Skip to content

Commit 88799d0

Browse files
author
Hamish Downer
committed
Check if we have enough people in every feature value
before trying to do the selection. Say we have a minimum of 10 in gender/female, but in the respondents we only have 8 females, then there is no point in trying to do the selection.
1 parent 59acec8 commit 88799d0

File tree

5 files changed

+86
-30
lines changed

5 files changed

+86
-30
lines changed

src/sortition_algorithms/core.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
standardize_distribution,
1515
)
1616
from sortition_algorithms.features import FeatureCollection, check_desired
17-
from sortition_algorithms.people import People
17+
from sortition_algorithms.people import People, check_enough_people_for_every_feature_value
1818
from sortition_algorithms.people_features import (
1919
iterate_select_collection,
2020
select_from_feature_collection,
@@ -525,23 +525,28 @@ def run_stratification(
525525
RuntimeError: If required solver is not available
526526
InfeasibleQuotasError: If quotas cannot be satisfied
527527
"""
528-
# Check if desired number is within feature constraints
529-
check_desired(features, number_people_wanted)
528+
success = False
529+
report = RunReport()
530+
people_selected: list[frozenset[str]] = []
531+
532+
try:
533+
# Check if desired number is within feature constraints
534+
check_desired(features, number_people_wanted)
535+
check_enough_people_for_every_feature_value(features, people)
536+
except errors.SelectionError as error:
537+
report.add_error(error)
538+
return False, people_selected, report
530539

531540
# Set random seed if specified
532541
# If the seed is zero or None, we use the secrets module, as it is better
533542
# from a security point of view
534543
set_random_provider(settings.random_number_seed)
535544

536-
success = False
537-
report = RunReport()
538-
539545
if test_selection:
540546
report.add_line("WARNING: Panel is not selected at random! Only use for testing!", ReportLevel.CRITICAL)
541547

542548
report.add_line("Initial: (selected = 0)", ReportLevel.IMPORTANT)
543549
report.add_report(_initial_category_info_table(features, people))
544-
people_selected: list[frozenset[str]] = []
545550

546551
tries = 0
547552
for tries in range(settings.max_attempts):

src/sortition_algorithms/features.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,16 @@ def check_desired(fc: FeatureCollection, desired_number: int) -> None:
165165
"""
166166
Check if the desired number of people is within the min/max of every feature.
167167
"""
168+
errors: list[str] = []
168169
for feature_name, fvalues in fc.items():
169170
if desired_number < _fv_minimum_selection(fvalues) or desired_number > _fv_maximum_selection(fvalues):
170-
msg = (
171+
errors.append(
171172
f"The number of people to select ({desired_number}) is out of the range of "
172173
f"the numbers of people in the {feature_name} feature. It should be within "
173174
f"[{_fv_minimum_selection(fvalues)}, {_fv_maximum_selection(fvalues)}]."
174175
)
175-
raise Exception(msg)
176+
if errors:
177+
raise SelectionMultilineError(errors)
176178

177179

178180
def _safe_max_flex_val(fc: FeatureCollection) -> int:

src/sortition_algorithms/people.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from collections import Counter, defaultdict
2-
from collections.abc import ItemsView, Iterable, Iterator, MutableMapping
2+
from collections.abc import Generator, ItemsView, Iterable, Iterator, MutableMapping
33
from typing import Any
44

55
from requests.structures import CaseInsensitiveDict
@@ -11,7 +11,7 @@
1111
SelectionError,
1212
SelectionMultilineError,
1313
)
14-
from sortition_algorithms.features import FeatureCollection
14+
from sortition_algorithms.features import FeatureCollection, iterate_feature_collection
1515
from sortition_algorithms.settings import Settings
1616
from sortition_algorithms.utils import RunReport, normalise_dict
1717

@@ -107,6 +107,14 @@ def matching_address(self, person_key: str, address_columns: list[str]) -> Itera
107107
if person_address == tuple(loop_person[col].lower() for col in address_columns):
108108
yield loop_key
109109

110+
def _iter_matching(self, feature_name: str, feature_value: str) -> Generator[str]:
111+
for person_key, person_dict in self._full_data.items():
112+
if person_dict[feature_name].lower() == feature_value.lower():
113+
yield person_key
114+
115+
def count_feature_value(self, feature_name: str, feature_value: str) -> int:
116+
return len(list(self._iter_matching(feature_name, feature_value)))
117+
110118
def find_person_by_position_in_category(self, feature_name: str, feature_value: str, position: int) -> str:
111119
"""
112120
Find the nth person (1-indexed) in a specific feature category.
@@ -122,18 +130,14 @@ def find_person_by_position_in_category(self, feature_name: str, feature_value:
122130
Raises:
123131
SelectionError: If no person is found at the specified position
124132
"""
125-
current_position = 0
126-
127-
for person_key, person_dict in self._full_data.items():
128-
if person_dict[feature_name].lower() == feature_value.lower():
129-
current_position += 1
130-
if current_position == position:
131-
return person_key
132-
133-
# Should always find someone if position is valid
134-
# If we hit this line it is a bug
135-
msg = f"Failed to find person at position {position} in {feature_name}/{feature_value}"
136-
raise SelectionError(msg)
133+
people_in_category = list(self._iter_matching(feature_name, feature_value))
134+
try:
135+
return people_in_category[position - 1]
136+
except IndexError:
137+
# Should always find someone if position is valid
138+
# If we hit this line it is a bug
139+
msg = f"Failed to find person at position {position} in {feature_name}/{feature_value}"
140+
raise SelectionError(msg) from None
137141

138142

139143
# simple helper function to tidy the code below
@@ -208,6 +212,20 @@ def check_for_duplicate_people(people_body: Iterable[MutableMapping[str, str]],
208212
raise SelectionMultilineError(output)
209213

210214

215+
def check_enough_people_for_every_feature_value(features: FeatureCollection, people: People) -> None:
216+
"""For each feature/value, if the min>0, check there are enough people who have that feature/value"""
217+
error_list: list[str] = []
218+
for fname, fvalue, fv_minmax in iterate_feature_collection(features):
219+
matching_count = people.count_feature_value(fname, fvalue)
220+
if matching_count < fv_minmax.min:
221+
error_list.append(
222+
f"Not enough people with the value '{fvalue}' in category '{fname}' - "
223+
f"the minimum is {fv_minmax.min} but we only have {matching_count}"
224+
)
225+
if error_list:
226+
raise SelectionMultilineError(error_list)
227+
228+
211229
def read_in_people(
212230
people_head: list[str],
213231
people_body: Iterable[dict[str, str] | dict[str, str | int]],

tests/test_core.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@ def test_run_stratification_infeasible_quotas():
4040
people = create_simple_people(features, settings, count=2)
4141

4242
# Should raise exception for invalid desired number (can't select 4 from 2 total)
43-
with pytest.raises(Exception, match="out of the range"):
44-
run_stratification(
45-
features=features,
46-
people=people,
47-
number_people_wanted=4, # Impossible: need 1 male + 1 female = 2 max
48-
settings=settings,
49-
)
43+
success, _, report = run_stratification(
44+
features=features,
45+
people=people,
46+
number_people_wanted=4, # Impossible: need 1 male + 1 female = 2 max
47+
settings=settings,
48+
)
49+
50+
assert not success
51+
assert "out of the range" in str(report.last_error())
5052

5153

5254
@pytest.mark.slow

tests/test_people.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
from sortition_algorithms.people import (
66
People,
77
_check_columns_exist_or_multiple,
8+
check_enough_people_for_every_feature_value,
89
check_for_duplicate_people,
910
read_in_people,
1011
)
1112
from sortition_algorithms.settings import Settings
1213
from sortition_algorithms.utils import normalise_dict
14+
from tests.helpers import create_test_scenario
1315

1416

1517
def create_simple_test_features() -> FeatureCollection:
@@ -1020,3 +1022,30 @@ def test_check_for_duplicate_people_with_dupes_with_mismatching_data(self):
10201022
assert "[email protected]" in combined_messages
10211023

10221024
assert "[email protected]" not in combined_messages
1025+
1026+
1027+
class TestCheckEnoughPeopleForEveryFeatureValue:
1028+
def test_check_enough_people_for_every_feature_value_with_enough(self):
1029+
features, people, _ = create_test_scenario(people_count=4)
1030+
# should not raise an error
1031+
check_enough_people_for_every_feature_value(features, people)
1032+
1033+
def test_check_enough_people_for_every_feature_value_with_zero_in_feature_value(self):
1034+
features, people, _ = create_test_scenario(people_count=4)
1035+
for _, person_dict in people.items():
1036+
person_dict["gender"] = "male"
1037+
# should raise an error now
1038+
with pytest.raises(errors.SelectionMultilineError, match="Not enough people") as excinfo:
1039+
check_enough_people_for_every_feature_value(features, people)
1040+
assert "value 'female' in category 'gender'" in str(excinfo.value)
1041+
assert "minimum is 1 but we only have 0" in str(excinfo.value)
1042+
1043+
def test_check_enough_people_for_every_feature_value_with_high_minimum(self):
1044+
features, people, _ = create_test_scenario(people_count=8)
1045+
features["gender"]["female"].min = 10
1046+
features["gender"]["female"].max = 10
1047+
# should raise an error now
1048+
with pytest.raises(errors.SelectionMultilineError, match="Not enough people") as excinfo:
1049+
check_enough_people_for_every_feature_value(features, people)
1050+
assert "value 'female' in category 'gender'" in str(excinfo.value)
1051+
assert "minimum is 10 but we only have 4" in str(excinfo.value)

0 commit comments

Comments
 (0)