Check if we have enough people in every feature value

Hamish Downer · Hamish Downer · commit 88799d01f499 · 2025-12-03T15:25:31.000Z
before trying to do the selection.

Say we have a minimum of 10 in gender/female, but in the respondents we
only have 8 females, then there is no point in trying to do the
selection.
diff --git a/src/sortition_algorithms/core.py b/src/sortition_algorithms/core.py
@@ -14,7 +14,7 @@
     standardize_distribution,
 )
 from sortition_algorithms.features import FeatureCollection, check_desired
-from sortition_algorithms.people import People
+from sortition_algorithms.people import People, check_enough_people_for_every_feature_value
 from sortition_algorithms.people_features import (
     iterate_select_collection,
     select_from_feature_collection,
@@ -525,23 +525,28 @@ def run_stratification(
         RuntimeError: If required solver is not available
         InfeasibleQuotasError: If quotas cannot be satisfied
     """
-    # Check if desired number is within feature constraints
-    check_desired(features, number_people_wanted)
+    success = False
+    report = RunReport()
+    people_selected: list[frozenset[str]] = []
+
+    try:
+        # Check if desired number is within feature constraints
+        check_desired(features, number_people_wanted)
+        check_enough_people_for_every_feature_value(features, people)
+    except errors.SelectionError as error:
+        report.add_error(error)
+        return False, people_selected, report
 
     # Set random seed if specified
     # If the seed is zero or None, we use the secrets module, as it is better
     # from a security point of view
     set_random_provider(settings.random_number_seed)
 
-    success = False
-    report = RunReport()
-
     if test_selection:
         report.add_line("WARNING: Panel is not selected at random! Only use for testing!", ReportLevel.CRITICAL)
 
     report.add_line("Initial: (selected = 0)", ReportLevel.IMPORTANT)
     report.add_report(_initial_category_info_table(features, people))
-    people_selected: list[frozenset[str]] = []
 
     tries = 0
     for tries in range(settings.max_attempts):
diff --git a/src/sortition_algorithms/features.py b/src/sortition_algorithms/features.py
@@ -165,14 +165,16 @@ def check_desired(fc: FeatureCollection, desired_number: int) -> None:
     """
     Check if the desired number of people is within the min/max of every feature.
     """
+    errors: list[str] = []
     for feature_name, fvalues in fc.items():
         if desired_number < _fv_minimum_selection(fvalues) or desired_number > _fv_maximum_selection(fvalues):
-            msg = (
+            errors.append(
                 f"The number of people to select ({desired_number}) is out of the range of "
                 f"the numbers of people in the {feature_name} feature. It should be within "
                 f"[{_fv_minimum_selection(fvalues)}, {_fv_maximum_selection(fvalues)}]."
             )
-            raise Exception(msg)
+    if errors:
+        raise SelectionMultilineError(errors)
 
 
 def _safe_max_flex_val(fc: FeatureCollection) -> int:
diff --git a/src/sortition_algorithms/people.py b/src/sortition_algorithms/people.py
@@ -1,5 +1,5 @@
 from collections import Counter, defaultdict
-from collections.abc import ItemsView, Iterable, Iterator, MutableMapping
+from collections.abc import Generator, ItemsView, Iterable, Iterator, MutableMapping
 from typing import Any
 
 from requests.structures import CaseInsensitiveDict
@@ -11,7 +11,7 @@
     SelectionError,
     SelectionMultilineError,
 )
-from sortition_algorithms.features import FeatureCollection
+from sortition_algorithms.features import FeatureCollection, iterate_feature_collection
 from sortition_algorithms.settings import Settings
 from sortition_algorithms.utils import RunReport, normalise_dict
 
@@ -107,6 +107,14 @@ def matching_address(self, person_key: str, address_columns: list[str]) -> Itera
             if person_address == tuple(loop_person[col].lower() for col in address_columns):
                 yield loop_key
 
+    def _iter_matching(self, feature_name: str, feature_value: str) -> Generator[str]:
+        for person_key, person_dict in self._full_data.items():
+            if person_dict[feature_name].lower() == feature_value.lower():
+                yield person_key
+
+    def count_feature_value(self, feature_name: str, feature_value: str) -> int:
+        return len(list(self._iter_matching(feature_name, feature_value)))
+
     def find_person_by_position_in_category(self, feature_name: str, feature_value: str, position: int) -> str:
         """
         Find the nth person (1-indexed) in a specific feature category.
@@ -122,18 +130,14 @@ def find_person_by_position_in_category(self, feature_name: str, feature_value:
         Raises:
             SelectionError: If no person is found at the specified position
         """
-        current_position = 0
-
-        for person_key, person_dict in self._full_data.items():
-            if person_dict[feature_name].lower() == feature_value.lower():
-                current_position += 1
-                if current_position == position:
-                    return person_key
-
-        # Should always find someone if position is valid
-        # If we hit this line it is a bug
-        msg = f"Failed to find person at position {position} in {feature_name}/{feature_value}"
-        raise SelectionError(msg)
+        people_in_category = list(self._iter_matching(feature_name, feature_value))
+        try:
+            return people_in_category[position - 1]
+        except IndexError:
+            # Should always find someone if position is valid
+            # If we hit this line it is a bug
+            msg = f"Failed to find person at position {position} in {feature_name}/{feature_value}"
+            raise SelectionError(msg) from None
 
 
 # simple helper function to tidy the code below
@@ -208,6 +212,20 @@ def check_for_duplicate_people(people_body: Iterable[MutableMapping[str, str]],
     raise SelectionMultilineError(output)
 
 
+def check_enough_people_for_every_feature_value(features: FeatureCollection, people: People) -> None:
+    """For each feature/value, if the min>0, check there are enough people who have that feature/value"""
+    error_list: list[str] = []
+    for fname, fvalue, fv_minmax in iterate_feature_collection(features):
+        matching_count = people.count_feature_value(fname, fvalue)
+        if matching_count < fv_minmax.min:
+            error_list.append(
+                f"Not enough people with the value '{fvalue}' in category '{fname}' - "
+                f"the minimum is {fv_minmax.min} but we only have {matching_count}"
+            )
+    if error_list:
+        raise SelectionMultilineError(error_list)
+
+
 def read_in_people(
     people_head: list[str],
     people_body: Iterable[dict[str, str] | dict[str, str | int]],
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -40,13 +40,15 @@ def test_run_stratification_infeasible_quotas():
     people = create_simple_people(features, settings, count=2)
 
     # Should raise exception for invalid desired number (can't select 4 from 2 total)
-    with pytest.raises(Exception, match="out of the range"):
-        run_stratification(
-            features=features,
-            people=people,
-            number_people_wanted=4,  # Impossible: need 1 male + 1 female = 2 max
-            settings=settings,
-        )
+    success, _, report = run_stratification(
+        features=features,
+        people=people,
+        number_people_wanted=4,  # Impossible: need 1 male + 1 female = 2 max
+        settings=settings,
+    )
+
+    assert not success
+    assert "out of the range" in str(report.last_error())
 
 
 @pytest.mark.slow
diff --git a/tests/test_people.py b/tests/test_people.py
@@ -5,11 +5,13 @@
 from sortition_algorithms.people import (
     People,
     _check_columns_exist_or_multiple,
+    check_enough_people_for_every_feature_value,
     check_for_duplicate_people,
     read_in_people,
 )
 from sortition_algorithms.settings import Settings
 from sortition_algorithms.utils import normalise_dict
+from tests.helpers import create_test_scenario
 
 
 def create_simple_test_features() -> FeatureCollection:
@@ -1020,3 +1022,30 @@ def test_check_for_duplicate_people_with_dupes_with_mismatching_data(self):
         assert "bob42@example.com" in combined_messages
 
         assert "jane@example.com" not in combined_messages
+
+
+class TestCheckEnoughPeopleForEveryFeatureValue:
+    def test_check_enough_people_for_every_feature_value_with_enough(self):
+        features, people, _ = create_test_scenario(people_count=4)
+        # should not raise an error
+        check_enough_people_for_every_feature_value(features, people)
+
+    def test_check_enough_people_for_every_feature_value_with_zero_in_feature_value(self):
+        features, people, _ = create_test_scenario(people_count=4)
+        for _, person_dict in people.items():
+            person_dict["gender"] = "male"
+        # should raise an error now
+        with pytest.raises(errors.SelectionMultilineError, match="Not enough people") as excinfo:
+            check_enough_people_for_every_feature_value(features, people)
+        assert "value 'female' in category 'gender'" in str(excinfo.value)
+        assert "minimum is 1 but we only have 0" in str(excinfo.value)
+
+    def test_check_enough_people_for_every_feature_value_with_high_minimum(self):
+        features, people, _ = create_test_scenario(people_count=8)
+        features["gender"]["female"].min = 10
+        features["gender"]["female"].max = 10
+        # should raise an error now
+        with pytest.raises(errors.SelectionMultilineError, match="Not enough people") as excinfo:
+            check_enough_people_for_every_feature_value(features, people)
+        assert "value 'female' in category 'gender'" in str(excinfo.value)
+        assert "minimum is 10 but we only have 4" in str(excinfo.value)