Use "feature" or "category" rather than "feature/category" in error messages

Hamish Downer · Hamish Downer · commit 63d38dd2bb9c · 2025-11-18T21:43:48.000Z
This means finding the column header used and passing that around, so it
is available for error messages around the code.

It makes the list of arguments a little long sometimes, and means we're
returning a tuple rather than a single thing here and there. Might
refactor later, but will leave it for now.

And then quite a lot of tests need updating
diff --git a/src/sortition_algorithms/adapters.py b/src/sortition_algorithms/adapters.py
@@ -121,13 +121,17 @@ def __init__(self, data_source: AbstractDataSource, gen_rem_tab: bool = True) ->
         self.data_source = data_source
         # short for "generate remaining tab"
         self.gen_rem_tab = gen_rem_tab  # Added for checkbox in strat select app
+        # record the column headers for feature/category and for value
+        self.feature_column_name = "feature"
+        self.feature_value_column_name = "value"
 
     def load_features(self) -> tuple[FeatureCollection, RunReport]:
         report = RunReport()
         with self.data_source.read_feature_data(report) as headers_body:
-            headers, body = headers_body
+            headers_iter, body = headers_body
+            headers = list(headers_iter)
             try:
-                features = read_in_features(list(headers), body)
+                features, self.feature_column_name, self.feature_value_column_name = read_in_features(headers, body)
             except ParseTableMultiError as error:
                 new_error = self.data_source.customise_features_parse_error(error, headers)
                 raise new_error from error
@@ -137,9 +141,16 @@ def load_features(self) -> tuple[FeatureCollection, RunReport]:
     def load_people(self, settings: Settings, features: FeatureCollection) -> tuple[People, RunReport]:
         report = RunReport()
         with self.data_source.read_people_data(report) as headers_body:
-            headers, body = headers_body
+            headers_iter, body = headers_body
+            headers = list(headers_iter)
             try:
-                people, report = read_in_people(list(headers), body, features, settings)
+                people, report = read_in_people(
+                    people_head=headers,
+                    people_body=body,
+                    features=features,
+                    settings=settings,
+                    feature_column_name=self.feature_column_name,
+                )
             except ParseTableMultiError as error:
                 new_error = self.data_source.customise_people_parse_error(error, headers)
                 raise new_error from error
diff --git a/src/sortition_algorithms/features.py b/src/sortition_algorithms/features.py
@@ -306,7 +306,9 @@ def _clean_row(row: utils.StrippedDict, feature_flex: bool, row_number: int) ->
     return feature_name, feature_value, fv_minmax
 
 
-def read_in_features(features_head: Iterable[str], features_body: Iterable[dict[str, str]]) -> FeatureCollection:
+def read_in_features(
+    features_head: Iterable[str], features_body: Iterable[dict[str, str]]
+) -> tuple[FeatureCollection, str, str]:
     """
     Read in stratified selection features and values
 
@@ -315,8 +317,13 @@ def read_in_features(features_head: Iterable[str], features_body: Iterable[dict[
     features: FeatureCollection = CaseInsensitiveDict()
     features_flex, filtered_headers = _feature_headers_flex(list(features_head))
     combined_error = ParseTableMultiError()
+    feature_column_name = "feature"
+    feature_value_column_name = "value"
     # row 1 is the header, so the body starts on row 2
     for row_number, row in enumerate(features_body, start=2):
+        if row_number == 2:
+            _, feature_column_name = _get_feature_from_row(row)
+            _, feature_value_column_name = _get_feature_value_from_row(row)
         # check the set of keys in the row are the same as the headers
         assert set(filtered_headers) <= set(row.keys())
         stripped_row = utils.StrippedDict(row)
@@ -341,4 +348,4 @@ def read_in_features(features_head: Iterable[str], features_body: Iterable[dict[
     # check feature_flex to see if we need to set the max here
     # this only changes the max_flex value if these (optional) flex values are NOT set already
     set_default_max_flex(features)
-    return CaseInsensitiveDict(features)
+    return CaseInsensitiveDict(features), feature_column_name, feature_value_column_name
diff --git a/src/sortition_algorithms/people.py b/src/sortition_algorithms/people.py
@@ -34,7 +34,14 @@ def __iter__(self) -> Iterator[str]:
     def items(self) -> ItemsView[str, dict[str, str]]:
         return self._full_data.items()
 
-    def add(self, person_key: str, data: StrippedDict, features: FeatureCollection, row_number: int) -> None:
+    def add(
+        self,
+        person_key: str,
+        data: StrippedDict,
+        features: FeatureCollection,
+        row_number: int,
+        feature_column_name: str = "feature",
+    ) -> None:
         person_full_data: dict[str, str] = {}
         errors = ParseErrorsCollector()
         # get the feature values: these are the most important and we must check them
@@ -46,13 +53,12 @@ def add(self, person_key: str, data: StrippedDict, features: FeatureCollection,
             if p_value in feature_values:
                 person_full_data[feature_name] = p_value
             else:
-                errors.add(
-                    msg=f"Value '{p_value}' not in category/feature {feature_name}",
-                    key=feature_name,
-                    value=p_value,
-                    row=row_number,
-                    row_name=person_key,
+                msg = (
+                    f"Value '{p_value}' not in {feature_column_name} {feature_name}"
+                    if p_value
+                    else f"Empty value in {feature_column_name} {feature_name}"
                 )
+                errors.add(msg=msg, key=feature_name, value=p_value, row=row_number, row_name=person_key)
         if errors:
             raise errors.to_error()
         # then get the other column values we need
@@ -202,6 +208,7 @@ def read_in_people(
     people_body: Iterable[dict[str, str] | dict[str, str | int]],
     features: FeatureCollection,
     settings: Settings,
+    feature_column_name: str = "feature",
 ) -> tuple[People, RunReport]:
     report = RunReport()
     _check_people_head(people_head, features, settings)
@@ -218,7 +225,13 @@ def read_in_people(
             report.add_line(f"WARNING: blank cell found in ID column in row {row_number} - skipped that line!")
             continue
         try:
-            people.add(pkey, stripped_row, features, row_number)
+            people.add(
+                person_key=pkey,
+                data=stripped_row,
+                features=features,
+                row_number=row_number,
+                feature_column_name=feature_column_name,
+            )
         except ParseTableMultiError as error:
             # gather all the errors so we can tell the user as many problems as possible in one pass
             combined_error.combine(error)
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -120,7 +120,7 @@ def create_simple_features(
     ]
 
     head = ["feature", "value", "min", "max"]
-    features = read_in_features(head, features_data)
+    features, _, _ = read_in_features(head, features_data)
     return features
 
 
@@ -150,7 +150,7 @@ def create_gender_only_features(min_val: int = 1, max_val: int = 5) -> FeatureCo
     ]
 
     head = ["feature", "value", "min", "max"]
-    features = read_in_features(head, features_data)
+    features, _, _ = read_in_features(head, features_data)
     return features
 
 
diff --git a/tests/test_adapters.py b/tests/test_adapters.py
@@ -454,4 +454,4 @@ def test_csv_load_people_from_file_failure(tmp_path: Path):
     with pytest.raises(SelectionMultilineError) as excinfo:
         file_select_data.load_people(settings, features)
     assert "new_people.csv" in str(excinfo.value)
-    assert "'PictsieLand' not in category/feature geo_bucket" in str(excinfo.value)
+    assert "'PictsieLand' not in feature geo_bucket" in str(excinfo.value)
diff --git a/tests/test_committee_generation.py b/tests/test_committee_generation.py
@@ -60,7 +60,7 @@ def convert_people_data(
         if person_id in columns_data:
             person_data.update(columns_data[person_id])
 
-        people.add(person_id, StrippedDict(person_data), features, 0)
+        people.add(person_key=person_id, data=StrippedDict(person_data), features=features, row_number=0)
 
     return people
 
diff --git a/tests/test_features.py b/tests/test_features.py
@@ -23,49 +23,53 @@ def test_read_in_features_without_flex():
         {"feature": "gender", "value": "female", "min": "4", "max": "6"},
         {"feature": "gender", "value": "non-binary-other", "min": "0", "max": "1"},
     ]
-    features = read_in_features(head, body)
+    features, feature_column_name, feature_value_column_name = read_in_features(head, body)
     assert list(features.keys()) == ["gender"]
     assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
     assert minimum_selection(features) == 8
     assert maximum_selection(features) == 13
+    assert feature_column_name == "feature"
+    assert feature_value_column_name == "value"
 
 
 def test_read_in_features_with_flex():
     """
     Test a basic import with a single feature/category
     """
-    head = FEATURE_FILE_FIELD_NAMES_FLEX
+    head = FEATURE_FILE_FIELD_NAMES_FLEX_OLD
     body = [
         {
-            "feature": "gender",
-            "value": "male",
+            "category": "gender",
+            "name": "male",
             "min": "4",
             "max": "6",
             "min_flex": "4",
             "max_flex": "6",
         },
         {
-            "feature": "gender",
-            "value": "female",
+            "category": "gender",
+            "name": "female",
             "min": "4",
             "max": "6",
             "min_flex": "4",
             "max_flex": "6",
         },
         {
-            "feature": "gender",
-            "value": "non-binary-other",
+            "category": "gender",
+            "name": "non-binary-other",
             "min": "0",
             "max": "1",
             "min_flex": "0",
             "max_flex": "1",
         },
     ]
-    features = read_in_features(head, body)
+    features, feature_column_name, feature_value_column_name = read_in_features(head, body)
     assert list(features.keys()) == ["gender"]
     assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
     assert minimum_selection(features) == 8
     assert maximum_selection(features) == 13
+    assert feature_column_name == "category"
+    assert feature_value_column_name == "name"
 
 
 def test_read_in_features_without_flex_old_names():
@@ -78,7 +82,7 @@ def test_read_in_features_without_flex_old_names():
         {"category": "gender", "name": "female", "min": "4", "max": "6"},
         {"category": "gender", "name": "non-binary-other", "min": "0", "max": "1"},
     ]
-    features = read_in_features(head, body)
+    features, _, _ = read_in_features(head, body)
     assert list(features.keys()) == ["gender"]
     assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
     assert minimum_selection(features) == 8
@@ -116,7 +120,7 @@ def test_read_in_features_with_flex_old_names():
             "max_flex": "1",
         },
     ]
-    features = read_in_features(head, body)
+    features, _, _ = read_in_features(head, body)
     assert list(features.keys()) == ["gender"]
     assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
     assert minimum_selection(features) == 8
@@ -136,7 +140,7 @@ def test_multiple_features_without_flex(self):
             {"feature": "age", "value": "31-50", "min": "2", "max": "5"},
             {"feature": "age", "value": "51+", "min": "1", "max": "2"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         # Check we have both features
         assert sorted(features.keys()) == ["age", "gender"]
@@ -189,7 +193,7 @@ def test_multiple_features_with_flex(self):
                 "max_flex": "5",
             },
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert sorted(features.keys()) == ["education", "gender"]
         assert minimum_selection(features) == 4  # max(4, 3) = 4
@@ -226,7 +230,7 @@ def test_extra_headers_ignored(self):
             "suggest max",
         ]  # extra "suggest min/max" headers
         body = [{"feature": "gender", "value": "male", "min": "1", "max": "2"}]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert list(features.keys()) == ["gender"]
         assert list(features["gender"].keys()) == ["male"]
@@ -243,7 +247,7 @@ def test_blank_feature_name_skipped(self):
             },  # blank feature, should be skipped
             {"feature": "gender", "value": "female", "min": "2", "max": "3"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert list(features.keys()) == ["gender"]
         assert list(features["gender"].keys()) == ["female"]
@@ -441,7 +445,7 @@ def test_string_values_stripped(self):
             {"feature": "  gender  ", "value": "  male  ", "min": "1", "max": "2"},
             {"feature": "gender", "value": "female", "min": "2", "max": "3"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert "gender" in features
         assert sorted(features["gender"].keys()) == ["female", "male"]
@@ -453,7 +457,7 @@ def test_numeric_feature_names_and_values(self):
             {"feature": 123, "value": 456, "min": "1", "max": "2"},
             {"feature": 123, "value": 789, "min": "2", "max": "3"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert "123" in features
         assert sorted(features["123"].keys()) == ["456", "789"]
@@ -469,7 +473,7 @@ def test_old_column_names_without_flex(self):
             {"category": "gender", "name": "male", "min": "1", "max": "2"},
             {"category": "gender", "name": "female", "min": "2", "max": "3"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert "gender" in features
         assert sorted(features["gender"].keys()) == ["female", "male"]
@@ -495,7 +499,7 @@ def test_old_column_names_with_flex(self):
                 "max_flex": "4",
             },
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         assert "gender" in features
         assert sorted(features["gender"].keys()) == ["female", "male"]
@@ -547,7 +551,7 @@ def test_case_insensitive_features(self):
             {"feature": "gender", "value": "Male", "min": "2", "max": "4"},
             {"feature": "gender", "value": "Female", "min": "2", "max": "4"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         # Should be able to access with different case
         assert "male" in features["gender"]
@@ -570,7 +574,7 @@ def test_case_insensitive_feature_values(self):
             {"feature": "gender", "value": "Male", "min": "2", "max": "4"},
             {"feature": "gender", "value": "Female", "min": "2", "max": "4"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         # Should be able to access with different case
         assert "male" in features["gender"]
@@ -595,7 +599,7 @@ def test_case_insensitive_with_mixed_case_input(self):
             {"feature": "ethnicity", "value": "White British", "min": "1", "max": "2"},
             {"feature": "ETHNICITY", "value": "Asian", "min": "1", "max": "2"},
         ]
-        features = read_in_features(head, body)
+        features, _, _ = read_in_features(head, body)
 
         # Check all variations work
         assert "male" in features["gender"]
diff --git a/tests/test_find_sample.py b/tests/test_find_sample.py
@@ -164,7 +164,7 @@ def test_max_zero_pruning(self):
             {"feature": "gender", "value": "female", "min": "0", "max": "0"},  # Don't want any females
         ]
         head = ["feature", "value", "min", "max"]
-        features = read_in_features(head, features_data)
+        features, _, _ = read_in_features(head, features_data)
 
         settings = create_test_settings(columns_to_keep=["name"])
         people = create_simple_people(features, settings, count=3)
diff --git a/tests/test_people.py b/tests/test_people.py
@@ -19,7 +19,7 @@ def create_simple_test_features() -> FeatureCollection:
         {"feature": "gender", "value": "female", "min": "1", "max": "10"},
     ]
     head = ["feature", "value", "min", "max"]
-    features = read_in_features(head, features_data)
+    features, _, _ = read_in_features(head, features_data)
     return features
 
 
@@ -32,7 +32,7 @@ def create_test_features() -> FeatureCollection:
         {"feature": "age", "value": "old", "min": "1", "max": "3"},
     ]
     head = ["feature", "value", "min", "max"]
-    features = read_in_features(head, features_data)
+    features, _, _ = read_in_features(head, features_data)
     return features
 
 
@@ -89,7 +89,7 @@ def test_people_add_person_with_invalid_feature_value(self):
             "gender": "other",  # Not in allowed values
         })
 
-        with pytest.raises(errors.ParseTableMultiError, match="Value 'other' not in category/feature gender"):
+        with pytest.raises(errors.ParseTableMultiError, match="Value 'other' not in feature gender"):
             people.add("123", person_data, features, 1)
 
     def test_people_remove_person(self):
@@ -283,7 +283,7 @@ def test_read_in_people_invalid_feature_value(self):
         # Change gender to invalid value
         people_body[0]["gender"] = "unknown"
 
-        with pytest.raises(errors.ParseTableMultiError, match="Value 'unknown' not in category/feature gender"):
+        with pytest.raises(errors.ParseTableMultiError, match="Value 'unknown' not in feature gender"):
             read_in_people(people_head, people_body, features, settings)
 
     def test_read_in_people_missing_id_column(self):
diff --git a/tests/test_people_features.py b/tests/test_people_features.py

Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,7 @@ def create_simple_features(`
`120`	`120`	`]`
`121`	`121`
`122`	`122`	`head = ["feature", "value", "min", "max"]`
`123`		`- features = read_in_features(head, features_data)`
	`123`	`+ features, _, _ = read_in_features(head, features_data)`
`124`	`124`	`return features`
`125`	`125`
`126`	`126`
`@@ -150,7 +150,7 @@ def create_gender_only_features(min_val: int = 1, max_val: int = 5) -> FeatureCo`
`150`	`150`	`]`
`151`	`151`
`152`	`152`	`head = ["feature", "value", "min", "max"]`
`153`		`- features = read_in_features(head, features_data)`
	`153`	`+ features, _, _ = read_in_features(head, features_data)`
`154`	`154`	`return features`
`155`	`155`
`156`	`156`
Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,7 @@ def test_max_zero_pruning(self):`
`164`	`164`	`{"feature": "gender", "value": "female", "min": "0", "max": "0"}, # Don't want any females`
`165`	`165`	`]`
`166`	`166`	`head = ["feature", "value", "min", "max"]`
`167`		`- features = read_in_features(head, features_data)`
	`167`	`+ features, _, _ = read_in_features(head, features_data)`
`168`	`168`
`169`	`169`	`settings = create_test_settings(columns_to_keep=["name"])`
`170`	`170`	`people = create_simple_people(features, settings, count=3)`