Skip to content

Commit 63d38dd

Browse files
author
Hamish Downer
committed
Use "feature" or "category" rather than "feature/category" in error messages
This means finding the column header used and passing that around, so it is available for error messages around the code. It makes the list of arguments a little long sometimes, and means we're returning a tuple rather than a single thing here and there. Might refactor later, but will leave it for now. And then quite a lot of tests need updating
1 parent bc4d80e commit 63d38dd

File tree

10 files changed

+83
-48
lines changed

10 files changed

+83
-48
lines changed

src/sortition_algorithms/adapters.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,17 @@ def __init__(self, data_source: AbstractDataSource, gen_rem_tab: bool = True) ->
121121
self.data_source = data_source
122122
# short for "generate remaining tab"
123123
self.gen_rem_tab = gen_rem_tab # Added for checkbox in strat select app
124+
# record the column headers for feature/category and for value
125+
self.feature_column_name = "feature"
126+
self.feature_value_column_name = "value"
124127

125128
def load_features(self) -> tuple[FeatureCollection, RunReport]:
126129
report = RunReport()
127130
with self.data_source.read_feature_data(report) as headers_body:
128-
headers, body = headers_body
131+
headers_iter, body = headers_body
132+
headers = list(headers_iter)
129133
try:
130-
features = read_in_features(list(headers), body)
134+
features, self.feature_column_name, self.feature_value_column_name = read_in_features(headers, body)
131135
except ParseTableMultiError as error:
132136
new_error = self.data_source.customise_features_parse_error(error, headers)
133137
raise new_error from error
@@ -137,9 +141,16 @@ def load_features(self) -> tuple[FeatureCollection, RunReport]:
137141
def load_people(self, settings: Settings, features: FeatureCollection) -> tuple[People, RunReport]:
138142
report = RunReport()
139143
with self.data_source.read_people_data(report) as headers_body:
140-
headers, body = headers_body
144+
headers_iter, body = headers_body
145+
headers = list(headers_iter)
141146
try:
142-
people, report = read_in_people(list(headers), body, features, settings)
147+
people, report = read_in_people(
148+
people_head=headers,
149+
people_body=body,
150+
features=features,
151+
settings=settings,
152+
feature_column_name=self.feature_column_name,
153+
)
143154
except ParseTableMultiError as error:
144155
new_error = self.data_source.customise_people_parse_error(error, headers)
145156
raise new_error from error

src/sortition_algorithms/features.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,9 @@ def _clean_row(row: utils.StrippedDict, feature_flex: bool, row_number: int) ->
306306
return feature_name, feature_value, fv_minmax
307307

308308

309-
def read_in_features(features_head: Iterable[str], features_body: Iterable[dict[str, str]]) -> FeatureCollection:
309+
def read_in_features(
310+
features_head: Iterable[str], features_body: Iterable[dict[str, str]]
311+
) -> tuple[FeatureCollection, str, str]:
310312
"""
311313
Read in stratified selection features and values
312314
@@ -315,8 +317,13 @@ def read_in_features(features_head: Iterable[str], features_body: Iterable[dict[
315317
features: FeatureCollection = CaseInsensitiveDict()
316318
features_flex, filtered_headers = _feature_headers_flex(list(features_head))
317319
combined_error = ParseTableMultiError()
320+
feature_column_name = "feature"
321+
feature_value_column_name = "value"
318322
# row 1 is the header, so the body starts on row 2
319323
for row_number, row in enumerate(features_body, start=2):
324+
if row_number == 2:
325+
_, feature_column_name = _get_feature_from_row(row)
326+
_, feature_value_column_name = _get_feature_value_from_row(row)
320327
# check the set of keys in the row are the same as the headers
321328
assert set(filtered_headers) <= set(row.keys())
322329
stripped_row = utils.StrippedDict(row)
@@ -341,4 +348,4 @@ def read_in_features(features_head: Iterable[str], features_body: Iterable[dict[
341348
# check feature_flex to see if we need to set the max here
342349
# this only changes the max_flex value if these (optional) flex values are NOT set already
343350
set_default_max_flex(features)
344-
return CaseInsensitiveDict(features)
351+
return CaseInsensitiveDict(features), feature_column_name, feature_value_column_name

src/sortition_algorithms/people.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ def __iter__(self) -> Iterator[str]:
3434
def items(self) -> ItemsView[str, dict[str, str]]:
3535
return self._full_data.items()
3636

37-
def add(self, person_key: str, data: StrippedDict, features: FeatureCollection, row_number: int) -> None:
37+
def add(
38+
self,
39+
person_key: str,
40+
data: StrippedDict,
41+
features: FeatureCollection,
42+
row_number: int,
43+
feature_column_name: str = "feature",
44+
) -> None:
3845
person_full_data: dict[str, str] = {}
3946
errors = ParseErrorsCollector()
4047
# get the feature values: these are the most important and we must check them
@@ -46,13 +53,12 @@ def add(self, person_key: str, data: StrippedDict, features: FeatureCollection,
4653
if p_value in feature_values:
4754
person_full_data[feature_name] = p_value
4855
else:
49-
errors.add(
50-
msg=f"Value '{p_value}' not in category/feature {feature_name}",
51-
key=feature_name,
52-
value=p_value,
53-
row=row_number,
54-
row_name=person_key,
56+
msg = (
57+
f"Value '{p_value}' not in {feature_column_name} {feature_name}"
58+
if p_value
59+
else f"Empty value in {feature_column_name} {feature_name}"
5560
)
61+
errors.add(msg=msg, key=feature_name, value=p_value, row=row_number, row_name=person_key)
5662
if errors:
5763
raise errors.to_error()
5864
# then get the other column values we need
@@ -202,6 +208,7 @@ def read_in_people(
202208
people_body: Iterable[dict[str, str] | dict[str, str | int]],
203209
features: FeatureCollection,
204210
settings: Settings,
211+
feature_column_name: str = "feature",
205212
) -> tuple[People, RunReport]:
206213
report = RunReport()
207214
_check_people_head(people_head, features, settings)
@@ -218,7 +225,13 @@ def read_in_people(
218225
report.add_line(f"WARNING: blank cell found in ID column in row {row_number} - skipped that line!")
219226
continue
220227
try:
221-
people.add(pkey, stripped_row, features, row_number)
228+
people.add(
229+
person_key=pkey,
230+
data=stripped_row,
231+
features=features,
232+
row_number=row_number,
233+
feature_column_name=feature_column_name,
234+
)
222235
except ParseTableMultiError as error:
223236
# gather all the errors so we can tell the user as many problems as possible in one pass
224237
combined_error.combine(error)

tests/helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def create_simple_features(
120120
]
121121

122122
head = ["feature", "value", "min", "max"]
123-
features = read_in_features(head, features_data)
123+
features, _, _ = read_in_features(head, features_data)
124124
return features
125125

126126

@@ -150,7 +150,7 @@ def create_gender_only_features(min_val: int = 1, max_val: int = 5) -> FeatureCo
150150
]
151151

152152
head = ["feature", "value", "min", "max"]
153-
features = read_in_features(head, features_data)
153+
features, _, _ = read_in_features(head, features_data)
154154
return features
155155

156156

tests/test_adapters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,4 +454,4 @@ def test_csv_load_people_from_file_failure(tmp_path: Path):
454454
with pytest.raises(SelectionMultilineError) as excinfo:
455455
file_select_data.load_people(settings, features)
456456
assert "new_people.csv" in str(excinfo.value)
457-
assert "'PictsieLand' not in category/feature geo_bucket" in str(excinfo.value)
457+
assert "'PictsieLand' not in feature geo_bucket" in str(excinfo.value)

tests/test_committee_generation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def convert_people_data(
6060
if person_id in columns_data:
6161
person_data.update(columns_data[person_id])
6262

63-
people.add(person_id, StrippedDict(person_data), features, 0)
63+
people.add(person_key=person_id, data=StrippedDict(person_data), features=features, row_number=0)
6464

6565
return people
6666

tests/test_features.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,49 +23,53 @@ def test_read_in_features_without_flex():
2323
{"feature": "gender", "value": "female", "min": "4", "max": "6"},
2424
{"feature": "gender", "value": "non-binary-other", "min": "0", "max": "1"},
2525
]
26-
features = read_in_features(head, body)
26+
features, feature_column_name, feature_value_column_name = read_in_features(head, body)
2727
assert list(features.keys()) == ["gender"]
2828
assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
2929
assert minimum_selection(features) == 8
3030
assert maximum_selection(features) == 13
31+
assert feature_column_name == "feature"
32+
assert feature_value_column_name == "value"
3133

3234

3335
def test_read_in_features_with_flex():
3436
"""
3537
Test a basic import with a single feature/category
3638
"""
37-
head = FEATURE_FILE_FIELD_NAMES_FLEX
39+
head = FEATURE_FILE_FIELD_NAMES_FLEX_OLD
3840
body = [
3941
{
40-
"feature": "gender",
41-
"value": "male",
42+
"category": "gender",
43+
"name": "male",
4244
"min": "4",
4345
"max": "6",
4446
"min_flex": "4",
4547
"max_flex": "6",
4648
},
4749
{
48-
"feature": "gender",
49-
"value": "female",
50+
"category": "gender",
51+
"name": "female",
5052
"min": "4",
5153
"max": "6",
5254
"min_flex": "4",
5355
"max_flex": "6",
5456
},
5557
{
56-
"feature": "gender",
57-
"value": "non-binary-other",
58+
"category": "gender",
59+
"name": "non-binary-other",
5860
"min": "0",
5961
"max": "1",
6062
"min_flex": "0",
6163
"max_flex": "1",
6264
},
6365
]
64-
features = read_in_features(head, body)
66+
features, feature_column_name, feature_value_column_name = read_in_features(head, body)
6567
assert list(features.keys()) == ["gender"]
6668
assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
6769
assert minimum_selection(features) == 8
6870
assert maximum_selection(features) == 13
71+
assert feature_column_name == "category"
72+
assert feature_value_column_name == "name"
6973

7074

7175
def test_read_in_features_without_flex_old_names():
@@ -78,7 +82,7 @@ def test_read_in_features_without_flex_old_names():
7882
{"category": "gender", "name": "female", "min": "4", "max": "6"},
7983
{"category": "gender", "name": "non-binary-other", "min": "0", "max": "1"},
8084
]
81-
features = read_in_features(head, body)
85+
features, _, _ = read_in_features(head, body)
8286
assert list(features.keys()) == ["gender"]
8387
assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
8488
assert minimum_selection(features) == 8
@@ -116,7 +120,7 @@ def test_read_in_features_with_flex_old_names():
116120
"max_flex": "1",
117121
},
118122
]
119-
features = read_in_features(head, body)
123+
features, _, _ = read_in_features(head, body)
120124
assert list(features.keys()) == ["gender"]
121125
assert sorted(features["gender"].keys()) == ["female", "male", "non-binary-other"]
122126
assert minimum_selection(features) == 8
@@ -136,7 +140,7 @@ def test_multiple_features_without_flex(self):
136140
{"feature": "age", "value": "31-50", "min": "2", "max": "5"},
137141
{"feature": "age", "value": "51+", "min": "1", "max": "2"},
138142
]
139-
features = read_in_features(head, body)
143+
features, _, _ = read_in_features(head, body)
140144

141145
# Check we have both features
142146
assert sorted(features.keys()) == ["age", "gender"]
@@ -189,7 +193,7 @@ def test_multiple_features_with_flex(self):
189193
"max_flex": "5",
190194
},
191195
]
192-
features = read_in_features(head, body)
196+
features, _, _ = read_in_features(head, body)
193197

194198
assert sorted(features.keys()) == ["education", "gender"]
195199
assert minimum_selection(features) == 4 # max(4, 3) = 4
@@ -226,7 +230,7 @@ def test_extra_headers_ignored(self):
226230
"suggest max",
227231
] # extra "suggest min/max" headers
228232
body = [{"feature": "gender", "value": "male", "min": "1", "max": "2"}]
229-
features = read_in_features(head, body)
233+
features, _, _ = read_in_features(head, body)
230234

231235
assert list(features.keys()) == ["gender"]
232236
assert list(features["gender"].keys()) == ["male"]
@@ -243,7 +247,7 @@ def test_blank_feature_name_skipped(self):
243247
}, # blank feature, should be skipped
244248
{"feature": "gender", "value": "female", "min": "2", "max": "3"},
245249
]
246-
features = read_in_features(head, body)
250+
features, _, _ = read_in_features(head, body)
247251

248252
assert list(features.keys()) == ["gender"]
249253
assert list(features["gender"].keys()) == ["female"]
@@ -441,7 +445,7 @@ def test_string_values_stripped(self):
441445
{"feature": " gender ", "value": " male ", "min": "1", "max": "2"},
442446
{"feature": "gender", "value": "female", "min": "2", "max": "3"},
443447
]
444-
features = read_in_features(head, body)
448+
features, _, _ = read_in_features(head, body)
445449

446450
assert "gender" in features
447451
assert sorted(features["gender"].keys()) == ["female", "male"]
@@ -453,7 +457,7 @@ def test_numeric_feature_names_and_values(self):
453457
{"feature": 123, "value": 456, "min": "1", "max": "2"},
454458
{"feature": 123, "value": 789, "min": "2", "max": "3"},
455459
]
456-
features = read_in_features(head, body)
460+
features, _, _ = read_in_features(head, body)
457461

458462
assert "123" in features
459463
assert sorted(features["123"].keys()) == ["456", "789"]
@@ -469,7 +473,7 @@ def test_old_column_names_without_flex(self):
469473
{"category": "gender", "name": "male", "min": "1", "max": "2"},
470474
{"category": "gender", "name": "female", "min": "2", "max": "3"},
471475
]
472-
features = read_in_features(head, body)
476+
features, _, _ = read_in_features(head, body)
473477

474478
assert "gender" in features
475479
assert sorted(features["gender"].keys()) == ["female", "male"]
@@ -495,7 +499,7 @@ def test_old_column_names_with_flex(self):
495499
"max_flex": "4",
496500
},
497501
]
498-
features = read_in_features(head, body)
502+
features, _, _ = read_in_features(head, body)
499503

500504
assert "gender" in features
501505
assert sorted(features["gender"].keys()) == ["female", "male"]
@@ -547,7 +551,7 @@ def test_case_insensitive_features(self):
547551
{"feature": "gender", "value": "Male", "min": "2", "max": "4"},
548552
{"feature": "gender", "value": "Female", "min": "2", "max": "4"},
549553
]
550-
features = read_in_features(head, body)
554+
features, _, _ = read_in_features(head, body)
551555

552556
# Should be able to access with different case
553557
assert "male" in features["gender"]
@@ -570,7 +574,7 @@ def test_case_insensitive_feature_values(self):
570574
{"feature": "gender", "value": "Male", "min": "2", "max": "4"},
571575
{"feature": "gender", "value": "Female", "min": "2", "max": "4"},
572576
]
573-
features = read_in_features(head, body)
577+
features, _, _ = read_in_features(head, body)
574578

575579
# Should be able to access with different case
576580
assert "male" in features["gender"]
@@ -595,7 +599,7 @@ def test_case_insensitive_with_mixed_case_input(self):
595599
{"feature": "ethnicity", "value": "White British", "min": "1", "max": "2"},
596600
{"feature": "ETHNICITY", "value": "Asian", "min": "1", "max": "2"},
597601
]
598-
features = read_in_features(head, body)
602+
features, _, _ = read_in_features(head, body)
599603

600604
# Check all variations work
601605
assert "male" in features["gender"]

tests/test_find_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def test_max_zero_pruning(self):
164164
{"feature": "gender", "value": "female", "min": "0", "max": "0"}, # Don't want any females
165165
]
166166
head = ["feature", "value", "min", "max"]
167-
features = read_in_features(head, features_data)
167+
features, _, _ = read_in_features(head, features_data)
168168

169169
settings = create_test_settings(columns_to_keep=["name"])
170170
people = create_simple_people(features, settings, count=3)

tests/test_people.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def create_simple_test_features() -> FeatureCollection:
1919
{"feature": "gender", "value": "female", "min": "1", "max": "10"},
2020
]
2121
head = ["feature", "value", "min", "max"]
22-
features = read_in_features(head, features_data)
22+
features, _, _ = read_in_features(head, features_data)
2323
return features
2424

2525

@@ -32,7 +32,7 @@ def create_test_features() -> FeatureCollection:
3232
{"feature": "age", "value": "old", "min": "1", "max": "3"},
3333
]
3434
head = ["feature", "value", "min", "max"]
35-
features = read_in_features(head, features_data)
35+
features, _, _ = read_in_features(head, features_data)
3636
return features
3737

3838

@@ -89,7 +89,7 @@ def test_people_add_person_with_invalid_feature_value(self):
8989
"gender": "other", # Not in allowed values
9090
})
9191

92-
with pytest.raises(errors.ParseTableMultiError, match="Value 'other' not in category/feature gender"):
92+
with pytest.raises(errors.ParseTableMultiError, match="Value 'other' not in feature gender"):
9393
people.add("123", person_data, features, 1)
9494

9595
def test_people_remove_person(self):
@@ -283,7 +283,7 @@ def test_read_in_people_invalid_feature_value(self):
283283
# Change gender to invalid value
284284
people_body[0]["gender"] = "unknown"
285285

286-
with pytest.raises(errors.ParseTableMultiError, match="Value 'unknown' not in category/feature gender"):
286+
with pytest.raises(errors.ParseTableMultiError, match="Value 'unknown' not in feature gender"):
287287
read_in_people(people_head, people_body, features, settings)
288288

289289
def test_read_in_people_missing_id_column(self):

0 commit comments

Comments
 (0)