Skip to content

Commit f1555ba

Browse files
author
Hamish Downer
committed
When loading features, check against number to select
and raise an error if the maximum for any feature is less than the number to select or the minimum is more than the number to select. This should help make it clearer what max or min is out of what with what you're trying to do.
1 parent 4b239e2 commit f1555ba

File tree

6 files changed

+122
-20
lines changed

6 files changed

+122
-20
lines changed

docs/adapters.md

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,16 @@ def csv_selection_workflow():
7171
)
7272
select_data = SelectionData(data_source)
7373
settings = Settings()
74+
number_wanted=100
7475

7576
# Load data
76-
features, report = select_data.load_features()
77+
features, report = select_data.load_features(number_wanted)
7778
print(report.as_text())
7879
people, report = select_data.load_people(Settings(), features)
7980
print(report.as_text())
8081

8182
# Run selection
82-
success, panels, msgs = run_stratification(features, people, 100, settings)
83+
success, panels, msgs = run_stratification(features, people, number_wanted, settings)
8384

8485
if success:
8586
# Format results
@@ -129,6 +130,8 @@ print(report.as_text())
129130
people, report = select_data.load_people(settings, features)
130131
print(report.as_text())
131132

133+
# Here, do selection
134+
132135
# Configure output tabs
133136
data_source.selected_tab_name_stub = "Selected Panel"
134137
data_source.remaining_tab_name_stub = "Reserve Pool"
@@ -140,31 +143,36 @@ select_data.output_selected_remaining(selected_rows, remaining_rows, settings)
140143
#### Full Google Sheets Workflow
141144

142145
```python
143-
from sortition_algorithms import GSheetAdapter, run_stratification, selected_remaining_tables, Settings
146+
from sortition_algorithms import GSheetDataSource, SelectionData, run_stratification, selected_remaining_tables, Settings
144147
from pathlib import Path
145148

146149
def gsheet_selection_workflow():
147150
# Initialize
148-
adapter = GSheetAdapter(
149-
auth_json_path=Path("credentials.json"),
150-
gen_rem_tab=True,
151+
data_source = GSheetDataSource(
152+
feature_tab_name="Demographics",
153+
people_tab_name="Candidates",
154+
auth_json_path=Path("/secure/path/credentials.json"),
155+
gen_rem_tab=True, # Generate remaining tab
151156
)
157+
data_source.set_g_sheet_name("My Spreadsheet")
158+
select_data = SelectionData(data_source)
152159
settings = Settings()
160+
number_wanted = 120
153161

154162
# Load data
155163
adapter.set_g_sheet_name("Citizen Panel 2024")
156-
features, report = adapter.load_features("Demographics")
164+
features, report = adapter.load_features(number_wanted)
157165
if features is None:
158166
print("Failed to load features:", "\n".join(msgs))
159167
return
160168

161-
people, report = adapter.load_people("Candidates", settings, features)
169+
people, report = adapter.load_people(settings, features)
162170
if people is None:
163171
print("Failed to load people:", "\n".join(msgs))
164172
return
165173

166174
# Run selection
167-
success, panels, report = run_stratification(features, people, 120, settings)
175+
success, panels, report = run_stratification(features, people, number_wanted, settings)
168176

169177
if success:
170178
# Format results

docs/api-reference.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ class SelectionData:
431431
**Methods:**
432432

433433
```python
434-
def load_features(self) -> tuple[FeatureCollection, RunReport]:
434+
def load_features(self, number_to_select: int = 0) -> tuple[FeatureCollection, RunReport]:
435435
# Load feature definitions from data source
436436

437437
def load_people(
@@ -469,14 +469,15 @@ data_source = CSVFileDataSource(
469469

470470
# Wrap in SelectionData
471471
selection_data = SelectionData(data_source)
472+
number_to_select = 100
472473

473474
# Load data
474-
features, report = selection_data.load_features()
475+
features, report = selection_data.load_features(number_to_select)
475476
people, report = selection_data.load_people(settings, features)
476477

477478
# Run stratification (using core.py functions)
478479
from sortition_algorithms.core import run_stratification, selected_remaining_tables
479-
success, panels, report = run_stratification(features, people, 100, settings)
480+
success, panels, report = run_stratification(features, people, number_to_select, settings)
480481

481482
# Format and output results
482483
selected_rows, remaining_rows, _ = selected_remaining_tables(

src/sortition_algorithms/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def csv(
9191
select_data = adapters.SelectionData(data_source)
9292
settings_obj, report = Settings.load_from_file(Path(settings))
9393
echo_report(report)
94-
features, report = select_data.load_features()
94+
features, report = select_data.load_features(number_wanted)
9595
echo_report(report)
9696
if features is None:
9797
raise click.ClickException("Could not load features, exiting.")
@@ -183,7 +183,7 @@ def gsheet(
183183
echo_report(report)
184184

185185
data_source.set_g_sheet_name(gsheet_name)
186-
features, report = select_data.load_features()
186+
features, report = select_data.load_features(number_wanted)
187187
echo_report(report)
188188
if features is None:
189189
raise click.ClickException("Could not load features, exiting.")

src/sortition_algorithms/adapters.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,15 @@ def __init__(self, data_source: AbstractDataSource, gen_rem_tab: bool = True) ->
125125
self.feature_column_name = "feature"
126126
self.feature_value_column_name = "value"
127127

128-
def load_features(self) -> tuple[FeatureCollection, RunReport]:
128+
def load_features(self, number_to_select: int = 0) -> tuple[FeatureCollection, RunReport]:
129129
report = RunReport()
130130
with self.data_source.read_feature_data(report) as headers_body:
131131
headers_iter, body = headers_body
132132
headers = list(headers_iter)
133133
try:
134-
features, self.feature_column_name, self.feature_value_column_name = read_in_features(headers, body)
134+
features, self.feature_column_name, self.feature_value_column_name = read_in_features(
135+
headers, body, number_to_select=number_to_select
136+
)
135137
except ParseTableMultiError as error:
136138
new_error = self.data_source.customise_features_parse_error(error, headers)
137139
raise new_error from error

src/sortition_algorithms/features.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,43 @@ def report_min_max_error_details(fc: FeatureCollection, feature_column_name: str
122122
]
123123

124124

125-
def check_min_max(fc: FeatureCollection, feature_column_name: str = "feature") -> None:
125+
def report_min_max_against_number_to_select(
126+
fc: FeatureCollection, number_to_select: int, feature_column_name: str
127+
) -> list[str]:
128+
"""
129+
If any combined minimum is > number_to_select we have a problem.
130+
If any combined maximum is < number_to_select we have a problem.
131+
"""
132+
if not fc:
133+
return []
134+
errors: list[str] = []
135+
for key, fv in fc.items():
136+
feature_minimum = _fv_minimum_selection(fv)
137+
feature_maximum = _fv_maximum_selection(fv)
138+
if feature_minimum > number_to_select:
139+
errors.append(
140+
f"Minimum for {feature_column_name} {key} ({feature_minimum}) "
141+
f"is more than number to select ({number_to_select})"
142+
)
143+
if feature_maximum < number_to_select:
144+
errors.append(
145+
f"Maximum for {feature_column_name} {key} ({feature_maximum}) "
146+
f"is less than number to select ({number_to_select})"
147+
)
148+
return errors
149+
150+
151+
def check_min_max(fc: FeatureCollection, number_to_select: int = 0, feature_column_name: str = "feature") -> None:
126152
"""
127153
If the min is bigger than the max we're in trouble i.e. there's an input error
128154
"""
155+
errors: list[str] = []
129156
if minimum_selection(fc) > maximum_selection(fc):
130-
raise SelectionMultilineError(report_min_max_error_details(fc, feature_column_name))
157+
errors += report_min_max_error_details(fc, feature_column_name)
158+
if number_to_select:
159+
errors += report_min_max_against_number_to_select(fc, number_to_select, feature_column_name)
160+
if errors:
161+
raise SelectionMultilineError(errors)
131162

132163

133164
def check_desired(fc: FeatureCollection, desired_number: int) -> None:
@@ -307,7 +338,7 @@ def _clean_row(row: utils.StrippedDict, feature_flex: bool, row_number: int) ->
307338

308339

309340
def read_in_features(
310-
features_head: Iterable[str], features_body: Iterable[dict[str, str]]
341+
features_head: Iterable[str], features_body: Iterable[dict[str, str]], number_to_select: int = 0
311342
) -> tuple[FeatureCollection, str, str]:
312343
"""
313344
Read in stratified selection features and values
@@ -344,7 +375,7 @@ def read_in_features(
344375
if combined_error:
345376
raise combined_error
346377

347-
check_min_max(features, feature_column_name)
378+
check_min_max(features, number_to_select=number_to_select, feature_column_name=feature_column_name)
348379
# check feature_flex to see if we need to set the max here
349380
# this only changes the max_flex value if these (optional) flex values are NOT set already
350381
set_default_max_flex(features)

tests/test_features.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,66 @@ def test_inconsistent_min_max_across_features(self):
434434
assert "smallest maximum is 4 for feature 'age'" in context.exconly()
435435
assert "largest minimum is 10 for feature 'gender'" in context.exconly()
436436

437+
def test_min_larger_than_number_to_select(self):
438+
head = FEATURE_FILE_FIELD_NAMES
439+
body = [
440+
{
441+
"feature": "gender",
442+
"value": "male",
443+
"min": "5",
444+
"max": "6",
445+
}, # min total: 10
446+
{
447+
"feature": "gender",
448+
"value": "female",
449+
"min": "5",
450+
"max": "6",
451+
}, # max total: 12
452+
]
453+
with pytest.raises(SelectionMultilineError) as context:
454+
read_in_features(head, body, number_to_select=8)
455+
assert "Minimum for feature gender (10) is more than number to select (8)" in context.exconly()
456+
457+
def test_max_smaller_than_number_to_select(self):
458+
head = FEATURE_FILE_FIELD_NAMES
459+
body = [
460+
{
461+
"feature": "gender",
462+
"value": "male",
463+
"min": "5",
464+
"max": "6",
465+
}, # min total: 10
466+
{
467+
"feature": "gender",
468+
"value": "female",
469+
"min": "5",
470+
"max": "6",
471+
}, # max total: 12
472+
]
473+
with pytest.raises(SelectionMultilineError) as context:
474+
read_in_features(head, body, number_to_select=15)
475+
assert "Maximum for feature gender (12) is less than number to select (15)" in context.exconly()
476+
477+
def test_no_error_when_number_to_select_is_zero(self):
478+
head = FEATURE_FILE_FIELD_NAMES
479+
body = [
480+
{
481+
"feature": "gender",
482+
"value": "male",
483+
"min": "5",
484+
"max": "6",
485+
}, # min total: 10
486+
{
487+
"feature": "gender",
488+
"value": "female",
489+
"min": "5",
490+
"max": "6",
491+
}, # max total: 12
492+
]
493+
_, feature_column_name, _ = read_in_features(head, body, number_to_select=0)
494+
# really just check no error raised
495+
assert feature_column_name == "feature"
496+
437497

438498
class TestReadInFeaturesDataTypes:
439499
"""Test handling of different data types in input."""

0 commit comments

Comments
 (0)