Skip to content

Commit c27fbd0

Browse files
Fix EBDs with "*" in step_number (#108)
* 🥅 Catch errors in test explitly (instead of pokemon style) this doesn't fix any thing but ensures that we get rid of error once and for all * cathc unboundlocalerror * cathc unboundlocalerror * add testbase for e0097 * like this? * Update src/ebddocx2table/docxtableconverter.py * Update src/ebddocx2table/docxtableconverter.py * 🚧 implementet exception for "*"-step_numbers * fixed extraction test * fix typo in extraction test * reduced code * List-> list pep585 * increased readability --------- Co-authored-by: konstantin <konstantin.klein@hochfrequenz.de>
1 parent 9878a5e commit c27fbd0

3 files changed

Lines changed: 242 additions & 7 deletions

File tree

src/ebddocx2table/docxtableconverter.py

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from docx.table import Table, _Cell, _Row # type:ignore[import]
1212
from ebdtable2graph.models import EbdTable, EbdTableRow, EbdTableSubRow
1313
from ebdtable2graph.models.ebd_table import _STEP_NUMBER_REGEX, EbdCheckResult, EbdTableMetaData, MultiStepInstruction
14-
from more_itertools import first, first_true
14+
from more_itertools import first, first_true, last
1515

1616
_logger = logging.getLogger(__name__)
1717

@@ -219,15 +219,15 @@ def _handle_single_table(
219219
table: Table,
220220
multi_step_instructions: List[MultiStepInstruction],
221221
row_offset: int,
222-
rows: List[EbdTableRow],
223-
sub_rows: List[EbdTableSubRow],
222+
rows: list[EbdTableRow],
223+
sub_rows: list[EbdTableSubRow],
224224
) -> None:
225225
"""
226226
Handles a single table (out of possible multiple tables for 1 EBD).
227227
The results are written into rows, sub_rows and multi_step_instructions. Those will be modified.
228228
"""
229-
use_cases: List[str] = []
230-
for enhanced_table_row in self._enhance_list_view(table=table, row_offset=row_offset):
229+
use_cases: list[str] = []
230+
for row_index, enhanced_table_row in enumerate(self._enhance_list_view(table=table, row_offset=row_offset)):
231231
if enhanced_table_row.sub_row_position == _EbdSubRowPosition.UPPER:
232232
use_cases = _get_use_cases(enhanced_table_row.cells)
233233
sub_rows = [] # clear list every second entry
@@ -236,6 +236,9 @@ def _handle_single_table(
236236
boolean_outcome, subsequent_step_number = _read_subsequent_step_cell(
237237
enhanced_table_row.cells[len(use_cases) + self._column_index_check_result]
238238
)
239+
if step_number.endswith("*"):
240+
self._handle_single_table_star_exception(table, multi_step_instructions, row_offset, rows, row_index)
241+
break
239242
sub_row = EbdTableSubRow(
240243
check_result=EbdCheckResult(subsequent_step_number=subsequent_step_number, result=boolean_outcome),
241244
result_code=enhanced_table_row.cells[len(use_cases) + self._column_index_result_code].text.strip()
@@ -263,6 +266,76 @@ def _handle_single_table(
263266
)
264267
)
265268

269+
# see above boolean_outcome and subsequent_step_number could be ignored iff schemes of *-numbers are always the same
270+
# pylint:disable=too-many-locals
271+
def _handle_single_table_star_exception(
272+
self,
273+
table: Table,
274+
multi_step_instructions: list[MultiStepInstruction],
275+
row_offset: int,
276+
rows: list[EbdTableRow],
277+
row_index: int,
278+
) -> None:
279+
"""
280+
Completes table when handling of single table (out of possible multiple tables for 1 EBD) hit a step
281+
with several instructions. Those instructions will be split in individual steps.
282+
As above, the results are written into rows, sub_rows and multi_step_instructions. Those will be modified.
283+
"""
284+
use_cases: list[str] = []
285+
complete_table = self._enhance_list_view(table=table, row_offset=row_offset)
286+
enhanced_table_row = complete_table[row_index]
287+
use_cases = _get_use_cases(enhanced_table_row.cells)
288+
star_case_result_code = (
289+
enhanced_table_row.cells[len(use_cases) + self._column_index_result_code].text.strip() or None
290+
)
291+
star_case_note = enhanced_table_row.cells[len(use_cases) + self._column_index_note].text.strip() or None
292+
while row_index < len(complete_table):
293+
enhanced_table_row = complete_table[row_index]
294+
step_number = str(int(last(rows).step_number) + 1)
295+
description = enhanced_table_row.cells[len(use_cases) + self._column_index_description].text.strip()
296+
boolean_outcome, subsequent_step_number = _read_subsequent_step_cell(
297+
enhanced_table_row.cells[len(use_cases) + self._column_index_check_result]
298+
)
299+
300+
this_is_the_last_row = row_index == len(complete_table) - 1
301+
302+
if this_is_the_last_row:
303+
next_step = "Ende"
304+
else:
305+
next_step = str(int(step_number) + 1)
306+
307+
row = EbdTableRow(
308+
description=description,
309+
step_number=step_number,
310+
sub_rows=[
311+
EbdTableSubRow(
312+
check_result=EbdCheckResult(
313+
subsequent_step_number=subsequent_step_number, result=boolean_outcome
314+
),
315+
result_code=star_case_result_code,
316+
note=star_case_note,
317+
),
318+
# point to next step
319+
EbdTableSubRow(
320+
check_result=EbdCheckResult(subsequent_step_number=next_step, result=True),
321+
result_code=None,
322+
note=None,
323+
),
324+
],
325+
use_cases=use_cases or None,
326+
)
327+
rows.append(row)
328+
_logger.debug("Successfully added artificial row #%s ('%s')", step_number, description)
329+
330+
if enhanced_table_row.multi_step_instruction_text:
331+
multi_step_instructions.append(
332+
MultiStepInstruction(
333+
first_step_number_affected=step_number,
334+
instruction_text=enhanced_table_row.multi_step_instruction_text,
335+
)
336+
)
337+
row_index += 1
338+
266339
def convert_docx_tables_to_ebd_table(self) -> EbdTable:
267340
"""
268341
Converts the raw docx tables of an EBD to an EbdTable.

unittests/examples.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,158 @@
5656
],
5757
)
5858

59+
# E_0097 is an example of a table that has rows with "*"
60+
table_e0097 = EbdTable(
61+
metadata=EbdTableMetaData(
62+
ebd_code="E_0097",
63+
chapter="7.56 AD: Austausch der Lieferantenausfallarbeitsclearingliste (Einzelanforderung)",
64+
sub_chapter="7.56.1 E_0097_Marktlokationen mit LF-AACL abgleichen",
65+
role="LF",
66+
),
67+
rows=[
68+
EbdTableRow(
69+
step_number="1",
70+
description="Entspricht die Gültigkeit (Monat) dem angefragten Zeit-raum?",
71+
sub_rows=[
72+
EbdTableSubRow(
73+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
74+
result_code="A01",
75+
note="Cluster: Ablehnung der gesamten Liste\nZeitraum nicht plausibel",
76+
),
77+
EbdTableSubRow(
78+
check_result=EbdCheckResult(result=True, subsequent_step_number="2"), result_code=None, note=None
79+
),
80+
],
81+
use_cases=None,
82+
),
83+
EbdTableRow(
84+
step_number="2",
85+
description="Entspricht der MaBiS-ZP dem angefragten MaBiS-ZP?",
86+
sub_rows=[
87+
EbdTableSubRow(
88+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
89+
result_code="A02",
90+
note="Cluster: Ablehnung der gesamten Liste \nMaBiS-ZP entspricht nicht dem angefragten MaBiS-ZP",
91+
),
92+
EbdTableSubRow(
93+
check_result=EbdCheckResult(result=True, subsequent_step_number="3"), result_code=None, note=None
94+
),
95+
],
96+
use_cases=None,
97+
),
98+
EbdTableRow(
99+
step_number="3",
100+
description="Entspricht die Versionsangabe in der LF-AACL der Versionsangabe der LF-AASZR, zu der eine LF-AACL angefordert wurde?",
101+
sub_rows=[
102+
EbdTableSubRow(
103+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
104+
result_code="A03",
105+
note="Cluster: Ablehnung der gesamten Liste \nVersion nicht zugelassen",
106+
),
107+
EbdTableSubRow(
108+
check_result=EbdCheckResult(result=True, subsequent_step_number="4"), result_code=None, note=None
109+
),
110+
],
111+
use_cases=None,
112+
),
113+
EbdTableRow(
114+
step_number="4",
115+
description="Ist eine erwartete Marktlokation in der LF-AACL nicht enthalten?",
116+
sub_rows=[
117+
EbdTableSubRow(
118+
check_result=EbdCheckResult(result=True, subsequent_step_number=None),
119+
result_code="A04",
120+
note="Cluster: Korrekturliste wegen Ablehnung\nZusätzlicher Datensatz / ergänzte Marktlokation",
121+
),
122+
EbdTableSubRow(
123+
check_result=EbdCheckResult(result=False, subsequent_step_number="5"), result_code=None, note=None
124+
),
125+
],
126+
use_cases=None,
127+
),
128+
EbdTableRow(
129+
step_number="5",
130+
description="Ist in der LF-AACL eine Marktlokation enthalten, die im Bilanzierungsmonat dem LF zur Bilanzierung nicht zugeordnet ist?",
131+
sub_rows=[
132+
EbdTableSubRow(
133+
check_result=EbdCheckResult(result=True, subsequent_step_number=None),
134+
result_code="A05",
135+
note="Cluster: Korrekturliste wegen Ablehnung\nMarktlokation falschem LF zugeordnet",
136+
),
137+
EbdTableSubRow(
138+
check_result=EbdCheckResult(result=False, subsequent_step_number="6"), result_code=None, note=None
139+
),
140+
],
141+
use_cases=None,
142+
),
143+
EbdTableRow(
144+
step_number="6",
145+
description="Ist die in der LF-AACL enthaltene Marktlokation dem MaBiS-ZP zugeordnet?",
146+
sub_rows=[
147+
EbdTableSubRow(
148+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
149+
result_code="A06",
150+
note="Cluster: Korrekturliste wegen Ablehnung\nZu viele Marktlokationen enthalten / entfallene Marktlokation",
151+
),
152+
EbdTableSubRow(
153+
check_result=EbdCheckResult(result=True, subsequent_step_number="7"), result_code=None, note=None
154+
),
155+
],
156+
use_cases=None,
157+
),
158+
EbdTableRow(
159+
step_number="7",
160+
description="Entspricht das Bilanzierungsgebiet dem zwischen NB und LF ausgetauschten Bilanzierungsgebiet?",
161+
sub_rows=[
162+
EbdTableSubRow(
163+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
164+
result_code="A07",
165+
note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen",
166+
),
167+
EbdTableSubRow(
168+
check_result=EbdCheckResult(result=True, subsequent_step_number="8"), result_code=None, note=None
169+
),
170+
],
171+
use_cases=None,
172+
),
173+
EbdTableRow(
174+
step_number="8", # artificially incremented step number (was '7*')
175+
description="Entspricht der Bilanzkreis dem zwischen NB und LF ausgetauschten Bilanzkreis?", # todo: check diff for "ausge-tauschten" in original .docx
176+
sub_rows=[
177+
EbdTableSubRow(
178+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
179+
result_code="A07",
180+
note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen",
181+
),
182+
EbdTableSubRow(
183+
check_result=EbdCheckResult(result=True, subsequent_step_number="9"), result_code=None, note=None
184+
),
185+
],
186+
use_cases=None,
187+
),
188+
EbdTableRow(
189+
step_number="9", # artificially incremented step number (was '7*')
190+
description="Entspricht die tatsächliche Ausfallarbeitsmenge der er-warteten Ausfallarbeitsmenge?",
191+
sub_rows=[
192+
EbdTableSubRow(
193+
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
194+
result_code="A07",
195+
note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen",
196+
),
197+
EbdTableSubRow(
198+
check_result=EbdCheckResult(result=True, subsequent_step_number="Ende"), result_code=None, note=None
199+
),
200+
],
201+
use_cases=None,
202+
),
203+
],
204+
multi_step_instructions=[
205+
MultiStepInstruction(
206+
first_step_number_affected="4", instruction_text="Je Marktlokation erfolgen die nachfolgenden Prüfungen:"
207+
)
208+
],
209+
)
210+
59211
# E_0901 spans over multiple pages, let the fun begin
60212
table_e0901 = EbdTable(
61213
metadata=EbdTableMetaData(

unittests/test_highlevel.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ebddocx2table.docxtableconverter import DocxTableConverter
99

1010
from . import get_all_ebd_keys, get_document, get_ebd_docx_tables
11-
from .examples import table_e0003, table_e0453, table_e0462, table_e0901
11+
from .examples import table_e0003, table_e0097, table_e0453, table_e0462, table_e0901
1212

1313

1414
@pytest.fixture
@@ -194,6 +194,14 @@ def test_wrong_encoding_of_rightarrow(self, datafiles, filename: str, ebd_key: s
194194
table_e0462,
195195
id="E_0462 with gray outer lefts",
196196
),
197+
pytest.param(
198+
"ebd20221128.docx",
199+
"E_0097",
200+
"7.56 AD: Austausch der Lieferantenausfallarbeitsclearingliste (Einzelanforderung)",
201+
"7.56.1 E_0097_Marktlokationen mit LF-AACL abgleichen",
202+
table_e0097,
203+
id="E_0097 contains step numbers with *",
204+
),
197205
],
198206
)
199207
def test_convert_docx_table_to_ebd_table(
@@ -278,12 +286,14 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str
278286
issue_number = "74"
279287
case _:
280288
raise
289+
error_msg = f"Error while scraping '{ebd_key}' (#{issue_number}): {value_error}"
290+
pytest.skip(error_msg)
281291
except UnboundLocalError as unbound_error:
282292
match unbound_error.args[0]:
283293
case "cannot access local variable 'role' where it is not associated with a value":
284294
# https://github.com/Hochfrequenz/ebd_docx_to_table/issues/22
285295
issue_number = "22"
286296
case _:
287297
raise
288-
error_msg = f"Error while scraping '{ebd_key}' (#{issue_number})"
298+
error_msg = f"Error while scraping '{ebd_key}' (#{issue_number}): {unbound_error}"
289299
pytest.skip(error_msg)

0 commit comments

Comments
 (0)