diff --git a/src/ebddocx2table/docxtableconverter.py b/src/ebddocx2table/docxtableconverter.py index 93849c0..6b4592d 100644 --- a/src/ebddocx2table/docxtableconverter.py +++ b/src/ebddocx2table/docxtableconverter.py @@ -11,7 +11,7 @@ from docx.table import Table, _Cell, _Row # type:ignore[import] from ebdtable2graph.models import EbdTable, EbdTableRow, EbdTableSubRow from ebdtable2graph.models.ebd_table import _STEP_NUMBER_REGEX, EbdCheckResult, EbdTableMetaData, MultiStepInstruction -from more_itertools import first, first_true +from more_itertools import first, first_true, last _logger = logging.getLogger(__name__) @@ -219,15 +219,15 @@ def _handle_single_table( table: Table, multi_step_instructions: List[MultiStepInstruction], row_offset: int, - rows: List[EbdTableRow], - sub_rows: List[EbdTableSubRow], + rows: list[EbdTableRow], + sub_rows: list[EbdTableSubRow], ) -> None: """ Handles a single table (out of possible multiple tables for 1 EBD). The results are written into rows, sub_rows and multi_step_instructions. Those will be modified. """ - use_cases: List[str] = [] - for enhanced_table_row in self._enhance_list_view(table=table, row_offset=row_offset): + use_cases: list[str] = [] + for row_index, enhanced_table_row in enumerate(self._enhance_list_view(table=table, row_offset=row_offset)): if enhanced_table_row.sub_row_position == _EbdSubRowPosition.UPPER: use_cases = _get_use_cases(enhanced_table_row.cells) sub_rows = [] # clear list every second entry @@ -236,6 +236,9 @@ def _handle_single_table( boolean_outcome, subsequent_step_number = _read_subsequent_step_cell( enhanced_table_row.cells[len(use_cases) + self._column_index_check_result] ) + if step_number.endswith("*"): + self._handle_single_table_star_exception(table, multi_step_instructions, row_offset, rows, row_index) + break sub_row = EbdTableSubRow( check_result=EbdCheckResult(subsequent_step_number=subsequent_step_number, result=boolean_outcome), result_code=enhanced_table_row.cells[len(use_cases) + self._column_index_result_code].text.strip() @@ -263,6 +266,76 @@ def _handle_single_table( ) ) + # see above boolean_outcome and subsequent_step_number could be ignored iff schemes of *-numbers are always the same + # pylint:disable=too-many-locals + def _handle_single_table_star_exception( + self, + table: Table, + multi_step_instructions: list[MultiStepInstruction], + row_offset: int, + rows: list[EbdTableRow], + row_index: int, + ) -> None: + """ + Completes table when handling of single table (out of possible multiple tables for 1 EBD) hit a step + with several instructions. Those instructions will be split in individual steps. + As above, the results are written into rows, sub_rows and multi_step_instructions. Those will be modified. + """ + use_cases: list[str] = [] + complete_table = self._enhance_list_view(table=table, row_offset=row_offset) + enhanced_table_row = complete_table[row_index] + use_cases = _get_use_cases(enhanced_table_row.cells) + star_case_result_code = ( + enhanced_table_row.cells[len(use_cases) + self._column_index_result_code].text.strip() or None + ) + star_case_note = enhanced_table_row.cells[len(use_cases) + self._column_index_note].text.strip() or None + while row_index < len(complete_table): + enhanced_table_row = complete_table[row_index] + step_number = str(int(last(rows).step_number) + 1) + description = enhanced_table_row.cells[len(use_cases) + self._column_index_description].text.strip() + boolean_outcome, subsequent_step_number = _read_subsequent_step_cell( + enhanced_table_row.cells[len(use_cases) + self._column_index_check_result] + ) + + this_is_the_last_row = row_index == len(complete_table) - 1 + + if this_is_the_last_row: + next_step = "Ende" + else: + next_step = str(int(step_number) + 1) + + row = EbdTableRow( + description=description, + step_number=step_number, + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult( + subsequent_step_number=subsequent_step_number, result=boolean_outcome + ), + result_code=star_case_result_code, + note=star_case_note, + ), + # point to next step + EbdTableSubRow( + check_result=EbdCheckResult(subsequent_step_number=next_step, result=True), + result_code=None, + note=None, + ), + ], + use_cases=use_cases or None, + ) + rows.append(row) + _logger.debug("Successfully added artificial row #%s ('%s')", step_number, description) + + if enhanced_table_row.multi_step_instruction_text: + multi_step_instructions.append( + MultiStepInstruction( + first_step_number_affected=step_number, + instruction_text=enhanced_table_row.multi_step_instruction_text, + ) + ) + row_index += 1 + def convert_docx_tables_to_ebd_table(self) -> EbdTable: """ Converts the raw docx tables of an EBD to an EbdTable. diff --git a/unittests/examples.py b/unittests/examples.py index c283fac..678bfbf 100644 --- a/unittests/examples.py +++ b/unittests/examples.py @@ -56,6 +56,158 @@ ], ) +# E_0097 is an example of a table that has rows with "*" +table_e0097 = EbdTable( + metadata=EbdTableMetaData( + ebd_code="E_0097", + chapter="7.56 AD: Austausch der Lieferantenausfallarbeitsclearingliste (Einzelanforderung)", + sub_chapter="7.56.1 E_0097_Marktlokationen mit LF-AACL abgleichen", + role="LF", + ), + rows=[ + EbdTableRow( + step_number="1", + description="Entspricht die Gültigkeit (Monat) dem angefragten Zeit-raum?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A01", + note="Cluster: Ablehnung der gesamten Liste\nZeitraum nicht plausibel", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="2"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="2", + description="Entspricht der MaBiS-ZP dem angefragten MaBiS-ZP?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A02", + note="Cluster: Ablehnung der gesamten Liste \nMaBiS-ZP entspricht nicht dem angefragten MaBiS-ZP", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="3"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="3", + description="Entspricht die Versionsangabe in der LF-AACL der Versionsangabe der LF-AASZR, zu der eine LF-AACL angefordert wurde?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A03", + note="Cluster: Ablehnung der gesamten Liste \nVersion nicht zugelassen", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="4"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="4", + description="Ist eine erwartete Marktlokation in der LF-AACL nicht enthalten?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number=None), + result_code="A04", + note="Cluster: Korrekturliste wegen Ablehnung\nZusätzlicher Datensatz / ergänzte Marktlokation", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number="5"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="5", + description="Ist in der LF-AACL eine Marktlokation enthalten, die im Bilanzierungsmonat dem LF zur Bilanzierung nicht zugeordnet ist?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number=None), + result_code="A05", + note="Cluster: Korrekturliste wegen Ablehnung\nMarktlokation falschem LF zugeordnet", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number="6"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="6", + description="Ist die in der LF-AACL enthaltene Marktlokation dem MaBiS-ZP zugeordnet?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A06", + note="Cluster: Korrekturliste wegen Ablehnung\nZu viele Marktlokationen enthalten / entfallene Marktlokation", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="7"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="7", + description="Entspricht das Bilanzierungsgebiet dem zwischen NB und LF ausgetauschten Bilanzierungsgebiet?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A07", + note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="8"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="8", # artificially incremented step number (was '7*') + description="Entspricht der Bilanzkreis dem zwischen NB und LF ausgetauschten Bilanzkreis?", # todo: check diff for "ausge-tauschten" in original .docx + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A07", + note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="9"), result_code=None, note=None + ), + ], + use_cases=None, + ), + EbdTableRow( + step_number="9", # artificially incremented step number (was '7*') + description="Entspricht die tatsächliche Ausfallarbeitsmenge der er-warteten Ausfallarbeitsmenge?", + sub_rows=[ + EbdTableSubRow( + check_result=EbdCheckResult(result=False, subsequent_step_number=None), + result_code="A07", + note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen", + ), + EbdTableSubRow( + check_result=EbdCheckResult(result=True, subsequent_step_number="Ende"), result_code=None, note=None + ), + ], + use_cases=None, + ), + ], + multi_step_instructions=[ + MultiStepInstruction( + first_step_number_affected="4", instruction_text="Je Marktlokation erfolgen die nachfolgenden Prüfungen:" + ) + ], +) + # E_0901 spans over multiple pages, let the fun begin table_e0901 = EbdTable( metadata=EbdTableMetaData( diff --git a/unittests/test_highlevel.py b/unittests/test_highlevel.py index 82966e4..b6033ca 100644 --- a/unittests/test_highlevel.py +++ b/unittests/test_highlevel.py @@ -8,7 +8,7 @@ from ebddocx2table.docxtableconverter import DocxTableConverter from . import get_all_ebd_keys, get_document, get_ebd_docx_tables -from .examples import table_e0003, table_e0453, table_e0462, table_e0901 +from .examples import table_e0003, table_e0097, table_e0453, table_e0462, table_e0901 @pytest.fixture @@ -194,6 +194,14 @@ def test_wrong_encoding_of_rightarrow(self, datafiles, filename: str, ebd_key: s table_e0462, id="E_0462 with gray outer lefts", ), + pytest.param( + "ebd20221128.docx", + "E_0097", + "7.56 AD: Austausch der Lieferantenausfallarbeitsclearingliste (Einzelanforderung)", + "7.56.1 E_0097_Marktlokationen mit LF-AACL abgleichen", + table_e0097, + id="E_0097 contains step numbers with *", + ), ], ) def test_convert_docx_table_to_ebd_table( @@ -278,6 +286,8 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str issue_number = "74" case _: raise + error_msg = f"Error while scraping '{ebd_key}' (#{issue_number}): {value_error}" + pytest.skip(error_msg) except UnboundLocalError as unbound_error: match unbound_error.args[0]: case "cannot access local variable 'role' where it is not associated with a value": @@ -285,5 +295,5 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str issue_number = "22" case _: raise - error_msg = f"Error while scraping '{ebd_key}' (#{issue_number})" + error_msg = f"Error while scraping '{ebd_key}' (#{issue_number}): {unbound_error}" pytest.skip(error_msg)