Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 78 additions & 5 deletions src/ebddocx2table/docxtableconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from docx.table import Table, _Cell, _Row # type:ignore[import]
from ebdtable2graph.models import EbdTable, EbdTableRow, EbdTableSubRow
from ebdtable2graph.models.ebd_table import _STEP_NUMBER_REGEX, EbdCheckResult, EbdTableMetaData, MultiStepInstruction
from more_itertools import first, first_true
from more_itertools import first, first_true, last

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -219,15 +219,15 @@ def _handle_single_table(
table: Table,
multi_step_instructions: List[MultiStepInstruction],
row_offset: int,
rows: List[EbdTableRow],
sub_rows: List[EbdTableSubRow],
rows: list[EbdTableRow],
sub_rows: list[EbdTableSubRow],
) -> None:
"""
Handles a single table (out of possible multiple tables for 1 EBD).
The results are written into rows, sub_rows and multi_step_instructions. Those will be modified.
"""
use_cases: List[str] = []
for enhanced_table_row in self._enhance_list_view(table=table, row_offset=row_offset):
use_cases: list[str] = []
for row_index, enhanced_table_row in enumerate(self._enhance_list_view(table=table, row_offset=row_offset)):
if enhanced_table_row.sub_row_position == _EbdSubRowPosition.UPPER:
use_cases = _get_use_cases(enhanced_table_row.cells)
sub_rows = [] # clear list every second entry
Expand All @@ -236,6 +236,9 @@ def _handle_single_table(
boolean_outcome, subsequent_step_number = _read_subsequent_step_cell(
enhanced_table_row.cells[len(use_cases) + self._column_index_check_result]
)
if step_number.endswith("*"):
self._handle_single_table_star_exception(table, multi_step_instructions, row_offset, rows, row_index)
break
sub_row = EbdTableSubRow(
check_result=EbdCheckResult(subsequent_step_number=subsequent_step_number, result=boolean_outcome),
result_code=enhanced_table_row.cells[len(use_cases) + self._column_index_result_code].text.strip()
Expand Down Expand Up @@ -263,6 +266,76 @@ def _handle_single_table(
)
)

# see above boolean_outcome and subsequent_step_number could be ignored iff schemes of *-numbers are always the same
# pylint:disable=too-many-locals
def _handle_single_table_star_exception(
self,
table: Table,
multi_step_instructions: list[MultiStepInstruction],
row_offset: int,
rows: list[EbdTableRow],
row_index: int,
) -> None:
"""
Completes table when handling of single table (out of possible multiple tables for 1 EBD) hit a step
with several instructions. Those instructions will be split in individual steps.
As above, the results are written into rows, sub_rows and multi_step_instructions. Those will be modified.
"""
use_cases: list[str] = []
complete_table = self._enhance_list_view(table=table, row_offset=row_offset)
enhanced_table_row = complete_table[row_index]
use_cases = _get_use_cases(enhanced_table_row.cells)
star_case_result_code = (
enhanced_table_row.cells[len(use_cases) + self._column_index_result_code].text.strip() or None
)
star_case_note = enhanced_table_row.cells[len(use_cases) + self._column_index_note].text.strip() or None
while row_index < len(complete_table):
enhanced_table_row = complete_table[row_index]
step_number = str(int(last(rows).step_number) + 1)
description = enhanced_table_row.cells[len(use_cases) + self._column_index_description].text.strip()
boolean_outcome, subsequent_step_number = _read_subsequent_step_cell(
enhanced_table_row.cells[len(use_cases) + self._column_index_check_result]
)

this_is_the_last_row = row_index == len(complete_table) - 1

if this_is_the_last_row:
next_step = "Ende"
else:
next_step = str(int(step_number) + 1)

row = EbdTableRow(
description=description,
step_number=step_number,
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(
subsequent_step_number=subsequent_step_number, result=boolean_outcome
),
result_code=star_case_result_code,
note=star_case_note,
),
# point to next step
EbdTableSubRow(
check_result=EbdCheckResult(subsequent_step_number=next_step, result=True),
result_code=None,
note=None,
),
],
use_cases=use_cases or None,
)
rows.append(row)
_logger.debug("Successfully added artificial row #%s ('%s')", step_number, description)

if enhanced_table_row.multi_step_instruction_text:
multi_step_instructions.append(
MultiStepInstruction(
first_step_number_affected=step_number,
instruction_text=enhanced_table_row.multi_step_instruction_text,
)
)
row_index += 1

def convert_docx_tables_to_ebd_table(self) -> EbdTable:
"""
Converts the raw docx tables of an EBD to an EbdTable.
Expand Down
152 changes: 152 additions & 0 deletions unittests/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,158 @@
],
)

# E_0097 is an example of a table that has rows with "*"
table_e0097 = EbdTable(
metadata=EbdTableMetaData(
ebd_code="E_0097",
chapter="7.56 AD: Austausch der Lieferantenausfallarbeitsclearingliste (Einzelanforderung)",
sub_chapter="7.56.1 E_0097_Marktlokationen mit LF-AACL abgleichen",
role="LF",
),
rows=[
EbdTableRow(
step_number="1",
description="Entspricht die Gültigkeit (Monat) dem angefragten Zeit-raum?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A01",
note="Cluster: Ablehnung der gesamten Liste\nZeitraum nicht plausibel",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="2"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="2",
description="Entspricht der MaBiS-ZP dem angefragten MaBiS-ZP?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A02",
note="Cluster: Ablehnung der gesamten Liste \nMaBiS-ZP entspricht nicht dem angefragten MaBiS-ZP",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="3"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="3",
description="Entspricht die Versionsangabe in der LF-AACL der Versionsangabe der LF-AASZR, zu der eine LF-AACL angefordert wurde?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A03",
note="Cluster: Ablehnung der gesamten Liste \nVersion nicht zugelassen",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="4"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="4",
description="Ist eine erwartete Marktlokation in der LF-AACL nicht enthalten?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number=None),
result_code="A04",
note="Cluster: Korrekturliste wegen Ablehnung\nZusätzlicher Datensatz / ergänzte Marktlokation",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number="5"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="5",
description="Ist in der LF-AACL eine Marktlokation enthalten, die im Bilanzierungsmonat dem LF zur Bilanzierung nicht zugeordnet ist?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number=None),
result_code="A05",
note="Cluster: Korrekturliste wegen Ablehnung\nMarktlokation falschem LF zugeordnet",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number="6"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="6",
description="Ist die in der LF-AACL enthaltene Marktlokation dem MaBiS-ZP zugeordnet?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A06",
note="Cluster: Korrekturliste wegen Ablehnung\nZu viele Marktlokationen enthalten / entfallene Marktlokation",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="7"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="7",
description="Entspricht das Bilanzierungsgebiet dem zwischen NB und LF ausgetauschten Bilanzierungsgebiet?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A07",
note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="8"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="8", # artificially incremented step number (was '7*')
description="Entspricht der Bilanzkreis dem zwischen NB und LF ausgetauschten Bilanzkreis?", # todo: check diff for "ausge-tauschten" in original .docx
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A07",
note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="9"), result_code=None, note=None
),
],
use_cases=None,
),
EbdTableRow(
step_number="9", # artificially incremented step number (was '7*')
description="Entspricht die tatsächliche Ausfallarbeitsmenge der er-warteten Ausfallarbeitsmenge?",
sub_rows=[
EbdTableSubRow(
check_result=EbdCheckResult(result=False, subsequent_step_number=None),
result_code="A07",
note="Cluster: Korrekturliste wegen Ablehnung\nBilanzierungsrel. Daten nicht korrekt / fehlen",
),
EbdTableSubRow(
check_result=EbdCheckResult(result=True, subsequent_step_number="Ende"), result_code=None, note=None
),
],
use_cases=None,
),
],
multi_step_instructions=[
MultiStepInstruction(
first_step_number_affected="4", instruction_text="Je Marktlokation erfolgen die nachfolgenden Prüfungen:"
)
],
)

# E_0901 spans over multiple pages, let the fun begin
table_e0901 = EbdTable(
metadata=EbdTableMetaData(
Expand Down
14 changes: 12 additions & 2 deletions unittests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ebddocx2table.docxtableconverter import DocxTableConverter

from . import get_all_ebd_keys, get_document, get_ebd_docx_tables
from .examples import table_e0003, table_e0453, table_e0462, table_e0901
from .examples import table_e0003, table_e0097, table_e0453, table_e0462, table_e0901


@pytest.fixture
Expand Down Expand Up @@ -194,6 +194,14 @@ def test_wrong_encoding_of_rightarrow(self, datafiles, filename: str, ebd_key: s
table_e0462,
id="E_0462 with gray outer lefts",
),
pytest.param(
"ebd20221128.docx",
"E_0097",
"7.56 AD: Austausch der Lieferantenausfallarbeitsclearingliste (Einzelanforderung)",
"7.56.1 E_0097_Marktlokationen mit LF-AACL abgleichen",
table_e0097,
id="E_0097 contains step numbers with *",
),
],
)
def test_convert_docx_table_to_ebd_table(
Expand Down Expand Up @@ -278,12 +286,14 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str
issue_number = "74"
case _:
raise
error_msg = f"Error while scraping '{ebd_key}' (#{issue_number}): {value_error}"
pytest.skip(error_msg)
except UnboundLocalError as unbound_error:
match unbound_error.args[0]:
case "cannot access local variable 'role' where it is not associated with a value":
# https://github.com/Hochfrequenz/ebd_docx_to_table/issues/22
issue_number = "22"
case _:
raise
error_msg = f"Error while scraping '{ebd_key}' (#{issue_number})"
error_msg = f"Error while scraping '{ebd_key}' (#{issue_number}): {unbound_error}"
pytest.skip(error_msg)