-
Notifications
You must be signed in to change notification settings - Fork 1
✨Support tables spanning over multiple pages #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cbfb77a
c9a5625
33645af
4059267
a8a4012
6e4b749
c2d49d5
86b2488
8257a7b
2f484ba
f8d9306
91297fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| from docx.table import Table, _Cell, _Row # type:ignore[import] | ||
| from ebdtable2graph import EbdTable, EbdTableRow, EbdTableSubRow | ||
| from ebdtable2graph.models.ebd_table import EbdCheckResult, EbdTableMetaData | ||
| from more_itertools import first | ||
|
|
||
|
|
||
| def _is_pruefende_rolle_cell(cell: _Cell) -> bool: | ||
|
|
@@ -71,11 +72,11 @@ class DocxTableConverter: | |
| converts docx tables to EbdTables | ||
| """ | ||
|
|
||
| def __init__(self, docx_table: Table, ebd_key: str, chapter: str, sub_chapter: str): | ||
| def __init__(self, docx_tables: List[Table], ebd_key: str, chapter: str, sub_chapter: str): | ||
| """ | ||
| the constructor initializes the instance and reads some metadata from the table header | ||
| the constructor initializes the instance and reads some metadata from the (first) table header | ||
| """ | ||
| self._docx_table = docx_table | ||
| self._docx_tables = docx_tables | ||
| self._column_index_step_number: int | ||
| self._column_index_description: int | ||
| self._column_index_check_result: int | ||
|
|
@@ -85,7 +86,7 @@ def __init__(self, docx_table: Table, ebd_key: str, chapter: str, sub_chapter: s | |
| for row_index in range(0, 2): # the first two lines/rows are the header of the table. | ||
| # In the constructor we just want to read the metadata from the table. | ||
| # For this purpose the first two lines are enough. | ||
| for column_index, table_cell in enumerate(docx_table.row_cells(row_index)): | ||
| for column_index, table_cell in enumerate(first(docx_tables).row_cells(row_index)): | ||
| if row_index == 0 and _is_pruefende_rolle_cell(table_cell): | ||
| role = table_cell.text.split(":")[1].strip() | ||
| break # because the prüfende rolle is always a full row with identical column cells | ||
|
|
@@ -104,15 +105,15 @@ def __init__(self, docx_table: Table, ebd_key: str, chapter: str, sub_chapter: s | |
| self._column_index_note = column_index | ||
| self._metadata = EbdTableMetaData(ebd_code=ebd_key, sub_chapter=sub_chapter, chapter=chapter, role=role) | ||
|
|
||
| def convert_docx_table_to_ebd_table(self) -> EbdTable: | ||
| def _handle_single_table( | ||
| self, table: Table, row_offset: int, rows: List[EbdTableRow], sub_rows: List[EbdTableSubRow] | ||
| ) -> None: | ||
| """ | ||
| Converts the raw docx table of an EBD to an EbdTable. | ||
| The latter contains the same data but in an easily accessible format that can be used to e.g. plot real graphs. | ||
| Handles a single table (out of possible multiple tables for 1 EBD). | ||
| The results are written into rows and sub_rows. Those will be modified. | ||
| """ | ||
| rows: List[EbdTableRow] = [] | ||
| sub_rows: List[EbdTableSubRow] = [] | ||
| for table_row, sub_row_position in zip( | ||
| self._docx_table.rows[self._row_index_last_header + 1 :], | ||
| table.rows[row_offset:], | ||
| cycle([_EbdSubRowPosition.UPPER, _EbdSubRowPosition.LOWER]), | ||
| ): | ||
|
Comment on lines
107
to
118
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. die funktion, die vorher die eine einzige tabelle gehandlet hat, ist jetzt eine private methode, die ihre ergebnisse an zwei übergebene listen fürs rows und subrows anhängt. |
||
| row_cells = list(_sort_columns_in_row(table_row)) | ||
|
|
@@ -124,12 +125,10 @@ def convert_docx_table_to_ebd_table(self) -> EbdTable: | |
| boolean_outcome, subsequent_step_number = _read_subsequent_step_cell( | ||
| row_cells[self._column_index_check_result] | ||
| ) | ||
| result_code = row_cells[self._column_index_result_code].text.strip() | ||
| note = row_cells[self._column_index_note].text.strip() | ||
| sub_row = EbdTableSubRow( | ||
| check_result=EbdCheckResult(subsequent_step_number=subsequent_step_number, result=boolean_outcome), | ||
| result_code=result_code or None, | ||
| note=note or None, | ||
| result_code=row_cells[self._column_index_result_code].text.strip() or None, | ||
| note=row_cells[self._column_index_note].text.strip() or None, | ||
|
Comment on lines
-127
to
+131
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pylint hat gemeckert: too-many-locals (also zu viele lokale variablen): hier ist so ein klassiker für: das gefällt zwar dem linter aber die lesbarkeit leidet vllt ein bisschen. |
||
| ) | ||
| sub_rows.append(sub_row) | ||
| if sub_row_position == _EbdSubRowPosition.LOWER: | ||
|
|
@@ -139,6 +138,19 @@ def convert_docx_table_to_ebd_table(self) -> EbdTable: | |
| sub_rows=sub_rows, | ||
| ) | ||
| rows.append(row) | ||
|
|
||
| def convert_docx_tables_to_ebd_table(self) -> EbdTable: | ||
| """ | ||
| Converts the raw docx tables of an EBD to an EbdTable. | ||
| The latter contains the same data but in an easily accessible format that can be used to e.g. plot real graphs. | ||
| """ | ||
| rows: List[EbdTableRow] = [] | ||
| sub_rows: List[EbdTableSubRow] = [] | ||
| for table_index, table in enumerate(self._docx_tables): | ||
| offset: int = 0 | ||
| if table_index == 0: | ||
| offset = self._row_index_last_header + 1 | ||
| self._handle_single_table(table, offset, rows, sub_rows) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. die vormals einzige konvertierungsfunktion wird jetzt hier aufgerufen. |
||
| result = EbdTable( | ||
| rows=rows, | ||
| metadata=self._metadata, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
habe den aus der loop rausgezogen, damit wir weiter unten weiter drüber loopen können (an der stelle wo wir die erste treffende tabelle gefunden haben.