diff --git a/dev_requirements/requirements-coverage.txt b/dev_requirements/requirements-coverage.txt index f0dd84b..c2c489a 100644 --- a/dev_requirements/requirements-coverage.txt +++ b/dev_requirements/requirements-coverage.txt @@ -1,8 +1,9 @@ +# SHA1:6dafbcf610e9f81897b65ee9142715ab2e793f9e # -# This file is autogenerated by pip-compile with python 3.10 +# This file is autogenerated by pip-compile-multi # To update, run: # -# pip-compile requirements-coverage.in +# pip-compile-multi # -coverage==7.5.0 - # via -r dev_requirements/requirements-coverage.in +coverage==7.5.1 + # via -r dev_requirements\requirements-coverage.in diff --git a/dev_requirements/requirements-formatting.txt b/dev_requirements/requirements-formatting.txt index 04287da..1e8089f 100644 --- a/dev_requirements/requirements-formatting.txt +++ b/dev_requirements/requirements-formatting.txt @@ -1,20 +1,23 @@ +# SHA1:2c7ffcd29222de3114c7f7994911f1b69d06b6b3 # -# This file is autogenerated by pip-compile with python 3.10 +# This file is autogenerated by pip-compile-multi # To update, run: # -# pip-compile requirements-formatting.in +# pip-compile-multi # black==24.4.2 - # via -r dev_requirements/requirements-formatting.in + # via -r dev_requirements\requirements-formatting.in click==8.1.7 # via black +colorama==0.4.6 + # via click isort==5.13.2 - # via -r dev_requirements/requirements-formatting.in + # via -r dev_requirements\requirements-formatting.in mypy-extensions==1.0.0 # via black -packaging==23.0 +packaging==24.0 # via black -pathspec==0.11.0 +pathspec==0.12.1 # via black -platformdirs==3.1.0 +platformdirs==4.2.1 # via black diff --git a/dev_requirements/requirements-linting.txt b/dev_requirements/requirements-linting.txt index 722ed2e..1214645 100644 --- a/dev_requirements/requirements-linting.txt +++ b/dev_requirements/requirements-linting.txt @@ -1,20 +1,23 @@ +# SHA1:0e15f8789b9d62fe90d1f1b0b6a7e32f13b99b19 # -# This file is autogenerated by pip-compile with python 3.10 +# This file is autogenerated by pip-compile-multi # To update, run: # -# pip-compile requirements-linting.in +# pip-compile-multi # astroid==3.1.0 # via pylint -dill==0.3.7 +colorama==0.4.6 + # via pylint +dill==0.3.8 # via pylint isort==5.13.2 # via pylint mccabe==0.7.0 # via pylint -platformdirs==3.1.0 +platformdirs==4.2.1 # via pylint pylint==3.1.0 - # via -r dev_requirements/requirements-linting.in -tomlkit==0.11.6 + # via -r dev_requirements\requirements-linting.in +tomlkit==0.12.4 # via pylint diff --git a/dev_requirements/requirements-packaging.txt b/dev_requirements/requirements-packaging.txt index 523a05f..908130a 100644 --- a/dev_requirements/requirements-packaging.txt +++ b/dev_requirements/requirements-packaging.txt @@ -1,55 +1,58 @@ +# SHA1:93e4fbf2b6cce1574fe3d5315360512fa9927699 # -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: +# This file is autogenerated by pip-compile-multi +# To update, run: # -# pip-compile dev_requirements/requirements-packaging.in +# pip-compile-multi # +backports-tarfile==1.1.1 + # via jaraco-context build==1.2.1 - # via -r dev_requirements/requirements-packaging.in + # via -r dev_requirements\requirements-packaging.in certifi==2024.2.2 # via requests -cffi==1.16.0 - # via cryptography charset-normalizer==3.3.2 # via requests -cryptography==42.0.5 - # via secretstorage -docutils==0.20.1 +colorama==0.4.6 + # via build +docutils==0.21.2 # via readme-renderer idna==3.7 # via requests -importlib-metadata==7.0.2 +importlib-metadata==7.1.0 # via # keyring # twine -jaraco-classes==3.3.1 +jaraco-classes==3.4.0 # via keyring -jeepney==0.8.0 - # via - # keyring - # secretstorage -keyring==24.3.1 +jaraco-context==5.3.0 + # via keyring +jaraco-functools==4.0.1 + # via keyring +keyring==25.2.0 # via twine markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py more-itertools==10.2.0 - # via jaraco-classes -nh3==0.2.15 + # via + # jaraco-classes + # jaraco-functools +nh3==0.2.17 # via readme-renderer packaging==24.0 # via build pkginfo==1.10.0 # via twine -pycparser==2.21 - # via cffi -pygments==2.17.2 +pygments==2.18.0 # via # readme-renderer # rich -pyproject-hooks==1.0.0 +pyproject-hooks==1.1.0 # via build +pywin32-ctypes==0.2.2 + # via keyring readme-renderer==43.0 # via twine requests==2.31.0 @@ -62,10 +65,8 @@ rfc3986==2.0.0 # via twine rich==13.7.1 # via twine -secretstorage==3.3.3 - # via keyring twine==5.0.0 - # via -r dev_requirements/requirements-packaging.in + # via -r dev_requirements\requirements-packaging.in urllib3==2.2.1 # via # requests diff --git a/dev_requirements/requirements-tests.txt b/dev_requirements/requirements-tests.txt index a06e44b..65d05b4 100644 --- a/dev_requirements/requirements-tests.txt +++ b/dev_requirements/requirements-tests.txt @@ -1,23 +1,26 @@ +# SHA1:237de32e0e58475d734882e72523b53d0f1187a3 # -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: +# This file is autogenerated by pip-compile-multi +# To update, run: # -# pip-compile requirements-tests.in +# pip-compile-multi # attrs==23.2.0 # via pytest-subtests +colorama==0.4.6 + # via pytest iniconfig==2.0.0 # via pytest -packaging==23.0 +packaging==24.0 # via pytest pluggy==1.5.0 # via pytest pytest==8.2.0 # via - # -r dev_requirements/requirements-tests.in + # -r dev_requirements\requirements-tests.in # pytest-datafiles # pytest-subtests pytest-datafiles==3.0.0 - # via -r dev_requirements/requirements-tests.in + # via -r dev_requirements\requirements-tests.in pytest-subtests==0.12.1 - # via -r dev_requirements/requirements-tests.in + # via -r dev_requirements\requirements-tests.in diff --git a/dev_requirements/requirements-type_check.txt b/dev_requirements/requirements-type_check.txt index bdf1283..dac66b5 100644 --- a/dev_requirements/requirements-type_check.txt +++ b/dev_requirements/requirements-type_check.txt @@ -1,12 +1,13 @@ +# SHA1:7983aaa01d64547827c20395d77e248c41b2572f # -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: +# This file is autogenerated by pip-compile-multi +# To update, run: # -# pip-compile requirements-type_check.in +# pip-compile-multi # mypy==1.10.0 - # via -r dev_requirements/requirements-type_check.in + # via -r dev_requirements\requirements-type_check.in mypy-extensions==1.0.0 # via mypy -typing-extensions==4.8.0 +typing-extensions==4.11.0 # via mypy diff --git a/requirements.txt b/requirements.txt index a4c27fc..ecd90f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,12 +2,12 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile '.\requirements.in' +# pip-compile pyproject.toml # attrs==23.2.0 # via - # -r requirements.in # cattrs + # ebdamame (pyproject.toml) # ebdtable2graph cattrs==22.2.0 # via ebdtable2graph @@ -16,9 +16,11 @@ certifi==2023.7.22 charset-normalizer==2.1.1 # via requests click==8.1.7 - # via -r requirements.in + # via ebdamame (pyproject.toml) +colorama==0.4.6 + # via click ebdtable2graph==0.1.20 - # via -r requirements.in + # via ebdamame (pyproject.toml) idna==3.7 # via requests lxml==4.9.3 @@ -27,16 +29,16 @@ lxml==4.9.3 # python-docx # svgutils more-itertools==10.2.0 - # via -r requirements.in + # via ebdamame (pyproject.toml) networkx==2.8.8 # via ebdtable2graph -python-docx==1.1.0 - # via -r requirements.in +python-docx==1.1.2 + # via ebdamame (pyproject.toml) requests==2.31.0 # via ebdtable2graph svgutils==0.3.4 # via ebdtable2graph -typing-extensions==4.8.0 +typing-extensions==4.11.0 # via python-docx urllib3==1.26.18 # via requests diff --git a/src/ebdamame/__init__.py b/src/ebdamame/__init__.py index 29d2bd9..f764191 100644 --- a/src/ebdamame/__init__.py +++ b/src/ebdamame/__init__.py @@ -10,15 +10,16 @@ from typing import Dict, Generator, Iterable, List, Optional, Tuple, Union import attrs -from docx import Document # type:ignore[import] -from docx.oxml import CT_P, CT_Tbl # type:ignore[import] -from docx.table import Table, _Cell # type:ignore[import] -from docx.text.paragraph import Paragraph # type:ignore[import] +import docx +from docx.document import Document as DocumentType +from docx.oxml import CT_P, CT_Tbl +from docx.table import Table, _Cell +from docx.text.paragraph import Paragraph _logger = logging.getLogger(__name__) -def get_document(docx_file_path: Path) -> Document: +def get_document(docx_file_path: Path) -> DocumentType: """ opens and returns the document specified in the docx_file_path using python-docx """ @@ -29,14 +30,14 @@ def get_document(docx_file_path: Path) -> Document: # but then switched from StringIO to BytesIO (without explicit 'utf-8') because of: # UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 605: character maps to try: - document = Document(source_stream) + document = docx.Document(source_stream) _logger.info("Successfully read the file '%s'", docx_file_path) return document finally: source_stream.close() -def _get_tables_and_paragaphs(document: Document) -> Generator[Union[Table, Paragraph], None, None]: +def _get_tables_and_paragraphs(document: DocumentType) -> Generator[Union[Table, Paragraph], None, None]: """ Yields tables and paragraphs from the given document in the order in which they occur in the document. This is helpful because document.tables and document.paragraphs are de-coupled and give you no information which @@ -116,7 +117,7 @@ def get_ebd_docx_tables(docx_file_path: Path, ebd_key: str) -> List[Table]: is_inside_subsection_of_requested_table: bool = False tables: List[Table] = [] - tables_and_paragraphs = _get_tables_and_paragaphs(document) + tables_and_paragraphs = _get_tables_and_paragraphs(document) for table_or_paragraph in tables_and_paragraphs: if isinstance(table_or_paragraph, Paragraph): paragraph: Paragraph = table_or_paragraph @@ -213,7 +214,8 @@ def _enrich_paragraphs_with_sections( subsection = 1 subsection_title: Optional[str] = None for paragraph in paragraphs: - match paragraph.style.style_id: + # since pyton-docx 1.1.2 there are type hints; seems like the style is not guaranteed to be not None + match paragraph.style.style_id: # type:ignore[union-attr] case "berschrift1": chapter = next(chapter_counter) chapter_title = paragraph.text.strip() diff --git a/src/ebdamame/docxtableconverter.py b/src/ebdamame/docxtableconverter.py index 2be8cec..c514987 100644 --- a/src/ebdamame/docxtableconverter.py +++ b/src/ebdamame/docxtableconverter.py @@ -9,7 +9,7 @@ from typing import Generator, List, Literal, Optional, Tuple import attrs -from docx.table import Table, _Cell, _Row # type:ignore[import] +from docx.table import Table, _Cell, _Row from ebdtable2graph.models import EbdTable, EbdTableRow, EbdTableSubRow from ebdtable2graph.models.ebd_table import _STEP_NUMBER_REGEX, EbdCheckResult, EbdTableMetaData, MultiStepInstruction from more_itertools import first, first_true, last @@ -46,6 +46,9 @@ def _get_index_of_first_column_with_step_number(cells: List[_Cell]) -> int: first_step_number_cell = first_true( cells, pred=lambda cell: _step_number_pattern.match(cell.text.strip()) is not None ) + if first_step_number_cell is None: + raise ValueError("No cell containing a valid step number found.") + step_number_column_index = cells.index(first_step_number_cell) _logger.debug("The step number is in column %i", step_number_column_index) return step_number_column_index diff --git a/unittests/__init__.py b/unittests/__init__.py index 0edf4dd..cff1806 100644 --- a/unittests/__init__.py +++ b/unittests/__init__.py @@ -6,13 +6,13 @@ from pathlib import Path from typing import Dict, List, Tuple -from docx import Document # type:ignore[import] -from docx.table import Table # type:ignore[import] +from docx.document import Document as DocumentType +from docx.table import Table import ebdamame -def get_document(datafiles, filename: str) -> Document: +def get_document(datafiles, filename: str) -> DocumentType: """ a datafiles compatible wrapper around ebddocx2table.get_document """ diff --git a/unittests/test_highlevel.py b/unittests/test_highlevel.py index d3a762f..6435384 100644 --- a/unittests/test_highlevel.py +++ b/unittests/test_highlevel.py @@ -1,7 +1,7 @@ from typing import List, Tuple import pytest # type:ignore[import] -from docx.table import Table # type:ignore[import] +from docx.table import Table from ebdtable2graph.models import EbdTable from ebdamame import EbdChapterInformation, TableNotFoundError