Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions dev_requirements/requirements-coverage.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# SHA1:6dafbcf610e9f81897b65ee9142715ab2e793f9e
#
# This file is autogenerated by pip-compile with python 3.10
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile requirements-coverage.in
# pip-compile-multi
#
coverage==7.5.0
# via -r dev_requirements/requirements-coverage.in
coverage==7.5.1
# via -r dev_requirements\requirements-coverage.in
17 changes: 10 additions & 7 deletions dev_requirements/requirements-formatting.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# SHA1:2c7ffcd29222de3114c7f7994911f1b69d06b6b3
#
# This file is autogenerated by pip-compile with python 3.10
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile requirements-formatting.in
# pip-compile-multi
#
black==24.4.2
# via -r dev_requirements/requirements-formatting.in
# via -r dev_requirements\requirements-formatting.in
click==8.1.7
# via black
colorama==0.4.6
# via click
isort==5.13.2
# via -r dev_requirements/requirements-formatting.in
# via -r dev_requirements\requirements-formatting.in
mypy-extensions==1.0.0
# via black
packaging==23.0
packaging==24.0
# via black
pathspec==0.11.0
pathspec==0.12.1
# via black
platformdirs==3.1.0
platformdirs==4.2.1
# via black
15 changes: 9 additions & 6 deletions dev_requirements/requirements-linting.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# SHA1:0e15f8789b9d62fe90d1f1b0b6a7e32f13b99b19
#
# This file is autogenerated by pip-compile with python 3.10
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile requirements-linting.in
# pip-compile-multi
#
astroid==3.1.0
# via pylint
dill==0.3.7
colorama==0.4.6
# via pylint
dill==0.3.8
# via pylint
isort==5.13.2
# via pylint
mccabe==0.7.0
# via pylint
platformdirs==3.1.0
platformdirs==4.2.1
# via pylint
pylint==3.1.0
# via -r dev_requirements/requirements-linting.in
tomlkit==0.11.6
# via -r dev_requirements\requirements-linting.in
tomlkit==0.12.4
# via pylint
51 changes: 26 additions & 25 deletions dev_requirements/requirements-packaging.txt
Original file line number Diff line number Diff line change
@@ -1,55 +1,58 @@
# SHA1:93e4fbf2b6cce1574fe3d5315360512fa9927699
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile dev_requirements/requirements-packaging.in
# pip-compile-multi
#
backports-tarfile==1.1.1
# via jaraco-context
build==1.2.1
# via -r dev_requirements/requirements-packaging.in
# via -r dev_requirements\requirements-packaging.in
certifi==2024.2.2
# via requests
cffi==1.16.0
# via cryptography
charset-normalizer==3.3.2
# via requests
cryptography==42.0.5
# via secretstorage
docutils==0.20.1
colorama==0.4.6
# via build
docutils==0.21.2
# via readme-renderer
idna==3.7
# via requests
importlib-metadata==7.0.2
importlib-metadata==7.1.0
# via
# keyring
# twine
jaraco-classes==3.3.1
jaraco-classes==3.4.0
# via keyring
jeepney==0.8.0
# via
# keyring
# secretstorage
keyring==24.3.1
jaraco-context==5.3.0
# via keyring
jaraco-functools==4.0.1
# via keyring
keyring==25.2.0
# via twine
markdown-it-py==3.0.0
# via rich
mdurl==0.1.2
# via markdown-it-py
more-itertools==10.2.0
# via jaraco-classes
nh3==0.2.15
# via
# jaraco-classes
# jaraco-functools
nh3==0.2.17
# via readme-renderer
packaging==24.0
# via build
pkginfo==1.10.0
# via twine
pycparser==2.21
# via cffi
pygments==2.17.2
pygments==2.18.0
# via
# readme-renderer
# rich
pyproject-hooks==1.0.0
pyproject-hooks==1.1.0
# via build
pywin32-ctypes==0.2.2
# via keyring
readme-renderer==43.0
# via twine
requests==2.31.0
Expand All @@ -62,10 +65,8 @@ rfc3986==2.0.0
# via twine
rich==13.7.1
# via twine
secretstorage==3.3.3
# via keyring
twine==5.0.0
# via -r dev_requirements/requirements-packaging.in
# via -r dev_requirements\requirements-packaging.in
urllib3==2.2.1
# via
# requests
Expand Down
17 changes: 10 additions & 7 deletions dev_requirements/requirements-tests.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
# SHA1:237de32e0e58475d734882e72523b53d0f1187a3
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile requirements-tests.in
# pip-compile-multi
#
attrs==23.2.0
# via pytest-subtests
colorama==0.4.6
# via pytest
iniconfig==2.0.0
# via pytest
packaging==23.0
packaging==24.0
# via pytest
pluggy==1.5.0
# via pytest
pytest==8.2.0
# via
# -r dev_requirements/requirements-tests.in
# -r dev_requirements\requirements-tests.in
# pytest-datafiles
# pytest-subtests
pytest-datafiles==3.0.0
# via -r dev_requirements/requirements-tests.in
# via -r dev_requirements\requirements-tests.in
pytest-subtests==0.12.1
# via -r dev_requirements/requirements-tests.in
# via -r dev_requirements\requirements-tests.in
11 changes: 6 additions & 5 deletions dev_requirements/requirements-type_check.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# SHA1:7983aaa01d64547827c20395d77e248c41b2572f
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile requirements-type_check.in
# pip-compile-multi
#
mypy==1.10.0
# via -r dev_requirements/requirements-type_check.in
# via -r dev_requirements\requirements-type_check.in
mypy-extensions==1.0.0
# via mypy
typing-extensions==4.8.0
typing-extensions==4.11.0
# via mypy
18 changes: 10 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile '.\requirements.in'
# pip-compile pyproject.toml
#
attrs==23.2.0
# via
# -r requirements.in
# cattrs
# ebdamame (pyproject.toml)
# ebdtable2graph
cattrs==22.2.0
# via ebdtable2graph
Expand All @@ -16,9 +16,11 @@ certifi==2023.7.22
charset-normalizer==2.1.1
# via requests
click==8.1.7
# via -r requirements.in
# via ebdamame (pyproject.toml)
colorama==0.4.6
# via click
ebdtable2graph==0.1.20
# via -r requirements.in
# via ebdamame (pyproject.toml)
idna==3.7
# via requests
lxml==4.9.3
Expand All @@ -27,16 +29,16 @@ lxml==4.9.3
# python-docx
# svgutils
more-itertools==10.2.0
# via -r requirements.in
# via ebdamame (pyproject.toml)
networkx==2.8.8
# via ebdtable2graph
python-docx==1.1.0
# via -r requirements.in
python-docx==1.1.2
# via ebdamame (pyproject.toml)
requests==2.31.0
# via ebdtable2graph
svgutils==0.3.4
# via ebdtable2graph
typing-extensions==4.8.0
typing-extensions==4.11.0
# via python-docx
urllib3==1.26.18
# via requests
20 changes: 11 additions & 9 deletions src/ebdamame/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@
from typing import Dict, Generator, Iterable, List, Optional, Tuple, Union

import attrs
from docx import Document # type:ignore[import]
from docx.oxml import CT_P, CT_Tbl # type:ignore[import]
from docx.table import Table, _Cell # type:ignore[import]
from docx.text.paragraph import Paragraph # type:ignore[import]
import docx
from docx.document import Document as DocumentType
from docx.oxml import CT_P, CT_Tbl
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph

_logger = logging.getLogger(__name__)


def get_document(docx_file_path: Path) -> Document:
def get_document(docx_file_path: Path) -> DocumentType:
"""
opens and returns the document specified in the docx_file_path using python-docx
"""
Expand All @@ -29,14 +30,14 @@ def get_document(docx_file_path: Path) -> Document:
# but then switched from StringIO to BytesIO (without explicit 'utf-8') because of:
# UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 605: character maps to <undefined>
try:
document = Document(source_stream)
document = docx.Document(source_stream)
_logger.info("Successfully read the file '%s'", docx_file_path)
return document
finally:
source_stream.close()


def _get_tables_and_paragaphs(document: Document) -> Generator[Union[Table, Paragraph], None, None]:
def _get_tables_and_paragraphs(document: DocumentType) -> Generator[Union[Table, Paragraph], None, None]:
"""
Yields tables and paragraphs from the given document in the order in which they occur in the document.
This is helpful because document.tables and document.paragraphs are de-coupled and give you no information which
Expand Down Expand Up @@ -116,7 +117,7 @@ def get_ebd_docx_tables(docx_file_path: Path, ebd_key: str) -> List[Table]:

is_inside_subsection_of_requested_table: bool = False
tables: List[Table] = []
tables_and_paragraphs = _get_tables_and_paragaphs(document)
tables_and_paragraphs = _get_tables_and_paragraphs(document)
for table_or_paragraph in tables_and_paragraphs:
if isinstance(table_or_paragraph, Paragraph):
paragraph: Paragraph = table_or_paragraph
Expand Down Expand Up @@ -213,7 +214,8 @@ def _enrich_paragraphs_with_sections(
subsection = 1
subsection_title: Optional[str] = None
for paragraph in paragraphs:
match paragraph.style.style_id:
# since pyton-docx 1.1.2 there are type hints; seems like the style is not guaranteed to be not None
match paragraph.style.style_id: # type:ignore[union-attr]
case "berschrift1":
chapter = next(chapter_counter)
chapter_title = paragraph.text.strip()
Expand Down
5 changes: 4 additions & 1 deletion src/ebdamame/docxtableconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Generator, List, Literal, Optional, Tuple

import attrs
from docx.table import Table, _Cell, _Row # type:ignore[import]
from docx.table import Table, _Cell, _Row
from ebdtable2graph.models import EbdTable, EbdTableRow, EbdTableSubRow
from ebdtable2graph.models.ebd_table import _STEP_NUMBER_REGEX, EbdCheckResult, EbdTableMetaData, MultiStepInstruction
from more_itertools import first, first_true, last
Expand Down Expand Up @@ -46,6 +46,9 @@ def _get_index_of_first_column_with_step_number(cells: List[_Cell]) -> int:
first_step_number_cell = first_true(
cells, pred=lambda cell: _step_number_pattern.match(cell.text.strip()) is not None
)
if first_step_number_cell is None:
raise ValueError("No cell containing a valid step number found.")

step_number_column_index = cells.index(first_step_number_cell)
_logger.debug("The step number is in column %i", step_number_column_index)
return step_number_column_index
Expand Down
6 changes: 3 additions & 3 deletions unittests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from pathlib import Path
from typing import Dict, List, Tuple

from docx import Document # type:ignore[import]
from docx.table import Table # type:ignore[import]
from docx.document import Document as DocumentType
from docx.table import Table

import ebdamame


def get_document(datafiles, filename: str) -> Document:
def get_document(datafiles, filename: str) -> DocumentType:
"""
a datafiles compatible wrapper around ebddocx2table.get_document
"""
Expand Down
2 changes: 1 addition & 1 deletion unittests/test_highlevel.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List, Tuple

import pytest # type:ignore[import]
from docx.table import Table # type:ignore[import]
from docx.table import Table
from ebdtable2graph.models import EbdTable

from ebdamame import EbdChapterInformation, TableNotFoundError
Expand Down