Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/ebddocx2table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,23 @@ def _get_tables_and_paragaphs(document: Document) -> Generator[Union[Table, Para
_ebd_key_with_heading_pattern = re.compile(r"^(?P<key>E_\d{4})_(?P<title>.*)\s*$")


class TableNotFoundError(Exception):
"""
an error that is raised when a requested table was not found
"""

ebd_key: str #: the key of the requested EBD, e.g. "E_0001"
Comment thread
hf-kklein marked this conversation as resolved.
Outdated

def __init__(self, ebd_key: str):
self.ebd_key = ebd_key


def get_ebd_docx_tables(docx_file_path: Path, ebd_key: str) -> List[Table]:
"""
Opens the file specified in docx_file_path and returns the tables that relate to the given ebd_key.
There might be more than 1 docx table for 1 EBD table.
This is because of inconsistencies and manual editing during creation of the documents by EDI@Energy.
Raises an ValueError if the table was not found.
Raises an TableNotFoundError if the table was not found.
"""
if _ebd_key_pattern.match(ebd_key) is None:
raise ValueError(f"The ebd_key '{ebd_key}' does not match {_ebd_key_pattern.pattern}")
Expand Down Expand Up @@ -90,7 +101,7 @@ def get_ebd_docx_tables(docx_file_path: Path, ebd_key: str) -> List[Table]:
# break the outer loop, too; no need to iterate any further
break
if len(tables) == 0:
raise ValueError(f"EBD Table '{ebd_key}' was not found.")
raise TableNotFoundError(ebd_key=ebd_key)
return tables


Expand Down
5 changes: 4 additions & 1 deletion unittests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from docx.table import Table # type:ignore[import]
from ebdtable2graph.models import EbdTable

from ebddocx2table import TableNotFoundError
from ebddocx2table.docxtableconverter import DocxTableConverter

from . import get_all_ebd_keys, get_document, get_ebd_docx_tables
Expand Down Expand Up @@ -120,7 +121,9 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str
)
actual = converter.convert_docx_tables_to_ebd_table()
assert isinstance(actual, EbdTable)
except Exception as error:
except TableNotFoundError:
pass # ignore for now
except Exception as error: # for everything which is _not_ a TableNotFoundError
# In the long run, this pokemon catcher shall be removed.
# For not it allows us to quickly get an overview of how well the scraping works for a single docx.
# Simply run the test, then see how many of the subtests pass and which are skipped.
Expand Down