diff --git a/.gitignore b/.gitignore index eefbd78..05374d6 100644 --- a/.gitignore +++ b/.gitignore @@ -133,4 +133,4 @@ dmypy.json # vscode settings .vscode/ -src/_ebddocx2table_version.py +src/_ebdamame_version.py diff --git a/README.md b/README.md index 6bd2d81..97a8cf3 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,24 @@ -# ebddocx2table +# ebdamame -> [!IMPORTANT] -⚠ This is the last version using the name `ebddocx2table`. Both the repository and the Python package will be renamed to `ebdamame`. +![Unittests status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Unittests/badge.svg) +![Coverage status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Coverage/badge.svg) +![Linting status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Linting/badge.svg) +![Black status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Black/badge.svg) +![PyPi Status Badge](https://img.shields.io/pypi/v/ebdamame) -![Unittests status badge](https://github.com/Hochfrequenz/ebd_docx_to_table/workflows/Unittests/badge.svg) -![Coverage status badge](https://github.com/Hochfrequenz/ebd_docx_to_table/workflows/Coverage/badge.svg) -![Linting status badge](https://github.com/Hochfrequenz/ebd_docx_to_table/workflows/Linting/badge.svg) -![Black status badge](https://github.com/Hochfrequenz/ebd_docx_to_table/workflows/Black/badge.svg) -![PyPi Status Badge](https://img.shields.io/pypi/v/ebddocx2table) - -🇩🇪 Dieses Repository enthält ein Python-Paket namens [`ebddocx2table`](https://pypi.org/project/ebddocx2table), das genutzt werden kann, um aus .docx-Dateien maschinenlesbare Tabellen, die einen Entscheidungsbaum (EBD) modellieren, zu extrahieren (scrapen). +🇩🇪 Dieses Repository enthält ein Python-Paket namens [`ebdamame`](https://pypi.org/project/ebdamame) (früher: `ebddocx2table`), das genutzt werden kann, um aus .docx-Dateien maschinenlesbare Tabellen, die einen Entscheidungsbaum (EBD) modellieren, zu extrahieren (scrapen). Diese Entscheidungsbäume sind Teil eines regulatorischen Regelwerks für die deutsche Energiewirtschaft und kommen in der Eingangsprüfung der Marktkommunikation zum Einsatz. Die mit diesem Paket erstellten maschinenlesbaren Tabellen können mit [`ebdtable2graph`](https://pypi.org/project/ebdtable2graph) in echte Graphen und Diagramme umgewandelt werden. Exemplarische Ergebnisse des Scrapings finden sich als .json-Dateien im Repository [`machine-readable_entscheidungsbaumdiagramme`](https://github.com/Hochfrequenz/machine-readable_entscheidungsbaumdiagramme/). -🇬🇧 This repository contains the source code of the Python package [`ebddocx2table`](https://pypi.org/project/ebddocx2table). +🇬🇧 This repository contains the source code of the Python package [`ebdamame`](https://pypi.org/project/ebdamame) (formerly published as `ebddocx2table`). ## Rationale -Assume, that you want to analyse or visualize the Entscheidungsbaumdiagramme (EBD) by EDI@Energy. +Assume that you want to analyse or visualize the Entscheidungsbaumdiagramme (EBD) by EDI@Energy. The website edi-energy.de, as always, only provides you with PDF or Word files instead of _really_ digitized data. -The package `ebddocx2table` scrapes the `.docx` files and returns data in a model defined in the "sister" package [`ebdtable2graph`](https://pypi.org/project/ebdtable2graph). +The package `ebdamame` scrapes the `.docx` files and returns data in a model defined in the "sister" package [`ebdtable2graph`](https://pypi.org/project/ebdtable2graph). Once you scraped the data (using this package) you can plot it with [`ebdtable2graph`](https://pypi.org/project/ebdtable2graph). @@ -30,7 +27,7 @@ Once you scraped the data (using this package) you can plot it with [`ebdtable2g In any case, install the repo from PyPI: ```bash -pip install ebddocx2table +pip install ebdamame ``` ### Use as a library @@ -41,8 +38,8 @@ from pathlib import Path import cattrs -from ebddocx2table import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import] -from ebddocx2table.docxtableconverter import DocxTableConverter # type:ignore[import] +from ebdamame import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import] +from ebdamame.docxtableconverter import DocxTableConverter # type:ignore[import] docx_file_path = Path("unittests/test_data/ebd20230629_v34.docx") # download this .docx File from edi-energy.de or find it in the unittests of this repository. diff --git a/main.py b/main.py index b7c1a72..d8e4092 100644 --- a/main.py +++ b/main.py @@ -29,8 +29,8 @@ from ebdtable2graph.plantuml import GraphTooComplexForPlantumlError # pylint:disable=import-error -from ebddocx2table import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import] -from ebddocx2table.docxtableconverter import DocxTableConverter # type:ignore[import] +from ebdamame import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import] +from ebdamame.docxtableconverter import DocxTableConverter # type:ignore[import] def _dump_puml(puml_path: Path, ebd_graph: EbdGraph) -> None: diff --git a/pyproject.toml b/pyproject.toml index 61a50de..90b47d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "ebddocx2table" +name = "ebdamame" description = "A scraper to library to scrape .docx files with 'Entscheidungsbaumdiagramm' tables into a truely machine readable structure" license = { text = "GPL" } requires-python = ">=3.11" @@ -27,8 +27,8 @@ dependencies = [ dynamic = ["readme", "version"] [project.urls] -Changelog = "https://github.com/Hochfrequenz/ebddocx2table/releases" -Homepage = "https://github.com/Hochfrequenz/ebddocx2table" +Changelog = "https://github.com/Hochfrequenz/ebdamame/releases" +Homepage = "https://github.com/Hochfrequenz/ebdamame" [tool.black] line-length = 120 diff --git a/src/_ebddocx2table_version.py b/src/_ebddocx2table_version.py new file mode 100644 index 0000000..a9759b3 --- /dev/null +++ b/src/_ebddocx2table_version.py @@ -0,0 +1 @@ +version = "0.1.dev59+gc9c489e.d20230707" diff --git a/src/ebddocx2table/__init__.py b/src/ebdamame/__init__.py similarity index 100% rename from src/ebddocx2table/__init__.py rename to src/ebdamame/__init__.py diff --git a/src/ebddocx2table/docxtableconverter.py b/src/ebdamame/docxtableconverter.py similarity index 100% rename from src/ebddocx2table/docxtableconverter.py rename to src/ebdamame/docxtableconverter.py diff --git a/src/ebddocx2table/py.typed b/src/ebdamame/py.typed similarity index 100% rename from src/ebddocx2table/py.typed rename to src/ebdamame/py.typed diff --git a/tox.ini b/tox.ini index 8dfea18..e08b88f 100644 --- a/tox.ini +++ b/tox.ini @@ -26,7 +26,7 @@ deps = # add your fixtures like e.g. pytest_datafiles here setenv = PYTHONPATH = {toxinidir}/src commands = - pylint ebddocx2table + pylint ebdamame pylint main.py # add single files (ending with .py) or packages here @@ -37,7 +37,7 @@ deps = -r requirements.txt -r dev_requirements/requirements-type_check.txt commands = - mypy --show-error-codes src/ebddocx2table + mypy --show-error-codes src/ebdamame mypy --show-error-codes unittests mypy --show-error-codes main.py # add single files (ending with .py) or packages here diff --git a/unittests/__init__.py b/unittests/__init__.py index 3e66bbc..0edf4dd 100644 --- a/unittests/__init__.py +++ b/unittests/__init__.py @@ -9,7 +9,7 @@ from docx import Document # type:ignore[import] from docx.table import Table # type:ignore[import] -import ebddocx2table +import ebdamame def get_document(datafiles, filename: str) -> Document: @@ -17,7 +17,7 @@ def get_document(datafiles, filename: str) -> Document: a datafiles compatible wrapper around ebddocx2table.get_document """ path = datafiles / Path(filename) - return ebddocx2table.get_document(path) + return ebdamame.get_document(path) def get_ebd_docx_tables(datafiles, filename: str, ebd_key: str) -> List[Table]: @@ -25,12 +25,12 @@ def get_ebd_docx_tables(datafiles, filename: str, ebd_key: str) -> List[Table]: a datafiles compatible wrapper around ebddocx2table.get_ebd_docx_tables """ path = datafiles / Path(filename) - return ebddocx2table.get_ebd_docx_tables(path, ebd_key=ebd_key) + return ebdamame.get_ebd_docx_tables(path, ebd_key=ebd_key) -def get_all_ebd_keys(datafiles, filename: str) -> Dict[str, Tuple[str, ebddocx2table.EbdChapterInformation]]: +def get_all_ebd_keys(datafiles, filename: str) -> Dict[str, Tuple[str, ebdamame.EbdChapterInformation]]: """ a datafiles compatible wrapper around ebddocx2table.get_all_ebd_keys """ path = datafiles / Path(filename) - return ebddocx2table.get_all_ebd_keys(path) + return ebdamame.get_all_ebd_keys(path) diff --git a/unittests/test_highlevel.py b/unittests/test_highlevel.py index b91aeae..d3a762f 100644 --- a/unittests/test_highlevel.py +++ b/unittests/test_highlevel.py @@ -4,8 +4,8 @@ from docx.table import Table # type:ignore[import] from ebdtable2graph.models import EbdTable -from ebddocx2table import EbdChapterInformation, TableNotFoundError -from ebddocx2table.docxtableconverter import DocxTableConverter +from ebdamame import EbdChapterInformation, TableNotFoundError +from ebdamame.docxtableconverter import DocxTableConverter from . import get_all_ebd_keys, get_document, get_ebd_docx_tables from .examples import table_e0003, table_e0097, table_e0453, table_e0462, table_e0901 @@ -18,7 +18,7 @@ def get_ebd_keys_and_files(datafiles, request) -> List[Tuple[str, str]]: return list(all_keys_and_files) -class TestEbdDocx2Table: +class TestEbdamame: """ A class for tests of the entire package/library """ @@ -263,9 +263,9 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str # In the long run, all these catchers shall be removed. except AttributeError as attribute_error: if attribute_error.name == "_column_index_step_number": - pytest.skip("https://github.com/Hochfrequenz/ebddocx2table/issues/71") + pytest.skip("https://github.com/Hochfrequenz/ebdamame/issues/71") except TableNotFoundError: - # https://github.com/Hochfrequenz/ebd_docx_to_table/issues/9 + # https://github.com/Hochfrequenz/ebdamame/issues/9 pass # ignore for now except ValueError as value_error: # Simply run the test, then see how many of the subtests pass and which are skipped. @@ -273,16 +273,16 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str # This library has probably reached v1.0.0 if this catch block is not necessary anymore. match value_error.args[0]: case "None is not in list": - # https://github.com/Hochfrequenz/ebd_docx_to_table/issues/20 + # https://github.com/Hochfrequenz/ebdamame/issues/20 issue_number = "20" case "Exactly one of the entries in sub_rows has to have check_result.result True": - # https://github.com/Hochfrequenz/ebd_docx_to_table/issues/21 + # https://github.com/Hochfrequenz/ebdamame/issues/21 issue_number = "21" case "The cell content '--' does not belong to a ja/nein cell": - # https://github.com/Hochfrequenz/ebd_docx_to_table/issues/23 + # https://github.com/Hochfrequenz/ebdamame/issues/23 issue_number = "23" case "The cell content 'gültiges daten-ergebnis' does not belong to a ja/nein cell": - # https://github.com/Hochfrequenz/ebd_docx_to_table/issues/74 + # https://github.com/Hochfrequenz/ebdamame/issues/74 issue_number = "74" case _: raise @@ -291,7 +291,7 @@ def test_extraction(self, datafiles, get_ebd_keys_and_files: List[Tuple[str, str except UnboundLocalError as unbound_error: match unbound_error.args[0]: case "cannot access local variable 'role' where it is not associated with a value": - # https://github.com/Hochfrequenz/ebd_docx_to_table/issues/22 + # https://github.com/Hochfrequenz/ebdamame/issues/22 issue_number = "22" case _: raise