From 772254dfa1ac3463790c8fced1a549eeeb8d5dfc Mon Sep 17 00:00:00 2001 From: konstantin Date: Fri, 7 Jul 2023 16:51:57 +0200 Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=8F=97=20Switch=20to=20hatchling=20as?= =?UTF-8?q?=20build=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/packaging.yml | 23 +++++++ .gitignore | 2 + dev_requirements/requirements-packaging.in | 3 + dev_requirements/requirements-packaging.txt | 70 +++++++++++++++++++++ pyproject.toml | 59 +++++++++++++++-- tox.ini | 7 +++ 6 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/packaging.yml create mode 100644 dev_requirements/requirements-packaging.in create mode 100644 dev_requirements/requirements-packaging.txt diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml new file mode 100644 index 0000000..764a7bb --- /dev/null +++ b/.github/workflows/packaging.yml @@ -0,0 +1,23 @@ +name: "Packaging Test" + +on: [ pull_request ] +jobs: + check_packaging: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [ "3.11" ] + os: [ ubuntu-latest ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox + - name: Run Packaging Test + run: | + tox -e test_packaging diff --git a/.gitignore b/.gitignore index c2d86e1..eefbd78 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,5 @@ dmypy.json # vscode settings .vscode/ + +src/_ebddocx2table_version.py diff --git a/dev_requirements/requirements-packaging.in b/dev_requirements/requirements-packaging.in new file mode 100644 index 0000000..9fef062 --- /dev/null +++ b/dev_requirements/requirements-packaging.in @@ -0,0 +1,3 @@ +# requirements for the hatchling build system +build +twine diff --git a/dev_requirements/requirements-packaging.txt b/dev_requirements/requirements-packaging.txt new file mode 100644 index 0000000..97fefe2 --- /dev/null +++ b/dev_requirements/requirements-packaging.txt @@ -0,0 +1,70 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile requirements-packaging.in +# +bleach==6.0.0 + # via readme-renderer +build==0.10.0 + # via -r requirements-packaging.in +certifi==2023.5.7 + # via requests +charset-normalizer==3.1.0 + # via requests +colorama==0.4.6 + # via build +docutils==0.20.1 + # via readme-renderer +idna==3.4 + # via requests +importlib-metadata==6.7.0 + # via + # keyring + # twine +jaraco-classes==3.2.3 + # via keyring +keyring==24.2.0 + # via twine +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +more-itertools==9.1.0 + # via jaraco-classes +packaging==23.1 + # via build +pkginfo==1.9.6 + # via twine +pygments==2.15.1 + # via + # readme-renderer + # rich +pyproject-hooks==1.0.0 + # via build +pywin32-ctypes==0.2.2 + # via keyring +readme-renderer==40.0 + # via twine +requests==2.31.0 + # via + # requests-toolbelt + # twine +requests-toolbelt==1.0.0 + # via twine +rfc3986==2.0.0 + # via twine +rich==13.4.2 + # via twine +six==1.16.0 + # via bleach +twine==4.0.2 + # via -r requirements-packaging.in +urllib3==2.0.3 + # via + # requests + # twine +webencodings==0.5.1 + # via bleach +zipp==3.15.0 + # via importlib-metadata diff --git a/pyproject.toml b/pyproject.toml index be10d94..588b555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,34 @@ +[project] +name = "ebddocx2table" +description = "A scraper to library to scrape .docx files with 'Entscheidungsbaumdiagramm' tables into a truely machine readable structure" +license = { text = "GPL" } +requires-python = ">=3.11" +authors = [{ name = "Hochfrequenz Unternehmensberatung GmbH", email = "info@hochfrequenz.de" }] +keywords = ["EBD", "Energiewirtschaft", "Marktkommunikation"] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "ebdtable2graph>=0.1.9", + "python-docx", + "more_itertools", + "attrs", + "click" + # add all the dependencies from requirements.in here, too +] +dynamic = ["readme", "version"] + +[project.urls] +Changelog = "https://github.com/Hochfrequenz/ebddocx2table/releases" +Homepage = "https://github.com/Hochfrequenz/ebddocx2table" + [tool.black] line-length = 120 @@ -8,10 +39,26 @@ profile = "black" [tool.pylint."MESSAGES CONTROL"] max-line-length = 120 -# the following lines are needed if you would like to build a python package -# and you want to use semantic versioning -# [build-system] -# requires = ["setuptools>=41.0", "wheel", "setuptools_scm[toml]>=3.4"] -# build-backend = "setuptools.build_meta" +[build-system] +requires = ["hatchling>=1.8.0", "hatch-vcs", "hatch-fancy-pypi-readme"] +build-backend = "hatchling.build" + +[tool.hatch.metadata.hooks.fancy-pypi-readme] +content-type = "text/markdown" +fragments = [{ path = "README.md" }] + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.build.hooks.vcs] +version-file = "src/_ebddocx2table_version.py" +template = ''' +version = "{version}" +''' + +[tool.hatch.build.targets.sdist] +exclude = ["/unittests"] -# [tool.setuptools_scm] +[tool.hatch.build.targets.wheel] +only-include = ["src"] +sources = ["src"] diff --git a/tox.ini b/tox.ini index 491d3df..8dfea18 100644 --- a/tox.ini +++ b/tox.ini @@ -53,6 +53,13 @@ commands = coverage html --omit .tox/*,unittests/* coverage report --fail-under 80 --omit .tox/*,unittests/* +[testenv:test_packaging] +skip_install = true +deps = + -r dev_requirements/requirements-packaging.txt +commands = + python -m build + twine check dist/* [testenv:dev] # the dev environment contains everything you need to start developing on your local machine. From b94f260c23b4a11e6e940d6c8fa5b3ac342fb3f5 Mon Sep 17 00:00:00 2001 From: konstantin Date: Fri, 7 Jul 2023 16:59:39 +0200 Subject: [PATCH 2/5] Add executable --- pyproject.toml | 3 +++ main.py => src/ebddocx2table/main.py | 0 2 files changed, 3 insertions(+) rename main.py => src/ebddocx2table/main.py (100%) diff --git a/pyproject.toml b/pyproject.toml index 588b555..2ca061f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,9 @@ dependencies = [ ] dynamic = ["readme", "version"] +[project.scripts] +ebd2docx2table = "ebddocx2table.main:main" + [project.urls] Changelog = "https://github.com/Hochfrequenz/ebddocx2table/releases" Homepage = "https://github.com/Hochfrequenz/ebddocx2table" diff --git a/main.py b/src/ebddocx2table/main.py similarity index 100% rename from main.py rename to src/ebddocx2table/main.py From 0948d86cb2588997a551f54c638cf3ec6fc82262 Mon Sep 17 00:00:00 2001 From: konstantin Date: Fri, 7 Jul 2023 17:05:36 +0200 Subject: [PATCH 3/5] Add publish workflow --- .github/workflows/python-publish.yml | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/workflows/python-publish.yml diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..10d5a68 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,57 @@ +# This GitHub workflow is only needed for python package releases which are supposed to be published on pypi. + +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package +on: + release: + branches: main + types: [ created, edited ] + +jobs: + tests: + if: startsWith(github.ref, 'refs/tags/v') + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [ "3.11" ] + os: [ ubuntu-latest ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: | + python -m pip install --upgrade pip + pip install tox + - name: Run tox + run: | + tox + build-n-publish: + name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI + runs-on: ubuntu-latest + needs: tests + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools setuptools-scm wheel twine + - name: Build a binary wheel and a source tarball + run: | + python setup.py sdist bdist_wheel + - name: Publish distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/v') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + # to update visit + # https://github.com/Hochfrequenz/ebddocx2table/settings/secrets/actions/PYPI_API_TOKEN From 3fa6911701ccb05c5c3bd4b8eac393f5a49da674 Mon Sep 17 00:00:00 2001 From: konstantin Date: Fri, 7 Jul 2023 16:59:39 +0200 Subject: [PATCH 4/5] Revert "Add executable" This reverts commit b94f260c23b4a11e6e940d6c8fa5b3ac342fb3f5. --- src/ebddocx2table/main.py => main.py | 0 pyproject.toml | 3 --- 2 files changed, 3 deletions(-) rename src/ebddocx2table/main.py => main.py (100%) diff --git a/src/ebddocx2table/main.py b/main.py similarity index 100% rename from src/ebddocx2table/main.py rename to main.py diff --git a/pyproject.toml b/pyproject.toml index 2ca061f..588b555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,9 +25,6 @@ dependencies = [ ] dynamic = ["readme", "version"] -[project.scripts] -ebd2docx2table = "ebddocx2table.main:main" - [project.urls] Changelog = "https://github.com/Hochfrequenz/ebddocx2table/releases" Homepage = "https://github.com/Hochfrequenz/ebddocx2table" From f889de0b180703d3e933729b484e9b7716e4aec6 Mon Sep 17 00:00:00 2001 From: konstantin Date: Fri, 7 Jul 2023 17:25:31 +0200 Subject: [PATCH 5/5] Add MWE Example to README --- README.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/README.md b/README.md index 8ebcaaf..161976a 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,39 @@ The package `ebddocx2table` scrapes the `.docx` files and returns data in a mode Once you scraped the data (using this package) you can plot it with [`ebdtable2graph`](https://pypi.org/project/ebdtable2graph). +## How to use the package +In any case, install the repo from PyPI: + +### Use as a library +```python +import json +from pathlib import Path + +import cattrs + +from ebddocx2table import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import] +from ebddocx2table.docxtableconverter import DocxTableConverter # type:ignore[import] + +docx_file_path = Path("unittests/test_data/ebd20230629_v34.docx") +# download this .docx File from edi-energy.de or find it in the unittests of this repository. +# https://github.com/Hochfrequenz/ebddocx2table/blob/main/unittests/test_data/ebd20230629_v34.docx +docx_tables = get_ebd_docx_tables(docx_file_path, ebd_key="E_0003") +converter = DocxTableConverter( + docx_tables, + ebd_key="E_0003", + chapter="MaBiS", + sub_chapter="7.42.1: AD: Bestellung der Aggregationsebene der Bilanzkreissummenzeitreihe auf Ebene der Regelzone", +) +result = converter.convert_docx_tables_to_ebd_table() +with open(Path("E_0003.json"), "w+", encoding="utf-8") as result_file: + # the result file can be found here: + # https://github.com/Hochfrequenz/machine-readable_entscheidungsbaumdiagramme/tree/main/FV2310 + json.dump(cattrs.unstructure(result), result_file, ensure_ascii=False, indent=2, sort_keys=True) +``` + +### Use as a CLI tool +_to be written_ + ## How to use this Repository on Your Machine (for development) Please follow the instructions in our