diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml new file mode 100644 index 0000000..764a7bb --- /dev/null +++ b/.github/workflows/packaging.yml @@ -0,0 +1,23 @@ +name: "Packaging Test" + +on: [ pull_request ] +jobs: + check_packaging: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [ "3.11" ] + os: [ ubuntu-latest ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox + - name: Run Packaging Test + run: | + tox -e test_packaging diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..10d5a68 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,57 @@ +# This GitHub workflow is only needed for python package releases which are supposed to be published on pypi. + +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package +on: + release: + branches: main + types: [ created, edited ] + +jobs: + tests: + if: startsWith(github.ref, 'refs/tags/v') + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [ "3.11" ] + os: [ ubuntu-latest ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: | + python -m pip install --upgrade pip + pip install tox + - name: Run tox + run: | + tox + build-n-publish: + name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI + runs-on: ubuntu-latest + needs: tests + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools setuptools-scm wheel twine + - name: Build a binary wheel and a source tarball + run: | + python setup.py sdist bdist_wheel + - name: Publish distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/v') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + # to update visit + # https://github.com/Hochfrequenz/ebddocx2table/settings/secrets/actions/PYPI_API_TOKEN diff --git a/.gitignore b/.gitignore index c2d86e1..eefbd78 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,5 @@ dmypy.json # vscode settings .vscode/ + +src/_ebddocx2table_version.py diff --git a/README.md b/README.md index 8ebcaaf..161976a 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,39 @@ The package `ebddocx2table` scrapes the `.docx` files and returns data in a mode Once you scraped the data (using this package) you can plot it with [`ebdtable2graph`](https://pypi.org/project/ebdtable2graph). +## How to use the package +In any case, install the repo from PyPI: + +### Use as a library +```python +import json +from pathlib import Path + +import cattrs + +from ebddocx2table import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import] +from ebddocx2table.docxtableconverter import DocxTableConverter # type:ignore[import] + +docx_file_path = Path("unittests/test_data/ebd20230629_v34.docx") +# download this .docx File from edi-energy.de or find it in the unittests of this repository. +# https://github.com/Hochfrequenz/ebddocx2table/blob/main/unittests/test_data/ebd20230629_v34.docx +docx_tables = get_ebd_docx_tables(docx_file_path, ebd_key="E_0003") +converter = DocxTableConverter( + docx_tables, + ebd_key="E_0003", + chapter="MaBiS", + sub_chapter="7.42.1: AD: Bestellung der Aggregationsebene der Bilanzkreissummenzeitreihe auf Ebene der Regelzone", +) +result = converter.convert_docx_tables_to_ebd_table() +with open(Path("E_0003.json"), "w+", encoding="utf-8") as result_file: + # the result file can be found here: + # https://github.com/Hochfrequenz/machine-readable_entscheidungsbaumdiagramme/tree/main/FV2310 + json.dump(cattrs.unstructure(result), result_file, ensure_ascii=False, indent=2, sort_keys=True) +``` + +### Use as a CLI tool +_to be written_ + ## How to use this Repository on Your Machine (for development) Please follow the instructions in our diff --git a/dev_requirements/requirements-packaging.in b/dev_requirements/requirements-packaging.in new file mode 100644 index 0000000..9fef062 --- /dev/null +++ b/dev_requirements/requirements-packaging.in @@ -0,0 +1,3 @@ +# requirements for the hatchling build system +build +twine diff --git a/dev_requirements/requirements-packaging.txt b/dev_requirements/requirements-packaging.txt new file mode 100644 index 0000000..97fefe2 --- /dev/null +++ b/dev_requirements/requirements-packaging.txt @@ -0,0 +1,70 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile requirements-packaging.in +# +bleach==6.0.0 + # via readme-renderer +build==0.10.0 + # via -r requirements-packaging.in +certifi==2023.5.7 + # via requests +charset-normalizer==3.1.0 + # via requests +colorama==0.4.6 + # via build +docutils==0.20.1 + # via readme-renderer +idna==3.4 + # via requests +importlib-metadata==6.7.0 + # via + # keyring + # twine +jaraco-classes==3.2.3 + # via keyring +keyring==24.2.0 + # via twine +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +more-itertools==9.1.0 + # via jaraco-classes +packaging==23.1 + # via build +pkginfo==1.9.6 + # via twine +pygments==2.15.1 + # via + # readme-renderer + # rich +pyproject-hooks==1.0.0 + # via build +pywin32-ctypes==0.2.2 + # via keyring +readme-renderer==40.0 + # via twine +requests==2.31.0 + # via + # requests-toolbelt + # twine +requests-toolbelt==1.0.0 + # via twine +rfc3986==2.0.0 + # via twine +rich==13.4.2 + # via twine +six==1.16.0 + # via bleach +twine==4.0.2 + # via -r requirements-packaging.in +urllib3==2.0.3 + # via + # requests + # twine +webencodings==0.5.1 + # via bleach +zipp==3.15.0 + # via importlib-metadata diff --git a/pyproject.toml b/pyproject.toml index be10d94..588b555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,34 @@ +[project] +name = "ebddocx2table" +description = "A scraper to library to scrape .docx files with 'Entscheidungsbaumdiagramm' tables into a truely machine readable structure" +license = { text = "GPL" } +requires-python = ">=3.11" +authors = [{ name = "Hochfrequenz Unternehmensberatung GmbH", email = "info@hochfrequenz.de" }] +keywords = ["EBD", "Energiewirtschaft", "Marktkommunikation"] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "ebdtable2graph>=0.1.9", + "python-docx", + "more_itertools", + "attrs", + "click" + # add all the dependencies from requirements.in here, too +] +dynamic = ["readme", "version"] + +[project.urls] +Changelog = "https://github.com/Hochfrequenz/ebddocx2table/releases" +Homepage = "https://github.com/Hochfrequenz/ebddocx2table" + [tool.black] line-length = 120 @@ -8,10 +39,26 @@ profile = "black" [tool.pylint."MESSAGES CONTROL"] max-line-length = 120 -# the following lines are needed if you would like to build a python package -# and you want to use semantic versioning -# [build-system] -# requires = ["setuptools>=41.0", "wheel", "setuptools_scm[toml]>=3.4"] -# build-backend = "setuptools.build_meta" +[build-system] +requires = ["hatchling>=1.8.0", "hatch-vcs", "hatch-fancy-pypi-readme"] +build-backend = "hatchling.build" + +[tool.hatch.metadata.hooks.fancy-pypi-readme] +content-type = "text/markdown" +fragments = [{ path = "README.md" }] + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.build.hooks.vcs] +version-file = "src/_ebddocx2table_version.py" +template = ''' +version = "{version}" +''' + +[tool.hatch.build.targets.sdist] +exclude = ["/unittests"] -# [tool.setuptools_scm] +[tool.hatch.build.targets.wheel] +only-include = ["src"] +sources = ["src"] diff --git a/tox.ini b/tox.ini index 491d3df..8dfea18 100644 --- a/tox.ini +++ b/tox.ini @@ -53,6 +53,13 @@ commands = coverage html --omit .tox/*,unittests/* coverage report --fail-under 80 --omit .tox/*,unittests/* +[testenv:test_packaging] +skip_install = true +deps = + -r dev_requirements/requirements-packaging.txt +commands = + python -m build + twine check dist/* [testenv:dev] # the dev environment contains everything you need to start developing on your local machine.