-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
140 lines (125 loc) · 6 KB
/
main.py
File metadata and controls
140 lines (125 loc) · 6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
a small click based script to extract all EBDs from a given file.
"""
# invoke like this:
# main.py -i unittests/test_data/ebd20230619_v33.docx
# -o ../machine-readable_entscheidungsbaumdiagramme/FV2304
# -t json -t dot -t svg -t puml
# or
# main.py -i unittests/test_data/ebd20230629_v34.docx
# -o ../machine-readable_entscheidungsbaumdiagramme/FV2310
# -t json -t dot -t svg -t puml
import json
from pathlib import Path
from typing import Literal
import cattrs
import click
from ebdtable2graph import convert_graph_to_plantuml, convert_table_to_graph
from ebdtable2graph.graphviz import convert_dot_to_svg_kroki, convert_graph_to_dot
from ebdtable2graph.models import EbdGraph, EbdTable
from ebdtable2graph.plantuml import GraphToComplexForPlantumlError
# pylint:disable=import-error
from ebddocx2table import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables # type:ignore[import]
from ebddocx2table.docxtableconverter import DocxTableConverter # type:ignore[import]
def _dump_puml(puml_path: Path, ebd_graph: EbdGraph) -> None:
plantuml_code = convert_graph_to_plantuml(ebd_graph)
with open(puml_path, "w+", encoding="utf-8") as uml_file:
uml_file.write(plantuml_code)
def _dump_dot(dot_path: Path, ebd_graph: EbdGraph) -> None:
dot_code = convert_graph_to_dot(ebd_graph)
with open(dot_path, "w+", encoding="utf-8") as uml_file:
uml_file.write(dot_code)
def _dump_svg(svg_path: Path, ebd_graph: EbdGraph) -> None:
dot_code = convert_graph_to_dot(ebd_graph)
svg_code = convert_dot_to_svg_kroki(dot_code)
with open(svg_path, "w+", encoding="utf-8") as svg_file:
svg_file.write(svg_code)
def _dump_json(json_path: Path, ebd_table: EbdTable) -> None:
with open(json_path, "w+", encoding="utf-8") as json_file:
json.dump(cattrs.unstructure(ebd_table), json_file, ensure_ascii=False, indent=2, sort_keys=True)
@click.command()
@click.option(
"-i",
"--input_path",
type=click.Path(exists=True, dir_okay=False, file_okay=True, path_type=Path),
prompt="Input DOCX File",
help="Path of a .docx file from which the EBDs shall be extracted",
)
@click.option(
"-o",
"--output_path",
type=click.Path(exists=False, dir_okay=True, file_okay=False, path_type=Path),
default="output",
prompt="Output directory",
help="Define the path where you want to save the generated files",
)
@click.option(
"-t",
"--export_types",
type=click.Choice(["puml", "dot", "json", "svg"], case_sensitive=False),
multiple=True,
help="Choose which file you'd like to create",
)
# pylint:disable=too-many-locals, too-many-branches
def main(input_path: Path, output_path: Path, export_types: list[Literal["puml", "dot", "json", "svg"]]):
"""
A program to get a machine-readable version of the AHBs docx files published by edi@energy.
"""
if output_path.exists():
click.secho(f"The output directory '{output_path}' exists already.", fg="yellow")
else:
output_path.mkdir(parents=True)
click.secho(f"Created a new directory at {output_path}", fg="yellow")
all_ebd_keys = get_all_ebd_keys(input_path)
for ebd_key, (ebd_title, ebd_kapitel) in all_ebd_keys.items():
click.secho(f"Processing EBD {ebd_kapitel} '{ebd_key}' ({ebd_title})")
try:
docx_tables = get_ebd_docx_tables(docx_file_path=input_path, ebd_key=ebd_key)
except TableNotFoundError as table_not_found_error:
click.secho(f"Table not found: {ebd_key}: {str(table_not_found_error)}; Skip!", fg="red")
continue
assert ebd_kapitel is not None
try:
converter = DocxTableConverter(
docx_tables,
ebd_key=ebd_key,
chapter=ebd_kapitel.chapter_title, # type:ignore[arg-type]
# pylint:disable=line-too-long
sub_chapter=f"{ebd_kapitel.chapter}.{ebd_kapitel.section}.{ebd_kapitel.subsection}: {ebd_kapitel.section_title}",
)
ebd_table = converter.convert_docx_tables_to_ebd_table()
except Exception as scraping_error: # pylint:disable=broad-except
click.secho(f"Error while scraping {ebd_key}: {str(scraping_error)}; Skip!", fg="red")
continue
if "json" in export_types:
_dump_json(output_path / Path(f"{ebd_key}.json"), ebd_table)
click.secho(f"💾 Successfully exported '{ebd_key}.json'")
try:
ebd_graph = convert_table_to_graph(ebd_table)
except Exception as graphing_error: # pylint:disable=broad-except
click.secho(f"Error while graphing {ebd_key}: {str(graphing_error)}; Skip!", fg="yellow")
continue
if "puml" in export_types:
try:
_dump_puml(output_path / Path(f"{ebd_key}.puml"), ebd_graph)
click.secho(f"💾 Successfully exported '{ebd_key}.puml'")
except AssertionError as assertion_error:
# https://github.com/Hochfrequenz/ebdtable2graph/issues/35
click.secho(str(assertion_error), fg="red")
except GraphToComplexForPlantumlError as too_complex_error:
click.secho(str(too_complex_error), fg="red")
try:
if "dot" in export_types:
_dump_dot(output_path / Path(f"{ebd_key}.dot"), ebd_graph)
click.secho(f"💾 Successfully exported '{ebd_key}.dot'")
if "svg" in export_types:
_dump_svg(output_path / Path(f"{ebd_key}.svg"), ebd_graph)
click.secho(f"💾 Successfully exported '{ebd_key}.svg'")
except AssertionError as assertion_error:
# e.g. AssertionError: If indegree > 1, the number of paths should always be greater than 1 too.
click.secho(str(assertion_error), fg="red")
# both the SVG and dot path require graphviz to work, hence the common error handling block
click.secho("🏁Finished")
if __name__ == "__main__":
# the parameter arguments gets provided over the CLI
main() # pylint:disable=no-value-for-parameter