Skip to content

Commit 5660ddf

Browse files
committed
get local file, web endpoints working
1 parent 7d8b79c commit 5660ddf

File tree

8 files changed

+76
-31
lines changed

8 files changed

+76
-31
lines changed

dcpy/connectors/registry.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,5 +167,8 @@ def get_subregistry(self, cls: Type[_C2]) -> ConnectorRegistry[_C2]:
167167
connectors = {
168168
t: conn for (t, conn) in self._connectors.items() if isinstance(conn, cls)
169169
}
170-
print(connectors)
171-
return ConnectorRegistry(connectors=connectors) # type: ignore
170+
return ConnectorRegistry(connectors=connectors)
171+
172+
@property
173+
def nonversioned(self) -> ConnectorRegistry[NonVersionedConnector]:
174+
return self.get_subregistry(NonVersionedConnector)

dcpy/connectors/web.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import requests
33

44
from dcpy.utils.logging import logger
5+
from dcpy.connectors import NonVersionedConnector
56

67

78
def download_file(url: str, path: Path) -> None:
@@ -14,3 +15,22 @@ def download_file(url: str, path: Path) -> None:
1415
response.raise_for_status()
1516
with open(path, "wb") as f:
1617
f.write(response.content)
18+
19+
20+
class Connector(NonVersionedConnector):
21+
conn_type = "file_download"
22+
23+
def push(self, key: str, version, push_conf: dict | None = {}) -> dict:
24+
raise NotImplementedError("Sorry :)")
25+
26+
def pull(
27+
self,
28+
key: str,
29+
destination_path: Path,
30+
pull_conf: dict | None = None,
31+
) -> dict:
32+
download_file(key, destination_path)
33+
return {"path": destination_path}
34+
35+
def get_current_version(self, key: str) -> str | None:
36+
return None

dcpy/lifecycle/connector_registry.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from dcpy.connectors.edm import recipes, publishing
22
from dcpy.connectors.socrata import connector as soc_connector
33
from dcpy.connectors.esri import arcgis_feature_service
4+
from dcpy.connectors import web
45
from dcpy.connectors.registry import (
56
ConnectorRegistry,
6-
Connector,
7-
VersionedConnector,
87
NonVersionedConnector,
8+
Connector,
99
)
1010
from dcpy.utils.logging import logger
1111

@@ -20,11 +20,9 @@ def _set_default_connectors():
2020
connectors.register(connector=publishing.GisDatasetsConnector())
2121
connectors.register(connector=soc_connector.Connector())
2222
connectors.register(connector=arcgis_feature_service.Connector())
23+
connectors.register(connector=web.Connector())
24+
connectors.register(connector=web.Connector(), conn_type="api")
2325
logger.info(f"Registered Connectors: {connectors.list_registered()}")
2426

2527

2628
_set_default_connectors()
27-
28-
29-
versioned_connectors = connectors.get_subregistry(VersionedConnector)
30-
nonversioned_connectors = connectors.get_subregistry(NonVersionedConnector)

dcpy/lifecycle/ingest/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
from pathlib import Path
2+
import shutil
23
import typer
34

45
from .run import ingest
6+
from .connector_registry import LocalFileConnector
57
from dcpy.configuration import TEMPLATE_DIR
8+
from dcpy.lifecycle.connector_registry import connectors
69

710
app = typer.Typer(add_completion=False)
811

12+
connectors.register(LocalFileConnector())
13+
914

1015
@app.command("ingest")
1116
def _cli_wrapper_run(

dcpy/lifecycle/ingest/configure.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
from pathlib import Path
66
from urllib.parse import urlparse
77
import yaml
8-
from dcpy.lifecycle.connector_registry import nonversioned_connectors
8+
from dcpy.lifecycle.connector_registry import connectors
99

1010
from dcpy.models.lifecycle.ingest import (
1111
ArchivalMetadata,
1212
Ingestion,
1313
LocalFileSource,
14-
ConnectorSource,
14+
_Source,
1515
S3Source,
1616
GisDataset,
1717
FileDownloadSource,
@@ -59,8 +59,8 @@ def read_template(
5959

6060
def get_version(source: Source, timestamp: datetime) -> str:
6161
match source:
62-
case ConnectorSource():
63-
connector = nonversioned_connectors[source.type]
62+
case _Source():
63+
connector = connectors.nonversioned[source.type]
6464
version = connector.get_version(source.key, source.model_dump())
6565
case _:
6666
version = None
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pathlib import Path
2+
import shutil
3+
4+
from dcpy.lifecycle.connector_registry import NonVersionedConnector
5+
6+
7+
class LocalFileConnector(NonVersionedConnector):
8+
conn_type = "local_file"
9+
10+
def pull(
11+
self, key: str, destination_path: Path, pull_conf: dict | None = None
12+
) -> dict:
13+
if Path(key) != destination_path:
14+
shutil.copy(key, destination_path)
15+
return {"path": destination_path}

dcpy/lifecycle/ingest/extract.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,13 @@
11
from pathlib import Path
2-
import shutil
32

43
from dcpy.models.lifecycle.ingest import Source
5-
from dcpy.lifecycle.connector_registry import nonversioned_connectors
4+
from dcpy.lifecycle.connector_registry import connectors
65

76

87
def download_file_from_source(source: Source, filename: str, dir: Path) -> None:
98
"""
109
From parsed config template and version, download raw data from source to provided path
1110
"""
1211
path = dir / filename
13-
if source.type == "local_file":
14-
if source.path != path:
15-
shutil.copy(source.path, path)
16-
else:
17-
connector = nonversioned_connectors[source.type]
18-
print(source.model_dump())
19-
connector.pull(
20-
key=source.key, destination_path=path, pull_conf=source.model_dump()
21-
)
12+
connector = connectors.nonversioned[source.type]
13+
connector.pull(key=source.key, destination_path=path, pull_conf=source.model_dump())

dcpy/models/lifecycle/ingest.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from dcpy.connectors.esri import arcgis_feature_service
1717

1818

19-
class ConnectorSource(ABC):
19+
class _Source(ABC):
2020
type: str
2121

2222
@property
@@ -25,29 +25,41 @@ def key(self) -> str:
2525
return ""
2626

2727

28-
class LocalFileSource(BaseModel, extra="forbid"):
28+
class LocalFileSource(_Source, BaseModel, extra="forbid"):
2929
type: Literal["local_file"]
3030
path: Path
3131

32+
@property
33+
def key(self) -> str:
34+
return str(self.path)
35+
3236

3337
class S3Source(BaseModel, extra="forbid"):
3438
type: Literal["s3"]
3539
bucket: str
3640
key: str
3741

3842

39-
class FileDownloadSource(BaseModel, extra="forbid"):
43+
class FileDownloadSource(_Source, BaseModel, extra="forbid"):
4044
type: Literal["file_download"]
4145
url: str
4246

47+
@property
48+
def key(self) -> str:
49+
return self.url
4350

44-
class GenericApiSource(BaseModel, extra="forbid"):
51+
52+
class GenericApiSource(_Source, BaseModel, extra="forbid"):
4553
type: Literal["api"]
4654
endpoint: str
4755
format: Literal["json", "csv"]
4856

57+
@property
58+
def key(self) -> str:
59+
return self.endpoint
60+
4961

50-
class DEPublished(BaseModel, ConnectorSource, extra="forbid"):
62+
class DEPublished(_Source, BaseModel, extra="forbid"):
5163
type: Literal["edm.publishing.published"]
5264
product: str
5365
filename: str
@@ -57,7 +69,7 @@ def key(self) -> str:
5769
return self.product
5870

5971

60-
class GisDataset(BaseModel, ConnectorSource, extra="forbid"):
72+
class GisDataset(_Source, BaseModel, extra="forbid"):
6173
"""Dataset published by GIS in edm-publishing/datasets"""
6274

6375
# Some datasets here will phased out if we eventually get data
@@ -70,7 +82,7 @@ def key(self) -> str:
7082
return self.name
7183

7284

73-
class SocrataSource(BaseModel, ConnectorSource, extra="forbid"):
85+
class SocrataSource(_Source, BaseModel, extra="forbid"):
7486
type: Literal["socrata"]
7587
org: socrata.Org
7688
uid: str
@@ -88,7 +100,7 @@ def key(self) -> str:
88100
return self.uid
89101

90102

91-
class ESRIFeatureServer(BaseModel, ConnectorSource, extra="forbid"):
103+
class ESRIFeatureServer(_Source, BaseModel, extra="forbid"):
92104
type: Literal["arcgis_feature_server"]
93105
server: esri.Server
94106
dataset: str

0 commit comments

Comments
 (0)