Skip to content

Commit 0f02b15

Browse files
committed
Support Google Cloud Storage
1 parent 7f6d421 commit 0f02b15

File tree

14 files changed

+250
-28
lines changed

14 files changed

+250
-28
lines changed

.devcontainer/.env

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,10 @@ AZURE_TEST_READ_WRITE_SAS="se=2100-05-05&sp=rcw&sv=2022-11-02&sr=c&sig=TPz2jEz0t
2222
ALLOW_HTTP=true
2323
HTTP_ENDPOINT=http://localhost:8080
2424

25+
# GCS tests
26+
GOOGLE_TEST_BUCKET=testbucket
27+
GOOGLE_SERVICE_ACCOUNT_KEY='{"gcs_base_url": "http://localhost:4443","disable_oauth": true,"client_email": "","private_key_id": "","private_key": ""}'
28+
GOOGLE_SERVICE_ENDPOINT=http://localhost:4443
29+
2530
# Others
2631
RUST_TEST_THREADS=1

.devcontainer/docker-compose.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ services:
1212
- ${USERPROFILE}${HOME}/.gitconfig:/home/rust/.gitconfig:ro
1313
- ${USERPROFILE}${HOME}/.aws:/home/rust/.aws:rw
1414
- ${USERPROFILE}${HOME}/.azure:/home/rust/.azure:rw
15+
- ${USERPROFILE}${HOME}/.config/gcloud:/home/rust/.config/gcloud:rw
1516
- ./entrypoint.sh:/entrypoint.sh
1617
env_file:
1718
- .env
@@ -21,6 +22,7 @@ services:
2122
- minio
2223
- azurite
2324
- webdav
25+
- fake-gcs-server
2426

2527
minio:
2628
image: minio/minio
@@ -61,3 +63,16 @@ services:
6163
interval: 6s
6264
timeout: 2s
6365
retries: 3
66+
67+
fake-gcs-server:
68+
image: tustvold/fake-gcs-server
69+
env_file:
70+
- .env
71+
network_mode: host
72+
command: -scheme http -public-host localhost:4443
73+
restart: unless-stopped
74+
healthcheck:
75+
test: ["CMD", "nc", "-z", "localhost", "4443"]
76+
interval: 6s
77+
timeout: 2s
78+
retries: 3

.devcontainer/entrypoint.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,8 @@ trap "echo 'Caught termination signal. Exiting...'; exit 0" SIGINT SIGTERM
66
az storage container create -n $AZURE_TEST_CONTAINER_NAME --connection-string $AZURE_STORAGE_CONNECTION_STRING
77
az storage container create -n ${AZURE_TEST_CONTAINER_NAME}2 --connection-string $AZURE_STORAGE_CONNECTION_STRING
88

9+
# create fake-gcs bucket
10+
curl -v -X POST --data-binary "{\"name\":\"$GOOGLE_TEST_BUCKET\"}" -H "Content-Type: application/json" "$GOOGLE_SERVICE_ENDPOINT/storage/v1/b"
11+
curl -v -X POST --data-binary "{\"name\":\"${GOOGLE_TEST_BUCKET}2\"}" -H "Content-Type: application/json" "$GOOGLE_SERVICE_ENDPOINT/storage/v1/b"
12+
913
sleep infinity

.github/workflows/ci.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,22 @@ jobs:
152152
sleep 1
153153
done
154154
155+
- name: Start fake-gcs-server for Google Cloud Storage emulator tests
156+
run: |
157+
docker run -d \
158+
--env-file .devcontainer/.env \
159+
-p 4443:4443 \
160+
tustvold/fake-gcs-server -scheme http -public-host localhost:4443
161+
162+
while ! curl $GOOGLE_SERVICE_ENDPOINT; do
163+
echo "Waiting for $GOOGLE_SERVICE_ENDPOINT..."
164+
sleep 1
165+
done
166+
167+
# create bucket
168+
curl -v -X POST --data-binary "{\"name\":\"$GOOGLE_TEST_BUCKET\"}" -H "Content-Type: application/json" "$GOOGLE_SERVICE_ENDPOINT/storage/v1/b"
169+
curl -v -X POST --data-binary "{\"name\":\"${GOOGLE_TEST_BUCKET}2\"}" -H "Content-Type: application/json" "$GOOGLE_SERVICE_ENDPOINT/storage/v1/b"
170+
155171
- name: Run tests
156172
run: |
157173
# Run tests with coverage tool

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ aws-credential-types = {version = "1", default-features = false}
2828
azure_storage = {version = "0.21", default-features = false}
2929
futures = "0.3"
3030
home = "0.5"
31-
object_store = {version = "0.11", default-features = false, features = ["aws", "azure", "http"]}
31+
object_store = {version = "0.11", default-features = false, features = ["aws", "azure", "gcp", "http"]}
3232
once_cell = "1"
3333
parquet = {version = "54", default-features = false, features = [
3434
"arrow",

README.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ SELECT * FROM parquet.column_stats('/tmp/product_example.parquet')
183183
```
184184

185185
## Object Store Support
186-
`pg_parquet` supports reading and writing Parquet files from/to `S3`, `Azure Blob Storage` and `http(s)` object stores.
186+
`pg_parquet` supports reading and writing Parquet files from/to `S3`, `Azure Blob Storage`, `http(s)` and `Google Cloud Service` object stores.
187187

188188
> [!NOTE]
189189
> To be able to write into a object store location, you need to grant `parquet_object_store_write` role to your current postgres user.
@@ -270,6 +270,28 @@ Supported authorization methods' priority order is shown below:
270270

271271
`Https` uris are supported by default. You can set `ALLOW_HTTP` environment variable to allow `http` uris.
272272

273+
#### Google Cloud Storage
274+
275+
The simplest way to configure object storage is by creating a json config file like [`/tmp/gcs.json`]:
276+
277+
```bash
278+
$ cat /tmp/gcs.json
279+
{
280+
"gcs_base_url": "http://localhost:4443",
281+
"disable_oauth": true,
282+
"client_email": "",
283+
"private_key_id": "",
284+
"private_key": ""
285+
}
286+
```
287+
288+
Alternatively, you can use the following environment variables when starting postgres to configure the Google Cloud Storage client:
289+
- `GOOGLE_SERVICE_ACCOUNT_KEY`: json serialized service account key **(only via environment variables)**
290+
- `GOOGLE_SERVICE_ACCOUNT_PATH`: an alternative location for the config file **(only via environment variables)**
291+
292+
Supported Google Cloud Storage uri formats are shown below:
293+
- gs:// \<bucket\> / \<path\>
294+
273295
## Copy Options
274296
`pg_parquet` supports the following options in the `COPY TO` command:
275297
- `format parquet`: you need to specify this option to read or write Parquet files which does not end with `.parquet[.<compression>]` extension,

src/arrow_parquet/uri_utils.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ use url::Url;
1919

2020
use crate::{
2121
object_store::{
22-
aws::parse_s3_bucket, azure::parse_azure_blob_container, http::parse_http_base_uri,
23-
object_store_cache::get_or_create_object_store,
22+
aws::parse_s3_bucket, azure::parse_azure_blob_container, gcs::parse_gcs_bucket,
23+
http::parse_http_base_uri, object_store_cache::get_or_create_object_store,
2424
},
2525
PG_BACKEND_TOKIO_RUNTIME,
2626
};
@@ -50,7 +50,7 @@ impl ParsedUriInfo {
5050
fn try_parse_scheme(uri: &Url) -> Result<(ObjectStoreScheme, Path), String> {
5151
ObjectStoreScheme::parse(uri).map_err(|_| {
5252
format!(
53-
"unrecognized uri {}. pg_parquet supports local paths, https://, s3:// or az:// schemes.",
53+
"unrecognized uri {}. pg_parquet supports local paths, https://, s3://, az:// or gs:// schemes.",
5454
uri
5555
)
5656
})
@@ -67,8 +67,11 @@ impl ParsedUriInfo {
6767
ObjectStoreScheme::Http => parse_http_base_uri(uri).
6868
ok_or(format!("unsupported http storage uri: {uri}"))
6969
.map(Some),
70+
ObjectStoreScheme::GoogleCloudStorage => parse_gcs_bucket(uri)
71+
.ok_or(format!("unsupported gcs uri {uri}"))
72+
.map(Some),
7073
ObjectStoreScheme::Local => Ok(None),
71-
_ => Err(format!("unsupported scheme {} in uri {}. pg_parquet supports local paths, https://, s3:// or az:// schemes.",
74+
_ => Err(format!("unsupported scheme {} in uri {}. pg_parquet supports local paths, https://, s3://, az:// or gs:// schemes.",
7275
uri.scheme(), uri))
7376
}
7477
}

src/object_store.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
1-
use crate::{
2-
arrow_parquet::uri_utils::uri_as_string,
3-
object_store::{
4-
aws::create_s3_object_store, azure::create_azure_object_store,
5-
local_file::create_local_file_object_store,
6-
},
7-
PG_BACKEND_TOKIO_RUNTIME,
8-
};
9-
101
pub(crate) mod aws;
112
pub(crate) mod azure;
3+
pub(crate) mod gcs;
124
pub(crate) mod http;
135
pub(crate) mod local_file;
146
pub(crate) mod object_store_cache;

src/object_store/aws.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ use aws_credential_types::provider::ProvideCredentials;
55
use object_store::aws::AmazonS3Builder;
66
use url::Url;
77

8-
use super::{object_store_cache::ObjectStoreWithExpiration, PG_BACKEND_TOKIO_RUNTIME};
8+
use crate::PG_BACKEND_TOKIO_RUNTIME;
9+
10+
use super::object_store_cache::ObjectStoreWithExpiration;
911

1012
// create_s3_object_store creates an AmazonS3 object store with the given bucket name.
1113
// It is configured by environment variables and aws config files as fallback method.

0 commit comments

Comments
 (0)