Skip to content

Commit bab2208

Browse files
Support AWS_ENDPOINT_URL (#84)
- [x] Supports `AWS_ENDPOINT_URL`, e.g. you can set `AWS_ENDPOINT_URL=http://localhost:9000` and `AWS_ALLOW_HTTP=true` for local MinIO Server. - [x] We were already supporting `AWS_SESSION_TOKEN` without documentation and tests. Added them. As a side note, not automatically `AssumeRole` to fetch the token, but when the token is passed from environment or config, authentication succeeds. Closes #83.
1 parent 4086ea4 commit bab2208

File tree

12 files changed

+178
-82
lines changed

12 files changed

+178
-82
lines changed

.devcontainer/.env

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
AWS_ACCESS_KEY_ID=minioadmin
33
AWS_SECRET_ACCESS_KEY=minioadmin
44
AWS_REGION=us-east-1
5+
AWS_ENDPOINT_URL=http://localhost:9000
6+
AWS_ALLOW_HTTP=true
57
AWS_S3_TEST_BUCKET=testbucket
68
MINIO_ROOT_USER=minioadmin
79
MINIO_ROOT_PASSWORD=minioadmin
810

911
# Others
1012
RUST_TEST_THREADS=1
11-
PG_PARQUET_TEST=true

.devcontainer/create-test-buckets.sh

Lines changed: 0 additions & 3 deletions
This file was deleted.

.devcontainer/devcontainer.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
"dockerComposeFile": "docker-compose.yml",
44
"service": "app",
55
"workspaceFolder": "/workspace",
6-
"postStartCommand": "bash .devcontainer/create-test-buckets.sh",
76
"postAttachCommand": "sudo chown -R rust /workspace",
87
"customizations": {
98
"vscode": {

.devcontainer/docker-compose.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ services:
2323
env_file:
2424
- .env
2525
network_mode: host
26-
command: server /data
26+
entrypoint: "./entrypoint.sh"
2727
restart: unless-stopped
2828
healthcheck:
2929
test: ["CMD", "curl", "http://localhost:9000"]
3030
interval: 6s
3131
timeout: 2s
3232
retries: 3
33+
volumes:
34+
- ./minio-entrypoint.sh:/entrypoint.sh

.devcontainer/minio-entrypoint.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
3+
trap "echo 'Caught termination signal. Exiting...'; exit 0" SIGINT SIGTERM
4+
5+
minio server /data &
6+
7+
minio_pid=$!
8+
9+
while ! curl $AWS_ENDPOINT_URL; do
10+
echo "Waiting for $AWS_ENDPOINT_URL..."
11+
sleep 1
12+
done
13+
14+
# set access key and secret key
15+
mc alias set local $AWS_ENDPOINT_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
16+
17+
# create bucket
18+
mc mb local/$AWS_S3_TEST_BUCKET
19+
20+
wait $minio_pid

.github/workflows/ci.yml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ jobs:
8989
- name: Install and configure pgrx
9090
run: |
9191
cargo install --locked [email protected]
92-
cargo pgrx init --pg${{ env.PG_MAJOR }} $(which pg_config)
92+
cargo pgrx init --pg${{ env.PG_MAJOR }} /usr/lib/postgresql/${{ env.PG_MAJOR }}/bin/pg_config
9393
9494
- name: Install cargo-llvm-cov for coverage report
9595
run: cargo install --locked [email protected]
@@ -101,21 +101,25 @@ jobs:
101101
102102
- name: Set up permissions for PostgreSQL
103103
run: |
104-
sudo chmod a+rwx $(pg_config --pkglibdir) \
105-
$(pg_config --sharedir)/extension \
104+
sudo chmod a+rwx $(/usr/lib/postgresql/${{ env.PG_MAJOR }}/bin/pg_config --pkglibdir) \
105+
$(/usr/lib/postgresql/${{ env.PG_MAJOR }}/bin/pg_config --sharedir)/extension \
106106
/var/run/postgresql/
107107
108108
- name: Start Minio for s3 emulator tests
109109
run: |
110-
docker run -d --env-file .devcontainer/.env -p 9000:9000 minio/minio server /data
111-
112-
while ! nc -z localhost 9000; do
113-
echo "Waiting for localhost:9000..."
114-
sleep 1
110+
docker run -d \
111+
--env-file .devcontainer/.env \
112+
-p 9000:9000 \
113+
--entrypoint "./entrypoint.sh" \
114+
--volume ./.devcontainer/minio-entrypoint.sh:/entrypoint.sh \
115+
--name miniocontainer \
116+
minio/minio
117+
118+
while ! curl $AWS_ENDPOINT_URL; do
119+
echo "Waiting for $AWS_ENDPOINT_URL..."
120+
sleep 1
115121
done
116122
117-
aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET
118-
119123
- name: Run tests
120124
run: |
121125
# Run tests with coverage tool

Cargo.lock

Lines changed: 23 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ pg_test = []
2323
arrow = {version = "53", default-features = false}
2424
arrow-cast = {version = "53", default-features = false}
2525
arrow-schema = {version = "53", default-features = false}
26-
aws-config = { version = "1.5", default-features = false, features = ["rustls"]}
27-
aws-credential-types = {version = "1.2", default-features = false}
26+
aws-config = { version = "1", default-features = false, features = ["rustls"]}
27+
aws-credential-types = {version = "1", default-features = false}
28+
aws-sdk-sts = "1"
2829
futures = "0.3"
2930
object_store = {version = "0.11", default-features = false, features = ["aws"]}
3031
once_cell = "1"

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,14 @@ region = eu-central-1
174174
Alternatively, you can use the following environment variables when starting postgres to configure the S3 client:
175175
- `AWS_ACCESS_KEY_ID`: the access key ID of the AWS account
176176
- `AWS_SECRET_ACCESS_KEY`: the secret access key of the AWS account
177+
- `AWS_SESSION_TOKEN`: the session token for the AWS account
177178
- `AWS_REGION`: the default region of the AWS account
178-
- `AWS_SHARED_CREDENTIALS_FILE`: an alternative location for the credentials file
179-
- `AWS_CONFIG_FILE`: an alternative location for the config file
180-
- `AWS_PROFILE`: the name of the profile from the credentials and config file (default profile name is `default`)
179+
- `AWS_ENDPOINT_URL`: the endpoint
180+
- `AWS_SHARED_CREDENTIALS_FILE`: an alternative location for the credentials file **(only via environment variables)**
181+
- `AWS_CONFIG_FILE`: an alternative location for the config file **(only via environment variables)**
182+
- `AWS_PROFILE`: the name of the profile from the credentials and config file (default profile name is `default`) **(only via environment variables)**
183+
- `AWS_ALLOW_HTTP`: allows http endpoints **(only via environment variables)**
184+
181185

182186
> [!NOTE]
183187
> To be able to write into a object store location, you need to grant `parquet_object_store_write` role to your current postgres user.

src/arrow_parquet/uri_utils.rs

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
use std::{sync::Arc, sync::LazyLock};
22

33
use arrow::datatypes::SchemaRef;
4-
use aws_config::{
5-
environment::{EnvironmentVariableCredentialsProvider, EnvironmentVariableRegionProvider},
6-
meta::{credentials::CredentialsProviderChain, region::RegionProviderChain},
7-
profile::{ProfileFileCredentialsProvider, ProfileFileRegionProvider},
8-
};
4+
use aws_config::BehaviorVersion;
95
use aws_credential_types::provider::ProvideCredentials;
106
use object_store::{
117
aws::{AmazonS3, AmazonS3Builder},
@@ -89,51 +85,43 @@ fn object_store_with_location(uri: &Url, copy_from: bool) -> (Arc<dyn ObjectStor
8985
}
9086
}
9187

88+
// get_s3_object_store creates an AmazonS3 object store with the given bucket name.
89+
// It is configured by environment variables and aws config files as fallback method.
90+
// We need to read the config files to make the fallback method work since object_store
91+
// does not provide a way to read them. Currently, we only support to extract
92+
// "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", "AWS_ENDPOINT_URL",
93+
// and "AWS_REGION" from the config files.
9294
async fn get_s3_object_store(bucket_name: &str) -> AmazonS3 {
93-
let mut aws_s3_builder = AmazonS3Builder::new().with_bucket_name(bucket_name);
94-
95-
let is_test_running = std::env::var("PG_PARQUET_TEST").is_ok();
95+
let mut aws_s3_builder = AmazonS3Builder::from_env().with_bucket_name(bucket_name);
9696

97-
if is_test_running {
98-
// use minio for testing
99-
aws_s3_builder = aws_s3_builder.with_endpoint("http://localhost:9000");
100-
aws_s3_builder = aws_s3_builder.with_allow_http(true);
101-
}
97+
// first tries environment variables and then the config files
98+
let sdk_config = aws_config::defaults(BehaviorVersion::v2024_03_28())
99+
.load()
100+
.await;
102101

103-
let aws_profile_name = std::env::var("AWS_PROFILE").unwrap_or("default".to_string());
104-
105-
let region_provider = RegionProviderChain::first_try(EnvironmentVariableRegionProvider::new())
106-
.or_else(
107-
ProfileFileRegionProvider::builder()
108-
.profile_name(aws_profile_name.clone())
109-
.build(),
110-
);
102+
if let Some(credential_provider) = sdk_config.credentials_provider() {
103+
if let Ok(credentials) = credential_provider.provide_credentials().await {
104+
// AWS_ACCESS_KEY_ID
105+
aws_s3_builder = aws_s3_builder.with_access_key_id(credentials.access_key_id());
111106

112-
let region = region_provider.region().await;
107+
// AWS_SECRET_ACCESS_KEY
108+
aws_s3_builder = aws_s3_builder.with_secret_access_key(credentials.secret_access_key());
113109

114-
if let Some(region) = region {
115-
aws_s3_builder = aws_s3_builder.with_region(region.to_string());
110+
if let Some(token) = credentials.session_token() {
111+
// AWS_SESSION_TOKEN
112+
aws_s3_builder = aws_s3_builder.with_token(token);
113+
}
114+
}
116115
}
117116

118-
let credential_provider = CredentialsProviderChain::first_try(
119-
"Environment",
120-
EnvironmentVariableCredentialsProvider::new(),
121-
)
122-
.or_else(
123-
"Profile",
124-
ProfileFileCredentialsProvider::builder()
125-
.profile_name(aws_profile_name)
126-
.build(),
127-
);
128-
129-
if let Ok(credentials) = credential_provider.provide_credentials().await {
130-
aws_s3_builder = aws_s3_builder.with_access_key_id(credentials.access_key_id());
131-
132-
aws_s3_builder = aws_s3_builder.with_secret_access_key(credentials.secret_access_key());
117+
// AWS_ENDPOINT_URL
118+
if let Some(aws_endpoint_url) = sdk_config.endpoint_url() {
119+
aws_s3_builder = aws_s3_builder.with_endpoint(aws_endpoint_url);
120+
}
133121

134-
if let Some(token) = credentials.session_token() {
135-
aws_s3_builder = aws_s3_builder.with_token(token);
136-
}
122+
// AWS_REGION
123+
if let Some(aws_region) = sdk_config.region() {
124+
aws_s3_builder = aws_s3_builder.with_region(aws_region.as_ref());
137125
}
138126

139127
aws_s3_builder.build().unwrap_or_else(|e| panic!("{}", e))

0 commit comments

Comments
 (0)