From 80b71058230154d6417b85e6c53b7306c5a63868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Artjoms=20I=C5=A1kovs?= Date: Fri, 16 Feb 2024 06:32:45 +0000 Subject: [PATCH] Add support for public S3 buckets If the access_key/secret_key aren't set, skip signing the requests and treat the bucket as public. --- docker-compose.yml | 5 +++- src/config/context.rs | 12 +++++++-- src/config/schema.rs | 51 +++++++++++++++++++++++++------------ src/object_store/wrapped.rs | 4 +-- tests/statements/ddl.rs | 6 ++++- tests/statements/mod.rs | 12 +++++++++ 6 files changed, 68 insertions(+), 22 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f72efeb9..bfbf6d77 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,7 +20,10 @@ services: /usr/bin/mc rm -r --force test-minio/seafowl-test-bucket; /usr/bin/mc mb test-minio/seafowl-test-bucket; /usr/bin/mc cp test-data/table_with_ns_column.parquet test-minio/seafowl-test-bucket/table_with_ns_column.parquet; /usr/bin/mc anonymous set public - test-minio/seafowl-test-bucket/table_with_ns_column.parquet; exit 0; " + test-minio/seafowl-test-bucket/table_with_ns_column.parquet; + + /usr/bin/mc mb test-minio/seafowl-test-bucket-public; /usr/bin/mc anonymous set public + test-minio/seafowl-test-bucket-public; exit 0; " fake-gcs: image: tustvold/fake-gcs-server diff --git a/src/config/context.rs b/src/config/context.rs index 0b794c20..98ef2a0b 100644 --- a/src/config/context.rs +++ b/src/config/context.rs @@ -102,8 +102,6 @@ pub fn build_object_store( .. }) => { let mut builder = AmazonS3Builder::new() - .with_access_key_id(access_key_id) - .with_secret_access_key(secret_access_key) .with_region(region.clone().unwrap_or_default()) .with_bucket_name(bucket) .with_allow_http(true); @@ -112,6 +110,16 @@ pub fn build_object_store( builder = builder.with_endpoint(endpoint); } + if let (Some(access_key_id), Some(secret_access_key)) = + (&access_key_id, &secret_access_key) + { + builder = builder + .with_access_key_id(access_key_id) + .with_secret_access_key(secret_access_key) + } else { + builder = builder.with_skip_signature(true) + } + let store = builder.build()?; if let Some(props) = cache_properties { diff --git a/src/config/schema.rs b/src/config/schema.rs index b56ad137..24cf526d 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -126,8 +126,8 @@ pub struct InMemory {} #[derive(Deserialize, Debug, PartialEq, Eq, Clone)] pub struct S3 { pub region: Option, - pub access_key_id: String, - pub secret_access_key: String, + pub access_key_id: Option, + pub secret_access_key: Option, pub endpoint: Option, pub bucket: String, pub prefix: Option, @@ -141,18 +141,8 @@ impl S3 { ) -> Result { Ok(S3 { region: map.get("region").cloned(), - access_key_id: map - .remove("access_key_id") - .ok_or(ConfigError::Message( - "'access_key_id' not found in provided options".to_string(), - ))? - .clone(), - secret_access_key: map - .remove("secret_access_key") - .ok_or(ConfigError::Message( - "'secret_access_key' not found in provided options".to_string(), - ))? - .clone(), + access_key_id: map.remove("access_key_id"), + secret_access_key: map.remove("secret_access_key"), endpoint: map.remove("endpoint"), bucket, prefix: None, @@ -531,6 +521,17 @@ secret_access_key = "ABC..." endpoint = "https://s3.amazonaws.com:9000" bucket = "seafowl" +[catalog] +type = "postgres" +dsn = "postgresql://user:pass@localhost:5432/somedb" +"#; + + const TEST_CONFIG_S3_PUBLIC: &str = r#" +[object_store] +type = "s3" +endpoint = "https://s3.amazonaws.com:9000" +bucket = "seafowl" + [catalog] type = "postgres" dsn = "postgresql://user:pass@localhost:5432/somedb" @@ -629,8 +630,8 @@ cache_control = "private, max-age=86400" config.object_store, ObjectStore::S3(S3 { region: None, - access_key_id: "AKI...".to_string(), - secret_access_key: "ABC...".to_string(), + access_key_id: Some("AKI...".to_string()), + secret_access_key: Some("ABC...".to_string()), endpoint: Some("https://s3.amazonaws.com:9000".to_string()), bucket: "seafowl".to_string(), prefix: None, @@ -639,6 +640,24 @@ cache_control = "private, max-age=86400" ); } + #[test] + fn test_parse_public_config_with_s3() { + let config = load_config_from_string(TEST_CONFIG_S3_PUBLIC, false, None).unwrap(); + + assert_eq!( + config.object_store, + ObjectStore::S3(S3 { + region: None, + access_key_id: None, + secret_access_key: None, + endpoint: Some("https://s3.amazonaws.com:9000".to_string()), + bucket: "seafowl".to_string(), + prefix: None, + cache_properties: None, + }) + ); + } + #[test] fn test_parse_config_basic() { let config = load_config_from_string(TEST_CONFIG_BASIC, false, None).unwrap(); diff --git a/src/object_store/wrapped.rs b/src/object_store/wrapped.rs index 467b0303..d8524a97 100644 --- a/src/object_store/wrapped.rs +++ b/src/object_store/wrapped.rs @@ -358,8 +358,8 @@ mod tests { ) -> Result<()> { let config = ObjectStore::S3(S3 { region: None, - access_key_id: "access_key_id".to_string(), - secret_access_key: "secret_access_key".to_string(), + access_key_id: Some("access_key_id".to_string()), + secret_access_key: Some("secret_access_key".to_string()), bucket: bucket.to_string(), prefix: prefix.map(|p| p.to_string()), endpoint: endpoint.clone(), diff --git a/tests/statements/ddl.rs b/tests/statements/ddl.rs index 69b4dac7..6ce76d6d 100644 --- a/tests/statements/ddl.rs +++ b/tests/statements/ddl.rs @@ -3,7 +3,11 @@ use crate::statements::*; #[rstest] #[tokio::test] async fn test_create_table( - #[values(ObjectStoreType::InMemory, ObjectStoreType::Gcs)] + #[values( + ObjectStoreType::InMemory, + ObjectStoreType::Gcs, + ObjectStoreType::S3Public + )] object_store_type: ObjectStoreType, ) { let (context, _) = make_context_with_pg(object_store_type).await; diff --git a/tests/statements/mod.rs b/tests/statements/mod.rs index 668b94c4..202b6f20 100644 --- a/tests/statements/mod.rs +++ b/tests/statements/mod.rs @@ -50,6 +50,8 @@ enum ObjectStoreType { InMemory, // S3 object store with an optional path to the actual data folder S3(Option<&'static str>), + // Publicly-accessible S3 bucket + S3Public, } /// Make a SeafowlContext that's connected to a real PostgreSQL database @@ -92,6 +94,16 @@ ttl = 30 ), None, ), + ObjectStoreType::S3Public => ( + r#"type = "s3" +endpoint = "http://127.0.0.1:9000" +bucket = "seafowl-test-bucket-public" +[object_store.cache_properties] +ttl = 30 +"# + .to_string(), + None, + ), ObjectStoreType::Gcs => { let creds_json = json!({"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": "", "private_key_id": ""}); // gcs_base_url should match docker-compose.yml:fake-gcs-server