From a3ac24d122ef73252475babd0cf94af51e381f33 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Fri, 18 Oct 2024 13:41:04 +0200 Subject: [PATCH] Don't override passed object store client options from env unless missing --- Cargo.lock | 14 +++++++++++- object_store_factory/Cargo.toml | 2 ++ object_store_factory/src/aws.rs | 9 ++++---- object_store_factory/src/google.rs | 25 ++++++++++++++++++--- object_store_factory/src/lib.rs | 36 +++++++++++++++++++++++++++++- 5 files changed, 76 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25cec8d2..224c92df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -4487,8 +4487,10 @@ dependencies = [ "async-trait", "futures", "object_store", + "rstest", "serde", "serde_json", + "temp-env", "tempfile", "tokio", "toml", @@ -6739,6 +6741,16 @@ version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +[[package]] +name = "temp-env" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" +dependencies = [ + "futures", + "parking_lot", +] + [[package]] name = "tempfile" version = "3.13.0" diff --git a/object_store_factory/Cargo.toml b/object_store_factory/Cargo.toml index 547daa28..47f4f160 100644 --- a/object_store_factory/Cargo.toml +++ b/object_store_factory/Cargo.toml @@ -17,6 +17,8 @@ tracing-subscriber = { workspace = true } url = { workspace = true } [dev-dependencies] +rstest = "*" +temp-env = { version = "0.3", features = ["async_closure"] } tokio = { workspace = true } diff --git a/object_store_factory/src/aws.rs b/object_store_factory/src/aws.rs index 42e8e671..ecd4f2bd 100644 --- a/object_store_factory/src/aws.rs +++ b/object_store_factory/src/aws.rs @@ -224,7 +224,7 @@ pub fn add_amazon_s3_environment_variables( if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { if key.starts_with("AWS_") { if let Ok(config_key) = key.to_ascii_lowercase().parse() { - options.insert(config_key, value.to_string()); + options.entry(config_key).or_insert(value.to_string()); } } } @@ -236,10 +236,9 @@ pub fn add_amazon_s3_environment_variables( .to_uppercase(), ) == Ok("true".to_string()) { - options.insert( - AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp), - "true".to_string(), - ); + options + .entry(AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp)) + .or_insert("true".to_string()); } } diff --git a/object_store_factory/src/google.rs b/object_store_factory/src/google.rs index b97f5099..f6b37c41 100644 --- a/object_store_factory/src/google.rs +++ b/object_store_factory/src/google.rs @@ -107,15 +107,34 @@ pub fn map_options_into_google_config_keys( pub fn add_google_cloud_storage_environment_variables( options: &mut HashMap, ) { - if let Ok(service_account_path) = env::var("SERVICE_ACCOUNT") { - options.insert(GoogleConfigKey::ServiceAccount, service_account_path); + // Insert the service account {path, key} from env only if neither the path nor the key are + // already supplied. + // + // This is because if the same config was already passed we don't want to override it, and if + // the other config was passed the client builder will error out (doesn't support both file and + // key). + if !options.contains_key(&GoogleConfigKey::ServiceAccount) + && !options.contains_key(&GoogleConfigKey::ServiceAccountKey) + { + if let Ok(service_account_path) = env::var("GOOGLE_SERVICE_ACCOUNT") { + options.insert(GoogleConfigKey::ServiceAccount, service_account_path); + } else if let Ok(service_account_path) = env::var("GOOGLE_SERVICE_ACCOUNT_KEY") { + options.insert(GoogleConfigKey::ServiceAccountKey, service_account_path); + } } + // Override all other configs from env vars for (os_key, os_value) in env::vars_os() { if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { if key.starts_with("GOOGLE_") { if let Ok(config_key) = key.to_ascii_lowercase().parse() { - options.insert(config_key, value.to_string()); + if !matches!( + config_key, + GoogleConfigKey::ServiceAccount + | GoogleConfigKey::ServiceAccountKey + ) { + options.entry(config_key).or_insert(value.to_string()); + } } } } diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index cbe1bac4..d37865a4 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -145,7 +145,7 @@ pub async fn build_object_store_from_opts( Ok(store) } ObjectStoreScheme::Local => { - let store = local::LocalConfig { + let store = LocalConfig { data_dir: url.path().to_string(), disable_hardlinks: false, } @@ -197,6 +197,7 @@ pub async fn build_storage_location_info_from_opts( #[cfg(test)] mod tests { use super::*; + use rstest::rstest; use serde_json::json; #[tokio::test] @@ -294,4 +295,37 @@ mod tests { let result = ObjectStoreConfig::build_from_json(&url, &json_str).await; assert!(result.is_err()); } + + #[rstest] + #[tokio::test] + async fn test_build_aws_object_store(#[values(true, false)] use_env: bool) { + let url = Url::parse("s3://my-bucket").unwrap(); + let options: HashMap = if use_env { + HashMap::new() + } else { + HashMap::from([ + ("access_key_id".to_string(), "my-key".to_string()), + ("secret_access_key".to_string(), "my-secret".to_string()), + ]) + }; + + let store = temp_env::async_with_vars( + [ + ("AWS_ACCESS_KEY_ID", Some("env-key")), + ("AWS_SECRET_ACCESS_KEY", Some("env-secret")), + ], + build_object_store_from_opts(&url, options), + ) + .await; + + let debug_output = format!("{store:?}"); + assert!(debug_output.contains("bucket: \"my-bucket\"")); + if use_env { + assert!(debug_output.contains("key_id: \"env-key\"")); + assert!(debug_output.contains("secret_key: \"env-secret\"")); + } else { + assert!(debug_output.contains("key_id: \"my-key\"")); + assert!(debug_output.contains("secret_key: \"my-secret\"")); + } + } }