diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml index a7ab2164a..450b61f47 100644 --- a/ballista/rust/client/Cargo.toml +++ b/ballista/rust/client/Cargo.toml @@ -41,7 +41,9 @@ tempfile = "3" tokio = "1.0" [features] +azure = ["ballista-core/azure"] default = [] +gcs = ["ballista-core/gcs"] hdfs = ["ballista-core/hdfs"] s3 = ["ballista-core/s3"] standalone = ["ballista-executor", "ballista-scheduler"] diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml index 6699af013..5827b3ae9 100644 --- a/ballista/rust/core/Cargo.toml +++ b/ballista/rust/core/Cargo.toml @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - [package] name = "ballista-core" description = "Ballista Distributed Compute" @@ -31,8 +30,10 @@ build = "build.rs" rustc-args = ["--cfg", "docsrs"] [features] +azure = ["object_store/azure"] # Used for testing ONLY: causes all values to hash to the same value (test for collisions) force_hash_collisions = ["datafusion/force_hash_collisions"] +gcs = ["object_store/gcp"] # Used to enable hdfs to be registered in the ObjectStoreRegistry by default hdfs = ["datafusion-objectstore-hdfs"] s3 = ["object_store/aws"] @@ -40,7 +41,6 @@ simd = ["datafusion/simd"] [dependencies] ahash = { version = "0.8", default-features = false } - arrow-flight = { version = "23.0.0", features = ["flight-sql-experimental"] } async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } @@ -50,13 +50,11 @@ datafusion-objectstore-hdfs = { version = "0.1.0", optional = true } datafusion-proto = { git = "https://github.com/apache/arrow-datafusion", rev = "06a4f79f02fcb6ea85303925b7c5a9b0231e3fee" } futures = "0.3" hashbrown = "0.12" - itertools = "0.10" libloading = "0.7.3" log = "0.4" object_store = "0.5.0" once_cell = "1.9.0" - parking_lot = "0.12" parse_arg = "0.1.3" prost = "0.11" @@ -76,4 +74,7 @@ tempfile = "3" [build-dependencies] rustc_version = "0.4.0" -tonic-build = { version = "0.8", default-features = false, features = ["transport", "prost"] } +tonic-build = { version = "0.8", default-features = false, features = [ + "transport", + "prost", +] } diff --git a/ballista/rust/core/src/utils.rs b/ballista/rust/core/src/utils.rs index 21626c555..697567fe0 100644 --- a/ballista/rust/core/src/utils.rs +++ b/ballista/rust/core/src/utils.rs @@ -52,6 +52,10 @@ use futures::StreamExt; use log::error; #[cfg(feature = "s3")] use object_store::aws::AmazonS3Builder; +#[cfg(feature = "azure")] +use object_store::azure::MicrosoftAzureBuilder; +#[cfg(feature = "gcs")] +use object_store::gcp::GoogleCloudStorageBuilder; use object_store::ObjectStore; use std::io::{BufWriter, Write}; use std::marker::PhantomData; @@ -109,6 +113,30 @@ impl ObjectStoreProvider for FeatureBasedObjectStoreProvider { } } + #[cfg(feature = "gcs")] + { + if url.to_string().starts_with("gcs://") { + if let Some(bucket_name) = url.host_str() { + let store = GoogleCloudStorageBuilder::from_env() + .with_bucket_name(bucket_name) + .build()?; + return Ok(Arc::new(store)); + } + } + } + + #[cfg(feature = "azure")] + { + if url.to_string().starts_with("azure://") { + if let Some(bucket_name) = url.host_str() { + let store = MicrosoftAzureBuilder::from_env() + .with_container_name(bucket_name) + .build()?; + return Ok(Arc::new(store)); + } + } + } + Err(DataFusionError::Execution(format!( "No object store available for {}", url