diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 6fb011d136..3c7123dd36 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -1589,6 +1589,7 @@ def commit( operation: LanceOperation.BaseOperation, read_version: Optional[int] = None, commit_lock: Optional[CommitLock] = None, + storage_options: Optional[Dict[str, str]] = None, ) -> LanceDataset: """Create a new version of dataset @@ -1623,6 +1624,9 @@ def commit( commit_lock : CommitLock, optional A custom commit lock. Only needed if your object store does not support atomic commits. See the user guide for more details. + storage_options : optional, dict + Extra options that make sense for a particular storage connection. This is + used to store connection parameters like credentials, endpoint, etc. Returns ------- @@ -1660,8 +1664,14 @@ def commit( f"commit_lock must be a function, got {type(commit_lock)}" ) - _Dataset.commit(base_uri, operation._to_inner(), read_version, commit_lock) - return LanceDataset(base_uri) + _Dataset.commit( + base_uri, + operation._to_inner(), + read_version, + commit_lock, + storage_options=storage_options, + ) + return LanceDataset(base_uri, storage_options=storage_options) def validate(self): """ diff --git a/python/python/lance/fragment.py b/python/python/lance/fragment.py index fb78cd4d8a..fea94b4869 100644 --- a/python/python/lance/fragment.py +++ b/python/python/lance/fragment.py @@ -147,6 +147,7 @@ def create( mode: str = "append", *, use_legacy_format=True, + storage_options: Optional[Dict[str, str]] = None, ) -> FragmentMetadata: """Create a :class:`FragmentMetadata` from the given data. @@ -180,6 +181,9 @@ def create( use_legacy_format: bool, default True Use the legacy format to write Lance files. The default is True while the v2 format is still in beta. + storage_options : optional, dict + Extra options that make sense for a particular storage connection. This is + used to store connection parameters like credentials, endpoint, etc. See Also -------- @@ -219,6 +223,7 @@ def create( progress=progress, mode=mode, use_legacy_format=use_legacy_format, + storage_options=storage_options, ) return FragmentMetadata(inner_meta.json()) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index ab3b0dca9d..b37c7139ff 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -994,7 +994,13 @@ impl Dataset { operation: Operation, read_version: Option, commit_lock: Option<&PyAny>, + storage_options: Option>, ) -> PyResult { + let object_store_params = storage_options.map(|storage_options| ObjectStoreParams { + storage_options: Some(storage_options), + ..Default::default() + }); + let commit_handler = commit_lock.map(|commit_lock| { Arc::new(PyCommitLock::new(commit_lock.to_object(commit_lock.py()))) as Arc @@ -1008,8 +1014,14 @@ impl Dataset { }; let manifest = dataset.as_ref().map(|ds| ds.manifest()); validate_operation(manifest, &operation.0)?; - LanceDataset::commit(dataset_uri, operation.0, read_version, None, commit_handler) - .await + LanceDataset::commit( + dataset_uri, + operation.0, + read_version, + object_store_params, + commit_handler, + ) + .await })? .map_err(|e| PyIOError::new_err(e.to_string()))?; Ok(Self { diff --git a/rust/lance/src/dataset/fragment/write.rs b/rust/lance/src/dataset/fragment/write.rs index 28edf4f21e..66ed2644b6 100644 --- a/rust/lance/src/dataset/fragment/write.rs +++ b/rust/lance/src/dataset/fragment/write.rs @@ -147,7 +147,11 @@ impl<'a> FragmentCreateBuilder<'a> { Self::validate_schema(&schema, stream.schema().as_ref())?; - let (object_store, base_path) = ObjectStore::from_uri(self.dataset_uri).await?; + let (object_store, base_path) = ObjectStore::from_uri_and_params( + self.dataset_uri, + ¶ms.store_params.clone().unwrap_or_default(), + ) + .await?; let filename = format!("{}.lance", Uuid::new_v4()); let mut fragment = Fragment::with_file_legacy(id, &filename, &schema, None); let full_path = base_path.child(DATA_DIR).child(filename.clone());