Skip to content

Commit

Permalink
Configurable on C++ side
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom-Newton committed Feb 11, 2024
1 parent d989191 commit 788f0d2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
14 changes: 10 additions & 4 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,14 @@ class ObjectInputFile final : public io::RandomAccessFile {
public:
ObjectInputFile(std::shared_ptr<Blobs::BlobClient> blob_client,
const io::IOContext& io_context, AzureLocation location,
int64_t size = kNoSize)
const AzureOptions& options, int64_t size = kNoSize)
: blob_client_(std::move(blob_client)),
io_context_(io_context),
location_(std::move(location)),
content_length_(size) {}
content_length_(size),
initial_chunk_size_(options.initial_chunk_size),
chunk_size_(options.chunk_size),
concurrency_(options.concurrency) {}

Status Init() {
if (content_length_ != kNoSize) {
Expand Down Expand Up @@ -596,8 +599,8 @@ class ObjectInputFile final : public io::RandomAccessFile {

// Read the desired range of bytes
Http::HttpRange range{position, nbytes};
Storage::Blobs::DownloadBlobToOptions download_options;
download_options.Range = range;
Storage::Blobs::DownloadBlobToOptions download_options{
range, {initial_chunk_size_, chunk_size_, concurrency_}};
try {
return blob_client_
->DownloadTo(reinterpret_cast<uint8_t*>(out), nbytes, download_options)
Expand Down Expand Up @@ -649,6 +652,9 @@ class ObjectInputFile final : public io::RandomAccessFile {
int64_t pos_ = 0;
int64_t content_length_ = kNoSize;
std::shared_ptr<const KeyValueMetadata> metadata_;
int64_t initial_chunk_size_;
int64_t chunk_size_;
int32_t concurrency_;
};

Status CreateEmptyBlockBlob(const Blobs::BlockBlobClient& block_blob_client) {
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ struct ARROW_EXPORT AzureOptions {
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
std::shared_ptr<const KeyValueMetadata> default_metadata;

/// \brief Options for parallel transfer of each read call on ObjectInputFile.
///
/// Defaults are taken from the Azure SDK. See
/// Azure::Storage::Blobs::DownloadBlobToOptions::TransferOptions
int64_t initial_chunk_size = 256 * 1024 * 1024;
int64_t chunk_size = 4 * 1024 * 1024;
int32_t concurrency = 5;

private:
enum class CredentialKind {
kDefault,
Expand Down

0 comments on commit 788f0d2

Please sign in to comment.