Skip to content

Commit

Permalink
Merge pull request #379 from Azure/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
zezha-msft authored Nov 3, 2017
2 parents 5bd9677 + f1032ee commit 9b5e870
Show file tree
Hide file tree
Showing 553 changed files with 27,672 additions and 27,105 deletions.
6 changes: 6 additions & 0 deletions azure-storage-blob/ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Change Log azure-storage-blob

> See [BreakingChanges](BreakingChanges.md) for a detailed list of API breaks.
## Version 0.37.1:

- Enabling MD5 validation no longer uses the memory-efficient algorithm for large block blobs, since computing the MD5 hash requires reading the entire block into memory.
- Fixed a bug in the _SubStream class which was at risk of causing data corruption when using the memory-efficient algorithm for large block blobs.
- Support for AccessTierChangeTime to get the last time a tier was modified on an individual blob.
5 changes: 4 additions & 1 deletion azure-storage-blob/azure/storage/blob/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
# --------------------------------------------------------------------------

__author__ = 'Microsoft Corp. <[email protected]>'
__version__ = '0.37.0'
__version__ = '0.37.1'

# x-ms-version for storage service.
X_MS_VERSION = '2017-04-17'

# internal configurations, should not be changed
_LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE = 4 * 1024 * 1024
6 changes: 5 additions & 1 deletion azure-storage-blob/azure/storage/blob/_deserialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@ def _convert_xml_to_containers(response):
'CopyCompletionTime': ('copy', 'completion_time', _to_str),
'CopyStatusDescription': ('copy', 'status_description', _to_str),
'AccessTier': (None, 'blob_tier', _to_str),
'ArchiveStatus': (None, 'rehydration_status', _to_str)
'AccessTierChangeTime': (None, 'blob_tier_change_time', parser.parse),
'AccessTierInferred': (None, 'blob_tier_inferred', _bool),
'ArchiveStatus': (None, 'rehydration_status', _to_str),
}


Expand Down Expand Up @@ -281,6 +283,8 @@ def _convert_xml_to_blob_list(response):
<CopyCompletionTime>datetime</CopyCompletionTime>
<CopyStatusDescription>error string</CopyStatusDescription>
<AccessTier>P4 | P6 | P10 | P20 | P30 | P40 | P50 | P60 | Archive | Cool | Hot</AccessTier>
<AccessTierChangeTime>date-time-value</AccessTierChangeTime>
<AccessTierInferred>true</AccessTierInferred>
</Properties>
<Metadata>
<Name>value</Name>
Expand Down
44 changes: 36 additions & 8 deletions azure-storage-blob/azure/storage/blob/_upload_chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
_get_blob_encryptor_and_padder,
)
from .models import BlobBlock
from ._constants import (
_LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE
)


def _upload_blob_chunks(blob_service, container_name, blob_name,
Expand Down Expand Up @@ -342,6 +345,7 @@ def __init__(self, wrapped_stream, stream_begin_index, length, lockObj):
# derivations of io.IOBase and thus do not implement seekable().
# Python > 3.0: file-like objects created with open() are derived from io.IOBase.
try:
# only the main thread runs this, so there's no need grabbing the lock
wrapped_stream.seek(0, SEEK_CUR)
except:
raise ValueError("Wrapped stream must support seek().")
Expand All @@ -351,9 +355,14 @@ def __init__(self, wrapped_stream, stream_begin_index, length, lockObj):
self._position = 0
self._stream_begin_index = stream_begin_index
self._length = length
self._count = 0
self._buffer = BytesIO()
self._read_buffer_size = 4 * 1024 * 1024

# we must avoid buffering more than necessary, and also not use up too much memory
# so the max buffer size is capped at 4MB
self._max_buffer_size = length if length < _LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE \
else _LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE
self._current_buffer_start = 0
self._current_buffer_size = 0

def __len__(self):
return self._length
Expand Down Expand Up @@ -382,35 +391,45 @@ def read(self, n):
if n is 0 or self._buffer.closed:
return b''

# attempt first read from the read buffer
# attempt first read from the read buffer and update position
read_buffer = self._buffer.read(n)
bytes_read = len(read_buffer)
bytes_remaining = n - bytes_read
self._position += bytes_read

# repopulate the read buffer from the underlying stream to fulfill the request
# ensure the seek and read operations are done atomically (only if a lock is provided)
if bytes_remaining > 0:
with self._buffer:
# either read in the max buffer size specified on the class
# or read in just enough data for the current block/sub stream
current_max_buffer_size = min(self._max_buffer_size, self._length - self._position)

# lock is only defined if max_connections > 1 (parallel uploads)
if self._lock:
with self._lock:
# reposition the underlying stream to match the start of the substream
# reposition the underlying stream to match the start of the data to read
absolute_position = self._stream_begin_index + self._position
self._wrapped_stream.seek(absolute_position, SEEK_SET)
# If we can't seek to the right location, our read will be corrupted so fail fast.
if self._wrapped_stream.tell() != absolute_position:
raise IOError("Stream failed to seek to the desired location.")
buffer_from_stream = self._wrapped_stream.read(self._read_buffer_size)
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
else:
buffer_from_stream = self._wrapped_stream.read(self._read_buffer_size)
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)

if buffer_from_stream:
# update the buffer with new data from the wrapped stream
# we need to note down the start position and size of the buffer, in case seek is performed later
self._buffer = BytesIO(buffer_from_stream)
self._current_buffer_start = self._position
self._current_buffer_size = len(buffer_from_stream)

# read the remaining bytes from the new buffer and update position
second_read_buffer = self._buffer.read(bytes_remaining)
bytes_read += len(second_read_buffer)
read_buffer += second_read_buffer
self._position += len(second_read_buffer)

self._position += bytes_read
return read_buffer

def readable(self):
Expand All @@ -437,6 +456,15 @@ def seek(self, offset, whence=0):
elif pos < 0:
pos = 0

# check if buffer is still valid
# if not, drop buffer
if pos < self._current_buffer_start or pos >= self._current_buffer_start + self._current_buffer_size:
self._buffer.close()
self._buffer = BytesIO()
else: # if yes seek to correct position
delta = pos - self._current_buffer_start
self._buffer.seek(delta, SEEK_SET)

self._position = pos
return pos

Expand Down
13 changes: 9 additions & 4 deletions azure-storage-blob/azure/storage/blob/blockblobservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,9 @@ def create_blob_from_path(
that was sent. This is primarily valuable for detecting bitflips on
the wire if using http instead of https as https (the default) will
already validate. Note that this MD5 hash is not stored with the
blob.
blob. Also note that if enabled, the memory-efficient upload algorithm
will not be used, because computing the MD5 hash requires buffering
entire blocks, and doing so defeats the purpose of the memory-efficient algorithm.
:param progress_callback:
Callback for progress with signature function(current, total) where
current is the number of bytes transfered so far, and total is the
Expand Down Expand Up @@ -441,7 +443,9 @@ def create_blob_from_stream(
that was sent. This is primarily valuable for detecting bitflips on
the wire if using http instead of https as https (the default) will
already validate. Note that this MD5 hash is not stored with the
blob.
blob. Also note that if enabled, the memory-efficient upload algorithm
will not be used, because computing the MD5 hash requires buffering
entire blocks, and doing so defeats the purpose of the memory-efficient algorithm.
:param progress_callback:
Callback for progress with signature function(current, total) where
current is the number of bytes transfered so far, and total is the
Expand Down Expand Up @@ -507,6 +511,7 @@ def create_blob_from_stream(
if (self.key_encryption_key is not None) and (adjusted_count is not None):
adjusted_count += (16 - (count % 16))

# Do single put if the size is smaller than MAX_SINGLE_PUT_SIZE
if adjusted_count is not None and (adjusted_count < self.MAX_SINGLE_PUT_SIZE):
if progress_callback:
progress_callback(0, count)
Expand All @@ -530,10 +535,10 @@ def create_blob_from_stream(
progress_callback(count, count)

return resp
else:
else: # Size is larger than MAX_SINGLE_PUT_SIZE, must upload with multiple put_block calls
cek, iv, encryption_data = None, None, None

use_original_upload_path = use_byte_buffer or self.require_encryption or \
use_original_upload_path = use_byte_buffer or validate_content or self.require_encryption or \
self.MAX_BLOCK_SIZE < self.MIN_LARGE_BLOCK_UPLOAD_THRESHOLD or \
hasattr(stream, 'seekable') and not stream.seekable() or \
not hasattr(stream, 'seek') or not hasattr(stream, 'tell')
Expand Down
15 changes: 15 additions & 0 deletions azure-storage-blob/azure/storage/blob/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,18 @@ class BlobProperties(object):
Stores all the content settings for the blob.
:ivar ~azure.storage.blob.models.LeaseProperties lease:
Stores all the lease information for the blob.
:ivar StandardBlobTier blob_tier:
Indicates the access tier of the blob. The hot tier is optimized
for storing data that is accessed frequently. The cool storage tier
is optimized for storing data that is infrequently accessed and stored
for at least a month. The archive tier is optimized for storing
data that is rarely accessed and stored for at least six months
with flexible latency requirements.
:ivar datetime blob_tier_change_time:
Indicates when the access tier was last changed.
:ivar bool blob_tier_inferred:
Indicates whether the access tier was inferred by the service.
If false, it indicates that the tier was set explicitly.
'''

def __init__(self):
Expand All @@ -129,6 +141,9 @@ def __init__(self):
self.copy = CopyProperties()
self.content_settings = ContentSettings()
self.lease = LeaseProperties()
self.blob_tier = None
self.blob_tier_change_time = None
self.blob_tier_inferred = False


class ContentSettings(object):
Expand Down
4 changes: 2 additions & 2 deletions azure-storage-blob/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

setup(
name='azure-storage-blob',
version='0.37.0',
version='0.37.1',
description='Microsoft Azure Storage Blob Client Library for Python',
long_description=open('README.rst', 'r').read(),
license='Apache License 2.0',
Expand All @@ -86,7 +86,7 @@
'cryptography',
'python-dateutil',
'requests',
'azure-storage-common>=0.37.0,<0.38.0'
'azure-storage-common>=0.37.1,<0.38.0'
] + (['futures'] if sys.version_info < (3, 0) else []),
cmdclass=cmdclass
)
5 changes: 5 additions & 0 deletions azure-storage-common/ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@

> See [BreakingChanges](BreakingChanges.md) for a detailed list of API breaks.
## Version 0.37.1:
- Fixed the return type of __add__ and __or__ methods on the AccountPermissions class
- Added the captured exception to retry_context, in case the user wants more info in retry_callback or implement their own retry class.
- Added random jitters to retry intervals, in order to avoid multiple retries to happen at the exact same time

2 changes: 1 addition & 1 deletion azure-storage-common/azure/storage/common/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import platform

__author__ = 'Microsoft Corp. <[email protected]>'
__version__ = '0.37.0'
__version__ = '0.37.1'

# UserAgent string sample: 'Azure-Storage/0.37.0-0.38.0 (Python CPython 3.4.2; Windows 8)'
# First version(0.37.0) is the common package, and the second version(0.38.0) is the service package
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def _get_download_size(start_range, end_range, resource_size):
'x-ms-blob-sequence-number': (None, 'page_blob_sequence_number', _int_to_str),
'x-ms-blob-committed-block-count': (None, 'append_blob_committed_block_count', _int_to_str),
'x-ms-access-tier': (None, 'blob_tier', _to_str),
'x-ms-access-tier-change-time': (None, 'blob_tier_change_time', parser.parse),
'x-ms-access-tier-inferred': (None, 'blob_tier_inferred', _bool),
'x-ms-archive-status': (None, 'rehydration_status', _to_str),
'x-ms-share-quota': (None, 'quota', _int_to_str),
Expand Down
10 changes: 7 additions & 3 deletions azure-storage-common/azure/storage/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,18 @@ class RetryContext(object):
The request sent to the storage service.
:ivar ~azure.storage.common._http.HTTPResponse response:
The response returned by the storage service.
:ivar LocationMode location_mode:
:ivar LocationMode location_mode:
The location the request was sent to.
:ivar Exception exception:
The exception that just occurred. The type could either be AzureException (for HTTP errors),
or other Exception types from lower layers, which are kept unwrapped for easier processing.
'''

def __init__(self):
self.request = None
self.response = None
self.location_mode = None
self.exception = None


class LocationMode(object):
Expand Down Expand Up @@ -593,10 +597,10 @@ def __init__(self, read=False, write=False, delete=False, list=False,
self.process = process or ('p' in _str)

def __or__(self, other):
return ResourceTypes(_str=str(self) + str(other))
return AccountPermissions(_str=str(self) + str(other))

def __add__(self, other):
return ResourceTypes(_str=str(self) + str(other))
return AccountPermissions(_str=str(self) + str(other))

def __str__(self):
return (('r' if self.read else '') +
Expand Down
26 changes: 22 additions & 4 deletions azure-storage-common/azure/storage/common/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# --------------------------------------------------------------------------
from abc import ABCMeta
from math import pow
import random

from .models import LocationMode

Expand Down Expand Up @@ -152,7 +153,7 @@ class ExponentialRetry(_Retry):
'''

def __init__(self, initial_backoff=15, increment_power=3, max_attempts=3,
retry_to_secondary=False):
retry_to_secondary=False, random_jitter_range=3):
'''
Constructs an Exponential retry object. The initial_backoff is used for
the first retry. Subsequent retries are retried after initial_backoff +
Expand All @@ -171,9 +172,13 @@ def __init__(self, initial_backoff=15, increment_power=3, max_attempts=3,
Whether the request should be retried to secondary, if able. This should
only be enabled of RA-GRS accounts are used and potentially stale data
can be handled.
:param int random_jitter_range:
A number in seconds which indicates a range to jitter/randomize for the back-off interval.
For example, a random_jitter_range of 3 results in the back-off interval x to vary between x+3 and x-3.
'''
self.initial_backoff = initial_backoff
self.increment_power = increment_power
self.random_jitter_range = random_jitter_range
super(ExponentialRetry, self).__init__(max_attempts, retry_to_secondary)

'''
Expand Down Expand Up @@ -201,15 +206,19 @@ def retry(self, context):
'''

def _backoff(self, context):
return self.initial_backoff + (0 if context.count == 0 else pow(self.increment_power, context.count))
random_generator = random.Random()
backoff = self.initial_backoff + (0 if context.count == 0 else pow(self.increment_power, context.count))
random_range_start = backoff - self.random_jitter_range if backoff > self.random_jitter_range else 0
random_range_end = backoff + self.random_jitter_range
return random_generator.uniform(random_range_start, random_range_end)


class LinearRetry(_Retry):
'''
Linear retry.
'''

def __init__(self, backoff=15, max_attempts=3, retry_to_secondary=False):
def __init__(self, backoff=15, max_attempts=3, retry_to_secondary=False, random_jitter_range=3):
'''
Constructs a Linear retry object.
Expand All @@ -221,9 +230,13 @@ def __init__(self, backoff=15, max_attempts=3, retry_to_secondary=False):
Whether the request should be retried to secondary, if able. This should
only be enabled of RA-GRS accounts are used and potentially stale data
can be handled.
:param int random_jitter_range:
A number in seconds which indicates a range to jitter/randomize for the back-off interval.
For example, a random_jitter_range of 3 results in the back-off interval x to vary between x+3 and x-3.
'''
self.backoff = backoff
self.max_attempts = max_attempts
self.random_jitter_range = random_jitter_range
super(LinearRetry, self).__init__(max_attempts, retry_to_secondary)

'''
Expand Down Expand Up @@ -251,7 +264,12 @@ def retry(self, context):
'''

def _backoff(self, context):
return self.backoff
random_generator = random.Random()
# the backoff interval normally does not change, however there is the possibility
# that it was modified by accessing the property directly after initializing the object
self.random_range_start = self.backoff - self.random_jitter_range if self.backoff > self.random_jitter_range else 0
self.random_range_end = self.backoff + self.random_jitter_range
return random_generator.uniform(self.random_range_start, self.random_range_end)


def no_retry(context):
Expand Down
2 changes: 2 additions & 0 deletions azure-storage-common/azure/storage/common/storageclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,10 @@ def _perform_request(self, request, parser=None, parser_args=None, operation_con
else:
return
except AzureException as ex:
retry_context.exception = ex
raise ex
except Exception as ex:
retry_context.exception = ex
if sys.version_info >= (3,):
# Automatic chaining in Python 3 means we keep the trace
raise AzureException(ex.args[0])
Expand Down
2 changes: 1 addition & 1 deletion azure-storage-common/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

setup(
name='azure-storage-common',
version='0.37.0',
version='0.37.1',
description='Microsoft Azure Storage Common Client Library for Python',
long_description=open('README.rst', 'r').read(),
license='Apache License 2.0',
Expand Down
Loading

0 comments on commit 9b5e870

Please sign in to comment.