Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IncludeExtension and ExcludeExtension for filter objects/files. #235

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ Specifies the customer-provided encryption key for Amazon S3 to use to decrypt t
##### `--API-ETag=[string]`
Entity tag returned when the part was uploaded.

##### `--API-ExcludeExtension=[string]`
Includes all objects except objects with a specified extension.

##### `--API-Expires=[datetime]`
The date and time at which the object is no longer cacheable.

Expand Down Expand Up @@ -345,6 +348,9 @@ Return the object only if its entity tag (ETag) is different from the one specif
##### `--API-IfUnmodifiedSince=[datetime]`
Return the object only if it has not been modified since the specified time, otherwise return a 412 (precondition failed).

##### `--API-IncludeExtension=[string]`
Includes objects with a specified extension.

##### `--API-Metadata=[dict]`
A map (in json string) of metadata to store with the object in S3

Expand Down Expand Up @@ -465,3 +471,4 @@ still have to download and verify the MD5 directly.

* Bloomreach http://www.bloomreach.com
* Onera http://www.onera.com

28 changes: 27 additions & 1 deletion s4cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ class BotoClient(object):
"Specifies the customer-provided encryption key for Amazon S3 to use to decrypt the source object. The encryption key provided in this header must be one that was used when the source object was created."),
("ETag", "string",
"Entity tag returned when the part was uploaded."),
("ExcludeExtension", "string",
"Includes all objects except objects with a specified extension."),
("Expires", "datetime",
"The date and time at which the object is no longer cacheable."),
("GrantFullControl", "string",
Expand All @@ -346,6 +348,8 @@ class BotoClient(object):
"Return the object only if its entity tag (ETag) is different from the one specified, otherwise return a 304 (not modified)."),
("IfUnmodifiedSince", "datetime",
"Return the object only if it has not been modified since the specified time, otherwise return a 412 (precondition failed)."),
("IncludeExtension", "string",
"Includes objects with a specified extension."),
("Metadata", "dict",
"A map (in json string) of metadata to store with the object in S3"),
("MetadataDirective", "string",
Expand Down Expand Up @@ -783,6 +787,9 @@ def source_expand(self, source):

return result




@log_calls
def put_single_file(self, pool, source, target):
'''Upload a single file or a directory by adding a task into queue'''
Expand Down Expand Up @@ -1079,6 +1086,7 @@ def get_md5(self):
self.md5 = self.file_hash(self.filename)
return self.md5


class ThreadUtil(S3Handler, ThreadPool.Worker):
'''Thread workers for S3 operations.
This class contains all thread workers for S3 operations.
Expand Down Expand Up @@ -1222,6 +1230,14 @@ def conditional(self, result, obj):

result.append(obj)

def extension_check(self, file):
''' check files extension which is included or excluded '''
if self.opt.ExcludeExtension is not None and file.endswith(self.opt.ExcludeExtension):
return True
if self.opt.IncludeExtension is not None and not file.endswith(self.opt.IncludeExtension):
return True
return False

class MultipartItem:
'''Utility class for multiple part upload/download.
This class is used to keep track of a single upload/download file, so
Expand Down Expand Up @@ -1309,6 +1325,8 @@ def read_file_chunk(self, source, pos, chunk):
@log_calls
def upload(self, source, target, mpi=None, pos=0, chunk=0, part=0):
'''Thread worker for upload operation.'''
if self.extension_check(source):
return
s3url = S3URL(target)
obj = self.lookup(s3url)

Expand Down Expand Up @@ -1385,6 +1403,8 @@ def write_file_chunk(self, target, pos, chunk, body):
@log_calls
def download(self, source, target, mpi=None, pos=0, chunk=0, part=0):
'''Thread worker for download operation.'''
if self.extension_check(source):
return
s3url = S3URL(source)
obj = self.lookup(s3url)
if obj is None:
Expand Down Expand Up @@ -1444,7 +1464,8 @@ def download(self, source, target, mpi=None, pos=0, chunk=0, part=0):
@log_calls
def copy(self, source, target, mpi=None, pos=0, chunk=0, part=0, delete_source=False):
'''Copy a single file from source to target using boto S3 library.'''

if self.extension_check(source):
return
if self.opt.dry_run:
message('%s => %s' % (source, target))
return
Expand Down Expand Up @@ -1499,6 +1520,9 @@ def copy(self, source, target, mpi=None, pos=0, chunk=0, part=0, delete_source=F
@log_calls
def delete(self, source):
'''Thread worker for download operation.'''
if self.extension_check(source):
return

s3url = S3URL(source)

message('Delete %s', source)
Expand All @@ -1521,6 +1545,8 @@ def batch_delete(self, sources):
bucket = S3URL(sources[0]).bucket
deletes = []
for source in sources:
if self.extension_check(source):
continue
s3url = S3URL(source)
if s3url.bucket != bucket:
raise Failure('Unable to delete keys in different bucket %s and %s.' % (s3url.bucket, bucket))
Expand Down