Skip to content
This repository has been archived by the owner on Jun 30, 2022. It is now read-only.

Commit

Permalink
Augment file utils with recursive copy
Browse files Browse the repository at this point in the history
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=124379580
  • Loading branch information
robertwb authored and aaltay committed Jun 10, 2016
1 parent 15bef52 commit 9782343
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 0 deletions.
17 changes: 17 additions & 0 deletions google/cloud/dataflow/io/fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,23 @@ def rename(src, dst):
except OSError as err:
raise IOError(err)

@staticmethod
def copytree(src, dst):
if src.startswith('gs://'):
assert dst.startswith('gs://'), dst
assert src.endswith('/'), src
assert dst.endswith('/'), dst
# pylint: disable=g-import-not-at-top
from google.cloud.dataflow.io import gcsio
gcsio.GcsIO().copytree(src, dst)
else:
try:
if os.path.exists(dst):
shutil.rmtree(dst)
shutil.copytree(src, dst)
except OSError as err:
raise IOError(err)

@staticmethod
def exists(path):
if path.startswith('gs://'):
Expand Down
16 changes: 16 additions & 0 deletions google/cloud/dataflow/io/gcsio.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,22 @@ def copy(self, src, dest):
destinationObject=dest_path)
self.client.objects.Copy(request)

# We intentionally do not decorate this method with a retry, since the
# underlying copy and delete operations are already idempotent operations
# protected by retry decorators.
def copytree(self, src, dest):
"""Renames the given GCS "directory" recursively from src to dest.
Args:
src: GCS file path pattern in the form gs://<bucket>/<name>/.
dest: GCS file path pattern in the form gs://<bucket>/<name>/.
"""
assert src.endswith('/')
assert dest.endswith('/')
for entry in self.glob(src + '*'):
rel_path = entry[len(src):]
self.copy(entry, dest + rel_path)

# We intentionally do not decorate this method with a retry, since the
# underlying copy and delete operations are already idempotent operations
# protected by retry decorators.
Expand Down
25 changes: 25 additions & 0 deletions google/cloud/dataflow/io/gcsio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,31 @@ def test_copy(self):
self.assertTrue(gcsio.parse_gcs_path(dest_file_name) in
self.client.objects.files)

def test_copytree(self):
src_dir_name = 'gs://gcsio-test/source/'
dest_dir_name = 'gs://gcsio-test/dest/'
file_size = 1024
paths = ['a', 'b/c', 'b/d']
for path in paths:
src_file_name = src_dir_name + path
dest_file_name = dest_dir_name + path
self._insert_random_file(self.client, src_file_name,
file_size)
self.assertTrue(gcsio.parse_gcs_path(src_file_name) in
self.client.objects.files)
self.assertFalse(gcsio.parse_gcs_path(dest_file_name) in
self.client.objects.files)

self.gcs.copytree(src_dir_name, dest_dir_name)

for path in paths:
src_file_name = src_dir_name + path
dest_file_name = dest_dir_name + path
self.assertTrue(gcsio.parse_gcs_path(src_file_name) in
self.client.objects.files)
self.assertTrue(gcsio.parse_gcs_path(dest_file_name) in
self.client.objects.files)

def test_rename(self):
src_file_name = 'gs://gcsio-test/source'
dest_file_name = 'gs://gcsio-test/dest'
Expand Down

0 comments on commit 9782343

Please sign in to comment.