diff --git a/worker/jobs/pull/upload.py b/worker/jobs/pull/upload.py index 84222c53f..bc11e50df 100644 --- a/worker/jobs/pull/upload.py +++ b/worker/jobs/pull/upload.py @@ -15,7 +15,7 @@ def pull_upload(source: dict, path: Optional[str] = None, **kwargs) -> Files: directory. In production, `source["url"]` is the URL (usually a storage bucket). - In development `source["path"]` is the path on the local file system. + In development, `source["path"]` is the path on the local file system. """ if not path: src = source["path"] if "path" in source else source["url"] diff --git a/worker/jobs/pull/upload_test.py b/worker/jobs/pull/upload_test.py new file mode 100644 index 000000000..a523105ad --- /dev/null +++ b/worker/jobs/pull/upload_test.py @@ -0,0 +1,48 @@ +from pathlib import Path + +import pytest + +from util.working_directory import working_directory + +from .upload import pull_upload + + +def test_one(tempdir): + """ + Test that will create new directories if necessary. + """ + with working_directory(tempdir.path): + files = pull_upload(dict(path=__file__), "a/b/some.txt") + + assert list(files.keys()) == ["a/b/some.txt"] + tempdir.compare(["a/", "a/b/", "a/b/some.txt"]) + + +def test_overwrite(tempdir): + """ + Test that will overwrite any existing files. + """ + with working_directory(tempdir.path): + with open("some.txt", "w") as file: + file.write("wateva") + assert open("some.txt").read() == "wateva" + + files = pull_upload(dict(path=__file__), "some.txt") + assert open("some.txt").read().startswith("from pathlib") + assert list(files.keys()) == ["some.txt"] + tempdir.compare(["some.txt"]) + + +def test_mergedirs(tempdir): + """ + Test that will merge with existing directories. + """ + with working_directory(tempdir.path): + # A file that may have come from another source + Path("a/b").mkdir(parents=True, exist_ok=True) + with open("a/b/other.txt", "w") as file: + file.write("whateva") + # Pull an upload source into the same directory + files = pull_upload(dict(path=__file__), "a/b/some.txt") + assert list(files.keys()) == ["a/b/some.txt"] + tempdir.compare(["a/", "a/b/", "a/b/other.txt", "a/b/some.txt"]) diff --git a/worker/util/files.py b/worker/util/files.py index efb7c0159..d587f601c 100644 --- a/worker/util/files.py +++ b/worker/util/files.py @@ -155,17 +155,24 @@ def remove_if_dir(path: str) -> None: def move_files(source: str, dest: str = ".", cleanup: bool = True) -> None: """ - Move from `source` to `dest` directories (with overwrite). - """ - for subpath in os.listdir(source): - source_path = os.path.join(source, subpath) - dest_path = os.path.join(dest, subpath) - if os.path.exists(dest_path): - if os.path.isdir(dest_path): - remove_dir(dest_path) - else: - ensure_parent(dest_path) - shutil.move(source_path, dest_path) + Move files from `source` to `dest` directories. + + With overwrite of existing files, directory merging and (optional) cleanup + of the source. + """ + for path, dirs, files in os.walk(source): + print(path, dirs, files) + rel_path = os.path.relpath(path, source) + dest_dir = os.path.join(dest, rel_path) + if not os.path.exists(dest_dir): + os.makedirs(dest_dir) + for file in files: + source_file = os.path.join(path, file) + dest_file = os.path.join(dest_dir, file) + if os.path.isfile(dest_file): + shutil.copy2(source_file, dest_file) + else: + os.rename(source_file, dest_file) if cleanup: remove_dir(source)