From 28021445a71b8c8e7cc1f7fdac6d89457095b81d Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Wed, 23 Oct 2024 17:01:03 -0700 Subject: [PATCH] allow duplicate relative_path in case dataset is not copied --- src/dataregistry/registrar/dataset.py | 27 +++++++++++++------ .../test_register_dataset_real_data.py | 8 ++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/dataregistry/registrar/dataset.py b/src/dataregistry/registrar/dataset.py index 85034977..e6bbabe5 100644 --- a/src/dataregistry/registrar/dataset.py +++ b/src/dataregistry/registrar/dataset.py @@ -414,6 +414,7 @@ def register( # Make sure the relative_path in the `root_dir` is avaliable if kwargs_dict["location_type"] in ["dataregistry", "dummy"]: + will_copy = kwargs_dict["old_location"] previous_datasets = self._find_previous( None, None, @@ -433,17 +434,27 @@ def register( root_dir=self._root_dir, ) + warned = False if get_dataset_status(previous_datasets[-1].status, "archived"): - raise ValueError( - f"Relative path {dest} is reserved " - f"for archived datasetid={previous_datasets[-1].dataset_id}" - ) + if will_copy: + raise ValueError( + f"Relative path {dest} is reserved " + f"for archived datasetid={previous_datasets[-1].dataset_id}" + ) + else: + print(f"Warning: found existing entry with path {kwargs_dict['relative_path']}") + warned = True if not get_dataset_status(previous_datasets[-1].status, "deleted"): - raise ValueError( - f"Relative path {dest} is taken by " - f"datasetid={previous_datasets[-1].dataset_id}" - ) + if will_copy: + raise ValueError( + f"Relative path {dest} is taken by " + f"datasetid={previous_datasets[-1].dataset_id}" + ) + else: + if not warned: + print(f"Warning: found existing entry with path {kwargs_dict['relative_path']}") + # Make sure there is not already a database entry with this # name/version combination diff --git a/tests/end_to_end_tests/test_register_dataset_real_data.py b/tests/end_to_end_tests/test_register_dataset_real_data.py index 015b54ca..22210731 100644 --- a/tests/end_to_end_tests/test_register_dataset_real_data.py +++ b/tests/end_to_end_tests/test_register_dataset_real_data.py @@ -54,6 +54,7 @@ def test_copy_data(dummy_file, data_org): [ ("file", "file1.txt"), ("directory", "dummy_dir"), + ("same_directory", "dummy_dir") ], ) def test_on_location_data(dummy_file, data_org, data_path): @@ -84,9 +85,12 @@ def test_on_location_data(dummy_file, data_org, data_path): [f], ) - assert len(results["dataset.data_org"]) == 1 + if data_org == "same_directory": + assert len(results["dataset.data_org"]) == 2 + else: + assert len(results["dataset.data_org"]) == 1 - assert results["dataset.data_org"][0] == data_org + assert results["dataset.data_org"][0].endswith(data_org) assert results["dataset.nfiles"][0] == 1 assert results["dataset.total_disk_space"][0] > 0