diff --git a/pyproject.toml b/pyproject.toml index 632995b..47b2f37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,10 +20,10 @@ dependencies = [ "boto3", "boto3-stubs[s3]", "pydantic-settings>=2.0", - "pydantic>=2.0,<2.7", + "pydantic>=2.7,<2.9", "pymongo==4.3.3", "dask==2023.5.0", - "aind-data-schema==0.33.3", + "aind-data-schema==1.0.0", "aind-codeocean-api==0.5.0", ] diff --git a/src/aind_data_asset_indexer/codeocean_bucket_indexer.py b/src/aind_data_asset_indexer/codeocean_bucket_indexer.py index 643e423..6b67787 100644 --- a/src/aind_data_asset_indexer/codeocean_bucket_indexer.py +++ b/src/aind_data_asset_indexer/codeocean_bucket_indexer.py @@ -248,7 +248,7 @@ def _process_codeocean_record( """ location = codeocean_record["location"] created = codeocean_record["created"] - external_links = [codeocean_record["external_links"]] + external_links = codeocean_record["external_links"] name = codeocean_record["name"] url_parts = get_s3_bucket_and_prefix(location) bucket = url_parts["bucket"] diff --git a/src/aind_data_asset_indexer/utils.py b/src/aind_data_asset_indexer/utils.py index e1d7877..39ebf83 100644 --- a/src/aind_data_asset_indexer/utils.py +++ b/src/aind_data_asset_indexer/utils.py @@ -11,7 +11,7 @@ from aind_codeocean_api.codeocean import CodeOceanClient from aind_data_schema.core.data_description import DataLevel, DataRegex -from aind_data_schema.core.metadata import Metadata +from aind_data_schema.core.metadata import ExternalPlatforms, Metadata from aind_data_schema.utils.json_writer import SchemaWriter from botocore.exceptions import ClientError from mypy_boto3_s3 import S3Client @@ -567,7 +567,7 @@ def build_metadata_record_from_prefix( s3_client: S3Client, optional_name: Optional[str] = None, optional_created: Optional[datetime] = None, - optional_external_links: Optional[List[dict]] = None, + optional_external_links: Optional[Dict[str, List[str]]] = None, ) -> Optional[str]: """ For a given bucket and prefix, this method will return a JSON string @@ -585,7 +585,7 @@ def build_metadata_record_from_prefix( s3_prefix. Default is None. optional_created: Optional[datetime] User can override created datetime. Default is None. - optional_external_links: Optional[List[dict]] + optional_external_links: Optional[Dict[str, List[str]]] User can provide external_links. Default is None. Returns @@ -1048,7 +1048,7 @@ def get_all_processed_codeocean_asset_records( {"name": data_asset_name, "location": data_asset_location, "created": data_asset_created, - "external_links": {"Code Ocean": data_asset_id} + "external_links": {"Code Ocean": [data_asset_id]} } } @@ -1091,7 +1091,9 @@ def get_all_processed_codeocean_asset_records( "name": data_asset_name, "location": location, "created": created_datetime, - "external_links": {"Code Ocean": data_asset_id}, + "external_links": { + ExternalPlatforms.CODEOCEAN.value: [data_asset_id] + }, } # Occasionally, there are duplicate items returned. This is one # way to remove the duplicates. diff --git a/tests/resources/utils/example_metadata.nd.json b/tests/resources/utils/example_metadata.nd.json index 254c99d..96d9124 100644 --- a/tests/resources/utils/example_metadata.nd.json +++ b/tests/resources/utils/example_metadata.nd.json @@ -87,8 +87,9 @@ "pipeline_version": null, "schema_version": "0.1.0" }, + "quality_control": null, "rig": null, - "schema_version": "0.2.7", + "schema_version": "1.0.0", "session": null, "subject": { "background_strain": null, diff --git a/tests/resources/utils/example_metadata1.nd.json b/tests/resources/utils/example_metadata1.nd.json index 4c75e61..798da14 100644 --- a/tests/resources/utils/example_metadata1.nd.json +++ b/tests/resources/utils/example_metadata1.nd.json @@ -95,8 +95,9 @@ "pipeline_version": null, "schema_version": "0.2.5" }, + "quality_control": null, "rig": null, - "schema_version": "0.2.7", + "schema_version": "1.0.0", "session": null, "subject": { "background_strain": null, diff --git a/tests/resources/utils/example_metadata2.nd.json b/tests/resources/utils/example_metadata2.nd.json index 6606e70..6f7b08f 100644 --- a/tests/resources/utils/example_metadata2.nd.json +++ b/tests/resources/utils/example_metadata2.nd.json @@ -173,8 +173,9 @@ }, "schema_version": "0.3.1" }, + "quality_control": null, "rig": null, - "schema_version": "0.2.7", + "schema_version": "1.0.0", "session": null, "subject": { "background_strain": null, diff --git a/tests/test_aind_bucket_indexer.py b/tests/test_aind_bucket_indexer.py index 460aecc..4a9e61d 100644 --- a/tests/test_aind_bucket_indexer.py +++ b/tests/test_aind_bucket_indexer.py @@ -73,7 +73,7 @@ def test_write_root_file_with_record_info_same_hash( "last_modified": datetime( 2024, 5, 15, 17, 41, 28, tzinfo=timezone.utc ), - "e_tag": '"e6dd2b7ab819f7a0fc21dba512a4071b"', + "e_tag": '"275d922d2a1e547f2e0f35b5cc54f493"', "version_id": "version_id", }, prefix="ecephys_642478_2023-01-17_13-56-29", diff --git a/tests/test_codeocean_bucket_indexer.py b/tests/test_codeocean_bucket_indexer.py index 8c7eff3..4b17fe9 100644 --- a/tests/test_codeocean_bucket_indexer.py +++ b/tests/test_codeocean_bucket_indexer.py @@ -54,7 +54,7 @@ def setUpClass(cls) -> None: 2024, 6, 12, 21, 21, 28, tzinfo=timezone.utc ), "external_links": { - "Code Ocean": "11ee1e1e-11e1-1111-1111-e11eeeee1e11" + "Code Ocean": ["11ee1e1e-11e1-1111-1111-e11eeeee1e11"] }, }, { @@ -69,7 +69,7 @@ def setUpClass(cls) -> None: 2024, 6, 12, 19, 45, 59, tzinfo=timezone.utc ), "external_links": { - "Code Ocean": "666666cc-66cc-6c66-666c-6c66c6666666" + "Code Ocean": ["666666cc-66cc-6c66-666c-6c66c6666666"] }, }, ] @@ -79,10 +79,10 @@ def setUpClass(cls) -> None: "ecephys_642478_2023-01-17_13-56-29/instrument.json": None, "ecephys_642478_2023-01-17_13-56-29/procedures.json": None, "ecephys_642478_2023-01-17_13-56-29/processing.json": None, + "ecephys_642478_2023-01-17_13-56-29/quality_control.json": None, "ecephys_642478_2023-01-17_13-56-29/rig.json": None, "ecephys_642478_2023-01-17_13-56-29/session.json": None, "ecephys_642478_2023-01-17_13-56-29/subject.json": None, - "ecephys_642478_2023-01-17_13-56-29/mri_session.json": None, } cls.example_docdb_records = [ { diff --git a/tests/test_utils.py b/tests/test_utils.py index 5f6ad9d..cfe011a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -64,10 +64,10 @@ def load_json_file(filename: str) -> dict: "instrument", "procedures", "processing", + "quality_control", "rig", "session", "subject", - "mri_session", ] cls.example_core_files = example_core_files example_pages = load_json_file("example_pages_response.json") @@ -187,7 +187,7 @@ def test__log_message_false(self, mock_log: MagicMock): def test_compute_md5_hash(self): """Tests compute_md5_hash method""" md5_hash = compute_md5_hash(json.dumps(self.example_metadata_nd)) - self.assertEqual("e6dd2b7ab819f7a0fc21dba512a4071b", md5_hash) + self.assertEqual("275d922d2a1e547f2e0f35b5cc54f493", md5_hash) def test_is_dict_corrupt(self): """Tests is_dict_corrupt method""" @@ -746,6 +746,7 @@ def test_build_metadata_record_from_prefix( "e_tag": '"f4827f025e79bafeb6947e14c4e3b51a"', "version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_", }, + "ecephys_642478_2023-01-17_13-56-29/quality_control.json": None, "ecephys_642478_2023-01-17_13-56-29/rig.json": None, "ecephys_642478_2023-01-17_13-56-29/session.json": None, "ecephys_642478_2023-01-17_13-56-29/subject.json": { @@ -755,7 +756,6 @@ def test_build_metadata_record_from_prefix( "e_tag": '"92734946c64fc87408ef79e5e92937bc"', "version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR", }, - "ecephys_642478_2023-01-17_13-56-29/mri_session.json": None, } mock_download_json_file.side_effect = [ self.example_processing, @@ -811,10 +811,10 @@ def test_build_metadata_record_from_prefix_with_optional_fields( "ecephys_642478_2023-01-17_13-56-29/instrument.json": None, "ecephys_642478_2023-01-17_13-56-29/procedures.json": None, "ecephys_642478_2023-01-17_13-56-29/processing.json": None, + "ecephys_642478_2023-01-17_13-56-29/quality_control.json": None, "ecephys_642478_2023-01-17_13-56-29/rig.json": None, "ecephys_642478_2023-01-17_13-56-29/session.json": None, "ecephys_642478_2023-01-17_13-56-29/subject.json": None, - "ecephys_642478_2023-01-17_13-56-29/mri_session.json": None, } # noinspection PyTypeChecker md = json.loads( @@ -824,7 +824,7 @@ def test_build_metadata_record_from_prefix_with_optional_fields( s3_client=mock_s3_client, optional_name="ecephys_642478_2023-01-17_13-56-29", optional_created=datetime(2020, 1, 2, 3, 4, 5), - optional_external_links=[{"Code Ocean": "123-456"}], + optional_external_links={"Code Ocean": ["123-456"]}, ) ) mock_get_dict_of_file_info.assert_called_once() @@ -832,7 +832,7 @@ def test_build_metadata_record_from_prefix_with_optional_fields( self.assertEqual("s3://code-ocean-bucket/abc-123", md["location"]) self.assertEqual("ecephys_642478_2023-01-17_13-56-29", md["name"]) self.assertEqual("2020-01-02T03:04:05", md["created"]) - self.assertEqual([{"Code Ocean": "123-456"}], md["external_links"]) + self.assertEqual({"Code Ocean": ["123-456"]}, md["external_links"]) @patch("aind_data_asset_indexer.utils.Metadata.model_construct") @patch("boto3.client") @@ -859,6 +859,7 @@ def test_build_metadata_record_from_prefix_error( "e_tag": '"f4827f025e79bafeb6947e14c4e3b51a"', "version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_", }, + "ecephys_642478_2023-01-17_13-56-29/quality_control.json": None, "ecephys_642478_2023-01-17_13-56-29/rig.json": None, "ecephys_642478_2023-01-17_13-56-29/session.json": None, "ecephys_642478_2023-01-17_13-56-29/subject.json": { @@ -868,7 +869,6 @@ def test_build_metadata_record_from_prefix_error( "e_tag": '"92734946c64fc87408ef79e5e92937bc"', "version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR", }, - "ecephys_642478_2023-01-17_13-56-29/mri_session.json": None, } mock_download_json_file.side_effect = [ self.example_processing, @@ -949,6 +949,7 @@ def test_sync_core_json_files( "e_tag": f'"{md5_hash_processing_unchanged}"', "version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_", }, + f"{pfx}/quality_control.json": None, f"{pfx}/rig.json": { "last_modified": datetime( 2023, 11, 4, 1, 13, 41, tzinfo=timezone.utc @@ -964,7 +965,6 @@ def test_sync_core_json_files( "e_tag": f'"{md5_hash_subject_unchanged}"', "version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR", }, - f"{pfx}/mri_session.json": None, } mock_upload_core_record.return_value = "mock_upload_response" mock_s3_client.delete_object.return_value = "mock_delete_response" @@ -1020,6 +1020,10 @@ def test_sync_core_json_files( f"processing is up-to-date in {s3_loc}/processing.json. " "Skipping." ), + ( + f"quality_control not found in metadata.nd.json for {pfx} nor " + f"in {s3_loc}/quality_control.json! Skipping." + ), ( f"rig not found in metadata.nd.json for {pfx} but {s3_loc}/" "rig.json exists! Deleting." @@ -1030,10 +1034,6 @@ def test_sync_core_json_files( f"{s3_loc}/session.json! Skipping." ), f"subject is up-to-date in {s3_loc}/subject.json. Skipping.", - ( - f"mri_session not found in metadata.nd.json for {pfx} nor in " - f"{s3_loc}/mri_session.json! Skipping." - ), ] actual_log_messages = [ c[1]["message"] for c in mock_log_message.call_args_list @@ -1072,6 +1072,7 @@ def test_cond_copy_then_sync_core_json_files( "e_tag": '"7ebb961de9e9b00accfd1358e4561ec1"', "version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_", }, + f"{pfx}/quality_control.json": None, f"{pfx}/rig.json": None, f"{pfx}/session.json": None, f"{pfx}/subject.json": { @@ -1081,7 +1082,6 @@ def test_cond_copy_then_sync_core_json_files( "e_tag": '"8b8cd50a6cf1f3f667be98a69db2ad89"', "version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR", }, - f"{pfx}/mri_session.json": None, } cond_copy_then_sync_core_json_files( metadata_json=json.dumps(self.example_metadata_nd), @@ -1164,6 +1164,7 @@ def test_cond_copy_then_sync_core_json_files_mismatch( "e_tag": '"7ebb961de9e9b00accfd1358e4561ec1"', "version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_", }, + f"{pfx}/quality_control.json": None, f"{pfx}/rig.json": { "last_modified": datetime( 2022, 5, 5, 1, 13, 41, tzinfo=timezone.utc @@ -1179,7 +1180,6 @@ def test_cond_copy_then_sync_core_json_files_mismatch( "e_tag": '"8b8cd50a6cf1f3f667be98a69db2ad89"', "version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR", }, - f"{pfx}/mri_session.json": None, } cond_copy_then_sync_core_json_files( @@ -1436,7 +1436,7 @@ def test_get_all_processed_codeocean_asset_records( 2024, 6, 12, 21, 21, 28, tzinfo=timezone.utc ), "external_links": { - "Code Ocean": "11ee1e1e-11e1-1111-1111-e11eeeee1e11" + "Code Ocean": ["11ee1e1e-11e1-1111-1111-e11eeeee1e11"] }, }, "s3://some_co_bucket/666666cc-66cc-6c66-666c-6c66c6666666": { @@ -1451,7 +1451,7 @@ def test_get_all_processed_codeocean_asset_records( 2024, 6, 12, 19, 45, 59, tzinfo=timezone.utc ), "external_links": { - "Code Ocean": "666666cc-66cc-6c66-666c-6c66c6666666" + "Code Ocean": ["666666cc-66cc-6c66-666c-6c66c6666666"] }, }, }