From 6793c82a45b0df9b6f02a3d0c7c9039f0e6b0cfb Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Tue, 7 Jan 2025 15:56:42 -0800 Subject: [PATCH] Revert "Expand/resource (#39)" This reverts commit 9aa79a0cebdcaf4c338c24c380bbd52c0e9bce20. --- config.yaml | 6 ++-- iceberg_tools/cli/schema.py | 3 +- iceberg_tools/graph/__init__.py | 26 +++++++---------- .../unit/link-description-object/conftest.py | 29 ------------------- .../test_vertex_schema.py | 16 +++++----- 5 files changed, 22 insertions(+), 58 deletions(-) diff --git a/config.yaml b/config.yaml index 7468481..9169ebc 100644 --- a/config.yaml +++ b/config.yaml @@ -16,14 +16,12 @@ dependency_order: # - Organization # - Location - - Group - Practitioner - PractitionerRole - ResearchStudy - Patient - ResearchSubject - Substance - - SubstanceDefinition - Specimen # - Encounter - Observation @@ -31,8 +29,8 @@ dependency_order: - Condition - Medication - MedicationAdministration - - MedicationStatement - - MedicationRequest +# - MedicationStatement +# - MedicationRequest - Procedure - DocumentReference - Task diff --git a/iceberg_tools/cli/schema.py b/iceberg_tools/cli/schema.py index ae6d39d..7e4d5da 100644 --- a/iceberg_tools/cli/schema.py +++ b/iceberg_tools/cli/schema.py @@ -58,8 +58,7 @@ def generate_bmeg(output_path, config_path, stats): with SchemaLinkWriter() as mgr: for klass, schema in schemas.items(): with open(output_path / pathlib.Path(klass + ".yaml"), "w") as fp: - dependency_order = gen3_config["dependency_order"] - schema = mgr.insert_links(schema, classes, dependency_order) + schema = mgr.insert_links(schema, classes) yaml.dump(schema, fp) logger.info(f"Individual yaml schemas written to {output_path}/*.yaml") diff --git a/iceberg_tools/graph/__init__.py b/iceberg_tools/graph/__init__.py index 1929e2d..a43cedf 100644 --- a/iceberg_tools/graph/__init__.py +++ b/iceberg_tools/graph/__init__.py @@ -67,7 +67,7 @@ def _extract_target_hints(schema_link): return directionality, multiplicity, association -def _generate_links_from_fhir_references(schema, classes, dependency_order) -> List[dict]: +def _generate_links_from_fhir_references(schema, classes) -> List[dict]: """Generate links for a schema. Parameters @@ -79,15 +79,14 @@ def _generate_links_from_fhir_references(schema, classes, dependency_order) -> L # Direct links from {schema['title']}" links = [] - dependency_order = [elem for elem in dependency_order if elem not in ["_definitions.yaml", "_terms.yaml", "Program", "Project"]] - links.extend(_extract_links(schema, classes, dependency_order)) + links.extend(_extract_links(schema, classes)) # Nested links nested_links = [] for nested_schema, path in _extract_nested_schemas(schema): if nested_schema['title'] in NESTED_OBJECTS_IGNORE: continue - extracted_links = _extract_links(nested_schema, classes, dependency_order) + extracted_links = _extract_links(nested_schema, classes) if len(extracted_links) == 0: continue @@ -138,12 +137,11 @@ def _extract_nested_schemas(schema) -> Iterator[tuple[dict, str]]: yield sub_schema, match -def _extract_links(schema: dict, classes, dependency_order) -> List[dict]: +def _extract_links(schema: dict, classes) -> List[dict]: """Extract Link Description Object (LDO) from a schema. see https://json-schema.org/draft/2019-09/json-schema-hypermedia.html#rfc.section.6 """ - refs_finder = RefFinder(schema, refs=True) matches = sorted(refs_finder.find_refs()) @@ -157,10 +155,8 @@ def _extract_links(schema: dict, classes, dependency_order) -> List[dict]: multiplicity = 'has_many' property_name = match.split('.')[1] property_ = schema['properties'][property_name] - - if 'enum_reference_types' not in property_ or\ - (len(property_["enum_reference_types"]) == 1 and property_["enum_reference_types"][0] == "Resource"): - property_['enum_reference_types'] = dependency_order + if 'enum_reference_types' not in property_: + property_['enum_reference_types'] = ['__ANY__'] append_postscript = len(property_['enum_reference_types']) > 1 _path = '.'.join(match.split('.')[1:-1]) _path = _path + '.reference' @@ -398,7 +394,7 @@ def _extract_link_parts(self, schema_link: dict, instance: dict) -> dict: class VertexSchemaDecorator: """Adds links to vertex schema.""" - def __init__(self, schema: dict, classes: list, dependency_order: list): + def __init__(self, schema: dict, classes: list): """Load and compile a JSON schema.""" self.schema = _load_schema(schema) # add links property @@ -411,7 +407,7 @@ def __init__(self, schema: dict, classes: list, dependency_order: list): } } # add links element - links, nested_links = _generate_links_from_fhir_references(schema, classes, dependency_order) + links, nested_links = _generate_links_from_fhir_references(schema, classes) self.schema['links'] = links + nested_links # check schema jsonschema.Draft202012Validator.check_schema(schema) # Draft202012Validator.check_schema(schema) @@ -475,6 +471,7 @@ def insert_links(self, vertex: dict) -> dict: return vertex for schema_link in _schema['links']: + keys = self._extract_href_keys(schema_link['href']) values = self._extract_values(schema_link, vertex) @@ -609,7 +606,7 @@ def __exit__(self, exc_type, exc_value, exc_tb): pass @staticmethod - def insert_links(schema, classes, dependency_order) -> dict: + def insert_links(schema, classes) -> dict: """Insert links into a schema. Parameters: @@ -619,9 +616,8 @@ def insert_links(schema, classes, dependency_order) -> dict: dict: schema with links inserted """ - schema = _load_schema(schema) - links, nested_links = _generate_links_from_fhir_references(schema, classes, dependency_order) + links, nested_links = _generate_links_from_fhir_references(schema, classes) schema['links'] = links + nested_links schema['properties']['links'] = { 'type': 'array', diff --git a/tests/unit/link-description-object/conftest.py b/tests/unit/link-description-object/conftest.py index 1e2d4dc..e36c371 100644 --- a/tests/unit/link-description-object/conftest.py +++ b/tests/unit/link-description-object/conftest.py @@ -18,30 +18,6 @@ def python_source_directories() -> List[str]: return ["tools", "tests"] -_DEPENDENCY_LIST = [ - "Organization", - "Practitioner", - "PractitionerRole", - "ResearchStudy", - "Patient", - "ResearchSubject", - "Substance", - "Specimen", - "Observation", - "DiagnosticReport", - "Condition", - "Medication", - "MedicationAdministration", - "MedicationStatement", - "MedicationRequest", - "Procedure", - "DocumentReference", - "Task", - "ImagingStudy", - "FamilyMemberHistory", - "BodyStructure" -] - _SCHEMA = yaml.safe_load(""" --- "$schema": https://json-schema.org/draft/2020-12/schema @@ -186,11 +162,6 @@ def python_source_directories() -> List[str]: """) -@pytest.fixture -def dependency_list(): - return _DEPENDENCY_LIST - - @pytest.fixture def nested_references(): return _NESTED_REFERENCES['references'] diff --git a/tests/unit/link-description-object/test_vertex_schema.py b/tests/unit/link-description-object/test_vertex_schema.py index 26e9804..3e3fba5 100644 --- a/tests/unit/link-description-object/test_vertex_schema.py +++ b/tests/unit/link-description-object/test_vertex_schema.py @@ -13,20 +13,20 @@ EXPECTED_LINKS = ['collection_collector_Patient', 'note_authorReference_Patient', 'parent', 'subject_Patient'] -def test_specimen_schema_decorator(dependency_list): +def test_specimen_schema_decorator(): """Ensure links are discovered from properties.""" schemas = extract_schemas([Specimen, Patient], BASE_URI) - specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient], dependency_list) + specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient]) assert len(specimen_schema.schema['links']) == 4, ("Specimen should have 4 links", yaml.dump(specimen_schema.schema, sort_keys=False)) actual_links = sorted([_['rel'] for _ in specimen_schema.schema['links']]) print(sorted(actual_links)) assert actual_links == EXPECTED_LINKS, ("Specimen links should match", actual_links, EXPECTED_LINKS) -def test_vertex_link_writer_polymorphic(dependency_list): +def test_vertex_link_writer_polymorphic(): """Ensure links are discovered from properties. Use a context manager for throughput.""" schemas = extract_schemas([Specimen], BASE_URI) - specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Device, Group], dependency_list) + specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Device, Group]) with VertexLinkWriter(specimen_schema) as mgr: for specimen in [ {'id': 's-p1', 'resourceType': 'Specimen', 'subject': {'reference': 'Patient/p1'}}, @@ -42,11 +42,11 @@ def test_vertex_link_writer_polymorphic(dependency_list): assert specimen['links'][0] == {'rel': f'subject_{entity_type}', 'href': ref}, "Links should be added to specimen" -def test_vertex_link_writer_nested(dependency_list): +def test_vertex_link_writer_nested(): """Ensure links are discovered from properties. Use a context manager for throughput.""" schemas = extract_schemas([Specimen], BASE_URI) - specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Substance], dependency_list) + specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Substance]) with VertexLinkWriter(specimen_schema) as mgr: @@ -80,13 +80,13 @@ def test_vertex_link_writer_nested(dependency_list): assert specimen['links'][i] == {'rel': 'processing_additive', 'href': specimen['processing'][i]['additive'][i]['reference']}, "Links should be added to specimen" -def test_schema_link_writer_nested(dependency_list): +def test_schema_link_writer_nested(): """Ensure links are discovered from schema. Use a context manager for throughput.""" schemas = extract_schemas([Specimen], BASE_URI) specimen_schema = schemas['Specimen'] with SchemaLinkWriter() as mgr: - specimen_schema = mgr.insert_links(specimen_schema, [Specimen, Patient], dependency_list) + specimen_schema = mgr.insert_links(specimen_schema, [Specimen, Patient]) assert specimen_schema['links'] is not None, "Links should be added to specimen" assert specimen_schema['properties']['links'] is not None, "Links should be added to specimen properties" assert 'links' not in specimen_schema['properties']['links'], "Links should not be double nested"