diff --git a/data/adapters/AFGR_caqtl_adapter.py b/data/adapters/AFGR_caqtl_adapter.py index d62c943f..36903c9f 100644 --- a/data/adapters/AFGR_caqtl_adapter.py +++ b/data/adapters/AFGR_caqtl_adapter.py @@ -23,6 +23,9 @@ class AFGRCAQtl: CLASS_NAME = 'accessible_dna_element' ONTOLOGY_TERM_ID = 'EFO_0005292' # lymphoblastoid cell line ONTOLOGY_TERM_NAME = 'lymphoblastoid cell line' + EDGE_COLLECTION_NAME = 'modulates accessibility of' + EDGE_COLLECTION_INVERSR_NAME = 'accessibility modulated by' + EDGE_COLLECTION_METHOD = 'BAO_0040027' # chromatin acessibility method def __init__(self, filepath, label, dry_run=True, writer: Optional[Writer] = None, **kwargs): if label not in AFGRCAQtl.ALLOWED_LABELS: @@ -90,8 +93,9 @@ def process_file(self): 'source_url': AFGRCAQtl.SOURCE_URL, 'biosample_term': 'ontology_terms/' + AFGRCAQtl.ONTOLOGY_TERM_ID, 'biological_context': AFGRCAQtl.ONTOLOGY_TERM_NAME, - 'name': 'associated with', - 'inverse_name': 'associated with' + 'name': AFGRCAQtl.EDGE_COLLECTION_NAME, + 'inverse_name': AFGRCAQtl.EDGE_COLLECTION_INVERSR_NAME, + 'method': 'ontology_terms/' + AFGRCAQtl.EDGE_COLLECTION_METHOD } self.writer.write(json.dumps(_props)) diff --git a/data/adapters/encode_caqtl_adapter.py b/data/adapters/encode_caqtl_adapter.py index 8e844d83..92d7fbd2 100644 --- a/data/adapters/encode_caqtl_adapter.py +++ b/data/adapters/encode_caqtl_adapter.py @@ -37,6 +37,9 @@ class CAQtl: 'term_name': 'liver' } } + EDGE_COLLECTION_NAME = 'modulates accessibility of' + EDGE_COLLECTION_INVERSR_NAME = 'accessibility modulated by' + EDGE_COLLECTION_METHOD = 'BAO_0040027' # chromatin acessibility method def __init__(self, filepath, source, label, dry_run=True, writer: Optional[Writer] = None, **kwargs): if label not in CAQtl.ALLOWED_LABELS: @@ -89,8 +92,9 @@ def process_file(self): 'source_url': 'https://www.encodeproject.org/files/' + os.path.basename(self.filepath).split('.')[0], 'biological_context': CAQtl.CELL_ONTOLOGY[cell_name]['term_name'], 'biosample_term': 'ontology_terms/' + CAQtl.CELL_ONTOLOGY[cell_name]['term_id'], - 'name': 'associated with', - 'inverse_name': 'associated with' + 'name': CAQtl.EDGE_COLLECTION_NAME, + 'inverse_name': CAQtl.EDGE_COLLECTION_INVERSR_NAME, + 'method': 'ontology_terms/' + CAQtl.EDGE_COLLECTION_METHOD, } self.writer.write(json.dumps(_props)) diff --git a/data/data_loading_support_files/AFGR/chromatin_acessibility_method_term.jsonl b/data/data_loading_support_files/AFGR/chromatin_acessibility_method_term.jsonl new file mode 100644 index 00000000..f6b245c2 --- /dev/null +++ b/data/data_loading_support_files/AFGR/chromatin_acessibility_method_term.jsonl @@ -0,0 +1 @@ +{"_key":"BAO_0040027","uri":"http://www.bioassayontology.org/bao#BAO_0040027","term_id":"BAO_0040027","name":"chromatin acessibility method","synonyms":[],"description":"A method that is used to identify open / accessible genomic sites of remodeled chromatin","source":"BAO","subontology":null} diff --git a/data/db/schema/clickhouse_import.yaml b/data/db/schema/clickhouse_import.yaml index 2ff41f6a..bc612368 100644 --- a/data/db/schema/clickhouse_import.yaml +++ b/data/db/schema/clickhouse_import.yaml @@ -305,7 +305,7 @@ mm_genomic_elements: | variants_genomic_elements: | INSERT INTO variants_genomic_elements - SELECT label, log10pvalue, p_value, beta, source, source_url, biological_context, biosample_term, name, inverse_name, _key as id, splitByString('/', assumeNotNull(_from))[2] as variants_id, splitByString('/', assumeNotNull(_to))[2] as genomic_elements_id + SELECT label, log10pvalue, p_value, beta, source, source_url, biological_context, biosample_term, name, method, inverse_name, _key as id, splitByString('/', assumeNotNull(_from))[2] as variants_id, splitByString('/', assumeNotNull(_to))[2] as genomic_elements_id FROM s3('s3://igvf-catalog-parsed-collections/variants_genomic_elements/*.jsonl', 'JSONEachRow') genomic_elements_genes: | diff --git a/data/db/schema/clickhouse_schema.sql b/data/db/schema/clickhouse_schema.sql index 8e7231ba..b9182c90 100644 --- a/data/db/schema/clickhouse_schema.sql +++ b/data/db/schema/clickhouse_schema.sql @@ -759,6 +759,7 @@ CREATE TABLE IF NOT EXISTS variants_genomic_elements ( biological_context String, biosample_term String, name String, + method String, inverse_name String, id String PRIMARY KEY, variants_id String, diff --git a/data/schema-config.yaml b/data/schema-config.yaml index 293915de..be00c156 100644 --- a/data/schema-config.yaml +++ b/data/schema-config.yaml @@ -469,6 +469,7 @@ AFGR variant to genomic element: biosample_term: str name: str inverse_name: str + method: str encode variant to genomic element: description: >- diff --git a/data/tests/test_AFGR_caqtl_adapter.py b/data/tests/test_AFGR_caqtl_adapter.py index acf8f929..1b02b2ff 100644 --- a/data/tests/test_AFGR_caqtl_adapter.py +++ b/data/tests/test_AFGR_caqtl_adapter.py @@ -22,6 +22,6 @@ def test_AFGR_caqtl_adapter_AFGR_caqtl(): adapter.process_file() first_item = json.loads(writer.contents[0]) assert len(writer.contents) == 200 - assert len(first_item) == 13 + assert len(first_item) == 14 assert '_from' in first_item assert first_item['_key'] == '701f175a69d51e1e7c526f8c8ca2b2165ba7a58aadfa797dfa737916120b8ce5_accessible_dna_element_1_906596_907043_GRCh38_AFGR' diff --git a/data/tests/test_encode_caqtl_adapter.py b/data/tests/test_encode_caqtl_adapter.py index 2d2be46c..43bebb0a 100644 --- a/data/tests/test_encode_caqtl_adapter.py +++ b/data/tests/test_encode_caqtl_adapter.py @@ -29,8 +29,8 @@ def test_caqtl_adapter_encode_caqtl(): assert '_from' in first_item assert '_to' in first_item assert first_item['label'] == 'caQTL' - assert first_item['name'] == 'associated with' - assert first_item['inverse_name'] == 'associated with' + assert first_item['name'] == 'modulates accessibility of' + assert first_item['inverse_name'] == 'accessibility modulated by' def test_caqtl_adapter_invalid_label():