-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
310 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#This version simply retrieves all classes for a given ancestor from an .owl ontology provided locally, creating a .txt dictionary | ||
|
||
import owlready2 | ||
|
||
#input to provide | ||
|
||
#wanted_classes_and_new_labels = {'Abnormal Cell' : 'Cell', 'Tissue Culture' : 'ResearchTechnique' , 'Cellular Process': 'AssociatedBiologicalProcess'} | ||
#new_dictionary = 'name.txt' | ||
|
||
|
||
# load ontology | ||
from owlready2 import * | ||
|
||
|
||
#lists | ||
ontology = '/Users/oriol/Desktop/ncit.owl' | ||
onto = get_ontology(ontology).load() | ||
namespace = onto.get_namespace(ontology) | ||
class_list = list(onto.classes()) | ||
label_list = [] | ||
ancestor_list = [] | ||
relevant_terms = [] | ||
|
||
#get all classes (if needed) | ||
''' | ||
for c in class_list: | ||
label_list.append(c.label) | ||
''' | ||
|
||
#get all classes from wanted ancestors | ||
for c in class_list: | ||
ancestors = list(c.ancestors()) | ||
print(ancestors) | ||
''' | ||
for a in ancestors: | ||
x = str(a.label) | ||
for term, label in wanted_classes_and_new_labels.items(): | ||
if term == x[2:-2] and c not in relevant_terms: | ||
relevant_terms.append(c) | ||
with open(new_dictionary, 'a') as n: | ||
n.write(c.label[0] + '\t' + 'LABEL=' + label + '\t' + 'ID=' + str(c) + '\n') | ||
''' | ||
|
||
''' | ||
#add synonyms and their preferred label | ||
for c in relevant_terms: | ||
ancestors = list(c.ancestors()) | ||
for a in ancestors: | ||
x = str(a.label) | ||
for term, label in new_labels.items(): | ||
if term == x[2:-2]: | ||
try: | ||
s = list(onto.search_one(label=c.label).hasExactSynonym) | ||
print(s) | ||
for term in s: | ||
with open(new_dictionary, 'a') as n: | ||
n.write(term + '\t' + 'LABEL=' + label + '\t' + 'ID=' + str(c) + '\t' + 'PrefSynonym=' + c.label[0] + '\n') | ||
except: | ||
print('no synonym') | ||
continue | ||
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
import os.path | ||
import urllib.request | ||
from typing import List, Dict, Optional | ||
import re | ||
from urllib.error import HTTPError | ||
|
||
import requests | ||
import owlready2 | ||
from pymongo.mongo_client import MongoClient | ||
import progressbar | ||
from bson.objectid import ObjectId | ||
from owlready2 import OwlReadyOntologyParsingError | ||
from tqdm import tqdm | ||
import obonet | ||
|
||
ONTOLOGY_REGEX = re.compile(r"([_A-Za-z]+):([_A-Za-z0-9^\-]+)") | ||
|
||
client = MongoClient( | ||
"mongodb://127.0.0.1:27017/" | ||
) | ||
|
||
class MyProgressBar: | ||
def __init__(self): | ||
self.pbar = None | ||
|
||
def __call__(self, block_num: int, block_size: int, total_size: int): | ||
if not self.pbar: | ||
self.pbar = progressbar.ProgressBar(maxval=total_size) | ||
self.pbar.start() | ||
|
||
downloaded = block_num * block_size | ||
if downloaded < total_size: | ||
self.pbar.update(downloaded) | ||
else: | ||
self.pbar.finish() | ||
|
||
|
||
def insert_all_ontology_terms_used(): | ||
collections = client.beacon.list_collection_names() | ||
if 'filtering_terms' in collections: | ||
collections.remove('filtering_terms') | ||
print("Collections:", collections) | ||
for c_name in collections: | ||
terms = find_ontology_terms_used(c_name) | ||
if len(terms) > 0: | ||
client.beacon.filtering_terms.insert_many(terms) | ||
|
||
|
||
def get_ontology_name(ontology: owlready2.Ontology) -> str: | ||
path = "ontologies/{}.obo".format(ontology) | ||
try: | ||
graph = obonet.read_obo(path) | ||
name = graph.graph['remark'] | ||
if '.' in name[0]: | ||
list_name = name[0].split('.') | ||
name = list_name[0] | ||
return name | ||
except: | ||
pass | ||
|
||
def load_ontology(ontology_id: str) -> Optional[owlready2.Ontology]: | ||
if ontology_id.isalpha(): | ||
url = "http://purl.obolibrary.org/obo/{}.obo".format(ontology_id.lower()) | ||
path = "ontologies/{}.obo".format(ontology_id) | ||
try: | ||
if not os.path.exists(path): | ||
urllib.request.urlretrieve(url, path, MyProgressBar()) | ||
return '{}'.format(ontology_id) | ||
except HTTPError: | ||
# TODO: Handle error | ||
print("ERROR", HTTPError) | ||
pass | ||
except ValueError: | ||
print("ERROR", ValueError) | ||
pass | ||
|
||
|
||
def get_ontology_term_label(ontology: owlready2.Ontology, term: str) -> Optional[str]: | ||
path = "ontologies/{}.obo".format(ontology.lower()) | ||
try: | ||
graph = obonet.read_obo(path) | ||
id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)} | ||
return id_to_name['{}:{}'.format(ontology,term)] | ||
except: | ||
pass | ||
|
||
|
||
def get_ontology_term_count(collection_name: str, term: str) -> int: | ||
query = { | ||
'$text': { | ||
'$search': '\"' + term + '\"' | ||
} | ||
} | ||
return client.beacon\ | ||
.get_collection(collection_name)\ | ||
.count_documents(query) | ||
|
||
|
||
def find_ontology_terms_used(collection_name: str) -> List[Dict]: | ||
terms = [] | ||
terms_ids = set() | ||
ontologies = dict() | ||
count = client.beacon.get_collection(collection_name).estimated_document_count() | ||
xs = client.beacon.get_collection(collection_name).find() | ||
for r in tqdm(xs, total=count): | ||
matches = ONTOLOGY_REGEX.findall(str(r)) | ||
for ontology_id, term_id in matches: | ||
term = ':'.join([ontology_id, term_id]) | ||
print(term, ontology_id) | ||
if term not in terms_ids: | ||
terms_ids.add(term) | ||
if ontology_id not in ontologies: | ||
ontologies[ontology_id] = load_ontology(ontology_id) | ||
if ontologies[ontology_id] is not None: | ||
terms.append({ | ||
'type': get_ontology_name(ontologies[ontology_id]), | ||
'id': term, | ||
'label': get_ontology_term_label(ontology_id, term_id), | ||
# TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms | ||
'count': get_ontology_term_count(collection_name, term), | ||
'collection': collection_name, | ||
}) | ||
return terms | ||
|
||
|
||
def get_alphanumeric_term_count(collection_name: str, key: str) -> int: | ||
return len(client.beacon\ | ||
.get_collection(collection_name)\ | ||
.distinct(key)) | ||
|
||
|
||
def get_properties_of_document(document, prefix="") -> List[str]: | ||
properties = [] | ||
if document is None or isinstance(document, str) or isinstance(document, int): | ||
return [] | ||
elif isinstance(document, list): | ||
for elem in document: | ||
properties += get_properties_of_document(elem, prefix) | ||
elif isinstance(document, dict): | ||
for key, value in document.items(): | ||
if isinstance(value, ObjectId): | ||
continue | ||
elif value is None: | ||
properties.append(prefix + '.' + key if prefix else key) | ||
elif isinstance(value, int): | ||
properties.append(prefix + '.' + key if prefix else key) | ||
elif isinstance(value, str): | ||
properties.append(prefix + '.' + key if prefix else key) | ||
elif isinstance(value, list): | ||
properties += get_properties_of_document(value, prefix + '.' + key if prefix else key) | ||
elif isinstance(value, dict): | ||
properties += get_properties_of_document(value, prefix + '.' + key if prefix else key) | ||
else: | ||
print('Unknown type:', value, ' (', type(value), ')') | ||
exit(0) | ||
else: | ||
print('Unknown type2:', document, ' (', type(document), ')') | ||
exit(0) | ||
return properties | ||
|
||
|
||
def find_alphanumeric_terms_used(collection_name: str) -> List[Dict]: | ||
terms = [] | ||
terms_ids = set() | ||
count = client.beacon.get_collection(collection_name).estimated_document_count() | ||
xs = client.beacon.get_collection(collection_name).find() | ||
for r in tqdm(xs, total=count): | ||
properties = get_properties_of_document(r) | ||
for p in properties: | ||
if p not in terms_ids: | ||
terms_ids.add(p) | ||
terms.append({ | ||
'type': 'alphanumeric', | ||
'id': p, | ||
'count': get_alphanumeric_term_count(collection_name, p), | ||
'collection': collection_name, | ||
}) | ||
return terms | ||
|
||
|
||
def insert_all_alphanumeric_terms_used(): | ||
collections = client.beacon.list_collection_names() | ||
if 'filtering_terms' in collections: | ||
collections.remove('filtering_terms') | ||
print("Collections:", collections) | ||
for c_name in collections: | ||
terms = find_alphanumeric_terms_used(c_name) | ||
print(terms) | ||
#if len(terms) > 0: | ||
#client.beacon.filtering_terms.insert_many(terms) | ||
|
||
|
||
#insert_all_ontology_terms_used() | ||
#insert_all_alphanumeric_terms_used() | ||
#terms=find_ontology_terms_used("individuals") | ||
#print(terms) | ||
#hola = get_ontology_term_label('NCIT','C173381') | ||
#print(hola) | ||
find_alphanumeric_terms_used('analyses') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import networkx | ||
import obonet | ||
|
||
url = '/Users/oriol/Desktop/beacon_canada/deploy/ontologies/ncit.obo' | ||
graph = obonet.read_obo(url) | ||
|
||
# Number of nodes | ||
#len(graph) | ||
|
||
#print(graph.graph) | ||
''' | ||
data_version = graph.graph['data-version'] | ||
if '/' in data_version: | ||
list_data_version = data_version.split('/') | ||
data_version = list_data_version[1] | ||
print(data_version) | ||
name = graph.graph['remark'] | ||
if '.' in name[0]: | ||
list_name = name[0].split('.') | ||
name = list_name[0] | ||
print(name) | ||
''' | ||
# Number of edges | ||
#graph.number_of_edges() | ||
|
||
#networkx.is_directed_acyclic_graph(graph) | ||
|
||
# Mapping from term ID to name | ||
|
||
id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)} | ||
print(id_to_name['NCIT:C173381']) | ||
#for id_, data in graph.nodes(data=True): | ||
#print (id_) | ||
#print(data.get('name')) | ||
#print(id_to_name['NCIT:C143048'] ) | ||
|
||
#descendants = networkx.ancestors(graph, 'NCIT:C173381') | ||
|
||
#print(descendants) |