Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pin numpy #126

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 34 additions & 64 deletions Compound.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
"from tempfile import NamedTemporaryFile\n",
"\n",
"import scipy\n",
"import numpy\n",
"import numpy as np\n",
"np.bool = np.bool_\n",
"from skimage import filters\n",
"import matplotlib.pyplot as plt\n",
"from idr import connection\n",
Expand Down Expand Up @@ -103,20 +104,16 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set up base URLS so can use shorter variable names later on"
"### Set up base URL so can use shorter variable names later on"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n",
"SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&id={compound_id}\"\n",
"PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}&case_sensitive=false\"\n",
"IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}&case_sensitive=false\"\n",
"ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\""
"SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\""
]
},
{
Expand All @@ -138,16 +135,7 @@
},
"outputs": [],
"source": [
"TYPE = \"compound\"\n",
"KEYS = {TYPE:\n",
" (\"InChIKey\",\n",
" \"PubChem InChIKey\",\n",
" \"Compound Concentration (microMolar)\",\n",
" \"Concentration (microMolar)\",\n",
" \"Dose\",\n",
" \"Compound MoA\",\n",
" \"Compound Action\")\n",
"}"
"COMPOUND_NAME = \"Compound Name\""
]
},
{
Expand All @@ -168,43 +156,34 @@
},
"outputs": [],
"source": [
"def parse_annotation(writer, json_data, name, data_type):\n",
" plate_name = \"-\"\n",
" screen_name = name\n",
" for p in json_data[data_type]:\n",
" parent_id = p['id']\n",
" plate_name = p['name']\n",
" qs3 = {'key': TYPE, 'value': compound,\n",
" 'parent_type': data_type[:-1], 'parent_id': parent_id}\n",
" url3 = IMAGES_URL.format(**qs3)\n",
"def parse_annotation(writer, json_data, compound):\n",
" for p in json_data:\n",
" plate_name = p['plate_name']\n",
" screen_name = p['screen_name']\n",
" c = compound.lower()\n",
" if c.startswith(\"ml\"):\n",
" c = 'ml9'\n",
" for i in session.get(url3).json()['images']:\n",
" image_id = i['id']\n",
" url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n",
" row = {}\n",
" inchikey = \"unknown\"\n",
" concentration = \"unknown\"\n",
" moa = \"unknown\"\n",
" for a in session.get(url4).json()['annotations']:\n",
" for v in a['values']:\n",
" key = str(v[0])\n",
" if key in KEYS[TYPE]:\n",
" if key in ['InChIKey', 'PubChem InChIKey']:\n",
" inchikey = v[1]\n",
" elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n",
" concentration = float(v[1].replace(' micromolar', ''))\n",
" elif key in ['Compound MoA', 'Compound Action']:\n",
" moa = v[1]\n",
" row.update({'Compound': c,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'InChIKey': inchikey,\n",
" 'Concentration (microMolar)': concentration,\n",
" 'MoA': moa})\n",
" writer.writerow(row)"
" image_id = p['id']\n",
" inchikey = \"unknown\"\n",
" concentration = \"unknown\"\n",
" moa = \"unknown\"\n",
" for v in p['key_values']:\n",
" key = v[\"name\"]\n",
" value = v[\"value\"]\n",
" if key in ['InChIKey', 'PubChem InChIKey']:\n",
" inchikey = value\n",
" elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n",
" concentration = float(value.replace(' micromolar', ''))\n",
" elif key in ['Compound MoA', 'Compound Action']:\n",
" moa = value\n",
" row = {'Compound': c,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'InChIKey': inchikey,\n",
" 'Concentration (microMolar)': concentration,\n",
" 'MoA': moa}\n",
" writer.writerow(row)"
]
},
{
Expand Down Expand Up @@ -237,18 +216,9 @@
" writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
" writer.writeheader()\n",
" for compound in compounds:\n",
" qs1 = {'key': TYPE, 'value': compound}\n",
" url1 = URL.format(**qs1)\n",
" json_data = session.get(url1).json()\n",
" for m in json_data['maps']:\n",
" qs2 = {'key': TYPE, 'value': compound, 'compound_id': m['id']}\n",
" url2 = SCREENS_PROJECTS_URL.format(**qs2)\n",
" json_data = session.get(url2).json()\n",
" for s in json_data['screens']:\n",
" compound = s['extra']['value']\n",
" qs3 = {'key': TYPE, 'value': compound, 'screen_id': s['id']}\n",
" url3 = PLATES_URL.format(**qs3)\n",
" parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n",
" url = SEARCH_URL.format(**{'key': COMPOUND_NAME, 'value': compound})\n",
" json_data = session.get(url).json()\n",
" parse_annotation(writer, json_data['results']['results'], compound)\n",
"finally:\n",
" csvfile.close()"
]
Expand Down
162 changes: 58 additions & 104 deletions GenesToPhenotypes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,11 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n",
"SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}\"\n",
"PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}\"\n",
"DATASETS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}\"\n",
"IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}\"\n",
"ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\""
"SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\""
]
},
{
Expand All @@ -141,17 +136,11 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"TYPE = \"gene\"\n",
"KEYS = {\"phenotype\":\n",
" (\"Phenotype\",\n",
" \"Phenotype Term Name\",\n",
" \"Phenotype Term Accession\",\n",
" \"Phenotype Term Accession URL\")\n",
"}"
"GENE_SYMBOL = \"Gene Symbol\""
]
},
{
Expand All @@ -164,75 +153,60 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def parse_annotation(writer, json_data, name, data_type):\n",
" screen_name = \"-\"\n",
" plate_name = \"-\"\n",
" project_name = \"-\"\n",
" dataset_name = \"-\"\n",
" if data_type == 'datasets':\n",
" project_name = name\n",
" else:\n",
" screen_name = name\n",
" \n",
" for p in json_data[data_type]:\n",
" parent_id = p['id']\n",
" if data_type == 'datasets':\n",
" dataset_name = p['name']\n",
" else:\n",
" plate_name = p['name']\n",
" qs3 = {'key': TYPE, 'value': gene,\n",
" 'parent_type': data_type[:-1], 'parent_id': parent_id}\n",
" url3 = IMAGES_URL.format(**qs3)\n",
" for i in session.get(url3).json()['images']:\n",
"\n",
" image_id = i['id']\n",
" url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n",
" for a in session.get(url4).json()['annotations']:\n",
" ontologies = [] # for ontology terms for a phenotype\n",
" row = {}\n",
" for v in a['values']:\n",
" if str(v[0]) in KEYS['phenotype']:\n",
" if str(v[0]) in ['Phenotype']: # has phenotype\n",
" row[str(v[0])] = v[1] # so create row\n",
"def parse_annotation(writer, json_data, gene):\n",
"\n",
" # if there are ontology mappings for the\n",
" # phenotype, add them to the ontologies list\n",
" ontList = ['Phenotype Term Name',\n",
" 'Phenotype Term Accession',\n",
" 'Phenotype Term Accession URL']\n",
" for p in json_data:\n",
" screen_name = p[\"screen_name\"] if p[\"screen_name\"] else \"-\"\n",
" plate_name = p[\"plate_name\"] if p[\"plate_name\"] else \"-\"\n",
" project_name = p[\"project_name\"] if p[\"project_name\"] else \"-\"\n",
" dataset_name = p[\"dataset_name\"] if p[\"dataset_name\"] else \"-\"\n",
" image_id = p['id']\n",
" ontologies = [] # for ontology terms for a phenotype\n",
" row = {}\n",
" for v in p['key_values']:\n",
" key = v['name']\n",
" value = v['value']\n",
" # if there are ontology mappings for the\n",
" # phenotype, add them to the ontologies list\n",
" ontList = ['Phenotype Term Name',\n",
" 'Phenotype Term Accession',\n",
" 'Phenotype Term Accession URL']\n",
" \n",
" if key == 'Phenotype': # has phenotype\n",
" row[key] = value # so create row\n",
"\n",
" if str(v[0]) in ontList:\n",
" ontologies.extend([str(v[0]), str(v[1])])\n",
" if row:\n",
" if (len(ontologies) > 0): # 1+ ontology mapping\n",
" row.update({'Gene': gene,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'Project' : project_name,\n",
" 'Dataset': dataset_name})\n",
" # we have the start of a row now\n",
" # but we want to print out as many rows\n",
" # as there are ontology mappings\n",
" # so if there is mapping to 1 ontology term\n",
" # print 1 row, if there are 2 ontology terms\n",
" # print 2 rows etc\n",
" numberOfRows = len(ontologies)/6\n",
" # this is 3 pairs of ontology values per\n",
" # mapping, add the ontology mappings and print\n",
" n = 1\n",
" while (n <= numberOfRows):\n",
" row.update({ontologies[0]: ontologies[1],\n",
" ontologies[2]: ontologies[3],\n",
" ontologies[4]: ontologies[5]})\n",
" # remove that set of ontology mappings\n",
" ontologies = ontologies[6:]\n",
" writer.writerow(row)\n",
" n = n + 1"
" elif key in ontList:\n",
" ontologies.extend([key, value])\n",
" if row:\n",
" if (len(ontologies) > 0): # 1+ ontology mapping\n",
" row.update({'Gene': gene,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'Project' : project_name,\n",
" 'Dataset': dataset_name})\n",
" # we have the start of a row now\n",
" # but we want to print out as many rows\n",
" # as there are ontology mappings\n",
" # so if there is mapping to 1 ontology term\n",
" # print 1 row, if there are 2 ontology terms\n",
" # print 2 rows etc\n",
" numberOfRows = len(ontologies)/6\n",
" # this is 3 pairs of ontology values per\n",
" # mapping, add the ontology mappings and print\n",
" n = 1\n",
" while (n <= numberOfRows):\n",
" row.update({ontologies[0]: ontologies[1],\n",
" ontologies[2]: ontologies[3],\n",
" ontologies[4]: ontologies[5]})\n",
" # remove that set of ontology mappings\n",
" ontologies = ontologies[6:]\n",
" writer.writerow(row)\n",
" n = n + 1"
]
},
{
Expand All @@ -258,32 +232,12 @@
" 'Phenotype Term Accession URL']\n",
" writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
" writer.writeheader()\n",
" new_genes = []\n",
" for g in genes:\n",
" qs1 = {'key': TYPE, 'value': g}\n",
" url1 = URL.format(**qs1)\n",
" json = session.get(url1).json()\n",
" for m in json['maps']: \n",
" new_genes.append(m['id'])\n",
" \n",
" for gene in new_genes:\n",
" qs1 = {'key': TYPE, 'value': gene}\n",
" url1 = URL.format(**qs1)\n",
" json = session.get(url1).json()\n",
" for m in json['maps']:\n",
" qs2 = {'key': TYPE, 'value': gene, 'compound_id': m['id']}\n",
" url2 = SCREENS_PROJECTS_URL.format(**qs2)\n",
" json = session.get(url2).json()\n",
" for s in json['screens']:\n",
" gene = s['extra']['value']\n",
" qs3 = {'key': TYPE, 'value': gene, 'screen_id': s['id']}\n",
" url3 = PLATES_URL.format(**qs3)\n",
" parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n",
" for p in json['projects']:\n",
" gene = s['extra']['value']\n",
" qs3 = {'key': TYPE, 'value': gene, 'project_id': p['id']}\n",
" url3 = DATASETS_URL.format(**qs3)\n",
" parse_annotation(writer, session.get(url3).json(), p['name'], 'datasets') \n",
" for gene in genes:\n",
" url = SEARCH_URL.format(**{'key': GENE_SYMBOL, 'value': gene})\n",
" json_data = session.get(url).json()\n",
" parse_annotation(writer, json_data['results']['results'], gene)\n",
" \n",
"finally:\n",
" csvfile.close()"
]
Expand Down
Loading