IDR · pwalczysko · Nov 15, 2024 · Nov 15, 2024 · Nov 19, 2024
diff --git a/Compound.ipynb b/Compound.ipynb
@@ -46,7 +46,8 @@
     "from tempfile import NamedTemporaryFile\n",
     "\n",
     "import scipy\n",
-    "import numpy\n",
+    "import numpy as np\n",
+    "np.bool = np.bool_\n",
     "from skimage import filters\n",
     "import matplotlib.pyplot as plt\n",
     "from idr import connection\n",
@@ -103,20 +104,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Set up base URLS so can use shorter variable names later on"
+    "### Set up base URL so can use shorter variable names later on"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
-    "URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n",
-    "SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&id={compound_id}\"\n",
-    "PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}&case_sensitive=false\"\n",
-    "IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}&case_sensitive=false\"\n",
-    "ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\""
+    "SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\""
    ]
   },
   {
@@ -138,16 +135,7 @@
    },
    "outputs": [],
    "source": [
-    "TYPE = \"compound\"\n",
-    "KEYS = {TYPE:\n",
-    "    (\"InChIKey\",\n",
-    "     \"PubChem InChIKey\",\n",
-    "     \"Compound Concentration (microMolar)\",\n",
-    "     \"Concentration (microMolar)\",\n",
-    "     \"Dose\",\n",
-    "     \"Compound MoA\",\n",
-    "     \"Compound Action\")\n",
-    "}"
+    "COMPOUND_NAME = \"Compound Name\""
    ]
   },
   {
@@ -168,43 +156,34 @@
    },
    "outputs": [],
    "source": [
-    "def parse_annotation(writer, json_data, name, data_type):\n",
-    "    plate_name = \"-\"\n",
-    "    screen_name = name\n",
-    "    for p in json_data[data_type]:\n",
-    "        parent_id = p['id']\n",
-    "        plate_name = p['name']\n",
-    "        qs3 = {'key': TYPE, 'value': compound,\n",
-    "                'parent_type': data_type[:-1], 'parent_id': parent_id}\n",
-    "        url3 = IMAGES_URL.format(**qs3)\n",
+    "def parse_annotation(writer, json_data, compound):\n",
+    "    for p in json_data:\n",
+    "        plate_name = p['plate_name']\n",
+    "        screen_name = p['screen_name']\n",
     "        c = compound.lower()\n",
     "        if c.startswith(\"ml\"):\n",
     "            c = 'ml9'\n",
-    "        for i in session.get(url3).json()['images']:\n",
-    "            image_id = i['id']\n",
-    "            url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n",
-    "            row = {}\n",
-    "            inchikey = \"unknown\"\n",
-    "            concentration = \"unknown\"\n",
-    "            moa = \"unknown\"\n",
-    "            for a in session.get(url4).json()['annotations']:\n",
-    "                for v in a['values']:\n",
-    "                    key = str(v[0])\n",
-    "                    if key in KEYS[TYPE]:\n",
-    "                        if key in ['InChIKey', 'PubChem InChIKey']:\n",
-    "                            inchikey = v[1]\n",
-    "                        elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n",
-    "                            concentration = float(v[1].replace(' micromolar', ''))\n",
-    "                        elif key in ['Compound MoA', 'Compound Action']:\n",
-    "                            moa = v[1]\n",
-    "            row.update({'Compound': c,\n",
-    "                        'Screen': screen_name,\n",
-    "                        'Plate': plate_name,\n",
-    "                        'Image': image_id,\n",
-    "                        'InChIKey': inchikey,\n",
-    "                        'Concentration (microMolar)': concentration,\n",
-    "                        'MoA': moa})\n",
-    "            writer.writerow(row)"
+    "        image_id = p['id']\n",
+    "        inchikey = \"unknown\"\n",
+    "        concentration = \"unknown\"\n",
+    "        moa = \"unknown\"\n",
+    "        for v in p['key_values']:\n",
+    "            key = v[\"name\"]\n",
+    "            value = v[\"value\"]\n",
+    "            if key in ['InChIKey', 'PubChem InChIKey']:\n",
+    "                inchikey = value\n",
+    "            elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n",
+    "                concentration = float(value.replace(' micromolar', ''))\n",
+    "            elif key in ['Compound MoA', 'Compound Action']:\n",
+    "                moa = value\n",
+    "        row = {'Compound': c,\n",
+    "                'Screen': screen_name,\n",
+    "                'Plate': plate_name,\n",
+    "                'Image': image_id,\n",
+    "                'InChIKey': inchikey,\n",
+    "                'Concentration (microMolar)': concentration,\n",
+    "                'MoA': moa}\n",
+    "        writer.writerow(row)"
    ]
   },
   {
@@ -237,18 +216,9 @@
     "    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
     "    writer.writeheader()\n",
     "    for compound in compounds:\n",
-    "        qs1 = {'key': TYPE, 'value': compound}\n",
-    "        url1 = URL.format(**qs1)\n",
-    "        json_data = session.get(url1).json()\n",
-    "        for m in json_data['maps']:\n",
-    "            qs2 = {'key': TYPE, 'value': compound, 'compound_id': m['id']}\n",
-    "            url2 = SCREENS_PROJECTS_URL.format(**qs2)\n",
-    "            json_data = session.get(url2).json()\n",
-    "            for s in json_data['screens']:\n",
-    "                compound = s['extra']['value']\n",
-    "                qs3 = {'key': TYPE, 'value': compound, 'screen_id': s['id']}\n",
-    "                url3 = PLATES_URL.format(**qs3)\n",
-    "                parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n",
+    "        url = SEARCH_URL.format(**{'key': COMPOUND_NAME, 'value': compound})\n",
+    "        json_data = session.get(url).json()\n",
+    "        parse_annotation(writer, json_data['results']['results'], compound)\n",
     "finally:\n",
     "    csvfile.close()"
    ]

diff --git a/GenesToPhenotypes.ipynb b/GenesToPhenotypes.ipynb
@@ -118,16 +118,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n",
-    "SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}\"\n",
-    "PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}\"\n",
-    "DATASETS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}\"\n",
-    "IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}\"\n",
-    "ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\""
+    "SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\""
    ]
   },
   {
@@ -141,17 +136,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "TYPE = \"gene\"\n",
-    "KEYS = {\"phenotype\":\n",
-    "    (\"Phenotype\",\n",
-    "     \"Phenotype Term Name\",\n",
-    "     \"Phenotype Term Accession\",\n",
-    "     \"Phenotype Term Accession URL\")\n",
-    "}"
+    "GENE_SYMBOL = \"Gene Symbol\""
    ]
   },
   {
@@ -164,75 +153,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def parse_annotation(writer, json_data, name, data_type):\n",
-    "    screen_name = \"-\"\n",
-    "    plate_name = \"-\"\n",
-    "    project_name = \"-\"\n",
-    "    dataset_name = \"-\"\n",
-    "    if data_type == 'datasets':\n",
-    "        project_name = name\n",
-    "    else:\n",
-    "        screen_name = name\n",
-    "     \n",
-    "    for p in json_data[data_type]:\n",
-    "        parent_id = p['id']\n",
-    "        if data_type == 'datasets':\n",
-    "            dataset_name = p['name']\n",
-    "        else:\n",
-    "            plate_name = p['name']\n",
-    "        qs3 = {'key': TYPE, 'value': gene,\n",
-    "                'parent_type': data_type[:-1], 'parent_id': parent_id}\n",
-    "        url3 = IMAGES_URL.format(**qs3)\n",
-    "        for i in session.get(url3).json()['images']:\n",
-    "\n",
-    "            image_id = i['id']\n",
-    "            url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n",
-    "            for a in session.get(url4).json()['annotations']:\n",
-    "                ontologies = []  # for ontology terms for a phenotype\n",
-    "                row = {}\n",
-    "                for v in a['values']:\n",
-    "                    if str(v[0]) in KEYS['phenotype']:\n",
-    "                        if str(v[0]) in ['Phenotype']:  # has phenotype\n",
-    "                            row[str(v[0])] = v[1]  # so create row\n",
+    "def parse_annotation(writer, json_data, gene):\n",
     "\n",
-    "                        # if there are ontology mappings for the\n",
-    "                        # phenotype, add them to the ontologies list\n",
-    "                        ontList = ['Phenotype Term Name',\n",
-    "                                   'Phenotype Term Accession',\n",
-    "                                   'Phenotype Term Accession URL']\n",
+    "    for p in json_data:\n",
+    "        screen_name = p[\"screen_name\"] if p[\"screen_name\"] else \"-\"\n",
+    "        plate_name = p[\"plate_name\"] if p[\"plate_name\"] else \"-\"\n",
+    "        project_name = p[\"project_name\"] if p[\"project_name\"] else \"-\"\n",
+    "        dataset_name = p[\"dataset_name\"] if p[\"dataset_name\"] else \"-\"\n",
+    "        image_id = p['id']\n",
+    "        ontologies = []  # for ontology terms for a phenotype\n",
+    "        row = {}\n",
+    "        for v in p['key_values']:\n",
+    "            key = v['name']\n",
+    "            value = v['value']\n",
+    "            # if there are ontology mappings for the\n",
+    "            # phenotype, add them to the ontologies list\n",
+    "            ontList = ['Phenotype Term Name',\n",
+    "                       'Phenotype Term Accession',\n",
+    "                       'Phenotype Term Accession URL']\n",
+    "            \n",
+    "            if key == 'Phenotype':  # has phenotype\n",
+    "                row[key] = value  # so create row\n",
     "\n",
-    "                        if str(v[0]) in ontList:\n",
-    "                            ontologies.extend([str(v[0]), str(v[1])])\n",
-    "                    if row:\n",
-    "                        if (len(ontologies) > 0):  # 1+ ontology mapping\n",
-    "                            row.update({'Gene': gene,\n",
-    "                                        'Screen': screen_name,\n",
-    "                                        'Plate': plate_name,\n",
-    "                                        'Image': image_id,\n",
-    "                                        'Project' : project_name,\n",
-    "                                        'Dataset': dataset_name})\n",
-    "                            # we have the start of a row now\n",
-    "                            # but we want to print out as many rows\n",
-    "                            # as there are ontology mappings\n",
-    "                            # so if there is mapping to 1 ontology term\n",
-    "                            # print 1 row, if there are 2 ontology terms\n",
-    "                            # print 2 rows etc\n",
-    "                            numberOfRows = len(ontologies)/6\n",
-    "                            # this is 3 pairs of ontology values per\n",
-    "                            # mapping, add the ontology mappings and print\n",
-    "                            n = 1\n",
-    "                            while (n <= numberOfRows):\n",
-    "                                row.update({ontologies[0]: ontologies[1],\n",
-    "                                            ontologies[2]: ontologies[3],\n",
-    "                                            ontologies[4]: ontologies[5]})\n",
-    "                                # remove that set of ontology mappings\n",
-    "                                ontologies = ontologies[6:]\n",
-    "                                writer.writerow(row)\n",
-    "                                n = n + 1"
+    "            elif key in ontList:\n",
+    "                ontologies.extend([key, value])\n",
+    "        if row:\n",
+    "            if (len(ontologies) > 0):  # 1+ ontology mapping\n",
+    "                row.update({'Gene': gene,\n",
+    "                            'Screen': screen_name,\n",
+    "                            'Plate': plate_name,\n",
+    "                            'Image': image_id,\n",
+    "                            'Project' : project_name,\n",
+    "                            'Dataset': dataset_name})\n",
+    "                # we have the start of a row now\n",
+    "                # but we want to print out as many rows\n",
+    "                # as there are ontology mappings\n",
+    "                # so if there is mapping to 1 ontology term\n",
+    "                # print 1 row, if there are 2 ontology terms\n",
+    "                # print 2 rows etc\n",
+    "                numberOfRows = len(ontologies)/6\n",
+    "                # this is 3 pairs of ontology values per\n",
+    "                # mapping, add the ontology mappings and print\n",
+    "                n = 1\n",
+    "                while (n <= numberOfRows):\n",
+    "                    row.update({ontologies[0]: ontologies[1],\n",
+    "                                ontologies[2]: ontologies[3],\n",
+    "                                ontologies[4]: ontologies[5]})\n",
+    "                    # remove that set of ontology mappings\n",
+    "                    ontologies = ontologies[6:]\n",
+    "                    writer.writerow(row)\n",
+    "                    n = n + 1"
    ]
   },
   {
@@ -258,32 +232,12 @@
     "        'Phenotype Term Accession URL']\n",
     "    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
     "    writer.writeheader()\n",
-    "    new_genes = []\n",
-    "    for g in genes:\n",
-    "        qs1 = {'key': TYPE, 'value': g}\n",
-    "        url1 = URL.format(**qs1)\n",
-    "        json = session.get(url1).json()\n",
-    "        for m in json['maps']: \n",
-    "            new_genes.append(m['id'])\n",
     "            \n",
-    "    for gene in new_genes:\n",
-    "        qs1 = {'key': TYPE, 'value': gene}\n",
-    "        url1 = URL.format(**qs1)\n",
-    "        json = session.get(url1).json()\n",
-    "        for m in json['maps']:\n",
-    "            qs2 = {'key': TYPE, 'value': gene, 'compound_id': m['id']}\n",
-    "            url2 = SCREENS_PROJECTS_URL.format(**qs2)\n",
-    "            json = session.get(url2).json()\n",
-    "            for s in json['screens']:\n",
-    "                gene = s['extra']['value']\n",
-    "                qs3 = {'key': TYPE, 'value': gene, 'screen_id': s['id']}\n",
-    "                url3 = PLATES_URL.format(**qs3)\n",
-    "                parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n",
-    "            for p in json['projects']:\n",
-    "                gene = s['extra']['value']\n",
-    "                qs3 = {'key': TYPE, 'value': gene, 'project_id': p['id']}\n",
-    "                url3 = DATASETS_URL.format(**qs3)\n",
-    "                parse_annotation(writer, session.get(url3).json(), p['name'], 'datasets')           \n",
+    "    for gene in genes:\n",
+    "        url = SEARCH_URL.format(**{'key': GENE_SYMBOL, 'value': gene})\n",
+    "        json_data = session.get(url).json()\n",
+    "        parse_annotation(writer, json_data['results']['results'], gene)\n",
+    "         \n",
     "finally:\n",
     "    csvfile.close()"
    ]