Skip to content

Commit

Permalink
Merge pull request #31 from ur-whitelab/chemspace-tools
Browse files Browse the repository at this point in the history
Chemspace tools, rearranged, and smiles->name
  • Loading branch information
doncamilom authored Mar 13, 2024
2 parents 61ecc0b + c003976 commit aed5e07
Show file tree
Hide file tree
Showing 16 changed files with 578 additions and 353 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ jobs:
SEMANTIC_SCHOLAR_API_KEY: ${{ secrets.SEMANTIC_SCHOLAR_API_KEY }}
RXN4CHEM_API_KEY: ${{ secrets.RXN4CHEM_API_KEY }}
SERPAPI_KEY: ${{ secrets.SERPAPI_KEY }}
CHEMSPACE_API_KEY: ${{ secrets.CHEMSPACE_KEY }}
CHEMSPACE_API_KEY: ${{ secrets.CHEMSPACE_API_KEY }}
run: |
pytest -m "not skip" tests
108 changes: 0 additions & 108 deletions app.py

This file was deleted.

1 change: 0 additions & 1 deletion chemcrow/agents/.#tool_list.py

This file was deleted.

14 changes: 11 additions & 3 deletions chemcrow/agents/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@


def make_tools(llm: BaseLanguageModel, api_keys: dict = {}, verbose=True):
serp_key = api_keys.get("SERP_API_KEY") or os.getenv("SERP_API_KEY")
serp_api_key = api_keys.get("SERP_API_KEY") or os.getenv("SERP_API_KEY")
rxn4chem_api_key = api_keys.get("RXN4CHEM_API_KEY") or os.getenv("RXN4CHEM_API_KEY")
openai_api_key = api_keys.get("OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
chemspace_api_key = api_keys.get("CHEMSPACE_API_KEY") or os.getenv(
"CHEMSPACE_API_KEY"
)

all_tools = agents.load_tools(
[
Expand All @@ -21,18 +24,23 @@ def make_tools(llm: BaseLanguageModel, api_keys: dict = {}, verbose=True):
)

all_tools += [
Query2SMILES(),
Query2SMILES(chemspace_api_key),
Query2CAS(),
SMILES2Name(),
PatentCheck(),
MolSimilarity(),
SMILES2Weight(),
FuncGroups(),
ExplosiveCheck(),
ControlChemCheck(),
SimilarControlChemCheck(),
Scholar2ResultLLM(llm=llm),
SafetySummary(llm=llm),
# LitSearch(llm=llm, verbose=verbose),
]
if chemspace_api_key:
all_tools += [GetMoleculePrice(chemspace_api_key)]
if serp_api_key:
all_tools += [WebSearch(serp_api_key)]
if rxn4chem_api_key:
all_tools += [
RXNPredict(rxn4chem_api_key),
Expand Down
2 changes: 2 additions & 0 deletions chemcrow/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
from .search import * # noqa
from .rxn4chem import * # noqa
from .safety import * # noqa
from .chemspace import * # noqa
from .converters import * # noqa
195 changes: 195 additions & 0 deletions chemcrow/tools/chemspace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import os

import molbloom
import pandas as pd
import requests
from langchain.tools import BaseTool

from chemcrow.utils import is_smiles


class ChemSpace:
def __init__(self, chemspace_api_key=None):
self.chemspace_api_key = chemspace_api_key
self._renew_token() # Create token

def _renew_token(self):
self.chemspace_token = requests.get(
url="https://api.chem-space.com/auth/token",
headers={
"Accept": "application/json",
"Authorization": f"Bearer {self.chemspace_api_key}",
},
).json()["access_token"]

def _make_api_request(
self,
query,
request_type,
count,
categories,
):
"""
Make a generic request to chem-space API.
Categories request.
CSCS: Custom Request: Could be useful for requesting whole synthesis
CSMB: Make-On-Demand Building Blocks
CSSB: In-Stock Building Blocks
CSSS: In-stock Screening Compounds
CSMS: Make-On-Demand Screening Compounds
"""

def _do_request():
data = requests.request(
"POST",
url=f"https://api.chem-space.com/v3/search/{request_type}?count={count}&page=1&categories={categories}",
headers={
"Accept": "application/json; version=3.1",
"Authorization": f"Bearer {self.chemspace_token}",
},
data={"SMILES": f"{query}"},
).json()
return data

data = _do_request()

# renew token if token is invalid
if "message" in data.keys():
if data["message"] == "Your request was made with invalid credentials.":
self._renew_token()

data = _do_request()
return data

def _convert_single(self, query, search_type: str):
"""Do query for a single molecule"""
data = self._make_api_request(query, "exact", 1, "CSCS,CSMB,CSSB")
if data["count"] > 0:
return data["items"][0][search_type]
else:
return "No data was found for this compound."

def convert_mol_rep(self, query, search_type: str = "smiles"):
if ", " in query:
query_list = query.split(", ")
else:
query_list = [query]
smi = ""
try:
for q in query_list:
smi += f"{query}'s {search_type} is: {str(self._convert_single(q, search_type))}"
return smi
except Exception:
return "The input provided is wrong. Input either a single molecule, or multiple molecules separated by a ', '"

def buy_mol(
self,
smiles,
request_type="exact",
count=1,
):
"""
Get data about purchasing compounds.
smiles: smiles string of the molecule you want to buy
request_type: one of "exact", "sim" (search by similarity), "sub" (search by substructure).
count: retrieve data for this many substances max.
"""

def purchasable_check(
s,
):
if not is_smiles(s):
try:
s = self.convert_mol_rep(s, "smiles")
except:
return "Invalid SMILES string."

"""Checks if molecule is available for purchase (ZINC20)"""
try:
r = molbloom.buy(s, canonicalize=True)
except:
print("invalid smiles")
return False
if r:
return True
else:
return False

purchasable = purchasable_check(smiles)

if request_type == "exact":
categories = "CSMB,CSSB"
elif request_type in ["sim", "sub"]:
categories = "CSSS,CSMS"

data = self._make_api_request(smiles, request_type, count, categories)

try:
if data["count"] == 0:
if purchasable:
return "Compound is purchasable, but price is unknown."
else:
return "Compound is not purchasable."
except KeyError:
return "Invalid query, try something else. "

print(f"Obtaining data for {data['count']} substances.")

dfs = []
# Convert this data into df
for item in data["items"]:
dfs_tmp = []
smiles = item["smiles"]
offers = item["offers"]

for off in offers:
df_tmp = pd.DataFrame(off["prices"])
df_tmp["vendorName"] = off["vendorName"]
df_tmp["time"] = off["shipsWithin"]
df_tmp["purity"] = off["purity"]

dfs_tmp.append(df_tmp)

df_this = pd.concat(dfs_tmp)
df_this["smiles"] = smiles
dfs.append(df_this)

df = pd.concat(dfs).reset_index(drop=True)

df["quantity"] = df["pack"].astype(str) + df["uom"]
df["time"] = df["time"].astype(str) + " days"

df = df.drop(columns=["pack", "uom"])
# Remove all entries that are not numbers
df = df[df["priceUsd"].astype(str).str.isnumeric()]

cheapest = df.iloc[df["priceUsd"].astype(float).idxmin()]
return f"{cheapest['quantity']} of this molecule cost {cheapest['priceUsd']} USD and can be purchased at {cheapest['vendorName']}."


class GetMoleculePrice(BaseTool):
name = "GetMoleculePrice"
description = "Get the cheapest available price of a molecule."
chemspace_api_key: str = None
url: str = None

def __init__(self, chemspace_api_key: str = None):
super().__init__()
self.chemspace_api_key = chemspace_api_key
self.url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"

def _run(self, query: str) -> str:
if not self.chemspace_api_key:
return "No Chemspace API key found. This tool may not be used without a Chemspace API key."
try:
chemspace = ChemSpace(self.chemspace_api_key)
price = chemspace.buy_mol(query)
return price
except Exception as e:
return str(e)

async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
Loading

0 comments on commit aed5e07

Please sign in to comment.