Skip to content

Commit

Permalink
Nle lc (#133)
Browse files Browse the repository at this point in the history
* Added langchain and improve NLE

* Tweaked prompt

* Updated notebook

* Added correlation direction

* More work on prompt

* Updated prompt for single vs multi molecules

* More prompt refinement

* Model changes

* Removed oai key dependency

* Updated notebook experiments

* Fixed old text generate code

* Addded OAI key

* Added openai to dev requirements

* Added OAI Key and uncommented explains
  • Loading branch information
whitead authored Mar 10, 2023
1 parent e0d0bea commit 1aa7e14
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 133 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ jobs:
run: |
pip install .
- name: Run Test
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
# mypy -p exmol --ignore-missing-imports
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/paper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,20 @@ jobs:
run: |
pip install -r paper1_CFs/requirements.txt
- name: Run paper1 experiments
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: jupyter nbconvert --ExecutePreprocessor.timeout=-1 --execute "paper1_CFs/*.ipynb" --to notebook --output-dir='temp' --clear-output
- name: Install paper2 depends
run: |
pip install -r paper2_LIME/requirements.txt
- name: Run paper2 experiments
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: jupyter nbconvert --ExecutePreprocessor.timeout=-1 --execute "paper2_LIME/*.ipynb" --to notebook --output-dir='temp' --clear-output
- name: Install paper3 depends
run: |
pip install -r paper3_Scents/requirements.txt
- name: Run paper3 experiments
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: jupyter nbconvert --ExecutePreprocessor.timeout=-1 --execute "paper3_Scents/*.ipynb" --to notebook --output-dir='temp' --clear-output
2 changes: 2 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ jobs:
run: |
pip install .
- name: Run Test
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
pytest tests
# mypy -p exmol --ignore-missing-imports
4 changes: 2 additions & 2 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Change Log
==========

v3.0.2
v3.0.2 (2023-02-23)
-------------------
* Now try to find largest component with multiple compounds separated via `.` in SMILES

* Added langchain for text explanations


v3.0.1 (2023-02-02)
Expand Down
96 changes: 59 additions & 37 deletions exmol/exmol.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from rdkit.Chem.Draw import MolToImage as mol2img, DrawMorganBit # type: ignore
from rdkit.Chem import rdchem # type: ignore
from rdkit.DataStructs.cDataStructs import BulkTanimotoSimilarity, TanimotoSimilarity # type: ignore

import langchain.llms as llms
import langchain.prompts as prompts

from . import stoned
from .plot_utils import _mol_images, _image_scatter, _bit2atoms
Expand Down Expand Up @@ -1331,45 +1332,66 @@ def merge_text_explains(
return pos + joint


_text_prompt = """
The following are a series of questions about molecules that connect their structure to a property, along with how important each question is for the molecular property. An answer of "Yes" means that the question was true and that attribute of structure contributed to the molecular property. An answer of "Counterfactual" means the lack of that attribute contributed to the molecular property. A summary paragraph is given below, which only summarizes on the most important structure-property relationships.
Property: [PROPERTY]
[TEXT]
Summary: The molecular property "[PROPERTY]" can be explained"""


def text_prompt(
_multi_prompt = (
"The following is information about molecules that connect their structures "
'to the property called "{property}." '
"The information is attributes of molecules expressed as questions with answers and "
"relative importance. "
"Using all aspects of this information, propose an explanation (50-150 words) "
'for the molecular property "{property}." '
"Only use the information below. Answer in a scientific "
'tone and make use of counterfactuals (e.g., "If X were present, {property} would be negatively...").'
"\n\n"
"{text}\n\n"
"Explanation:"
)

_single_prompt = (
"The following is information about a specific molecule that connects its structure "
'to the property "{property}." '
"The information is structural attributes expressed as questions with answers and "
"relative importance. "
"Using all aspects of this information, propose an explanation (50-150 words) "
'for this molecule\'s property "{property}." '
"Only use the information below. Answer in a scientific "
'tone and make use of counterfactuals (e.g., "If X were present, its {property} would be negatively...").'
"\n\n"
"{text}\n\n"
"Explanation:"
)


def text_explain_generate(
text_explanations: List[Tuple[str, float]],
property_name: str,
open_ai_key: Optional[str] = None,
llm: Optional[llms.BaseLLM] = None,
single: bool = True,
) -> str:
"""Insert text explanations into template, and optionally send to OpenAI."""
result = _text_prompt.replace("[PROPERTY]", property_name)
"""Insert text explanations into template, and generate explanation.
Args:
text_explanations: List of text explanations.
property_name: Name of property.
llm: Language model to use.
single: Whether to use a prompt about a single molecule or multiple molecules.
"""
# want to have negative examples at the end
text_explanations.sort(key=lambda x: x[1], reverse=True)
result = result.replace("[TEXT]", "".join([f"{t[0]}" for t in text_explanations]))
if open_ai_key is not None:
import openai

openai.api_key = open_ai_key
response = openai.Completion.create(
model="text-davinci-003",
prompt=result,
temperature=0.7,
max_tokens=256,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
)
completion = response["choices"][0]["text"]
return (
'The molecular property "'
+ property_name
+ '" can be explained'
+ completion
)
return result
text = "\n".join(
[
# f"{x[0][:-1]} {'Positive' if x[1] > 0 else 'Negative'} correlation."
f"{x[0][:-1]}."
for x in text_explanations
]
)
prompt_template = prompts.PromptTemplate(
input_variables=["property", "text"],
template=_single_prompt if single else _multi_prompt,
)
prompt = prompt_template.format(property=property_name, text=text)
if llm is None:
llm = llms.OpenAI(temperature=0.05)
return llm(prompt)


def text_explain(
Expand Down Expand Up @@ -1444,10 +1466,10 @@ def text_explain(
if neg_count == count - 2:
# don't want to have only negative examples
continue
kind = "No (Counterfactual)."
kind = "No and it would be negatively correlated with property (counterfactual)."
neg_count += 1
elif present / nbases > presence_thresh and v > 0:
kind = "Yes."
kind = "Yes and this is positively correlated with property."
pos_count += 1
else:
continue
Expand Down
Loading

0 comments on commit 1aa7e14

Please sign in to comment.