Skip to content

Commit

Permalink
revert code changes, keep just nbs and fig plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
qcampbel committed Feb 13, 2025
1 parent c9f6b95 commit f58ce54
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 120 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pip install git+https://github.com/ur-whitelab/MDCrow.git

## Usage
The next step is to set up your API keys in your environment. An API key for LLM provider is necessary for this project. Supported LLM providers are OpenAI, TogetherAI, Fireworks, and Anthropic.
We recommend setting up api keys in a .env file. You can use the provided .env.example file as a template.
Other tools require API keys, such as paper-qa for literature searches. We recommend setting up the keys in a .env file. You can use the provided .env.example file as a template.
1. Copy the `.env.example` file and rename it to `.env`: `cp .env.example .env`
2. Replace the placeholder values in `.env` with your actual keys

Expand Down
44 changes: 2 additions & 42 deletions mdcrow/agent/agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
from datetime import datetime

from dotenv import load_dotenv
from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
Expand Down Expand Up @@ -47,7 +46,6 @@ def __init__(
uploaded_files=[], # user input files to add to path registry
run_id="",
use_memory=False,
safe_mode=False,
paper_dir=None, # papers for pqa, relative path within repo
):
self.llm = _make_llm(model, temp, streaming)
Expand All @@ -62,8 +60,8 @@ def __init__(
self.run_id = self.memory.run_id

self.uploaded_files = uploaded_files
# for file in uploaded_files: # todo -> allow users to add descriptions?
# self.path_registry.map_path(file, file, description="User uploaded file")
for file in uploaded_files: # todo -> allow users to add descriptions?
self.path_registry.map_path(file, file, description="User uploaded file")

self.agent = None
self.agent_type = agent_type
Expand All @@ -72,43 +70,6 @@ def __init__(
self.user_tools = tools
self.verbose = verbose

if self.uploaded_files:
self.add_file(self.uploaded_files)
self.safe_mode = safe_mode

def _add_single_file(self, file_path, description=None):
now = datetime.now()
# Format the date and time as "YYYYMMDD_HHMMSS"
timestamp = now.strftime("%Y%m%d_%H%M%S")
i = 0
ID = "UPL_" + str(i) + timestamp
while ID in self.path_registry.list_path_names(): # check if ID already exists
i += 1
ID = "UPL_" + str(i) + timestamp
if not description:
# asks for user input to add description for file file_path
# wait for 20 seconds or set up a default description
description = "User uploaded file"
print(f"Adding file {file_path} with ID {ID}\n")
self.path_registry.map_path(ID, file_path, description=description)

def add_file(self, uploaded_files):
if type(uploaded_files) == str:
self._add_single_file(uploaded_files)
elif type(uploaded_files) == tuple:
self._add_single_file(uploaded_files[0], description=uploaded_files[1])
elif type(uploaded_files) == list:
for file_path in uploaded_files:
print(f"Adding file {file_path}\n")
print(type(file_path))
self.add_file(file_path)
else:
raise ValueError(
"Invalid input. Please provide a file path \
or list of file paths. Optionally, tuple or list of tuples\
of file path and description"
)

def _initialize_tools_and_agent(self, user_input=None):
"""Retrieve tools and initialize the agent."""
if self.user_tools is not None:
Expand All @@ -127,7 +88,6 @@ def _initialize_tools_and_agent(self, user_input=None):
self.tools = make_all_tools(
self.tools_llm,
human=self.use_human_tool,
safe_mode=self.safe_mode,
)
return AgentExecutor.from_agent_and_tools(
tools=self.tools,
Expand Down
2 changes: 1 addition & 1 deletion mdcrow/tools/base_tools/preprocess_tools/pdb_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def small_molecule_pdb(self, mol_str: str) -> str:
except Exception as e:
print(
"There was an error getting pdb. Please input a single molecule name."
f"{mol_str}"
f"{mol_str},{mol_name}"
)
return (
"Failed. There was an error getting pdb. "
Expand Down
77 changes: 24 additions & 53 deletions mdcrow/tools/base_tools/simulation_tools/create_simulation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import textwrap
from typing import Optional

Expand All @@ -17,7 +16,7 @@ class ModifyScriptUtils:
def __init__(self, llm):
self.llm = llm

def _prompt_summary(self, task: dict):
def _prompt_summary(self, query: str):
if not self.llm:
raise ValueError("No language model provided at ModifyScriptTool")

Expand Down Expand Up @@ -51,7 +50,7 @@ def _prompt_summary(self, task: dict):
)
llm_chain = prompt | self.llm | StrOutputParser()

return llm_chain.invoke(task)
return llm_chain.invoke(query)

# Remove leading spaces for proper formatting

Expand All @@ -62,16 +61,15 @@ def remove_leading_spaces(self, text):


class ModifyScriptInput(BaseModel):
script_id: str = Field(..., description=" File ID of the simulation script file")
query: str = Field(
...,
description=(
"simulation required by the user. Be as descriptive as possible"
" including requirements of the simulation, such as the forcefields, "
"integrator, and constraints. Also, mention the protein you are working on."
"simulation required by the user.You MUST "
"specify the objective, requirements of the simulation as well "
"as on what protein you are working."
),
)
script: str = Field(..., description=" simulation ID of the base script file")


class ModifyBaseSimulationScriptTool(BaseTool):
Expand All @@ -84,33 +82,25 @@ class ModifyBaseSimulationScriptTool(BaseTool):
args_schema = ModifyScriptInput
llm: Optional[BaseLanguageModel]
path_registry: Optional[PathRegistry]
safe_mode: Optional[bool]

def __init__(self, path_registry, llm, safe_mode=False):
def __init__(self, path_registry: Optional[PathRegistry], llm):
super().__init__()
self.path_registry = path_registry
self.llm = llm
self.safe_mode = safe_mode

def _run(self, script_id: str, query: str) -> str:
# if len(args) > 0:
# return (
# "Failed. This tool expects you to provide the input as a "
# "dictionary: {'query': 'your query', 'script': 'script id'}"
# )

def _run(self, *args, **input):
if len(args) > 0:
return (
"Failed. This tool expects you to provide the input as a "
"dictionary: {'query': 'your query', 'script': 'script id'}"
)
if not self.path_registry:
return "Failed. No path registry provided" # this should not happen
base_script_id = script_id
base_script_id = input.get("script")
if not base_script_id:
return (
"Failed. No id provided. The keys for the input are: "
"query' and 'script_id'"
)
current_ids = self.path_registry.list_path_names()
if base_script_id not in current_ids:
return (
f"Failed. File ID not found: {base_script_id}, make sure "
"the script ID is correct"
"query' and 'script'"
)
try:
base_script_path = self.path_registry.get_mapped_path(base_script_id)
Expand All @@ -119,24 +109,18 @@ def _run(self, script_id: str, query: str) -> str:
parts[-1]
except Exception as e:
return f"Failed. Error getting path from file id: {e}"
if os.path.exists(base_script_path):
with open(base_script_path, "r") as file:
base_script = file.read()
else:
return f"Failed. File not found: {base_script_id}"

with open(base_script_path, "r") as file:
base_script = file.read()
base_script = "".join(base_script)
utils = ModifyScriptUtils(self.llm)

description = query
description = input.get("query")
answer = utils._prompt_summary(
task={"base_script": base_script, "query": description}
query={"base_script": base_script, "query": description}
)
print("This the answer from the LLM\n\n", answer)
# script = answer["text"]
thoughts, new_script = answer.split("SCRIPT:")
# script_content = utils.remove_leading_spaces(new_script)
script_content = new_script
script = answer["text"]
thoughts, new_script = script.split("SCRIPT:")
script_content = utils.remove_leading_spaces(new_script)
if "FINAL THOUGHTS:" in script_content:
script_content, final_thoughts = script_content.split("FINAL THOUGHTS:")
# replace ''' with #
Expand All @@ -151,21 +135,8 @@ def _run(self, script_id: str, query: str) -> str:
with open(f"{directory}/{filename}", "w") as file:
file.write(script_content)

self.path_registry.map_path(file_id, f"{directory}/{filename}", description)
# if safe mode is on, return the file id
if self.safe_mode:
return f"Succeeded. Script modified successfully. Modified Script ID: {file_id}"
# if safe mode is off, try to run the script
try:
exec(script_content)
return f"Succeeded. Script modified and ran \
successfully. Modified Script ID: {file_id}"
except Exception as e:
return (
f"Failed. Error running the script: {e}."
"Modified Script ID: {file_id}. If you want to try to correct the "
"script, use the file id of the modified to correct the script."
)
self.path_registry.map_path(file_id, filename, description)
return f"Succeeded. Script modified successfully. Modified Script ID: {file_id}"

async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
Expand Down
21 changes: 2 additions & 19 deletions mdcrow/tools/base_tools/simulation_tools/setup_and_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,22 +723,14 @@ def _construct_script_content(
system.addForce(MonteCarloBarostat(pressure, temperature, barostatInterval))
"""

if (
integrator_type == "LangevinMiddle"
and constraints != "None"
and constraints
):
print("Constraints must be set to 'None' for LangevinMiddle integrator.")
print(integrator_type, "constraints: ", constraints)
if integrator_type == "LangevinMiddle" and constraints != "None":
script_content += """
integrator = LangevinMiddleIntegrator(temperature, friction, dt)
integrator.setConstraintTolerance(constraintTolerance)
simulation = Simulation(modeller.topology, system, integrator, platform)
simulation.context.setPositions(modeller.positions)
"""
if integrator_type == "LangevinMiddle" and (
constraints == "None" or constraints is None
):
if integrator_type == "LangevinMiddle" and constraints == "None":
script_content += """
integrator = LangevinMiddleIntegrator(temperature, friction, dt)
simulation = Simulation(modeller.topology, system, integrator, platform)
Expand All @@ -750,15 +742,6 @@ def _construct_script_content(
print('Performing energy minimization...')
simulation.minimizeEnergy()
## Save initial positions
top_name = 'simulation_initial_positions.pdb'
top_description = 'Initial positions of the simulation'
with open(top_name, "w") as f:
\tPDBFile.writeFile(
\tsimulation.topology,
\tsimulation.context.getState(getPositions=True).getPositions(),
\tf,
\t)
print('Equilibrating...')
simulation.context.setVelocitiesToTemperature(temperature)
simulation.step(equilibrationSteps)
Expand Down
5 changes: 1 addition & 4 deletions mdcrow/tools/maketools.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
def make_all_tools(
llm: BaseLanguageModel,
human=False,
safe_mode=False,
):
load_dotenv()
all_tools = []
Expand All @@ -72,9 +71,7 @@ def make_all_tools(
all_tools += agents.load_tools(["llm-math"], llm)
# all_tools += [PythonREPLTool()]
all_tools += [
ModifyBaseSimulationScriptTool(
path_registry=path_instance, llm=llm, safe_mode=safe_mode
),
ModifyBaseSimulationScriptTool(path_registry=path_instance, llm=llm),
]
if path_instance.ckpt_papers:
all_tools += [Scholar2ResultLLM(llm=llm, path_registry=path_instance)]
Expand Down
25 changes: 25 additions & 0 deletions notebooks/experiments/prompts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Simulate pdb 1MBN at two different temperatures: 300K, 400K for 1ns seconds each. Plot RMSD of both over time, and compare the final secondary structures at the end of the simulations.
Download the pdb file for PDB ID 1LYZ.
Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are.
What are common parameters to simulate fibronectin?
Simulate 1XQ8 for 1ns at temperate 300K. Then tell me if the secondary structure changed from before the simulation to after.
Simulate 1A3N and 7VDE, two PDB IDs for hemoglobin with the same parameters. Find the appropriate parameters from literature. Then, plot the radius of gyration throughout the both simulations.
Simulate 1ZNI for 1ns at temp=300K.
Simulate 4RMB at 100K, 200K, and 300K. Then plot the radius of gyration over time for all three simulations. Lastly, compare the change in secondary structure for the three analyses throughout the simulation.
What are the known interractions of protein 1BDG?
Download the PDB file for 1AEE. Then, tell me how many chains and atoms are in the protein.
Simulate protein 1ZNI at 300K for 1ns and calculate RMSD.
Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structure of the two proteins, including number atoms, secondary structure, number of chains, etc.
Simulate fibronectin (PDB ID 1FNF) for 1ns. Use an appropriate temperature from literature.
Compare the RMSF of 1UBQ at high pressure and low pressure. Perform the simulation for 1 ns and vary only the pressure.
Simulate hemoglobin oxygenated (1A3N) and de-oxygenated (6BB5)
Simulate Trypsin (1TRN) for 1ns at 300K and compute SASA.
Download the pdb file for 1C3W and describe the secondary structure. Then simulate the protein at 300K for 1ns. Plot RMSD over time and radius of gyration over time.
Download the PDB file for 1XQ8. Then, save the visualization for it.
Download the PDB for 2YXF. Tell me about its stability, as found in literature. Then, simulate it for 1ns and plot its RMSD over time.
Simulate 1MBN in water and in methanol solutions.
Download Protein 1ATN
Download and clean protein 1A3N
Perform a short simulation of protein 1PQ2
Analyze the rdf of the simulation of 1A3N solvated in water
Make an rdf analysis of both oxygenated and deoxygenated hemoglobin structures

0 comments on commit f58ce54

Please sign in to comment.