Skip to content

Commit

Permalink
rearranged & tidied up result folders
Browse files Browse the repository at this point in the history
  • Loading branch information
qcampbel committed Oct 23, 2024
1 parent 65eee23 commit a9404bf
Show file tree
Hide file tree
Showing 481 changed files with 533,557 additions and 60,879 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

Large diffs are not rendered by default.

32,127 changes: 32,127 additions & 0 deletions notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt10.ipynb

Large diffs are not rendered by default.

368 changes: 368 additions & 0 deletions notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt2.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from mdagent import MDAgent\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"descriptive_prompt_2 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets.\"\n",
"non_descriptive_prompt_2 = \"Download the PDB file 1LYZ. Tell me how many residues are in the chains and beta sheets.\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"llm_model = \"gpt-4o-2024-08-06\"\n",
"tools = \"all\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"date: 2024-10-18\n",
"time: 00:50:38\n"
]
}
],
"source": [
"#todays date and time\n",
"import datetime\n",
"now = datetime.datetime.now()\n",
"date = now.strftime(\"%Y-%m-%d\")\n",
"print(\"date:\",date)\n",
"time = now.strftime(\"%H:%M:%S\")\n",
"print(\"time:\",time)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# descriptive prompt"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thought: To complete the tasks, I need to first download the PDB file for 1LYZ. After downloading, I will analyze the structure to determine the number of residues in the chains and beta sheets.\n",
"\n",
"Action:\n",
"```json\n",
"{\n",
" \"action\": \"PDBFileDownloader\",\n",
" \"action_input\": \"1LYZ\"\n",
"}\n",
"```PDB file found with this ID: 1LYZ\n",
"To determine the number of residues in the chains and beta sheets, I need to analyze the downloaded PDB file for 1LYZ.\n",
"\n",
"Action:\n",
"```json\n",
"{\n",
" \"action\": \"SummarizeProteinStructure\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"1LYZ_005041\"\n",
" }\n",
"}\n",
"```Action:\n",
"```json\n",
"{\n",
" \"action\": \"ComputeDSSP\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"1LYZ_005041\",\n",
" \"target_frames\": \"all\"\n",
" }\n",
"}\n",
"```To complete the tasks, I need to provide the number of residues in the chains and beta sheets based on the observations from the DSSP analysis.\n",
"\n",
"Final Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. According to the DSSP analysis, there are 14 residues in beta sheets (strands)."
]
},
{
"data": {
"text/plain": [
"({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. ',\n",
" 'output': 'To complete the tasks, I need to provide the number of residues in the chains and beta sheets based on the observations from the DSSP analysis.\\n\\nFinal Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. According to the DSSP analysis, there are 14 residues in beta sheets (strands).'},\n",
" 'ZRVGGF82')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_1.run(descriptive_prompt_2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ckpt_dir: ckpt_21\n",
"Files found in registry: 1LYZ_005041: PDB file downloaded from RSCB\n",
" PDBFile ID: 1LYZ_005041\n",
" rec0_005045: dssp values for trajectory with id: 1LYZ_005041\n"
]
}
],
"source": [
"registry = agent_1.path_registry\n",
"print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n",
"paths_and_descriptions = registry.list_path_names_and_descriptions()\n",
"print(\"\\n\".join(paths_and_descriptions.split(\",\")))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"It is asserted that file path for 1LYZ_005041 exists\n",
"Number of chains: 2\n",
"Number of sheets: 14\n",
"Number of helices: 50\n",
"Number of coils: 65\n"
]
}
],
"source": [
"import mdtraj as md\n",
"import os\n",
"import re\n",
"\n",
"match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n",
"file_id = match.group(0)\n",
"pdb_path = registry.get_mapped_path(file_id)\n",
"assert os.path.exists(pdb_path)\n",
"print(f'It is asserted that file path for {file_id} exists')\n",
"\n",
"traj = md.load(pdb_path)\n",
"#get dssp \n",
"number_of_chains = traj.n_chains\n",
"secondary_structure = md.compute_dssp(traj,simplified=True)\n",
"print(\"Number of chains: \",number_of_chains)\n",
"print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n",
"print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n",
"print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# non-descriptive prompt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thought: To answer the question, I need to first download the PDB file for 1LYZ. Then, I will analyze the structure to determine the number of residues in the chains and beta sheets. I will start by downloading the PDB file.\n",
"\n",
"Action:\n",
"```json\n",
"{\n",
" \"action\": \"PDBFileDownloader\",\n",
" \"action_input\": \"1LYZ\"\n",
"}\n",
"```PDB file found with this ID: 1LYZ\n",
"To determine the number of residues in the chains and beta sheets, I need to analyze the downloaded PDB file. I will use the tool to get the number and location of turns, beta sheets, and helices for the protein.\n",
"\n",
"Action:\n",
"```json\n",
"{\n",
" \"action\": \"GetTurnsBetaSheetsHelices\",\n",
" \"action_input\": {\n",
" \"query\": \"1LYZ\",\n",
" \"primary_accession\": \"\"\n",
" }\n",
"}\n",
"```To accurately determine the number of residues in the chains and beta sheets, I need to analyze the PDB file directly for structural information. I will summarize the protein structure to get the number of residues in the chains and then specifically look for beta sheets.\n",
"\n",
"Action:\n",
"```json\n",
"{\n",
" \"action\": \"SummarizeProteinStructure\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"1LYZ_005049\"\n",
" }\n",
"}\n",
"```To determine the number of residues in the beta sheets, I need to analyze the secondary structure of the protein. Since the previous attempt to get the beta sheets information returned empty, I will use the DSSP tool to compute the secondary structure assignment for the protein trajectory, which will help identify the beta sheets.\n",
"\n",
"Action:\n",
"```json\n",
"{\n",
" \"action\": \"ComputeDSSP\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"1LYZ_005049\",\n",
" \"target_frames\": \"all\"\n",
" }\n",
"}\n",
"```To provide a complete answer, I need to summarize the findings from the analysis of the PDB file 1LYZ. The analysis revealed the following:\n",
"\n",
"- The protein has 230 residues in total.\n",
"- There are 2 chains in the protein.\n",
"- The secondary structure analysis using DSSP indicates that there are 14 residues in beta sheets (strands).\n",
"\n",
"Final Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. Among these, 14 residues are part of beta sheets."
]
},
{
"data": {
"text/plain": [
"({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file 1LYZ. Tell me how many residues are in the chains and beta sheets. ',\n",
" 'output': 'To provide a complete answer, I need to summarize the findings from the analysis of the PDB file 1LYZ. The analysis revealed the following:\\n\\n- The protein has 230 residues in total.\\n- There are 2 chains in the protein.\\n- The secondary structure analysis using DSSP indicates that there are 14 residues in beta sheets (strands).\\n\\nFinal Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. Among these, 14 residues are part of beta sheets.'},\n",
" 'U7AP098N')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_2.run(non_descriptive_prompt_2)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ckpt_dir: ckpt_22\n",
"Files found in registry: 1LYZ_005049: PDB file downloaded from RSCB\n",
" PDBFile ID: 1LYZ_005049\n",
" rec0_005057: dssp values for trajectory with id: 1LYZ_005049\n"
]
}
],
"source": [
"registry = agent_2.path_registry\n",
"print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n",
"paths_and_descriptions = registry.list_path_names_and_descriptions()\n",
"print(\"\\n\".join(paths_and_descriptions.split(\",\")))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"It is asserted that file path for 1LYZ_005049 exists\n",
"Number of chains: 2\n",
"Number of sheets: 14\n",
"Number of helices: 50\n",
"Number of coils: 65\n"
]
}
],
"source": [
"import mdtraj as md\n",
"import os\n",
"import re\n",
"\n",
"match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n",
"file_id = match.group(0)\n",
"pdb_path = registry.get_mapped_path(file_id)\n",
"assert os.path.exists(pdb_path)\n",
"print(f'It is asserted that file path for {file_id} exists')\n",
"\n",
"traj = md.load(pdb_path)\n",
"#get dssp \n",
"number_of_chains = traj.n_chains\n",
"secondary_structure = md.compute_dssp(traj,simplified=True)\n",
"print(\"Number of chains: \",number_of_chains)\n",
"print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n",
"print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n",
"print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mdagent",
"language": "python",
"name": "mdagent"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit a9404bf

Please sign in to comment.