new notebooks for robustness

ur-whitelab · Oct 18, 2024 · 9ab3bfd · 9ab3bfd
1 parent e38277e
commit 9ab3bfd
Show file tree

Hide file tree

Showing 34 changed files with 3,115 additions and 101 deletions.
diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt1.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt1.ipynb
diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt1.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt1.ipynb
@@ -0,0 +1,283 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mdagent import MDAgent\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "descriptive_prompt_1 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ.\"\n",
+    "non_descriptive_prompt_1 = \"Download the PDB file 1LYZ.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_model = \"gpt-4o-2024-08-06\"\n",
+    "tools = \"all\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "date: 2024-10-18\n",
+      "time: 00:49:00\n"
+     ]
+    }
+   ],
+   "source": [
+    "#todays date and time\n",
+    "import datetime\n",
+    "now = datetime.datetime.now()\n",
+    "date = now.strftime(\"%Y-%m-%d\")\n",
+    "print(\"date:\",date)\n",
+    "time = now.strftime(\"%H:%M:%S\")\n",
+    "print(\"time:\",time)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# descriptive prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Thought: To complete the task, I need to download the PDB file for the protein with the ID \"1LYZ\". I will use the PDBFileDownloader tool to accomplish this.\n",
+      "\n",
+      "Action:\n",
+      "```json\n",
+      "{\n",
+      "  \"action\": \"PDBFileDownloader\",\n",
+      "  \"action_input\": {\n",
+      "    \"query\": \"1LYZ\"\n",
+      "  }\n",
+      "}\n",
+      "```PDB file found with this ID: 1LYZ\n",
+      "To complete the task, I have successfully downloaded the PDB file for the protein with the ID \"1LYZ\". \n",
+      "\n",
+      "Final Answer: The PDB file for 1LYZ has been successfully downloaded. The file ID is 1LYZ_004903."
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "({'input': '\\n    You are an expert molecular dynamics scientist, and\\n    your task is to respond to the question or\\n    solve the problem to the best of your ability using\\n    the provided tools.\\n\\n    You can only respond with a single complete\\n    \\'Thought, Action, Action Input\\' format\\n    OR a single \\'Final Answer\\' format.\\n\\n    Complete format:\\n    Thought: (reflect on your progress and decide what to do next)\\n    Action:\\n    ```\\n    {\\n        \"action\": (the action name, it should be the name of a tool),\\n        \"action_input\": (the input string for the action)\\n    }\\n    \\'\\'\\'\\n\\n    OR\\n\\n    Final Answer: (the final response to the original input\\n    question, once all steps are complete)\\n\\n    You are required to use the tools provided,\\n    using the most specific tool\\n    available for each action.\\n    Your final answer should contain all information\\n    necessary to answer the question and its subquestions.\\n    Before you finish, reflect on your progress and make\\n    sure you have addressed the question in its entirety.\\n\\n    If you are asked to continue\\n    or reference previous runs,\\n    the context will be provided to you.\\n    If context is provided, you should assume\\n    you are continuing a chat.\\n\\n    Here is the input:\\n    Previous Context: None\\n    Question: Complete all of the following tasks: 1. Download the PDB file 1LYZ. ',\n",
+       "  'output': 'To complete the task, I have successfully downloaded the PDB file for the protein with the ID \"1LYZ\". \\n\\nFinal Answer: The PDB file for 1LYZ has been successfully downloaded. The file ID is 1LYZ_004903.'},\n",
+       " 'X180EV5E')"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "agent_1.run(descriptive_prompt_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ckpt_dir: ckpt_19\n",
+      "Files found in registry: 1LYZ_004903: PDB file downloaded from RSCB\n",
+      " PDBFile ID: 1LYZ_004903\n"
+     ]
+    }
+   ],
+   "source": [
+    "registry = agent_1.path_registry\n",
+    "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n",
+    "paths_and_descriptions = registry.list_path_names_and_descriptions()\n",
+    "print(\"\\n\".join(paths_and_descriptions.split(\",\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "It is asserted that file path for 1LYZ_004903 exists\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import os\n",
+    "match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n",
+    "file_id = match.group(0)\n",
+    "pdb_path = registry.get_mapped_path(file_id)\n",
+    "assert os.path.exists(pdb_path)\n",
+    "print(f'It is asserted that file path for {file_id} exists')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# non-descriptive prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Thought: To download the PDB file for the protein with the ID \"1LYZ\", I will use the PDBFileDownloader tool. \n",
+      "\n",
+      "Action:\n",
+      "```json\n",
+      "{\n",
+      "  \"action\": \"PDBFileDownloader\",\n",
+      "  \"action_input\": \"1LYZ\"\n",
+      "}\n",
+      "```PDB file found with this ID: 1LYZ\n",
+      "The PDB file for the protein with the ID \"1LYZ\" has been successfully downloaded. The file is named \"1LYZ_004906\".\n",
+      "\n",
+      "Final Answer: The PDB file for 1LYZ has been successfully downloaded and is named \"1LYZ_004906\"."
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "({'input': '\\n    You are an expert molecular dynamics scientist, and\\n    your task is to respond to the question or\\n    solve the problem to the best of your ability using\\n    the provided tools.\\n\\n    You can only respond with a single complete\\n    \\'Thought, Action, Action Input\\' format\\n    OR a single \\'Final Answer\\' format.\\n\\n    Complete format:\\n    Thought: (reflect on your progress and decide what to do next)\\n    Action:\\n    ```\\n    {\\n        \"action\": (the action name, it should be the name of a tool),\\n        \"action_input\": (the input string for the action)\\n    }\\n    \\'\\'\\'\\n\\n    OR\\n\\n    Final Answer: (the final response to the original input\\n    question, once all steps are complete)\\n\\n    You are required to use the tools provided,\\n    using the most specific tool\\n    available for each action.\\n    Your final answer should contain all information\\n    necessary to answer the question and its subquestions.\\n    Before you finish, reflect on your progress and make\\n    sure you have addressed the question in its entirety.\\n\\n    If you are asked to continue\\n    or reference previous runs,\\n    the context will be provided to you.\\n    If context is provided, you should assume\\n    you are continuing a chat.\\n\\n    Here is the input:\\n    Previous Context: None\\n    Question: Download the PDB file 1LYZ. ',\n",
+       "  'output': 'The PDB file for the protein with the ID \"1LYZ\" has been successfully downloaded. The file is named \"1LYZ_004906\".\\n\\nFinal Answer: The PDB file for 1LYZ has been successfully downloaded and is named \"1LYZ_004906\".'},\n",
+       " 'BDDANVWX')"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "agent_2.run(non_descriptive_prompt_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ckpt_dir: ckpt_20\n",
+      "Files found in registry: 1LYZ_004906: PDB file downloaded from RSCB\n",
+      " PDBFile ID: 1LYZ_004906\n"
+     ]
+    }
+   ],
+   "source": [
+    "registry = agent_2.path_registry\n",
+    "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n",
+    "paths_and_descriptions = registry.list_path_names_and_descriptions()\n",
+    "print(\"\\n\".join(paths_and_descriptions.split(\",\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "It is asserted that file path for 1LYZ_004906 exists\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import os\n",
+    "match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n",
+    "file_id = match.group(0)\n",
+    "pdb_path = registry.get_mapped_path(file_id)\n",
+    "assert os.path.exists(pdb_path)\n",
+    "print(f'It is asserted that file path for {file_id} exists')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mdagent",
+   "language": "python",
+   "name": "mdagent"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}