Update tutorials and examples to use OptoPrime

microsoft · Jul 22, 2024 · 7801cf6 · 7801cf6
1 parent ecdb780
commit 7801cf6
Show file tree

Hide file tree

Showing 12 changed files with 1,190 additions and 2,866 deletions.
diff --git a/docs/examples/code/code_optimization.ipynb b/docs/examples/code/code_optimization.ipynb
@@ -35,7 +35,7 @@
     "import numpy as np\n",
     "from datetime import datetime\n",
     "import opto.trace as trace\n",
-    "from opto.optimizers import FunctionOptimizerV2Memory\n",
+    "from opto.optimizers import OptoPrime\n",
     "from opto.trace.bundle import ExceptionNode\n",
     "from opto.trace.errors import ExecutionError"
    ]
@@ -139,7 +139,7 @@
     "        try:\n",
     "            action = controller(controller_input)\n",
     "            next_obs, reward, termination, truncation, info = env.step(action)\n",
-    "        except trace.TraceExecutionError as e:\n",
+    "        except ExecutionError as e:\n",
     "            error = e\n",
     "            break\n",
     "\n",
@@ -189,7 +189,7 @@
     "        \"\"\"A feedback controller that computes the action based on the observation.\"\"\"\n",
     "        return [0, 0, 0, 0]\n",
     "\n",
-    "    optimizer = FunctionOptimizerV2Memory(controller.parameters(), config_list=config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "    optimizer = OptoPrime(controller.parameters(), config_list=config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
     "\n",
     "    env = TracedEnv(env_name, seed=seed, relative=relative)\n",
     "\n",
@@ -203,8 +203,8 @@
     "            target = traj[\"observation\"][-1][\"observation\"]\n",
     "            returns = [sum(traj[\"reward\"]) for _ in range(n_episodes)]\n",
     "        else:\n",
-    "            feedback = str(error)\n",
     "            target = error.exception_node\n",
+    "            feedback = target.data\n",
     "\n",
     "        optimizer.objective = f\"The goal is to optimize the pick-and-place task. {optimizer.default_objective}\"\n",
     "        optimizer.zero_feedback()\n",

diff --git a/docs/examples/game/joint_code_optimization.ipynb b/docs/examples/game/joint_code_optimization.ipynb
diff --git a/docs/examples/game/joint_prompt_optimization.ipynb b/docs/examples/game/joint_prompt_optimization.ipynb
@@ -27,7 +27,7 @@
     "import json\n",
     "\n",
     "import opto.trace as trace\n",
-    "from opto.optimizers import FunctionOptimizerV2Memory\n",
+    "from opto.optimizers import OptoPrime\n",
     "from autogen import config_list_from_json\n",
     "\n",
     "config = config_list_from_json(\"OAI_CONFIG_LIST\")\n",
@@ -286,49 +286,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "ITERATION 1\n",
-      "Alice STOCKPILE THESE RESOURCES: WOOD\n",
-      "Bob STOCKPILE THESE RESOURCES: GOLD\n",
-      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 11.\n",
-      "OVERALL SCORE: 24\n",
+      "Alice STOCKPILE THESE RESOURCES: N/A\n",
+      "Bob STOCKPILE THESE RESOURCES: N/A\n",
+      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 15.\n",
+      "OVERALL SCORE: 28\n",
       "OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
       "ITERATION 2\n",
-      "Alice STOCKPILE THESE RESOURCES: STONE\n",
-      "Bob STOCKPILE THESE RESOURCES: WOOD\n",
-      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 12.\n",
-      "OVERALL SCORE: 25\n",
+      "Alice STOCKPILE THESE RESOURCES: GOLD, STONE\n",
+      "Bob STOCKPILE THESE RESOURCES: GOLD, WOOD\n",
+      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 15.\n",
+      "OVERALL SCORE: 28\n",
       "OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
       "ITERATION 3\n",
-      "Alice STOCKPILE THESE RESOURCES: GOLD\n",
-      "Bob STOCKPILE THESE RESOURCES: GOLD\n",
-      "The game has ended. Alice has inventory with value of 15 and Bob has inventory with value of 34.\n",
-      "OVERALL SCORE: 49\n",
+      "Alice STOCKPILE THESE RESOURCES: GOLD, WOOD\n",
+      "Bob STOCKPILE THESE RESOURCES: GOLD, STONE\n",
+      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 22.\n",
+      "OVERALL SCORE: 35\n",
       "OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
+      "Cannot extract suggestion from LLM's response:\n",
+      "{\n",
+      "\"reasoning\": \"The feedback indicates that the sum of values in the players' inventories at the end of the game is not optimal. The goal is to increase the OVERALL SCORE by making better trade decisions through the chat interactions modeled by the prompts and the responses in the chat variables (chat8, chat9, etc.). The trading decisions are based on the prompts 'STOCKPILE THESE RESOURCES: GOLD, WOOD' for Alice and 'STOCKPILE THESE RESOURCES: GOLD, STONE' for Bob. The trading actions 'TRADE', 'REJECT', and 'ACCEPT' suggest whether a proposed trade between Alice and Bob is successful or not. To optimize the overall score, we need to adjust the trading strategy, which could involve modifying the resources Alice and Bob are aiming to stockpile, to encourage more successful and beneficial trades. Since the only variables we can adjust are str2 and str3, which define the resources each player is trying to accumulate, changing these could potentially lead to better trade outcomes, increasing the overall value of the inventories. However, the instructions and feedback suggest that the strategy and prompts should be changed, rather than specific values. Without specific instructions on what values to change to what new values, there is no direct recommendation to improve the results other than considering changing the trading strategies.\",\n",
+      "\"answer\": \"\",\n",
+      "\"suggestion\": {}\n",
+      "}\n",
       "ITERATION 4\n",
-      "Alice STOCKPILE A VARIETY OF RESOURCES\n",
-      "Bob STOCKPILE THESE RESOURCES: STONE\n",
-      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 15.\n",
-      "OVERALL SCORE: 28\n",
-      "OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
-      "ITERATION 5\n",
-      "Alice SUGGESTED_CHANGE\n",
-      "Bob SUGGESTED_CHANGE\n",
+      "Alice STOCKPILE THESE RESOURCES: GOLD, WOOD\n",
+      "Bob STOCKPILE THESE RESOURCES: GOLD, STONE\n",
       "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 22.\n",
       "OVERALL SCORE: 35\n",
+      "OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
+      "ITERATION 5\n",
+      "Alice STOCKPILE THESE RESOURCES: STONE, GOLD\n",
+      "Bob STOCKPILE THESE RESOURCES: WOOD, GOLD\n",
+      "The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 12.\n",
+      "OVERALL SCORE: 25\n",
       "OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n"
      ]
     }
    ],
    "source": [
     "# Initialize optimizer\n",
-    "optimizer = FunctionOptimizerV2Memory(\n",
+    "optimizer = OptoPrime(\n",
     "                [p1_prompt, p2_prompt], memory_size=0, config_list=config_list_from_json(\"OAI_CONFIG_LIST\")\n",
     "            )\n",
     "\n",