Skip to content

Commit

Permalink
Update tutorials and examples to use OptoPrime
Browse files Browse the repository at this point in the history
  • Loading branch information
adith387 committed Jul 22, 2024
1 parent ecdb780 commit 7801cf6
Show file tree
Hide file tree
Showing 12 changed files with 1,190 additions and 2,866 deletions.
8 changes: 4 additions & 4 deletions docs/examples/code/code_optimization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"import numpy as np\n",
"from datetime import datetime\n",
"import opto.trace as trace\n",
"from opto.optimizers import FunctionOptimizerV2Memory\n",
"from opto.optimizers import OptoPrime\n",
"from opto.trace.bundle import ExceptionNode\n",
"from opto.trace.errors import ExecutionError"
]
Expand Down Expand Up @@ -139,7 +139,7 @@
" try:\n",
" action = controller(controller_input)\n",
" next_obs, reward, termination, truncation, info = env.step(action)\n",
" except trace.TraceExecutionError as e:\n",
" except ExecutionError as e:\n",
" error = e\n",
" break\n",
"\n",
Expand Down Expand Up @@ -189,7 +189,7 @@
" \"\"\"A feedback controller that computes the action based on the observation.\"\"\"\n",
" return [0, 0, 0, 0]\n",
"\n",
" optimizer = FunctionOptimizerV2Memory(controller.parameters(), config_list=config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
" optimizer = OptoPrime(controller.parameters(), config_list=config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
"\n",
" env = TracedEnv(env_name, seed=seed, relative=relative)\n",
"\n",
Expand All @@ -203,8 +203,8 @@
" target = traj[\"observation\"][-1][\"observation\"]\n",
" returns = [sum(traj[\"reward\"]) for _ in range(n_episodes)]\n",
" else:\n",
" feedback = str(error)\n",
" target = error.exception_node\n",
" feedback = target.data\n",
"\n",
" optimizer.objective = f\"The goal is to optimize the pick-and-place task. {optimizer.default_objective}\"\n",
" optimizer.zero_feedback()\n",
Expand Down
613 changes: 588 additions & 25 deletions docs/examples/game/joint_code_optimization.ipynb

Large diffs are not rendered by default.

52 changes: 29 additions & 23 deletions docs/examples/game/joint_prompt_optimization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"import json\n",
"\n",
"import opto.trace as trace\n",
"from opto.optimizers import FunctionOptimizerV2Memory\n",
"from opto.optimizers import OptoPrime\n",
"from autogen import config_list_from_json\n",
"\n",
"config = config_list_from_json(\"OAI_CONFIG_LIST\")\n",
Expand Down Expand Up @@ -286,49 +286,55 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ITERATION 1\n",
"Alice STOCKPILE THESE RESOURCES: WOOD\n",
"Bob STOCKPILE THESE RESOURCES: GOLD\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 11.\n",
"OVERALL SCORE: 24\n",
"Alice STOCKPILE THESE RESOURCES: N/A\n",
"Bob STOCKPILE THESE RESOURCES: N/A\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 15.\n",
"OVERALL SCORE: 28\n",
"OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
"ITERATION 2\n",
"Alice STOCKPILE THESE RESOURCES: STONE\n",
"Bob STOCKPILE THESE RESOURCES: WOOD\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 12.\n",
"OVERALL SCORE: 25\n",
"Alice STOCKPILE THESE RESOURCES: GOLD, STONE\n",
"Bob STOCKPILE THESE RESOURCES: GOLD, WOOD\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 15.\n",
"OVERALL SCORE: 28\n",
"OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
"ITERATION 3\n",
"Alice STOCKPILE THESE RESOURCES: GOLD\n",
"Bob STOCKPILE THESE RESOURCES: GOLD\n",
"The game has ended. Alice has inventory with value of 15 and Bob has inventory with value of 34.\n",
"OVERALL SCORE: 49\n",
"Alice STOCKPILE THESE RESOURCES: GOLD, WOOD\n",
"Bob STOCKPILE THESE RESOURCES: GOLD, STONE\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 22.\n",
"OVERALL SCORE: 35\n",
"OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
"Cannot extract suggestion from LLM's response:\n",
"{\n",
"\"reasoning\": \"The feedback indicates that the sum of values in the players' inventories at the end of the game is not optimal. The goal is to increase the OVERALL SCORE by making better trade decisions through the chat interactions modeled by the prompts and the responses in the chat variables (chat8, chat9, etc.). The trading decisions are based on the prompts 'STOCKPILE THESE RESOURCES: GOLD, WOOD' for Alice and 'STOCKPILE THESE RESOURCES: GOLD, STONE' for Bob. The trading actions 'TRADE', 'REJECT', and 'ACCEPT' suggest whether a proposed trade between Alice and Bob is successful or not. To optimize the overall score, we need to adjust the trading strategy, which could involve modifying the resources Alice and Bob are aiming to stockpile, to encourage more successful and beneficial trades. Since the only variables we can adjust are str2 and str3, which define the resources each player is trying to accumulate, changing these could potentially lead to better trade outcomes, increasing the overall value of the inventories. However, the instructions and feedback suggest that the strategy and prompts should be changed, rather than specific values. Without specific instructions on what values to change to what new values, there is no direct recommendation to improve the results other than considering changing the trading strategies.\",\n",
"\"answer\": \"\",\n",
"\"suggestion\": {}\n",
"}\n",
"ITERATION 4\n",
"Alice STOCKPILE A VARIETY OF RESOURCES\n",
"Bob STOCKPILE THESE RESOURCES: STONE\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 15.\n",
"OVERALL SCORE: 28\n",
"OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
"ITERATION 5\n",
"Alice SUGGESTED_CHANGE\n",
"Bob SUGGESTED_CHANGE\n",
"Alice STOCKPILE THESE RESOURCES: GOLD, WOOD\n",
"Bob STOCKPILE THESE RESOURCES: GOLD, STONE\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 22.\n",
"OVERALL SCORE: 35\n",
"OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n",
"ITERATION 5\n",
"Alice STOCKPILE THESE RESOURCES: STONE, GOLD\n",
"Bob STOCKPILE THESE RESOURCES: WOOD, GOLD\n",
"The game has ended. Alice has inventory with value of 13 and Bob has inventory with value of 12.\n",
"OVERALL SCORE: 25\n",
"OVERALL SCORE is less than optimal. Find better trades to increase the OVERALL SCORE.\n"
]
}
],
"source": [
"# Initialize optimizer\n",
"optimizer = FunctionOptimizerV2Memory(\n",
"optimizer = OptoPrime(\n",
" [p1_prompt, p2_prompt], memory_size=0, config_list=config_list_from_json(\"OAI_CONFIG_LIST\")\n",
" )\n",
"\n",
Expand Down
Loading

0 comments on commit 7801cf6

Please sign in to comment.