use gpt-4o-mini for tests

microsoft · Oct 1, 2024 · 0448fe6 · 0448fe6
1 parent 00941b0
commit 0448fe6
Show file tree

Hide file tree

Showing 10 changed files with 12 additions and 33 deletions.
diff --git a/notebook/agentchat_function_call_currency_calculator.ipynb b/notebook/agentchat_function_call_currency_calculator.ipynb
@@ -65,7 +65,7 @@
     "\n",
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
-    "    filter_dict={\"tags\": [\"tools\"]},  # comment out to get all\n",
+    "    filter_dict={\"tags\": [\"tool\"]},  # comment out to get all\n",
     ")"
    ]
   },

diff --git a/test/agentchat/contrib/agent_eval/test_agent_eval.py b/test/agentchat/contrib/agent_eval/test_agent_eval.py
@@ -32,13 +32,8 @@ def remove_ground_truth(test_case: str):
         filter_dict={
             "api_type": ["openai"],
             "model": [
-                "gpt-4-turbo",
-                "gpt-4-turbo-preview",
-                "gpt-4-0125-preview",
-                "gpt-4-1106-preview",
+                "gpt-4o-mini",
                 "gpt-3.5-turbo",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-1106",
             ],
         },
     )

diff --git a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py
@@ -11,7 +11,8 @@
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
-filter_dict = {"model": ["gpt-4-0125-preview"]}
+filter_dict = {"model": ["gpt-4o-mini"]}
+# filter_dict = {"model": ["gpt-4-0125-preview"]}
 # filter_dict = {"model": ["gpt-3.5-turbo-1106"]}
 # filter_dict = {"model": ["gpt-4-0613"]}
 # filter_dict = {"model": ["gpt-3.5-turbo"]}

diff --git a/test/agentchat/contrib/capabilities/test_image_generation_capability.py b/test/agentchat/contrib/capabilities/test_image_generation_capability.py
@@ -26,8 +26,6 @@
 sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
 from conftest import MOCK_OPEN_AI_API_KEY, skip_openai  # noqa: E402
 
-filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
-
 RESOLUTIONS = ["256x256", "512x512", "1024x1024"]
 QUALITIES = ["standard", "hd"]
 PROMPTS = [

diff --git a/test/agentchat/contrib/capabilities/test_teachable_agent.py b/test/agentchat/contrib/capabilities/test_teachable_agent.py
@@ -28,7 +28,8 @@
 # filter_dict={"model": ["gpt-3.5-turbo-1106"]}
 # filter_dict={"model": ["gpt-3.5-turbo-0613"]}
 # filter_dict={"model": ["gpt-4"]}
-filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
+# filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
+filter_dict={"model": ["gpt-4o-mini"]}
 
 
 def create_teachable_agent(reset_db=False, verbosity=0):

diff --git a/test/agentchat/test_tool_calls.py b/test/agentchat/test_tool_calls.py
@@ -144,7 +144,7 @@ def test_update_tool():
     config_list_gpt4 = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         filter_dict={
-            "tags": ["gpt-4"],
+            "tags": ["gpt-4o-mini"],
         },
         file_location=KEY_LOC,
     )

diff --git a/test/io/test_websockets.py b/test/io/test_websockets.py
@@ -97,14 +97,8 @@ def on_connect(iostream: IOWebsockets, success_dict: Dict[str, bool] = success_d
                 OAI_CONFIG_LIST,
                 filter_dict={
                     "model": [
+                        "gpt-4o-mini",
                         "gpt-3.5-turbo",
-                        "gpt-3.5-turbo-16k",
-                        "gpt-4",
-                        "gpt-4-0314",
-                        "gpt4",
-                        "gpt-4-32k",
-                        "gpt-4-32k-0314",
-                        "gpt-4-32k-v0314",
                     ],
                 },
                 file_location=KEY_LOC,

diff --git a/test/oai/_test_completion.py b/test/oai/_test_completion.py
@@ -143,13 +143,8 @@ def test_nocontext():
             file_location=KEY_LOC,
             filter_dict={
                 "model": {
+                    "gpt-4o-mini",
                     "gpt-3.5-turbo",
-                    "gpt-3.5-turbo-16k",
-                    "gpt-3.5-turbo-16k-0613",
-                    "gpt-3.5-turbo-0301",
-                    "chatgpt-35-turbo-0301",
-                    "gpt-35-turbo-v0301",
-                    "gpt",
                 },
             },
         ),
@@ -179,13 +174,8 @@ def test_humaneval(num_samples=1):
         env_or_file=OAI_CONFIG_LIST,
         filter_dict={
             "model": {
+                "gpt-4o-mini",
                 "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-16k-0613",
-                "gpt-3.5-turbo-0301",
-                "chatgpt-35-turbo-0301",
-                "gpt-35-turbo-v0301",
-                "gpt",
             },
         },
         file_location=KEY_LOC,

diff --git a/test/oai/test_client.py b/test/oai/test_client.py
@@ -89,7 +89,7 @@ def test_aoai_chat_completion():
     print(client.extract_text_or_completion_object(response))
 
 
-# @pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
+@pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
 @pytest.mark.skip(reason="This test is not working until Azure settings are updated")
 def test_oai_tool_calling_extraction():
     config_list = config_list_from_json(

diff --git a/test/oai/test_client_stream.py b/test/oai/test_client_stream.py
@@ -237,7 +237,7 @@ def test_chat_tools_stream() -> None:
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"tags": ["multitool"]},
+        filter_dict={"tags": ["tool"]},
     )
     tools = [
         {