[FEAT][Readme]

kyegomez · Dec 1, 2023 · a7a6d54 · a7a6d54
1 parent 3ae305e
commit a7a6d54
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -55,10 +55,10 @@ llm = OpenAIChat(
 )
 
 
-## Initialize the workflow
+## Initialize the Agent
 agent = Agent(llm=llm, max_loops=1, dashboard=True)
 
-# Run the workflow on a task
+# Run the Agent on a task
 out = agent.run("Generate a 10,000 word blog on health and wellness.")
 
 
@@ -129,14 +129,25 @@ for task in workflow.tasks:
 - Run the agent with multiple modalities useful for various real-world tasks in manufacturing, logistics, and health.
 
 ```python
-from swarms.structs import Agent
+# Description: This is an example of how to use the Agent class to run a multi-modal workflow
+import os
+from dotenv import load_dotenv
 from swarms.models.gpt4_vision_api import GPT4VisionAPI
-from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
-    MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
-)
+from swarms.structs import Agent
+
+# Load the environment variables
+load_dotenv()
 
-llm = GPT4VisionAPI()
+# Get the API key from the environment
+api_key = os.environ.get("OPENAI_API_KEY")
 
+# Initialize the language model
+llm = GPT4VisionAPI(
+    openai_api_key=api_key,
+    max_tokens=500,
+)
+
+# Initialize the task
 task = (
     "Analyze this image of an assembly line and identify any issues such as"
     " misaligned parts, defects, or deviations from the standard assembly"
@@ -148,13 +159,15 @@ img = "assembly_line.jpg"
 ## Initialize the workflow
 agent = Agent(
     llm=llm,
-    max_loops='auto'
-    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+    max_loops="auto",
+    autosave=True,
     dashboard=True,
+    multi_modal=True
 )
 
-agent.run(task=task, img=img)
-
+# Run the workflow on a task
+out = agent.run(task=task, img=img)
+print(out)
 
 
 ```

diff --git a/multi_modal_auto_agent.py b/multi_modal_auto_agent.py
@@ -1,32 +1,34 @@
+# Description: This is an example of how to use the Agent class to run a multi-modal workflow
 import os
-
 from dotenv import load_dotenv
-
 from swarms.models.gpt4_vision_api import GPT4VisionAPI
-from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
-    MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
-)
 from swarms.structs import Agent
 
+# Load the environment variables
 load_dotenv()
 
+# Get the API key from the environment
 api_key = os.environ.get("OPENAI_API_KEY")
 
+# Initialize the language model
 llm = GPT4VisionAPI(
     openai_api_key=api_key,
+    max_tokens=500,
 )
 
+# Initialize the language model
 task = "What is the color of the object?"
 img = "images/swarms.jpeg"
 
 ## Initialize the workflow
 agent = Agent(
     llm=llm,
     max_loops="auto",
-    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
     autosave=True,
     dashboard=True,
+    multi_modal=True
 )
 
+# Run the workflow on a task
 out = agent.run(task=task, img=img)
 print(out)