feat: OpenAI Image Generation Tool (#628)

* feat: dall-e-3 * chore: remove checkpoints * lint * cr * chore: use multi-modal as an example * chore: delete checkpoint * chore: fix tests and lint * cr * lint
run-llama · Nov 18, 2023 · a96e996 · a96e996
1 parent c128839
commit a96e996
Showing 13 changed files with 426 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@
 .idea/
 llama-hub.iml
 llamahub/
+img_cache/
diff --git a/llama_hub/microsoft_onedrive/base.py b/llama_hub/microsoft_onedrive/base.py
@@ -501,7 +501,6 @@ def load_data(
             List[Document]: A list of documents.
         """
         try:
-
             with tempfile.TemporaryDirectory() as temp_dir:
                 self._downloaded_files_metadata = self._init_download_and_get_metadata(
                     temp_dir=temp_dir,

diff --git a/llama_hub/openalex/demo.ipynb b/llama_hub/openalex/demo.ipynb
@@ -53,7 +53,9 @@
     "    citation_chunk_size=1024,\n",
     ")\n",
     "# query the index\n",
-    "response = query_engine.query(\"list the biases in large language models in a markdown table\")"
+    "response = query_engine.query(\n",
+    "    \"list the biases in large language models in a markdown table\"\n",
+    ")"
    ]
   },
   {

diff --git a/llama_hub/tools/library.json b/llama_hub/tools/library.json
@@ -150,5 +150,16 @@
       "symbolic",
       "embedding"
     ]
-  } 
+  },
+  "OpenAIImageGenerationToolSpec": {
+    "id": "tools/openai/image_generation",
+    "author": "manelferreira_",
+    "keywords": [
+      "image",
+      "vision",
+      "cv",
+      "openai",
+      "gpt-3"
+    ]
+  }
 }
diff --git a/llama_hub/tools/notebooks/cogniswitch.ipynb b/llama_hub/tools/notebooks/cogniswitch.ipynb
@@ -145,7 +145,7 @@
    ],
    "source": [
     "store_response = toolspec.store_data(\n",
-    "    file = \"sample_file.txt\",\n",
+    "    file=\"sample_file.txt\",\n",
     "    document_name=\"Cogniswitch file\",\n",
     "    document_description=\"Cogniswitch website\",\n",
     ")\n",

diff --git a/llama_hub/tools/notebooks/multimodal_openai_image.ipynb b/llama_hub/tools/notebooks/multimodal_openai_image.ipynb
diff --git a/llama_hub/tools/notebooks/openai_image_generation_agent.ipynb b/llama_hub/tools/notebooks/openai_image_generation_agent.ipynb
diff --git a/llama_hub/tools/notebooks/tavily.ipynb b/llama_hub/tools/notebooks/tavily.ipynb
@@ -196,7 +196,11 @@
     }
    ],
    "source": [
-    "print(agent.chat(\"Write a deep analysis in markdown syntax about the latest burning man floods\"))"
+    "print(\n",
+    "    agent.chat(\n",
+    "        \"Write a deep analysis in markdown syntax about the latest burning man floods\"\n",
+    "    )\n",
+    ")"
    ]
   },
   {

diff --git a/llama_hub/tools/openai/image_generation/README.md b/llama_hub/tools/openai/image_generation/README.md
@@ -0,0 +1,45 @@
+# OpenAI Image Generation Tool
+
+This tool allows Agents to generate images using OpenAI's DALL-E model. To see more and get started, visit https://openai.com/blog/dall-e/
+
+## Usage
+
+This tool has a more extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/openai_image_generation.ipynb)
+
+### Usage with Agent
+```python
+from llama_hub.tools.openai.image_generation import OpenAIImageGenerationToolSpec
+
+image_generation_tool = OpenAIImageGenerationToolSpec(api_key=os.environ["OPENAI_API_KEY"])
+
+agent = OpenAIAgent.from_tools(
+    [*image_generation_tool.to_tool_list()],
+    verbose=True,
+)
+
+response = agent.query('A pink and blue llama in a black background with the output')
+
+print(response)
+```
+
+### Usage directly
+```python
+from llama_hub.tools.openai.image_generation import OpenAIImageGenerationToolSpec
+
+image_generation_tool = OpenAIImageGenerationToolSpec(api_key=os.environ["OPENAI_API_KEY"])
+
+image_data = image_generation_tool.image_generation(
+    text="A pink and blue llama with a black background", 
+    response_format="b64_json"
+)
+
+image_bytes = base64.b64decode(image_data)
+
+img = Image.open(BytesIO(image_bytes))
+
+display(img)
+```
+
+`image_generation`: Takes an text input and generates an image
+
+This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/tools/openai/image_generation/__init__.py b/llama_hub/tools/openai/image_generation/__init__.py
@@ -0,0 +1,6 @@
+## init file
+from llama_hub.tools.openai.image_generation.base import (
+    OpenAIImageGenerationToolSpec,
+)
+
+__all__ = ["OpenAIImageGenerationToolSpec"]
diff --git a/llama_hub/tools/openai/image_generation/base.py b/llama_hub/tools/openai/image_generation/base.py
@@ -0,0 +1,90 @@
+"""OpenAI Image Generation tool sppec.."""
+
+import os
+import base64
+import time
+
+from typing import Optional
+from llama_index.tools.tool_spec.base import BaseToolSpec
+
+DEFAULT_CACHE_DIR = "../../../img_cache"
+DEFAULT_SIZE = "1024x1024"  # Dall-e-3 only supports 1024x1024
+
+
+class OpenAIImageGenerationToolSpec(BaseToolSpec):
+    """OpenAI Image Generation tool spec."""
+
+    spec_functions = ["image_generation"]
+
+    def __init__(self, api_key: str, cache_dir: Optional[str] = None) -> None:
+        try:
+            from openai import OpenAI
+        except ImportError:
+            raise ImportError(
+                "Please install openai with `pip install openai` to use this tool"
+            )
+
+        """Initialize with parameters."""
+        self.client = OpenAI(api_key=api_key)
+        self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
+
+    def get_cache_dir(self):
+        return self.cache_dir
+
+    def save_base64_image(self, base64_str, image_name):
+        try:
+            from PIL import Image
+            from io import BytesIO
+        except ImportError:
+            raise ImportError(
+                "Please install Pillow with `pip install Pillow` to use this tool"
+            )
+        cache_dir = self.cache_dir
+
+        # Create cache directory if it doesn't exist
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir)
+
+        # Decode the base64 string
+        image_data = base64.b64decode(base64_str)
+
+        # Create an image from the decoded bytes and save it
+        image_path = os.path.join(cache_dir, image_name)
+        with Image.open(BytesIO(image_data)) as img:
+            img.save(image_path)
+
+        return image_path
+
+    def image_generation(
+        self,
+        text: str,
+        model: Optional[str] = "dall-e-3",
+        quality: Optional[str] = "standard",
+        num_images: Optional[int] = 1,
+    ) -> str:
+        """
+        This tool accepts a natural language string and will use OpenAI's DALL-E model to generate an image.
+
+        args:
+            text (str): The text to generate an image from.
+            size (str): The size of the image to generate (1024x1024, 256x256, 512x512).
+            model (str): The model to use to generate the image (dall-e-3, dall-e-2).
+            quality (str): The quality of the image to generate (standard, hd).
+            num_images (int): The number of images to generate.
+        """
+        response = self.client.images.generate(
+            model=model,
+            prompt=text,
+            size=DEFAULT_SIZE,
+            quality=quality,
+            n=num_images,
+            response_format="b64_json",
+        )
+
+        image_bytes = response.data[0].b64_json
+
+        filename = f"{time.time()}.jpg"
+
+        saved_image_path = self.save_base64_image(image_bytes, filename)
+
+        return saved_image_path
diff --git a/llama_hub/web/trafilatura_web/base.py b/llama_hub/web/trafilatura_web/base.py
@@ -14,7 +14,6 @@ class TrafilaturaWebReader(BaseReader):
     """
 
     def __init__(self) -> None:
-
         if find_spec("trafilatura") is None:
             raise ImportError(
                 "Missing package: trafilatura.\n"

diff --git a/tests/tests_microsoft_onedrive/test_onedrivereader_base.py b/tests/tests_microsoft_onedrive/test_onedrivereader_base.py
@@ -12,7 +12,6 @@
 
 
 def test_onedrivereader_init():
-
     client_id = "test_client_id"
     client_secret = "test_client_secret"
     tenant_id = "test_tenant_id"