Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

feat: OpenAI Image Generation Tool #628

Merged
merged 10 commits into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
.idea/
llama-hub.iml
llamahub/
img_cache/
1 change: 0 additions & 1 deletion llama_hub/microsoft_onedrive/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,6 @@ def load_data(
List[Document]: A list of documents.
"""
try:

with tempfile.TemporaryDirectory() as temp_dir:
self._downloaded_files_metadata = self._init_download_and_get_metadata(
temp_dir=temp_dir,
Expand Down
4 changes: 3 additions & 1 deletion llama_hub/openalex/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@
" citation_chunk_size=1024,\n",
")\n",
"# query the index\n",
"response = query_engine.query(\"list the biases in large language models in a markdown table\")"
"response = query_engine.query(\n",
" \"list the biases in large language models in a markdown table\"\n",
")"
]
},
{
Expand Down
13 changes: 12 additions & 1 deletion llama_hub/tools/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,16 @@
"symbolic",
"embedding"
]
}
},
"OpenAIImageGenerationToolSpec": {
"id": "tools/openai/image_generation",
"author": "manelferreira_",
"keywords": [
"image",
"vision",
"cv",
"openai",
"gpt-3"
]
}
}
2 changes: 1 addition & 1 deletion llama_hub/tools/notebooks/cogniswitch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
],
"source": [
"store_response = toolspec.store_data(\n",
" file = \"sample_file.txt\",\n",
" file=\"sample_file.txt\",\n",
" document_name=\"Cogniswitch file\",\n",
" document_description=\"Cogniswitch website\",\n",
")\n",
Expand Down
138 changes: 138 additions & 0 deletions llama_hub/tools/notebooks/multimodal_openai_image.ipynb

Large diffs are not rendered by default.

125 changes: 125 additions & 0 deletions llama_hub/tools/notebooks/openai_image_generation_agent.ipynb

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion llama_hub/tools/notebooks/tavily.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,11 @@
}
],
"source": [
"print(agent.chat(\"Write a deep analysis in markdown syntax about the latest burning man floods\"))"
"print(\n",
" agent.chat(\n",
" \"Write a deep analysis in markdown syntax about the latest burning man floods\"\n",
" )\n",
")"
]
},
{
Expand Down
45 changes: 45 additions & 0 deletions llama_hub/tools/openai/image_generation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# OpenAI Image Generation Tool

This tool allows Agents to generate images using OpenAI's DALL-E model. To see more and get started, visit https://openai.com/blog/dall-e/

## Usage

This tool has a more extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/openai_image_generation.ipynb)

### Usage with Agent
```python
from llama_hub.tools.openai.image_generation import OpenAIImageGenerationToolSpec

image_generation_tool = OpenAIImageGenerationToolSpec(api_key=os.environ["OPENAI_API_KEY"])

agent = OpenAIAgent.from_tools(
[*image_generation_tool.to_tool_list()],
verbose=True,
)

response = agent.query('A pink and blue llama in a black background with the output')

print(response)
```

### Usage directly
```python
from llama_hub.tools.openai.image_generation import OpenAIImageGenerationToolSpec

image_generation_tool = OpenAIImageGenerationToolSpec(api_key=os.environ["OPENAI_API_KEY"])

image_data = image_generation_tool.image_generation(
text="A pink and blue llama with a black background",
response_format="b64_json"
)

image_bytes = base64.b64decode(image_data)

img = Image.open(BytesIO(image_bytes))

display(img)
```

`image_generation`: Takes an text input and generates an image

This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
6 changes: 6 additions & 0 deletions llama_hub/tools/openai/image_generation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## init file
from llama_hub.tools.openai.image_generation.base import (
OpenAIImageGenerationToolSpec,
)

__all__ = ["OpenAIImageGenerationToolSpec"]
90 changes: 90 additions & 0 deletions llama_hub/tools/openai/image_generation/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""OpenAI Image Generation tool sppec.."""

import os
import base64
import time

from typing import Optional
from llama_index.tools.tool_spec.base import BaseToolSpec

DEFAULT_CACHE_DIR = "../../../img_cache"
DEFAULT_SIZE = "1024x1024" # Dall-e-3 only supports 1024x1024


class OpenAIImageGenerationToolSpec(BaseToolSpec):
"""OpenAI Image Generation tool spec."""

spec_functions = ["image_generation"]

def __init__(self, api_key: str, cache_dir: Optional[str] = None) -> None:
try:
from openai import OpenAI
except ImportError:
raise ImportError(
"Please install openai with `pip install openai` to use this tool"
)

"""Initialize with parameters."""
self.client = OpenAI(api_key=api_key)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

api_key should probably be optional, since it could be in os.environ

self.cache_dir = cache_dir or DEFAULT_CACHE_DIR

def get_cache_dir(self):
return self.cache_dir

def save_base64_image(self, base64_str, image_name):
try:
from PIL import Image
from io import BytesIO
except ImportError:
raise ImportError(
"Please install Pillow with `pip install Pillow` to use this tool"
)
cache_dir = self.cache_dir

# Create cache directory if it doesn't exist
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)

# Decode the base64 string
image_data = base64.b64decode(base64_str)

# Create an image from the decoded bytes and save it
image_path = os.path.join(cache_dir, image_name)
with Image.open(BytesIO(image_data)) as img:
img.save(image_path)

return image_path

def image_generation(
self,
text: str,
model: Optional[str] = "dall-e-3",
quality: Optional[str] = "standard",
num_images: Optional[int] = 1,
) -> str:
"""
This tool accepts a natural language string and will use OpenAI's DALL-E model to generate an image.

args:
text (str): The text to generate an image from.
size (str): The size of the image to generate (1024x1024, 256x256, 512x512).
model (str): The model to use to generate the image (dall-e-3, dall-e-2).
quality (str): The quality of the image to generate (standard, hd).
num_images (int): The number of images to generate.
"""
response = self.client.images.generate(
model=model,
prompt=text,
size=DEFAULT_SIZE,
quality=quality,
n=num_images,
response_format="b64_json",
)

image_bytes = response.data[0].b64_json

filename = f"{time.time()}.jpg"

saved_image_path = self.save_base64_image(image_bytes, filename)

return saved_image_path
1 change: 0 additions & 1 deletion llama_hub/web/trafilatura_web/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class TrafilaturaWebReader(BaseReader):
"""

def __init__(self) -> None:

if find_spec("trafilatura") is None:
raise ImportError(
"Missing package: trafilatura.\n"
Expand Down
1 change: 0 additions & 1 deletion tests/tests_microsoft_onedrive/test_onedrivereader_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


def test_onedrivereader_init():

client_id = "test_client_id"
client_secret = "test_client_secret"
tenant_id = "test_tenant_id"
Expand Down