Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
feat: OpenAI Image Generation Tool (#628)
Browse files Browse the repository at this point in the history
* feat: dall-e-3

* chore: remove checkpoints

* lint

* cr

* chore: use multi-modal as an example

* chore: delete checkpoint

* chore: fix tests and lint

* cr

* lint
EmanuelCampos authored Nov 18, 2023
1 parent c128839 commit a96e996
Showing 13 changed files with 426 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -8,3 +8,4 @@
.idea/
llama-hub.iml
llamahub/
img_cache/
1 change: 0 additions & 1 deletion llama_hub/microsoft_onedrive/base.py
Original file line number Diff line number Diff line change
@@ -501,7 +501,6 @@ def load_data(
List[Document]: A list of documents.
"""
try:

with tempfile.TemporaryDirectory() as temp_dir:
self._downloaded_files_metadata = self._init_download_and_get_metadata(
temp_dir=temp_dir,
4 changes: 3 additions & 1 deletion llama_hub/openalex/demo.ipynb
Original file line number Diff line number Diff line change
@@ -53,7 +53,9 @@
" citation_chunk_size=1024,\n",
")\n",
"# query the index\n",
"response = query_engine.query(\"list the biases in large language models in a markdown table\")"
"response = query_engine.query(\n",
" \"list the biases in large language models in a markdown table\"\n",
")"
]
},
{
13 changes: 12 additions & 1 deletion llama_hub/tools/library.json
Original file line number Diff line number Diff line change
@@ -150,5 +150,16 @@
"symbolic",
"embedding"
]
}
},
"OpenAIImageGenerationToolSpec": {
"id": "tools/openai/image_generation",
"author": "manelferreira_",
"keywords": [
"image",
"vision",
"cv",
"openai",
"gpt-3"
]
}
}
2 changes: 1 addition & 1 deletion llama_hub/tools/notebooks/cogniswitch.ipynb
Original file line number Diff line number Diff line change
@@ -145,7 +145,7 @@
],
"source": [
"store_response = toolspec.store_data(\n",
" file = \"sample_file.txt\",\n",
" file=\"sample_file.txt\",\n",
" document_name=\"Cogniswitch file\",\n",
" document_description=\"Cogniswitch website\",\n",
")\n",
138 changes: 138 additions & 0 deletions llama_hub/tools/notebooks/multimodal_openai_image.ipynb

Large diffs are not rendered by default.

125 changes: 125 additions & 0 deletions llama_hub/tools/notebooks/openai_image_generation_agent.ipynb

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion llama_hub/tools/notebooks/tavily.ipynb
Original file line number Diff line number Diff line change
@@ -196,7 +196,11 @@
}
],
"source": [
"print(agent.chat(\"Write a deep analysis in markdown syntax about the latest burning man floods\"))"
"print(\n",
" agent.chat(\n",
" \"Write a deep analysis in markdown syntax about the latest burning man floods\"\n",
" )\n",
")"
]
},
{
45 changes: 45 additions & 0 deletions llama_hub/tools/openai/image_generation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# OpenAI Image Generation Tool

This tool allows Agents to generate images using OpenAI's DALL-E model. To see more and get started, visit https://openai.com/blog/dall-e/

## Usage

This tool has a more extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/openai_image_generation.ipynb)

### Usage with Agent
```python
from llama_hub.tools.openai.image_generation import OpenAIImageGenerationToolSpec

image_generation_tool = OpenAIImageGenerationToolSpec(api_key=os.environ["OPENAI_API_KEY"])

agent = OpenAIAgent.from_tools(
[*image_generation_tool.to_tool_list()],
verbose=True,
)

response = agent.query('A pink and blue llama in a black background with the output')

print(response)
```

### Usage directly
```python
from llama_hub.tools.openai.image_generation import OpenAIImageGenerationToolSpec

image_generation_tool = OpenAIImageGenerationToolSpec(api_key=os.environ["OPENAI_API_KEY"])

image_data = image_generation_tool.image_generation(
text="A pink and blue llama with a black background",
response_format="b64_json"
)

image_bytes = base64.b64decode(image_data)

img = Image.open(BytesIO(image_bytes))

display(img)
```

`image_generation`: Takes an text input and generates an image

This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
6 changes: 6 additions & 0 deletions llama_hub/tools/openai/image_generation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## init file
from llama_hub.tools.openai.image_generation.base import (
OpenAIImageGenerationToolSpec,
)

__all__ = ["OpenAIImageGenerationToolSpec"]
90 changes: 90 additions & 0 deletions llama_hub/tools/openai/image_generation/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""OpenAI Image Generation tool sppec.."""

import os
import base64
import time

from typing import Optional
from llama_index.tools.tool_spec.base import BaseToolSpec

DEFAULT_CACHE_DIR = "../../../img_cache"
DEFAULT_SIZE = "1024x1024" # Dall-e-3 only supports 1024x1024


class OpenAIImageGenerationToolSpec(BaseToolSpec):
"""OpenAI Image Generation tool spec."""

spec_functions = ["image_generation"]

def __init__(self, api_key: str, cache_dir: Optional[str] = None) -> None:
try:
from openai import OpenAI
except ImportError:
raise ImportError(
"Please install openai with `pip install openai` to use this tool"
)

"""Initialize with parameters."""
self.client = OpenAI(api_key=api_key)
self.cache_dir = cache_dir or DEFAULT_CACHE_DIR

def get_cache_dir(self):
return self.cache_dir

def save_base64_image(self, base64_str, image_name):
try:
from PIL import Image
from io import BytesIO
except ImportError:
raise ImportError(
"Please install Pillow with `pip install Pillow` to use this tool"
)
cache_dir = self.cache_dir

# Create cache directory if it doesn't exist
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)

# Decode the base64 string
image_data = base64.b64decode(base64_str)

# Create an image from the decoded bytes and save it
image_path = os.path.join(cache_dir, image_name)
with Image.open(BytesIO(image_data)) as img:
img.save(image_path)

return image_path

def image_generation(
self,
text: str,
model: Optional[str] = "dall-e-3",
quality: Optional[str] = "standard",
num_images: Optional[int] = 1,
) -> str:
"""
This tool accepts a natural language string and will use OpenAI's DALL-E model to generate an image.
args:
text (str): The text to generate an image from.
size (str): The size of the image to generate (1024x1024, 256x256, 512x512).
model (str): The model to use to generate the image (dall-e-3, dall-e-2).
quality (str): The quality of the image to generate (standard, hd).
num_images (int): The number of images to generate.
"""
response = self.client.images.generate(
model=model,
prompt=text,
size=DEFAULT_SIZE,
quality=quality,
n=num_images,
response_format="b64_json",
)

image_bytes = response.data[0].b64_json

filename = f"{time.time()}.jpg"

saved_image_path = self.save_base64_image(image_bytes, filename)

return saved_image_path
1 change: 0 additions & 1 deletion llama_hub/web/trafilatura_web/base.py
Original file line number Diff line number Diff line change
@@ -14,7 +14,6 @@ class TrafilaturaWebReader(BaseReader):
"""

def __init__(self) -> None:

if find_spec("trafilatura") is None:
raise ImportError(
"Missing package: trafilatura.\n"
1 change: 0 additions & 1 deletion tests/tests_microsoft_onedrive/test_onedrivereader_base.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,6 @@


def test_onedrivereader_init():

client_id = "test_client_id"
client_secret = "test_client_secret"
tenant_id = "test_tenant_id"

0 comments on commit a96e996

Please sign in to comment.