Skip to content

Commit

Permalink
Testing shrunken images to speed up OS mode
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed Dec 15, 2023
1 parent c492887 commit 8f2ebde
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 12 deletions.
7 changes: 0 additions & 7 deletions interpreter/core/computer/display/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,6 @@ def screenshot(self, show=True, quadrant=None):
else:
raise ValueError("Invalid quadrant. Choose between 1 and 4.")

# Shrink screenshot so it's no more than 1024 wide
screenshot_width, screenshot_height = screenshot.size
if screenshot_width > 1024:
ratio = 1024.0 / screenshot_width
new_height = int(screenshot_height * ratio)
screenshot = screenshot.resize((1024, new_height))

screenshot.save(temp_file.name)

# Open the image file with PIL
Expand Down
6 changes: 4 additions & 2 deletions interpreter/core/llm/convert_to_coding_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from .setup_text_llm import setup_text_llm


def convert_to_coding_llm(text_llm, debug_mode=False, vision=False):
def convert_to_coding_llm(
text_llm, debug_mode=False, vision=False, shrink_images=False
):
"""
Takes a text_llm
returns an OI Coding LLM.
Expand Down Expand Up @@ -49,7 +51,7 @@ def coding_llm(messages):
'''

messages = convert_to_openai_messages(
messages, function_calling=False, vision=vision
messages, function_calling=False, vision=vision, shrink_images=shrink_images
)

inside_code_block = False
Expand Down
5 changes: 4 additions & 1 deletion interpreter/core/llm/setup_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def setup_llm(interpreter):
# Non-function-calling LLM
text_llm = setup_text_llm(interpreter)
coding_llm = convert_to_coding_llm(
text_llm, debug_mode=interpreter.debug_mode, vision=interpreter.vision
text_llm,
debug_mode=interpreter.debug_mode,
vision=interpreter.vision,
shrink_images=interpreter.os,
)

return coding_llm
23 changes: 22 additions & 1 deletion interpreter/core/utils/convert_to_openai_messages.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import base64
import io
import json

from PIL import Image

def convert_to_openai_messages(messages, function_calling=True, vision=False):

def convert_to_openai_messages(
messages, function_calling=True, vision=False, shrink_images=True
):
"""
Converts LMC messages into OpenAI messages
"""
Expand Down Expand Up @@ -79,6 +84,22 @@ def convert_to_openai_messages(messages, function_calling=True, vision=False):
# Construct the content string
content = f"data:image/{extension};base64,{message['content']}"

if shrink_images:
# Decode the base64 image
img_data = base64.b64decode(message["content"])
img = Image.open(io.BytesIO(img_data))

# Resize the image if it's width is more than 1024
if img.width > 1024:
new_height = int(img.height * 1024 / img.width)
img = img.resize((1024, new_height))

# Convert the image back to base64
buffered = io.BytesIO()
img.save(buffered, format=extension)
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
content = f"data:image/{extension};base64,{img_str}"

elif message["format"] == "path":
# Convert to base64
image_path = message["content"]
Expand Down
3 changes: 2 additions & 1 deletion interpreter/terminal_interface/terminal_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,9 @@ def terminal_interface(interpreter, message):
# Display action notifications if we're in OS mode
if interpreter.os and active_block.active_line != None:
action = active_block.code.split("\n")[
active_block.active_line
active_block.active_line - 1
].strip()
print(action)
if action.startswith("computer"):
description = None

Expand Down

0 comments on commit 8f2ebde

Please sign in to comment.