Skip to content

Commit

Permalink
The New Computer Update
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed Dec 10, 2023
1 parent aa1d2a8 commit 1996b97
Show file tree
Hide file tree
Showing 12 changed files with 154 additions and 96 deletions.
39 changes: 23 additions & 16 deletions interpreter/core/computer/keyboard/keyboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,43 @@

import pyautogui

pyautogui.FAILSAFE = False


class Keyboard:
def write(self, text):
# Split the text into words
words = text.split(" ")

# Type each word
for word in words:
# Type each word with a space after it, unless it's the last word
for i, word in enumerate(words):
# Type the word
pyautogui.write(word)
# Add a delay after each word
# Add a space after the word if it's not the last word
if i != len(words) - 1:
pyautogui.write(" ")
# Add a delay after each word to simulate ChatGPT
time.sleep(random.uniform(0.1, 0.3))

def press(self, keys):
pyautogui.press(keys)

def hotkey(self, *args):
if "darwin" in platform.system().lower():
# For some reason, application focus or something, we need to do this for spotlight
# only if they passed in "command", "space" or "command", " ", or those in another order
if set(args) == {"command", " "} or set(args) == {"command", "space"}:
os.system(
"""
osascript -e 'tell application "System Events" to keystroke " " using {command down}'
"""
)
else:
pyautogui.hotkey(*args)
modifiers = {"command", "control", "option", "shift"}
if "darwin" in platform.system().lower() and len(args) == 2:
# pyautogui.hotkey seems to not work, so we use applescript
# Determine which argument is the keystroke and which is the modifier
keystroke, modifier = args if args[0] not in modifiers else args[::-1]

# Create the AppleScript
script = f"""
tell application "System Events"
keystroke "{keystroke}" using {modifier}
end tell
"""

# Execute the AppleScript
os.system("osascript -e '{}'".format(script))
else:
pyautogui.hotkey(*args)

def down(self, key):
pyautogui.keyDown(key)
Expand Down
4 changes: 1 addition & 3 deletions interpreter/core/computer/mouse/mouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

from ..utils.computer_vision import find_text_in_image

pyautogui.FAILSAFE = False


class Mouse:
def __init__(self, computer):
Expand All @@ -33,7 +31,7 @@ def move(self, *args, x=None, y=None, index=None, svg=None):
if len(centers) > 1:
if index == None:
print(
f"This text ('{text}') was found multiple times on screen. Please try 'click()' again, but pass in an `index` int to identify which one you want to click. The indices have been drawn on the attached image."
f"(Message for language model) This text ('{text}') was found multiple times on screen. Please try 'click()' again, but pass in an `index` int to identify which one you want to click. The indices have been drawn on the image."
)
# Show the image using matplotlib
plt.imshow(np.array(bounding_box_image))
Expand Down
57 changes: 45 additions & 12 deletions interpreter/core/computer/terminal/languages/react.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Test this more— I don't think it understands the environment it's in. It tends to write "require" for example. Also make sure errors go back into it (console.log type stuff)
"""
import re

from ...utils.html_to_png_base64 import html_to_png_base64
from ..base_language import BaseLanguage

template = """<!DOCTYPE html>
Expand All @@ -27,18 +26,52 @@
</html>"""


class HTML(BaseLanguage):
file_extension = "html"
proper_name = "React"
def is_incompatible(code):
lines = code.split("\n")

# Check for require statements at the start of any of the first few lines
# Check for ES6 import/export statements
for line in lines[:5]:
if re.match(r"\s*require\(", line):
return True
if re.match(r"\s*import\s", line) or re.match(r"\s*export\s", line):
return True

def __init__(self, config):
super().__init__()
self.config = config
return False


class React(BaseLanguage):
name = "React"
file_extension = "html"
system_message = "When you execute code with `react`, your react code will be run in a script tag after being inserted into the HTML template, following the installation of React, ReactDOM, and Babel for JSX parsing. **We will handle this! Don't make an HTML file to run React, just execute `react`.**"

def run(self, code):
# Everything happens in the terminal interface re: how you render HTML.
# In the future though, we should let the TUI do this but then also capture stuff like console.log errors here.
if is_incompatible(code):
yield {
"type": "console",
"format": "output",
"content": f"Error: React format not supported. {self.system_message} Therefore some things like `require` and 'import' aren't supported.",
"recipient": "assistant",
}
return

code = template.replace("{insert_react_code}", code)

yield {"html": code}
yield {
"type": "console",
"format": "output",
"content": "React is being displayed on the user's machine...",
"recipient": "assistant",
}

# User sees interactive HTML
yield {"type": "code", "format": "html", "content": code, "recipient": "user"}

# Assistant sees image
base64 = html_to_png_base64(code)
yield {
"type": "image",
"format": "base64.png",
"content": base64,
"recipient": "assistant",
}
1 change: 1 addition & 0 deletions interpreter/core/computer/terminal/languages/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
class Shell(SubprocessLanguage):
file_extension = "sh"
name = "Shell"
aliases = ["bash", "sh", "zsh"]

def __init__(
self,
Expand Down
35 changes: 20 additions & 15 deletions interpreter/core/computer/terminal/terminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,35 @@
from .languages.powershell import PowerShell
from .languages.python import Python
from .languages.r import R
from .languages.react import React
from .languages.shell import Shell

language_map = {
"python": Python,
"bash": Shell,
"shell": Shell,
"sh": Shell,
"zsh": Shell,
"javascript": JavaScript,
"html": HTML,
"applescript": AppleScript,
"r": R,
"powershell": PowerShell,
}


class Terminal:
def __init__(self):
self.languages = [Python, Shell, JavaScript, HTML, AppleScript, R, PowerShell]
self.languages = [
Python,
Shell,
JavaScript,
HTML,
AppleScript,
R,
PowerShell,
React,
]
self._active_languages = {}

def get_language(self, language):
for lang in self.languages:
if language.lower() == lang.name.lower() or (
hasattr(lang, "aliases") and language in lang.aliases
):
return lang
return None

def run(self, language, code):
if language not in self._active_languages:
self._active_languages[language] = language_map[language]()
self._active_languages[language] = self.get_language(language)()
try:
yield from self._active_languages[language].run(code)
except GeneratorExit:
Expand Down
15 changes: 9 additions & 6 deletions interpreter/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,16 @@ def _streaming_chat(self, message=None, display=True):
elif isinstance(message, list):
self.messages = message

# DISABLED because I think we should just not transmit images to non-multimodal models?
# REENABLE this when multimodal becomes more common:

# Make sure we're using a model that can handle this
if not self.vision:
for message in self.messages:
if message["type"] == "image":
raise Exception(
"Use a multimodal model and set `interpreter.vision` to True to handle image messages."
)
# if not self.vision:
# for message in self.messages:
# if message["type"] == "image":
# raise Exception(
# "Use a multimodal model and set `interpreter.vision` to True to handle image messages."
# )

# This is where it all happens!
yield from self._respond_and_store()
Expand Down
7 changes: 5 additions & 2 deletions interpreter/core/generate_system_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ def generate_system_message(interpreter):
try:
system_message += "\n" + get_relevant_procedures_string(interpreter)
except:
raise
if interpreter.debug_mode:
print(traceback.format_exc())
# It's okay if they can't. This just fixes some common mistakes it makes.

return system_message
for language in interpreter.computer.terminal.languages:
if hasattr(language, "system_message"):
system_message += "\n\n" + language.system_message

return system_message.strip()
4 changes: 3 additions & 1 deletion interpreter/core/llm/setup_text_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ def base_llm(messages):
except TypeError as e:
if interpreter.vision and str(e) == "expected string or buffer":
# There's just no way to use tokentrim on vision-enabled models yet.
# We instead handle this outside setup_text_llm!

if interpreter.debug_mode:
print("Couldn't token trim image messages. Error:", e)
print("Won't token trim image messages. ", e)

### DISABLED image trimming
# To maintain the order of messages while simulating trimming, we will iterate through the messages
Expand Down
25 changes: 23 additions & 2 deletions interpreter/core/respond.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,26 @@ def respond(interpreter):
messages_for_llm = interpreter.messages.copy()
messages_for_llm = [system_message] + messages_for_llm

# Trim image messages if they're there
if interpreter.vision:
image_messages = [msg for msg in messages_for_llm if msg["type"] == "image"]

if interpreter.os:
# Keep only the last image if the interpreter is running in OS mode
if len(image_messages) > 1:
for img_msg in image_messages[:-1]:
messages_for_llm.remove(img_msg)
if interpreter.debug_mode:
print("Removing image message!")
else:
# Delete all the middle ones (leave only the first and last 2 images) from messages_for_llm
if len(image_messages) > 3:
for img_msg in image_messages[1:-2]:
messages_for_llm.remove(img_msg)
if interpreter.debug_mode:
print("Removing image message!")
# Idea: we could set detail: low for the middle messages, instead of deleting them

### RUN THE LLM ###

try:
Expand Down Expand Up @@ -74,7 +94,8 @@ def respond(interpreter):
)
elif interpreter.local:
raise Exception(
str(e)
"Error occurred. "
+ str(e)
+ """
Please make sure LM Studio's local server is running by following the steps above, if you're using LM Studio (recommended).
Expand Down Expand Up @@ -133,7 +154,7 @@ def respond(interpreter):
break

# don't let it import computer on os mode — we handle that!
if interpreter.os:
if interpreter.os and language == "python":
code = code.replace("import computer", "")

# yield each line
Expand Down
30 changes: 3 additions & 27 deletions interpreter/core/utils/scan_code.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import subprocess

from ..computer.terminal.terminal import language_map
from .temporary_file import cleanup_temporary_file, create_temporary_file

try:
Expand All @@ -11,37 +10,14 @@
pass


def get_language_file_extension(language_name):
"""
Get the file extension for a given language
"""
language = language_map[language_name.lower()]

if language.file_extension:
return language.file_extension
else:
return language


def get_language_name(language_name):
"""
Get the proper name for a given language
"""
language = language_map[language_name.lower()]

if language.name:
return language.name
else:
return language


def scan_code(code, language, interpreter):
"""
Scan code with semgrep
"""
language_class = interpreter.computer.terminal.get_language(language)

temp_file = create_temporary_file(
code, get_language_file_extension(language), verbose=interpreter.debug_mode
code, language_class.file_extension, verbose=interpreter.debug_mode
)

temp_path = os.path.dirname(temp_file)
Expand All @@ -65,7 +41,7 @@ def scan_code(code, language, interpreter):
)

if scan.returncode == 0:
language_name = get_language_name(language)
language_name = language_class.name
print(
f" {'Code Scanner: ' if interpreter.safe_mode == 'auto' else ''}No issues were found in this {language_name} code."
)
Expand Down
Loading

0 comments on commit 1996b97

Please sign in to comment.