Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #83 Read image data within the convo with the bot #87

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 66 additions & 23 deletions app/bolt_listeners.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import os
import re
import time
from typing import Optional
Expand Down Expand Up @@ -35,7 +36,7 @@
build_thread_replies_as_combined_text,
)

from app.utils import redact_string
from app.utils import download_and_encode_image, redact_string

#
# Listener functions
Expand Down Expand Up @@ -66,9 +67,11 @@ def respond_to_app_mention(
client: WebClient,
logger: logging.Logger,
):
if payload.get("thread_ts") is not None:
logger.info(f"Received an app mention: {payload}")
thread_ts = payload.get("thread_ts")
if thread_ts is not None:
parent_message = find_parent_message(
client, context.channel_id, payload.get("thread_ts")
client, context.channel_id, thread_ts
)
if parent_message is not None and is_this_app_mentioned(
context, parent_message
Expand All @@ -91,41 +94,54 @@ def respond_to_app_mention(
return

user_id = context.actor_user_id or context.user_id

if payload.get("thread_ts") is not None:
if thread_ts is not None:
# Mentioning the bot user in a thread
replies_in_thread = client.conversations_replies(
channel=context.channel_id,
ts=payload.get("thread_ts"),
ts=thread_ts,
include_all_metadata=True,
limit=1000,
).get("messages", [])
for reply in replies_in_thread:
reply_text = redact_string(reply.get("text"))
messages.append(
content = [
{
"role": (
"assistant"
if "user" in reply and reply["user"] == context.bot_user_id
else "user"
),
"content": (
f"<@{reply['user'] if 'user' in reply else reply['username']}>: "
+ format_openai_message_content(
reply_text, TRANSLATE_MARKDOWN
)
),
"type": "text",
"text": f"<@{reply['user'] if 'user' in reply else reply['username']}>: "
+ format_openai_message_content(reply_text, TRANSLATE_MARKDOWN)
}
)
]

# Add images if any
files = reply.get(files)
if files:
append_image_content(files, content)

messages.append(
{
"role": "assistant" if "user" in reply and reply["user"] == context.bot_user_id else "user",
"content": content,
}
)
else:
# Strip bot Slack user ID from initial message
msg_text = re.sub(f"<@{context.bot_user_id}>\\s*", "", payload["text"])
msg_text = redact_string(msg_text)
text_item = {
"type": "text",
"text": f"<@{user_id}>: "
+ format_openai_message_content(msg_text, TRANSLATE_MARKDOWN),
}
content = [text_item]
# also add images in the message, the content will be a json object. will have both text and image_url type
files = payload.get("files")
if files:
append_image_content(files, content)

messages.append(
{
"role": "user",
"content": f"<@{user_id}>: "
+ format_openai_message_content(msg_text, TRANSLATE_MARKDOWN),
"content": content
}
)

Expand Down Expand Up @@ -223,6 +239,23 @@ def respond_to_app_mention(
)


def append_image_content(files, content):
for file in files:
mime_type = file.get("mimetype")
if mime_type and mime_type.startswith("image"):
encoded_image, image_format = download_and_encode_image(file.get("url_private"), os.environ["SLACK_BOT_TOKEN"])
if image_format.lower() in ["jpeg", "png", "gif"]:
image_item = {
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{encoded_image}"
}
}
content.append(image_item)
else:
logger.debug(f'Image format {image_format} is not supported')


def respond_to_new_message(
context: BoltContext,
payload: dict,
Expand All @@ -245,6 +278,7 @@ def respond_to_new_message(
if is_in_dm_with_bot is False and thread_ts is None:
return

logger.debug(f"Received an new message: {payload}")
messages_in_context = []
if is_in_dm_with_bot is True and thread_ts is None:
# In the DM with the bot; this is not within a thread
Expand Down Expand Up @@ -341,10 +375,19 @@ def respond_to_new_message(
for reply in filtered_messages_in_context:
msg_user_id = reply.get("user")
reply_text = redact_string(reply.get("text"))
messages.append(
content = [
{
"content": f"<@{msg_user_id}>: "
"type": "text",
"text": f"<@{msg_user_id}>: "
+ format_openai_message_content(reply_text, TRANSLATE_MARKDOWN),
}
]
files = reply.get("files")
if files:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

save as above

append_image_content(files, content)
messages.append(
{
"content": content,
"role": (
"assistant"
if "user" in reply and reply["user"] == context.bot_user_id
Expand Down
140 changes: 81 additions & 59 deletions app/openai_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,76 +359,98 @@ def context_length(
raise NotImplementedError(error)


# Adapted from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please don't completely delete this comment; you can say "initially adapted from ... and then we customized it to support image file scenarios" or something like that

def get_encoding_for_model(model: str):
try:
return tiktoken.encoding_for_model(model)
except KeyError:
return tiktoken.get_encoding("cl100k_base")


def get_tokens_per_message(model: str):
model_specific_tokens = {
GPT_3_5_TURBO_0613_MODEL: (3, 1),
GPT_3_5_TURBO_16K_0613_MODEL: (3, 1),
GPT_3_5_TURBO_1106_MODEL: (3, 1),
GPT_3_5_TURBO_0125_MODEL: (3, 1),
GPT_4_0314_MODEL: (3, 1),
GPT_4_32K_0314_MODEL: (3, 1),
GPT_4_0613_MODEL: (3, 1),
GPT_4_32K_0613_MODEL: (3, 1),
GPT_4_1106_PREVIEW_MODEL: (3, 1),
GPT_4_0125_PREVIEW_MODEL: (3, 1),
GPT_4_TURBO_PREVIEW_MODEL: (3, 1),
GPT_4_TURBO_2024_04_09_MODEL: (3, 1),
GPT_4O_2024_05_13_MODEL: (3, 1),
GPT_3_5_TURBO_0301_MODEL: (4, -1),
}
return model_specific_tokens.get(model, None)


def handle_fallback_models(model: str, messages: List[Dict[str, Union[str, Dict[str, str]]]]):
model_fallbacks = {
GPT_3_5_TURBO_MODEL: GPT_3_5_TURBO_0125_MODEL,
GPT_3_5_TURBO_16K_MODEL: GPT_3_5_TURBO_16K_0613_MODEL,
GPT_4_MODEL: GPT_4_0613_MODEL,
GPT_4_TURBO_MODEL: GPT_4_TURBO_2024_04_09_MODEL,
GPT_4_32K_MODEL: GPT_4_32K_0613_MODEL,
GPT_4O_MODEL: GPT_4O_2024_05_13_MODEL,
}
if model in model_fallbacks:
return calculate_num_tokens(messages, model=model_fallbacks[model])
return None


def raise_not_supported_error(model: str):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change is not bad but please avoid introducing non-essential changes to your goal. we can do this separately. keep your diff as small as possible

error = (
f"Calculating the number of tokens for model {model} is not yet supported. "
"See https://github.com/openai/openai-python/blob/main/chatml.md "
"for information on how messages are converted to tokens."
)
raise NotImplementedError(error)


def encode_and_count_tokens(value, encoding):
if isinstance(value, str):
return len(encoding.encode(value))
elif isinstance(value, list):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this pattern is not consistent with calculate_num_tokens

return sum(encode_and_count_tokens(item, encoding) for item in value)
elif isinstance(value, dict):
return sum(encode_and_count_tokens(v, encoding) for k, v in value.items() if k != 'image_url')
return 0


def calculate_num_tokens(
messages: List[Dict[str, Union[str, Dict[str, str]]]],
model: str = GPT_3_5_TURBO_0613_MODEL,
model: str = GPT_3_5_TURBO_0613_MODEL
) -> int:
"""Returns the number of tokens used by a list of messages."""
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer less changes here. you can extract common parts for supporting new patterns but there is no need to completely rewrite.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, just use ai to help refactor, I think it's too long

try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model in {
GPT_3_5_TURBO_0613_MODEL,
GPT_3_5_TURBO_16K_0613_MODEL,
GPT_3_5_TURBO_1106_MODEL,
GPT_3_5_TURBO_0125_MODEL,
GPT_4_0314_MODEL,
GPT_4_32K_0314_MODEL,
GPT_4_0613_MODEL,
GPT_4_32K_0613_MODEL,
GPT_4_1106_PREVIEW_MODEL,
GPT_4_0125_PREVIEW_MODEL,
GPT_4_TURBO_PREVIEW_MODEL,
GPT_4_TURBO_2024_04_09_MODEL,
GPT_4O_2024_05_13_MODEL,
}:
tokens_per_message = 3
tokens_per_name = 1
elif model == GPT_3_5_TURBO_0301_MODEL:
tokens_per_message = (
4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
)
tokens_per_name = -1 # if there's a name, the role is omitted
elif model == GPT_3_5_TURBO_MODEL:
# Note that GPT_3_5_TURBO_MODEL may change over time. Return num tokens assuming GPT_3_5_TURBO_0125_MODEL.
return calculate_num_tokens(messages, model=GPT_3_5_TURBO_0125_MODEL)
elif model == GPT_3_5_TURBO_16K_MODEL:
# Note that GPT_3_5_TURBO_16K_MODEL may change over time. Return num tokens assuming GPT_3_5_TURBO_16K_0613_MODEL.
return calculate_num_tokens(messages, model=GPT_3_5_TURBO_16K_0613_MODEL)
elif model == GPT_4_MODEL:
# Note that GPT_4_MODEL may change over time. Return num tokens assuming GPT_4_0613_MODEL.
return calculate_num_tokens(messages, model=GPT_4_0613_MODEL)
elif model == GPT_4_TURBO_MODEL:
# Note that GPT_4_TURBO_MODEL may change over time. Return num tokens assuming GPT_4_TURBO_2024_04_09_MODEL.
return calculate_num_tokens(messages, model=GPT_4_TURBO_2024_04_09_MODEL)
elif model == GPT_4_32K_MODEL:
# Note that GPT_4_32K_MODEL may change over time. Return num tokens assuming GPT_4_32K_0613_MODEL.
return calculate_num_tokens(messages, model=GPT_4_32K_0613_MODEL)
elif model == GPT_4O_MODEL:
# Note that GPT_4O_MODEL may change over time. Return num tokens assuming GPT_4O_2024_05_13_MODEL.
return calculate_num_tokens(messages, model=GPT_4O_2024_05_13_MODEL)
else:
error = (
f"Calculating the number of tokens for model {model} is not yet supported. "
"See https://github.com/openai/openai-python/blob/main/chatml.md "
"for information on how messages are converted to tokens."
)
raise NotImplementedError(error)
encoding = get_encoding_for_model(model)
num_tokens = 0

# Handle model-specific tokens per message and name
token_numbers = get_tokens_per_message(model)
if token_numbers is None:
fallback_result = handle_fallback_models(model, messages)
if fallback_result is not None:
return fallback_result
raise_not_supported_error(model)

tokens_per_message, tokens_per_name = token_numbers

for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
if key == "function_call":
num_tokens += 1
num_tokens += len(encoding.encode(value["name"]))
num_tokens += len(encoding.encode(value["arguments"]))
num_tokens += (
1 + len(encoding.encode(value["name"]))
+ len(encoding.encode(value["arguments"]))
)
else:
num_tokens += len(encoding.encode(value))
num_tokens += encode_and_count_tokens(value, encoding)
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>

num_tokens += 3 # every reply is primed with <|im_start|>assistant<|im_sep|>

return num_tokens


Expand Down
24 changes: 24 additions & 0 deletions app/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import base64
from io import BytesIO
import re

from PIL import Image
import requests
from app.env import (
REDACT_EMAIL_PATTERN,
REDACT_PHONE_PATTERN,
Expand Down Expand Up @@ -30,3 +35,22 @@ def redact_string(input_string: str) -> str:
output_string = re.sub(REDACT_USER_DEFINED_PATTERN, "[REDACTED]", output_string)

return output_string

def download_and_encode_image(image_url, token):
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.get(image_url, headers=headers)
if response.status_code != 200:
raise ValueError(f"Request to {image_url} failed with status code {response.status_code}")
else:
content_type = response.headers['content-type']
if 'image' not in content_type:
raise ValueError(f"Content type {content_type} is not an image")

try:
image = Image.open(BytesIO(response.content))
image_format = image.format
except Exception as e:
raise ValueError(f"Error opening image: {e}")
return base64.b64encode(response.content).decode('utf-8'), image_format
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ openai>=1.30.1,<2
tiktoken>=0.6,<0.7
# https://github.com/Yelp/elastalert/issues/2306
urllib3<2
# https://github.com/python-pillow/Pillow
pillow==0.4.0
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which code requires this?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, updated, should be 9.4.0

Loading