Skip to content

Commit

Permalink
restructure examples
Browse files Browse the repository at this point in the history
  • Loading branch information
harshitgupta412 committed Nov 18, 2024
1 parent 5d56fb6 commit 60cccf3
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 67 deletions.
19 changes: 0 additions & 19 deletions examples/multimodal_data/filter.py

This file was deleted.

22 changes: 0 additions & 22 deletions examples/multimodal_data/join.py

This file was deleted.

19 changes: 0 additions & 19 deletions examples/multimodal_data/map.py

This file was deleted.

21 changes: 21 additions & 0 deletions examples/op_examples/multimodal_ops/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os

import pandas as pd

import lotus
from lotus.dtype_extensions import ImageArray
from lotus.models import LM

lotus.settings.configure(lm=LM(model="gpt-4o-mini"))

# The images folder contain images representing digits taken from MNIST dataset
image_file_names = os.listdir("images") # get all file in the folder

# file names are the same as the digit represented by image
labels = [os.path.splitext(image)[0] for image in image_file_names]
image_paths = [os.path.join("images", image) for image in image_file_names]

df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths})

df = df.sem_filter("{image} represents number 1")
print(df)
Binary file added examples/op_examples/multimodal_ops/images/0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/op_examples/multimodal_ops/images/1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/op_examples/multimodal_ops/images/4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/op_examples/multimodal_ops/images/5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/op_examples/multimodal_ops/images/9.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 22 additions & 0 deletions examples/op_examples/multimodal_ops/join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os

import pandas as pd

import lotus
from lotus.dtype_extensions import ImageArray
from lotus.models import LM

lotus.settings.configure(lm=LM(model="gpt-4o-mini"))

# The images folder contain images representing digits taken from MNIST dataset
image_file_names = os.listdir("images") # get all file in the folder

# file names are the same as the digit represented by image
image_paths = [os.path.join("images", image) for image in image_file_names]

image_df = pd.DataFrame({"image": ImageArray(image_paths), "image_path": image_paths})
labels_df = pd.DataFrame({"label": [0, 1]})

df = image_df.sem_join(labels_df, "{image} represents the number {label}", strategy="zs-cot")

print(df)
21 changes: 21 additions & 0 deletions examples/op_examples/multimodal_ops/map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os

import pandas as pd

import lotus
from lotus.dtype_extensions import ImageArray
from lotus.models import LM

lotus.settings.configure(lm=LM(model="gpt-4o-mini"))

# The images folder contain images representing digits taken from MNIST dataset
image_file_names = os.listdir("images") # get all file in the folder

# file names are the same as the digit represented by image
labels = [os.path.splitext(image)[0] for image in image_file_names]
image_paths = [os.path.join("images", image) for image in image_file_names]

df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths})

df = df.sem_map("convert {image} to the number it represents")
print(df)
4 changes: 4 additions & 0 deletions lotus/sem_ops/sem_topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd

import lotus
from lotus.dtype_extensions import ImageDtype
from lotus.templates import task_instructions
from lotus.types import LMOutput, SemanticTopKOutput

Expand Down Expand Up @@ -374,6 +375,9 @@ def __call__(

if method == "quick-sem":
assert len(col_li) == 1, "Only one column can be used for embedding optimization"
assert not isinstance(
self._obj[col_li[0]].dtype, ImageDtype
), "Image columns are not supported for embedding optimization"
col_name = col_li[0]
# Sort the dataframe by the column to be used for embedding optimization
self._obj = self._obj.sem_index(col_name, f"{col_name}_lotus_index").sem_search(
Expand Down
10 changes: 3 additions & 7 deletions lotus/templates/task_instructions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,9 @@ def user_message_formatter(
}
return {
"role": "user",
"content": [
{
"type": "text",
"text": f"{user_instruction_with_tag}\n\nContext:\n{text}",
},
]
+ image_inputs,
"content": [{"type": "text", "text": f"Context:\n{text}"}]
+ image_inputs
+ [{"type": "text", "text": f"\n\n{user_instruction_with_tag}"}],
}


Expand Down

0 comments on commit 60cccf3

Please sign in to comment.