diff --git a/examples/multimodal_data/filter.py b/examples/multimodal_data/filter.py deleted file mode 100644 index a1b4ab85..00000000 --- a/examples/multimodal_data/filter.py +++ /dev/null @@ -1,19 +0,0 @@ -import pandas as pd -from torchvision import datasets - -import lotus -from lotus.dtype_extensions import ImageArray -from lotus.models import LM - -lm = LM(model="gpt-4o-mini") -lotus.settings.configure(lm=lm) - -mnist_data = datasets.MNIST(root="mnist_data", train=True, download=True, transform=None) - -images = [image for image, _ in mnist_data] -labels = [label for _, label in mnist_data] - -df = pd.DataFrame({"image": ImageArray(images), "label": labels}) - -df = df.sem_filter("{image} represents number 1") -print(df) diff --git a/examples/multimodal_data/join.py b/examples/multimodal_data/join.py deleted file mode 100644 index 6de5d1fb..00000000 --- a/examples/multimodal_data/join.py +++ /dev/null @@ -1,22 +0,0 @@ -import pandas as pd -from torchvision import datasets - -import lotus -from lotus.dtype_extensions import ImageArray -from lotus.models import LM - -lm = LM(model="gpt-4o-mini") -lotus.settings.configure(lm=lm) - -mnist_data = datasets.MNIST(root="mnist_data", train=True, download=True, transform=None) - -images = [image for image, _ in mnist_data] -labels = [label for _, label in mnist_data] - -df = pd.DataFrame({"image": ImageArray(images[:5]), "label": labels[:5]}) - -df2 = pd.DataFrame({"image": ImageArray(images[5:10]), "label": labels[5:10]}) - -df = df.sem_join(df2, "{image:left} represents the same number as {image:right}", strategy="zs-cot") - -print(df) diff --git a/examples/multimodal_data/map.py b/examples/multimodal_data/map.py deleted file mode 100644 index d8794835..00000000 --- a/examples/multimodal_data/map.py +++ /dev/null @@ -1,19 +0,0 @@ -import pandas as pd -from torchvision import datasets - -import lotus -from lotus.dtype_extensions import ImageArray -from lotus.models import LM - -lm = LM(model="gpt-4o-mini") -lotus.settings.configure(lm=lm) - -mnist_data = datasets.MNIST(root="mnist_data", train=True, download=True, transform=None) - -images = [image for image, _ in mnist_data] -labels = [label for _, label in mnist_data] - -df = pd.DataFrame({"image": ImageArray(images[:5]), "label": labels[:5]}) - -df = df.sem_map("convert {image} to the number it represents") -print(df) diff --git a/examples/op_examples/multimodal_ops/filter.py b/examples/op_examples/multimodal_ops/filter.py new file mode 100644 index 00000000..3fbb0fdb --- /dev/null +++ b/examples/op_examples/multimodal_ops/filter.py @@ -0,0 +1,21 @@ +import os + +import pandas as pd + +import lotus +from lotus.dtype_extensions import ImageArray +from lotus.models import LM + +lotus.settings.configure(lm=LM(model="gpt-4o-mini")) + +# The images folder contain images representing digits taken from MNIST dataset +image_file_names = os.listdir("images") # get all file in the folder + +# file names are the same as the digit represented by image +labels = [os.path.splitext(image)[0] for image in image_file_names] +image_paths = [os.path.join("images", image) for image in image_file_names] + +df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths}) + +df = df.sem_filter("{image} represents number 1") +print(df) diff --git a/examples/op_examples/multimodal_ops/images/0.png b/examples/op_examples/multimodal_ops/images/0.png new file mode 100644 index 00000000..789ddac9 Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/0.png differ diff --git a/examples/op_examples/multimodal_ops/images/1.png b/examples/op_examples/multimodal_ops/images/1.png new file mode 100644 index 00000000..e44e0c9c Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/1.png differ diff --git a/examples/op_examples/multimodal_ops/images/4.png b/examples/op_examples/multimodal_ops/images/4.png new file mode 100644 index 00000000..7d87808b Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/4.png differ diff --git a/examples/op_examples/multimodal_ops/images/5.png b/examples/op_examples/multimodal_ops/images/5.png new file mode 100644 index 00000000..9878c632 Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/5.png differ diff --git a/examples/op_examples/multimodal_ops/images/9.png b/examples/op_examples/multimodal_ops/images/9.png new file mode 100644 index 00000000..405b2f66 Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/9.png differ diff --git a/examples/op_examples/multimodal_ops/join.py b/examples/op_examples/multimodal_ops/join.py new file mode 100644 index 00000000..9e490ea9 --- /dev/null +++ b/examples/op_examples/multimodal_ops/join.py @@ -0,0 +1,22 @@ +import os + +import pandas as pd + +import lotus +from lotus.dtype_extensions import ImageArray +from lotus.models import LM + +lotus.settings.configure(lm=LM(model="gpt-4o-mini")) + +# The images folder contain images representing digits taken from MNIST dataset +image_file_names = os.listdir("images") # get all file in the folder + +# file names are the same as the digit represented by image +image_paths = [os.path.join("images", image) for image in image_file_names] + +image_df = pd.DataFrame({"image": ImageArray(image_paths), "image_path": image_paths}) +labels_df = pd.DataFrame({"label": [0, 1]}) + +df = image_df.sem_join(labels_df, "{image} represents the number {label}", strategy="zs-cot") + +print(df) diff --git a/examples/op_examples/multimodal_ops/map.py b/examples/op_examples/multimodal_ops/map.py new file mode 100644 index 00000000..be3fe1ff --- /dev/null +++ b/examples/op_examples/multimodal_ops/map.py @@ -0,0 +1,21 @@ +import os + +import pandas as pd + +import lotus +from lotus.dtype_extensions import ImageArray +from lotus.models import LM + +lotus.settings.configure(lm=LM(model="gpt-4o-mini")) + +# The images folder contain images representing digits taken from MNIST dataset +image_file_names = os.listdir("images") # get all file in the folder + +# file names are the same as the digit represented by image +labels = [os.path.splitext(image)[0] for image in image_file_names] +image_paths = [os.path.join("images", image) for image in image_file_names] + +df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths}) + +df = df.sem_map("convert {image} to the number it represents") +print(df) diff --git a/lotus/sem_ops/sem_topk.py b/lotus/sem_ops/sem_topk.py index 1db8b514..944ac88c 100644 --- a/lotus/sem_ops/sem_topk.py +++ b/lotus/sem_ops/sem_topk.py @@ -6,6 +6,7 @@ import pandas as pd import lotus +from lotus.dtype_extensions import ImageDtype from lotus.templates import task_instructions from lotus.types import LMOutput, SemanticTopKOutput @@ -374,6 +375,9 @@ def __call__( if method == "quick-sem": assert len(col_li) == 1, "Only one column can be used for embedding optimization" + assert not isinstance( + self._obj[col_li[0]].dtype, ImageDtype + ), "Image columns are not supported for embedding optimization" col_name = col_li[0] # Sort the dataframe by the column to be used for embedding optimization self._obj = self._obj.sem_index(col_name, f"{col_name}_lotus_index").sem_search( diff --git a/lotus/templates/task_instructions.py b/lotus/templates/task_instructions.py index 283f0ef4..f80b30d1 100644 --- a/lotus/templates/task_instructions.py +++ b/lotus/templates/task_instructions.py @@ -39,13 +39,9 @@ def user_message_formatter( } return { "role": "user", - "content": [ - { - "type": "text", - "text": f"{user_instruction_with_tag}\n\nContext:\n{text}", - }, - ] - + image_inputs, + "content": [{"type": "text", "text": f"Context:\n{text}"}] + + image_inputs + + [{"type": "text", "text": f"\n\n{user_instruction_with_tag}"}], }