diff --git a/examples/multimodal_data/filter.py b/examples/multimodal_data/filter.py
deleted file mode 100644
index a1b4ab85..00000000
--- a/examples/multimodal_data/filter.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import pandas as pd
-from torchvision import datasets
-
-import lotus
-from lotus.dtype_extensions import ImageArray
-from lotus.models import LM
-
-lm = LM(model="gpt-4o-mini")
-lotus.settings.configure(lm=lm)
-
-mnist_data = datasets.MNIST(root="mnist_data", train=True, download=True, transform=None)
-
-images = [image for image, _ in mnist_data]
-labels = [label for _, label in mnist_data]
-
-df = pd.DataFrame({"image": ImageArray(images), "label": labels})
-
-df = df.sem_filter("{image} represents number 1")
-print(df)
diff --git a/examples/multimodal_data/join.py b/examples/multimodal_data/join.py
deleted file mode 100644
index 6de5d1fb..00000000
--- a/examples/multimodal_data/join.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pandas as pd
-from torchvision import datasets
-
-import lotus
-from lotus.dtype_extensions import ImageArray
-from lotus.models import LM
-
-lm = LM(model="gpt-4o-mini")
-lotus.settings.configure(lm=lm)
-
-mnist_data = datasets.MNIST(root="mnist_data", train=True, download=True, transform=None)
-
-images = [image for image, _ in mnist_data]
-labels = [label for _, label in mnist_data]
-
-df = pd.DataFrame({"image": ImageArray(images[:5]), "label": labels[:5]})
-
-df2 = pd.DataFrame({"image": ImageArray(images[5:10]), "label": labels[5:10]})
-
-df = df.sem_join(df2, "{image:left} represents the same number as {image:right}", strategy="zs-cot")
-
-print(df)
diff --git a/examples/multimodal_data/map.py b/examples/multimodal_data/map.py
deleted file mode 100644
index d8794835..00000000
--- a/examples/multimodal_data/map.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import pandas as pd
-from torchvision import datasets
-
-import lotus
-from lotus.dtype_extensions import ImageArray
-from lotus.models import LM
-
-lm = LM(model="gpt-4o-mini")
-lotus.settings.configure(lm=lm)
-
-mnist_data = datasets.MNIST(root="mnist_data", train=True, download=True, transform=None)
-
-images = [image for image, _ in mnist_data]
-labels = [label for _, label in mnist_data]
-
-df = pd.DataFrame({"image": ImageArray(images[:5]), "label": labels[:5]})
-
-df = df.sem_map("convert {image} to the number it represents")
-print(df)
diff --git a/examples/op_examples/multimodal_ops/filter.py b/examples/op_examples/multimodal_ops/filter.py
new file mode 100644
index 00000000..3fbb0fdb
--- /dev/null
+++ b/examples/op_examples/multimodal_ops/filter.py
@@ -0,0 +1,21 @@
+import os
+
+import pandas as pd
+
+import lotus
+from lotus.dtype_extensions import ImageArray
+from lotus.models import LM
+
+lotus.settings.configure(lm=LM(model="gpt-4o-mini"))
+
+# The images folder contain images representing digits taken from MNIST dataset
+image_file_names = os.listdir("images")  # get all file in the folder
+
+# file names are the same as the digit represented by image
+labels = [os.path.splitext(image)[0] for image in image_file_names]
+image_paths = [os.path.join("images", image) for image in image_file_names]
+
+df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths})
+
+df = df.sem_filter("{image} represents number 1")
+print(df)
diff --git a/examples/op_examples/multimodal_ops/images/0.png b/examples/op_examples/multimodal_ops/images/0.png
new file mode 100644
index 00000000..789ddac9
Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/0.png differ
diff --git a/examples/op_examples/multimodal_ops/images/1.png b/examples/op_examples/multimodal_ops/images/1.png
new file mode 100644
index 00000000..e44e0c9c
Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/1.png differ
diff --git a/examples/op_examples/multimodal_ops/images/4.png b/examples/op_examples/multimodal_ops/images/4.png
new file mode 100644
index 00000000..7d87808b
Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/4.png differ
diff --git a/examples/op_examples/multimodal_ops/images/5.png b/examples/op_examples/multimodal_ops/images/5.png
new file mode 100644
index 00000000..9878c632
Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/5.png differ
diff --git a/examples/op_examples/multimodal_ops/images/9.png b/examples/op_examples/multimodal_ops/images/9.png
new file mode 100644
index 00000000..405b2f66
Binary files /dev/null and b/examples/op_examples/multimodal_ops/images/9.png differ
diff --git a/examples/op_examples/multimodal_ops/join.py b/examples/op_examples/multimodal_ops/join.py
new file mode 100644
index 00000000..9e490ea9
--- /dev/null
+++ b/examples/op_examples/multimodal_ops/join.py
@@ -0,0 +1,22 @@
+import os
+
+import pandas as pd
+
+import lotus
+from lotus.dtype_extensions import ImageArray
+from lotus.models import LM
+
+lotus.settings.configure(lm=LM(model="gpt-4o-mini"))
+
+# The images folder contain images representing digits taken from MNIST dataset
+image_file_names = os.listdir("images")  # get all file in the folder
+
+# file names are the same as the digit represented by image
+image_paths = [os.path.join("images", image) for image in image_file_names]
+
+image_df = pd.DataFrame({"image": ImageArray(image_paths), "image_path": image_paths})
+labels_df = pd.DataFrame({"label": [0, 1]})
+
+df = image_df.sem_join(labels_df, "{image} represents the number {label}", strategy="zs-cot")
+
+print(df)
diff --git a/examples/op_examples/multimodal_ops/map.py b/examples/op_examples/multimodal_ops/map.py
new file mode 100644
index 00000000..be3fe1ff
--- /dev/null
+++ b/examples/op_examples/multimodal_ops/map.py
@@ -0,0 +1,21 @@
+import os
+
+import pandas as pd
+
+import lotus
+from lotus.dtype_extensions import ImageArray
+from lotus.models import LM
+
+lotus.settings.configure(lm=LM(model="gpt-4o-mini"))
+
+# The images folder contain images representing digits taken from MNIST dataset
+image_file_names = os.listdir("images")  # get all file in the folder
+
+# file names are the same as the digit represented by image
+labels = [os.path.splitext(image)[0] for image in image_file_names]
+image_paths = [os.path.join("images", image) for image in image_file_names]
+
+df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths})
+
+df = df.sem_map("convert {image} to the number it represents")
+print(df)
diff --git a/lotus/sem_ops/sem_topk.py b/lotus/sem_ops/sem_topk.py
index 1db8b514..944ac88c 100644
--- a/lotus/sem_ops/sem_topk.py
+++ b/lotus/sem_ops/sem_topk.py
@@ -6,6 +6,7 @@
 import pandas as pd
 
 import lotus
+from lotus.dtype_extensions import ImageDtype
 from lotus.templates import task_instructions
 from lotus.types import LMOutput, SemanticTopKOutput
 
@@ -374,6 +375,9 @@ def __call__(
 
         if method == "quick-sem":
             assert len(col_li) == 1, "Only one column can be used for embedding optimization"
+            assert not isinstance(
+                self._obj[col_li[0]].dtype, ImageDtype
+            ), "Image columns are not supported for embedding optimization"
             col_name = col_li[0]
             # Sort the dataframe by the column to be used for embedding optimization
             self._obj = self._obj.sem_index(col_name, f"{col_name}_lotus_index").sem_search(
diff --git a/lotus/templates/task_instructions.py b/lotus/templates/task_instructions.py
index 283f0ef4..f80b30d1 100644
--- a/lotus/templates/task_instructions.py
+++ b/lotus/templates/task_instructions.py
@@ -39,13 +39,9 @@ def user_message_formatter(
         }
     return {
         "role": "user",
-        "content": [
-            {
-                "type": "text",
-                "text": f"{user_instruction_with_tag}\n\nContext:\n{text}",
-            },
-        ]
-        + image_inputs,
+        "content": [{"type": "text", "text": f"Context:\n{text}"}]
+        + image_inputs
+        + [{"type": "text", "text": f"\n\n{user_instruction_with_tag}"}],
     }