Fix doc_vqa lmms_eval

Signed-off-by: elronbandel <[email protected]>
IBM · Oct 20, 2024 · 6b5216f · 6b5216f
1 parent 3d027e9
commit 6b5216f
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 1 deletion.
diff --git a/prepare/cards/doc_vqa.py b/prepare/cards/doc_vqa.py
@@ -1,7 +1,7 @@
 from unitxt.blocks import LoadHF, Set, TaskCard
 from unitxt.catalog import add_to_catalog
 from unitxt.collections_operators import Explode, Wrap
-from unitxt.image_operators import ToImage
+from unitxt.image_operators import ToImage, ToRGB
 from unitxt.operators import Copy
 from unitxt.splitters import RenameSplits
 from unitxt.test_utils.card import test_card
@@ -43,6 +43,7 @@
     ),
     preprocess_steps=[
         RenameSplits(mapper={"validation": "test"}),
+        ToRGB(field="image"),
         ToImage(field="image", to_field="context"),
         Set(fields={"context_type": "image"}),
     ],

diff --git a/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json b/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json
@@ -15,6 +15,10 @@
                 "validation": "test"
             }
         },
+        {
+            "__type__": "to_rgb",
+            "field": "image"
+        },
         {
             "__type__": "to_image",
             "field": "image",

diff --git a/src/unitxt/image_operators.py b/src/unitxt/image_operators.py
@@ -73,3 +73,8 @@ def process_image(self, image):
 
         # Convert back to a PIL image with 3 channels
         return self.image.fromarray(grayscale_array)
+
+
+class ToRGB(ImageFieldOperator):
+    def process_image(self, image):
+        return image.convert("RGB")