landing-ai · CamiloInx · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 13, 2024
diff --git a/tests/models/data/results/florence2sam2_image_screw_ft_results.json b/tests/models/data/results/florence2sam2_image_screw_ft_results.json
diff --git a/tests/models/data/results/florence2sam2_image_tomato_results.json b/tests/models/data/results/florence2sam2_image_tomato_results.json
diff --git a/tests/models/data/results/florence2sam2_video_ft_results.json b/tests/models/data/results/florence2sam2_video_ft_results.json
diff --git a/tests/models/data/results/florence2sam2_video_results.json b/tests/models/data/results/florence2sam2_video_results.json
diff --git a/tests/models/florence2/data/results/caption_to_phrase_grounding_video_results.json b/tests/models/florence2/data/results/caption_to_phrase_grounding_video_results.json
diff --git a/tests/models/florence2/test_caption_to_phrase_grounding.py b/tests/models/florence2/test_caption_to_phrase_grounding.py
@@ -23,6 +23,7 @@ def test_caption_to_phrase_grounding_cereal(shared_model):
         {
             "bboxes": [],
             "labels": [],
+            "scores": [],
         }
     ]
 
@@ -65,6 +66,7 @@ def test_caption_to_phrase_grounding_car_with_nms(shared_model):
                     373.1999816894531,
                 ]
             ],
+            "scores": [1.0],
         }
     ]
 
@@ -125,5 +127,6 @@ def test_caption_to_phrase_grounding_image_ft(unzip_model):
                 ]
             ],
             "labels": ["screw"],
+            "scores": [1.0],
         }
     ]
diff --git a/tests/models/florence2/test_dense_region_caption.py b/tests/models/florence2/test_dense_region_caption.py
@@ -15,7 +15,7 @@ def test_dense_region_caption(shared_model):
         "task": task,
     }
     response = shared_model(**payload)
-    assert response == [{"labels": [], "bboxes": []}]
+    assert response == [{"labels": [], "bboxes": [], "scores": []}]
 
 
 def test_dense_region_caption_ft(unzip_model):

diff --git a/tests/models/florence2/test_od.py b/tests/models/florence2/test_od.py
@@ -17,6 +17,7 @@ def test_large_model_od_image(shared_large_model):
         {
             "bboxes": [],
             "labels": [],
+            "scores": [],
         }
     ]
 
@@ -34,6 +35,7 @@ def test_small_model_od_image(shared_model):
         {
             "bboxes": [],
             "labels": [],
+            "scores": [],
         }
     ]
 
@@ -64,6 +66,7 @@ def test_od_ft(unzip_model):
                 [738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
             ],
             "labels": ["screw"],
+            "scores": [1.0],
         }
     ]
 
@@ -94,6 +97,7 @@ def test_large_model_base_with_small_model_od_ft(unzip_model):
                 [738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
             ],
             "labels": ["screw"],
+            "scores": [1.0],
         }
     ]
 
@@ -124,6 +128,7 @@ def test_od_ft_and_base_and_ft(unzip_model):
                 [738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
             ],
             "labels": ["screw"],
+            "scores": [1.0],
         }
     ]
 
@@ -139,6 +144,7 @@ def test_od_ft_and_base_and_ft(unzip_model):
         {
             "bboxes": [],
             "labels": [],
+            "scores": [],
         }
     ]
 
@@ -156,5 +162,6 @@ def test_od_ft_and_base_and_ft(unzip_model):
                 [738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
             ],
             "labels": ["screw"],
+            "scores": [1.0],
         }
     ]
diff --git a/tests/models/florence2/test_region_proposal.py b/tests/models/florence2/test_region_proposal.py
@@ -15,7 +15,7 @@ def test_region_proposal(shared_model):
         "task": task,
     }
     response = shared_model(**payload)
-    assert response == [{"bboxes": [], "labels": []}]
+    assert response == [{"bboxes": [], "labels": [], "scores": []}]
 
 
 def test_region_proposal_ft(unzip_model):

diff --git a/tests/models/test_sam2.py b/tests/models/test_sam2.py
@@ -28,7 +28,7 @@ def test_sam2_point_segmentation_image(shared_model, rle_decode_array):
     assert len(frame) == 1  # annotations
     annotations = frame[0]
 
-    assert annotations.keys() == {"id", "score", "mask", "logits"}
+    assert annotations.keys() == {"id", "label", "score", "bbox", "mask", "logits"}
     assert annotations["id"] == 0
     assert annotations["score"] == 0.9140625
     reverted_masks = rle_decode_array(annotations["mask"])
@@ -63,7 +63,7 @@ def test_sam2_box_segmentation_image(shared_model, rle_decode_array):
     assert len(frame) == 2  # annotations
     expected_scores = [0.953125, 0.921875]
     for idx, (score, annotation) in enumerate(zip(expected_scores, frame)):
-        assert annotation.keys() == {"id", "score", "mask", "logits"}
+        assert annotation.keys() == {"id", "label", "score", "bbox", "mask", "logits"}
         assert annotation["id"] == idx
         assert annotation["score"] == score
         reverted_masks = rle_decode_array(annotation["mask"])
@@ -97,7 +97,7 @@ def test_sam2_video_detection_segmentation(shared_model, rle_decode_array):
     for frame in response:
         assert len(frame) == 1  # annotations
         annotation = frame[0]
-        assert annotation.keys() == {"id", "score", "mask", "logits"}
+        assert annotation.keys() == {"id", "label", "score", "bbox", "mask", "logits"}
         assert annotation["id"] == 0
         assert annotation["score"] is None
         reverted_masks = rle_decode_array(annotation["mask"])

diff --git a/vision_agent_tools/models/florence2.py b/vision_agent_tools/models/florence2.py
@@ -15,7 +15,7 @@
     Device,
     Florence2ResponseType,
     BaseMLModel,
-    ODResponse,
+    ODWithScoreResponse,
     Florence2OCRResponse,
     Florence2TextResponse,
     Florence2OpenVocabularyResponse,
@@ -360,7 +360,11 @@ def _serialize(
                 | PromptTask.REGION_PROPOSAL
             ):
                 detections.append(
-                    ODResponse(bboxes=detection["bboxes"], labels=detection["labels"])
+                    ODWithScoreResponse(
+                        bboxes=detection["bboxes"],
+                        labels=detection["labels"],
+                        scores=[1.0] * len(detection["labels"]),
+                    )
                 )
             case PromptTask.OCR_WITH_REGION:
                 detections.append(

diff --git a/vision_agent_tools/models/florence2_sam2.py b/vision_agent_tools/models/florence2_sam2.py
@@ -10,7 +10,7 @@
     BaseMLModel,
     VideoNumpy,
     PromptTask,
-    ODResponse,
+    ODWithScoreResponse,
 )
 from vision_agent_tools.models.sam2 import Sam2, Sam2Config
 from vision_agent_tools.models.florence2 import Florence2, Florence2Config
@@ -149,7 +149,7 @@ def __call__(
 
         florence2_response = self._florence2(**florence2_payload)
         od_response = [
-            ODResponse(**item) if item is not None else None
+            ODWithScoreResponse(**item) if item is not None else None
             for item in florence2_response
         ]