Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: text-to-od-sam2 tool #92

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
691 changes: 345 additions & 346 deletions tests/models/data/results/florence2sam2_image_screw_ft_results.json

Large diffs are not rendered by default.

7,380 changes: 3,673 additions & 3,707 deletions tests/models/data/results/florence2sam2_image_tomato_results.json

Large diffs are not rendered by default.

696 changes: 345 additions & 351 deletions tests/models/data/results/florence2sam2_video_ft_results.json

Large diffs are not rendered by default.

7,386 changes: 3,620 additions & 3,766 deletions tests/models/data/results/florence2sam2_video_results.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions tests/models/florence2/test_caption_to_phrase_grounding.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_caption_to_phrase_grounding_cereal(shared_model):
{
"bboxes": [],
"labels": [],
"scores": [],
}
]

Expand Down Expand Up @@ -65,6 +66,7 @@ def test_caption_to_phrase_grounding_car_with_nms(shared_model):
373.1999816894531,
]
],
"scores": [1.0],
}
]

Expand Down Expand Up @@ -125,5 +127,6 @@ def test_caption_to_phrase_grounding_image_ft(unzip_model):
]
],
"labels": ["screw"],
"scores": [1.0],
}
]
2 changes: 1 addition & 1 deletion tests/models/florence2/test_dense_region_caption.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_dense_region_caption(shared_model):
"task": task,
}
response = shared_model(**payload)
assert response == [{"labels": [], "bboxes": []}]
assert response == [{"labels": [], "bboxes": [], "scores": []}]


def test_dense_region_caption_ft(unzip_model):
Expand Down
7 changes: 7 additions & 0 deletions tests/models/florence2/test_od.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def test_large_model_od_image(shared_large_model):
{
"bboxes": [],
"labels": [],
"scores": [],
}
]

Expand All @@ -34,6 +35,7 @@ def test_small_model_od_image(shared_model):
{
"bboxes": [],
"labels": [],
"scores": [],
}
]

Expand Down Expand Up @@ -64,6 +66,7 @@ def test_od_ft(unzip_model):
[738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
],
"labels": ["screw"],
"scores": [1.0],
}
]

Expand Down Expand Up @@ -94,6 +97,7 @@ def test_large_model_base_with_small_model_od_ft(unzip_model):
[738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
],
"labels": ["screw"],
"scores": [1.0],
}
]

Expand Down Expand Up @@ -124,6 +128,7 @@ def test_od_ft_and_base_and_ft(unzip_model):
[738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
],
"labels": ["screw"],
"scores": [1.0],
}
]

Expand All @@ -139,6 +144,7 @@ def test_od_ft_and_base_and_ft(unzip_model):
{
"bboxes": [],
"labels": [],
"scores": [],
}
]

Expand All @@ -156,5 +162,6 @@ def test_od_ft_and_base_and_ft(unzip_model):
[738.3040161132812, 1373.18408203125, 881.6640625, 1557.5040283203125]
],
"labels": ["screw"],
"scores": [1.0],
}
]
2 changes: 1 addition & 1 deletion tests/models/florence2/test_region_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_region_proposal(shared_model):
"task": task,
}
response = shared_model(**payload)
assert response == [{"bboxes": [], "labels": []}]
assert response == [{"bboxes": [], "labels": [], "scores": []}]


def test_region_proposal_ft(unzip_model):
Expand Down
6 changes: 3 additions & 3 deletions tests/models/test_sam2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_sam2_point_segmentation_image(shared_model, rle_decode_array):
assert len(frame) == 1 # annotations
annotations = frame[0]

assert annotations.keys() == {"id", "score", "mask", "logits"}
assert annotations.keys() == {"id", "label", "score", "bbox", "mask", "logits"}
assert annotations["id"] == 0
assert annotations["score"] == 0.9140625
reverted_masks = rle_decode_array(annotations["mask"])
Expand Down Expand Up @@ -63,7 +63,7 @@ def test_sam2_box_segmentation_image(shared_model, rle_decode_array):
assert len(frame) == 2 # annotations
expected_scores = [0.953125, 0.921875]
for idx, (score, annotation) in enumerate(zip(expected_scores, frame)):
assert annotation.keys() == {"id", "score", "mask", "logits"}
assert annotation.keys() == {"id", "label", "score", "bbox", "mask", "logits"}
assert annotation["id"] == idx
assert annotation["score"] == score
reverted_masks = rle_decode_array(annotation["mask"])
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_sam2_video_detection_segmentation(shared_model, rle_decode_array):
for frame in response:
assert len(frame) == 1 # annotations
annotation = frame[0]
assert annotation.keys() == {"id", "score", "mask", "logits"}
assert annotation.keys() == {"id", "label", "score", "bbox", "mask", "logits"}
assert annotation["id"] == 0
assert annotation["score"] is None
reverted_masks = rle_decode_array(annotation["mask"])
Expand Down
8 changes: 6 additions & 2 deletions vision_agent_tools/models/florence2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
Device,
Florence2ResponseType,
BaseMLModel,
ODResponse,
ODWithScoreResponse,
Florence2OCRResponse,
Florence2TextResponse,
Florence2OpenVocabularyResponse,
Expand Down Expand Up @@ -360,7 +360,11 @@ def _serialize(
| PromptTask.REGION_PROPOSAL
):
detections.append(
ODResponse(bboxes=detection["bboxes"], labels=detection["labels"])
ODWithScoreResponse(
bboxes=detection["bboxes"],
labels=detection["labels"],
scores=[1.0] * len(detection["labels"]),
)
)
case PromptTask.OCR_WITH_REGION:
detections.append(
Expand Down
4 changes: 2 additions & 2 deletions vision_agent_tools/models/florence2_sam2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
BaseMLModel,
VideoNumpy,
PromptTask,
ODResponse,
ODWithScoreResponse,
)
from vision_agent_tools.models.sam2 import Sam2, Sam2Config
from vision_agent_tools.models.florence2 import Florence2, Florence2Config
Expand Down Expand Up @@ -149,7 +149,7 @@ def __call__(

florence2_response = self._florence2(**florence2_payload)
od_response = [
ODResponse(**item) if item is not None else None
ODWithScoreResponse(**item) if item is not None else None
for item in florence2_response
]

Expand Down
Loading
Loading