Skip to content

Commit

Permalink
fixed a split list
Browse files Browse the repository at this point in the history
Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Feb 9, 2025
1 parent c5a8a5a commit 3e990f3
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 46 deletions.
2 changes: 1 addition & 1 deletion prepare/cards/chart_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@


card = TaskCard(
loader=LoadHF(path="lmms-lab/ChartQA", splits=["train", "val", "test"]),
loader=LoadHF(path="lmms-lab/ChartQA", splits=["test"]),
preprocess_steps=[
Wrap(field="answer", inside="list", to_field="answers"),
ToImage(field="image", to_field="context"),
Expand Down
33 changes: 33 additions & 0 deletions prepare/cards/doc_vqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from unitxt.collections_operators import Explode, Wrap
from unitxt.image_operators import ToImage
from unitxt.operators import Copy
from unitxt.splitters import RenameSplits
from unitxt.templates import MultiReferenceTemplate
from unitxt.test_utils.card import test_card

Expand Down Expand Up @@ -47,3 +48,35 @@

test_card(card)
add_to_catalog(card, f"cards.doc_vqa.{language}", overwrite=True)


card = TaskCard(
loader=LoadHF(
path="lmms-lab/DocVQA",
name="DocVQA",
data_classification_policy=["public"],
splits=["test", "validation"],
),
preprocess_steps=[
RenameSplits(mapper={"validation": "test"}),
ToImage(field="image", to_field="context"),
Set(fields={"context_type": "image"}),
],
task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "apache-2.0",
"multilinguality": "monolingual",
"modalities": ["image", "text"],
"size_categories": "10K<n<100K",
"task_categories": "question-answering",
"task_ids": "extractive-qa",
},
__description__=(
"The doc-vqa Dataset integrates images from the Infographic_vqa dataset sourced from HuggingFaceM4 The Cauldron dataset, as well as images from the dataset AFTDB (Arxiv Figure Table Database) curated by cmarkea. This dataset consists of pairs of images and corresponding text, with each image linked to an average of five questions and answers available in both English and French. These questions and answers were generated using Gemini 1.5 Pro, thereby rendering the dataset well-suited for multimodal tasks involving image-text pairing and multilingual question answering."
),
)

test_card(card)
add_to_catalog(card, "cards.doc_vqa.lmms_eval", overwrite=True)
43 changes: 0 additions & 43 deletions prepare/cards/doc_vqa_lmms.py

This file was deleted.

2 changes: 0 additions & 2 deletions src/unitxt/catalog/cards/chart_qa_lmms_eval.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
"__type__": "load_hf",
"path": "lmms-lab/ChartQA",
"splits": [
"train",
"val",
"test"
]
},
Expand Down
4 changes: 4 additions & 0 deletions src/unitxt/catalog/cards/doc_vqa/lmms_eval.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
"name": "DocVQA",
"data_classification_policy": [
"public"
],
"splits": [
"test",
"validation"
]
},
"preprocess_steps": [
Expand Down

0 comments on commit 3e990f3

Please sign in to comment.