Skip to content

Commit

Permalink
Merge branch 'main' into add-non-verify-option-to-api-loader
Browse files Browse the repository at this point in the history
  • Loading branch information
elronbandel authored Feb 17, 2025
2 parents 762d1ef + fe79da3 commit cd379b3
Show file tree
Hide file tree
Showing 34 changed files with 249 additions and 188 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/catalog_preparation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }}
cancel-in-progress: true

jobs:
preparation:

Expand All @@ -35,8 +35,11 @@ jobs:

- run: curl -LsSf https://astral.sh/uv/install.sh | sh
- run: uv pip install --system ".[tests]"
- run: huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }}

- name: Hugging Face Login
run: |
for i in {1..5}; do
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} && break || sleep $((2 ** i));
done
- name: Run Tests
run: |
modulo="${{ matrix.modulo }}"
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/examples_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ jobs:

- run: curl -LsSf https://astral.sh/uv/install.sh | sh
- run: uv pip install --system ".[tests,watsonx,inference_tests]"
- run: huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }}

- name: Hugging Face Login
run: |
for i in {1..5}; do
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} && break || sleep $((2 ** i));
done
- name: Run Tests
run: python -m unittest discover -s tests/examples -p "test_*.py"
7 changes: 5 additions & 2 deletions .github/workflows/inference_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ jobs:

- run: curl -LsSf https://astral.sh/uv/install.sh | sh
- run: uv pip install --system ".[tests,watsonx,inference-tests]"
- run: huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }}

- name: Hugging Face Login
run: |
for i in {1..5}; do
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} && break || sleep $((2 ** i));
done
- name: Run Tests
run: python -m unittest discover -s tests/inference -p "test_*.py"
9 changes: 5 additions & 4 deletions .github/workflows/performance.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: Test Performance

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

Expand Down Expand Up @@ -35,8 +33,11 @@ jobs:
uv pip install --system ".[tests,watsonx,inference-tests]"
uv pip install --system litellm
uv pip install --system diskcache
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }}
- name: Hugging Face Login
run: |
for i in {1..5}; do
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} && break || sleep $((2 ** i));
done
- name: Prepare the dirs for performance evaluation in main
run: |
mkdir -p performance_action
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,6 @@ src/unitxt/catalog/temp_recipe_name.json
prod_env/*
benchmark_output/*
.litellm_cache

src.lock
docs/_static/data.js
cache
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/gpt-4-0314.jsonl",
},
data_classification_policy = ["public"],
),
preprocess_steps=[
# region Question file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
"judgment": "data/arena-hard-v0.1/model_judgment/gpt-4-1106-preview/*.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.arena_hard_hf_space_processing_steps",
Expand Down Expand Up @@ -69,7 +70,7 @@
],
)

test_card(card, demos_taken_from="test", strict=False, loader_limit=100000)
test_card(card, demos_taken_from="test", strict=False, loader_limit=15000)
add_to_catalog(
card,
"cards.arena_hard.response_assessment.pairwise_comparative_rating.both_games_gpt_4_judge",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
"judgment": "data/arena-hard-v0.1/model_judgment/gpt-4-1106-preview/*.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.arena_hard_hf_space_processing_steps",
Expand Down Expand Up @@ -54,7 +55,7 @@
],
)

test_card(card, demos_taken_from="test", strict=False, loader_limit=100000)
test_card(card, demos_taken_from="test", strict=False, loader_limit=15000)
add_to_catalog(
card,
"cards.arena_hard.response_assessment.pairwise_comparative_rating.both_games_mean_judgment_gpt4_judge",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
"judgment": "data/arena-hard-v0.1/model_judgment/gpt-4-1106-preview/*.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.arena_hard_hf_space_processing_steps",
Expand Down Expand Up @@ -47,7 +48,7 @@
],
)

test_card(card, demos_taken_from="test", strict=False, loader_limit=100000)
test_card(card, demos_taken_from="test", strict=False, loader_limit=15000)
add_to_catalog(
card,
"cards.arena_hard.response_assessment.pairwise_comparative_rating.first_game_only_gpt_4_judge",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Fillna,
FilterByCondition,
InterleaveListsToDialogOperator,
MapInstanceValues,
Expand All @@ -20,10 +21,12 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_pair.jsonl",
},
data_classification_policy = ["public"],
),
preprocess_steps=[
"operators.mt_bench.pairwise_hf_space_processing_steps",
FilterByCondition(values={"turn": 2}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="eq"),
FilterByCondition(
values={"winner": ["model_1", "tie", "model_2"]}, condition="in"
Expand Down Expand Up @@ -55,7 +58,7 @@
],
)

test_card(card, demos_taken_from="test", strict=False, loader_limit=1000)
test_card(card, demos_taken_from="test", strict=False, loader_limit=15000)
add_to_catalog(
card,
"cards.mt_bench.response_assessment.pairwise_comparison.multi_turn_gpt4_judgement",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Fillna,
FilterByCondition,
InterleaveListsToDialogOperator,
MapInstanceValues,
Expand All @@ -20,10 +21,12 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_pair.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.mt_bench.pairwise_hf_space_processing_steps",
FilterByCondition(values={"turn": 2}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="ne"),
FilterByCondition(
values={"winner": ["model_1", "tie", "model_2"]}, condition="in"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
)
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Copy,
FilterByCondition,
MapInstanceValues,
Rename,
)
from unitxt.operators import Copy, Fillna, FilterByCondition, MapInstanceValues, Rename
from unitxt.test_utils.card import test_card

card = TaskCard(
Expand All @@ -20,10 +15,13 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_pair.jsonl",
},
data_classification_policy = ["public"],

),
preprocess_steps=[
"operators.mt_bench.pairwise_hf_space_processing_steps",
FilterByCondition(values={"turn": 1}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="eq"),
FilterByCondition(
values={"winner": ["model_1", "tie", "model_2"]}, condition="in"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
)
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Copy,
FilterByCondition,
MapInstanceValues,
Rename,
)
from unitxt.operators import Copy, Fillna, FilterByCondition, MapInstanceValues, Rename
from unitxt.test_utils.card import test_card

card = TaskCard(
Expand All @@ -20,10 +15,12 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_pair.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.mt_bench.pairwise_hf_space_processing_steps",
FilterByCondition(values={"turn": 1}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="ne"),
FilterByCondition(
values={"winner": ["model_1", "tie", "model_2"]}, condition="in"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Fillna,
FilterByCondition,
InterleaveListsToDialogOperator,
Rename,
Expand All @@ -19,10 +20,13 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_single.jsonl",
},
data_classification_policy = ["public"],

),
preprocess_steps=[
"operators.mt_bench.rating_hf_space_processing_steps",
FilterByCondition(values={"turn": 2}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="eq"),
Rename(field_to_field={"score": "rating", "category": "group"}),
InterleaveListsToDialogOperator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Fillna,
FilterByCondition,
InterleaveListsToDialogOperator,
Rename,
Expand All @@ -19,10 +20,12 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_single.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.mt_bench.rating_hf_space_processing_steps",
FilterByCondition(values={"turn": 2}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="ne"),
Rename(field_to_field={"score": "rating", "category": "group"}),
InterleaveListsToDialogOperator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
)
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import Copy, FilterByCondition, Rename
from unitxt.operators import Copy, Fillna, FilterByCondition, Rename
from unitxt.test_utils.card import test_card

card = TaskCard(
Expand All @@ -15,10 +15,12 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_single.jsonl",
},
data_classification_policy = ["public"],
),
preprocess_steps=[
"operators.mt_bench.rating_hf_space_processing_steps",
FilterByCondition(values={"turn": 1}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="eq"),
Rename(
field_to_field={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
)
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
Copy,
FilterByCondition,
Rename,
)
from unitxt.operators import Copy, Fillna, FilterByCondition, Rename
from unitxt.test_utils.card import test_card

card = TaskCard(
Expand All @@ -19,10 +15,12 @@
"model_answer": "data/mt_bench/model_answer/*.jsonl",
"judgment": "data/mt_bench/model_judgment/gpt-4_single.jsonl",
},
data_classification_policy = ["public"]
),
preprocess_steps=[
"operators.mt_bench.rating_hf_space_processing_steps",
FilterByCondition(values={"turn": 1}, condition="eq"),
Fillna(field="reference", value=None),
FilterByCondition(values={"reference": None}, condition="ne"),
Rename(
field_to_field={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
"data_files": {
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/gpt-4-0314.jsonl"
}
},
"data_classification_policy": [
"public"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
"judgment": "data/arena-hard-v0.1/model_judgment/gpt-4-1106-preview/*.jsonl"
}
},
"data_classification_policy": [
"public"
]
},
"preprocess_steps": [
"operators.arena_hard_hf_space_processing_steps",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
"judgment": "data/arena-hard-v0.1/model_judgment/gpt-4-1106-preview/*.jsonl"
}
},
"data_classification_policy": [
"public"
]
},
"preprocess_steps": [
"operators.arena_hard_hf_space_processing_steps",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
"questions": "data/arena-hard-v0.1/question.jsonl",
"model_answer": "data/arena-hard-v0.1/model_answer/*.jsonl",
"judgment": "data/arena-hard-v0.1/model_judgment/gpt-4-1106-preview/*.jsonl"
}
},
"data_classification_policy": [
"public"
]
},
"preprocess_steps": [
"operators.arena_hard_hf_space_processing_steps",
Expand Down
Loading

0 comments on commit cd379b3

Please sign in to comment.