From 70f75cabd9500950c6b673b48e8d5dc688f4c299 Mon Sep 17 00:00:00 2001 From: elronbandel Date: Thu, 13 Feb 2025 15:37:44 +0200 Subject: [PATCH 1/2] Use faster model for examples Signed-off-by: elronbandel --- examples/evaluate_using_metrics_ensemble.py | 4 +- examples/ner_evaluation.py | 4 +- examples/qa_evaluation.py | 4 +- .../standalone_evaluation_llm_as_judge.py | 4 +- examples/standalone_qa_evaluation.py | 4 +- utils/.secrets.baseline | 69 ++++++++++++++----- 6 files changed, 62 insertions(+), 27 deletions(-) diff --git a/examples/evaluate_using_metrics_ensemble.py b/examples/evaluate_using_metrics_ensemble.py index ecf6dedb3b..ea18b6ef23 100644 --- a/examples/evaluate_using_metrics_ensemble.py +++ b/examples/evaluate_using_metrics_ensemble.py @@ -27,9 +27,9 @@ split="test", ) -# Infer using Llama-3.2-1B base using HF API +# Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="Qwen/Qwen1.5-0.5B-Chat", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 ) # Change to this to infer with external APIs: # CrossProviderInferenceEngine(model="llama-3-2-1b-instruct", provider="watsonx") diff --git a/examples/ner_evaluation.py b/examples/ner_evaluation.py index 62fe3b5ed1..fdecf51c67 100644 --- a/examples/ner_evaluation.py +++ b/examples/ner_evaluation.py @@ -35,9 +35,9 @@ format="formats.chat_api", ) -# Infer using Llama-3.2-1B base using HF API +# Infer using SmolLM2 using HF API # model = HFPipelineBasedInferenceEngine( -# model_name="Qwen/Qwen1.5-0.5B-Chat", max_new_tokens=32 +# model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 # ) # Change to this to infer with external APIs: diff --git a/examples/qa_evaluation.py b/examples/qa_evaluation.py index 1db776aabd..79b9663987 100644 --- a/examples/qa_evaluation.py +++ b/examples/qa_evaluation.py @@ -30,9 +30,9 @@ format="formats.chat_api", ) -# Infer using Llama-3.2-1B base using HF API +# Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="Qwen/Qwen1.5-0.5B-Chat", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 ) # Change to this to infer with external APIs: # from unitxt.inference import CrossProviderInferenceEngine diff --git a/examples/standalone_evaluation_llm_as_judge.py b/examples/standalone_evaluation_llm_as_judge.py index 7f37859cf2..2d0fb0a3c2 100644 --- a/examples/standalone_evaluation_llm_as_judge.py +++ b/examples/standalone_evaluation_llm_as_judge.py @@ -89,9 +89,9 @@ max_test_instances=10, ) -# Infer using Llama-3.2-1B base using HF API +# Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="Qwen/Qwen1.5-0.5B-Chat", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 ) predictions = model(dataset) diff --git a/examples/standalone_qa_evaluation.py b/examples/standalone_qa_evaluation.py index 7172567caa..d6f6163c82 100644 --- a/examples/standalone_qa_evaluation.py +++ b/examples/standalone_qa_evaluation.py @@ -37,9 +37,9 @@ ) -# Infer using Llama-3.2-1B base using HF API +# Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="Qwen/Qwen1.5-0.5B-Chat", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 ) # Change to this to infer with external APIs: # from unitxt.inference import CrossProviderInferenceEngine diff --git a/utils/.secrets.baseline b/utils/.secrets.baseline index 1226ff7479..c544bced1f 100644 --- a/utils/.secrets.baseline +++ b/utils/.secrets.baseline @@ -90,10 +90,6 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, - { - "path": "detect_secrets.filters.common.is_baseline_file", - "filename": "utils/.secrets.baseline" - }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -127,22 +123,65 @@ } ], "results": { + "examples/evaluate_using_metrics_ensemble.py": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/evaluate_using_metrics_ensemble.py", + "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", + "is_verified": false, + "line_number": 32 + } + ], + "examples/ner_evaluation.py": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/ner_evaluation.py", + "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", + "is_verified": false, + "line_number": 40 + } + ], + "examples/qa_evaluation.py": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/qa_evaluation.py", + "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", + "is_verified": false, + "line_number": 35 + } + ], + "examples/standalone_evaluation_llm_as_judge.py": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/standalone_evaluation_llm_as_judge.py", + "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", + "is_verified": false, + "line_number": 94 + } + ], + "examples/standalone_qa_evaluation.py": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/standalone_qa_evaluation.py", + "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", + "is_verified": false, + "line_number": 42 + } + ], "src/unitxt/inference.py": [ { "type": "Secret Keyword", "filename": "src/unitxt/inference.py", "hashed_secret": "aa6cd2a77de22303be80e1f632195d62d211a729", "is_verified": false, - "line_number": 1294, - "is_secret": false + "line_number": 1294 }, { "type": "Secret Keyword", "filename": "src/unitxt/inference.py", "hashed_secret": "c8f16a194efc59559549c7bd69f7bea038742e79", "is_verified": false, - "line_number": 1779, - "is_secret": false + "line_number": 1779 } ], "src/unitxt/loaders.py": [ @@ -151,8 +190,7 @@ "filename": "src/unitxt/loaders.py", "hashed_secret": "840268f77a57d5553add023cfa8a4d1535f49742", "is_verified": false, - "line_number": 595, - "is_secret": false + "line_number": 595 } ], "src/unitxt/metrics.py": [ @@ -161,8 +199,7 @@ "filename": "src/unitxt/metrics.py", "hashed_secret": "fa172616e9af3d2a24b5597f264eab963fe76889", "is_verified": false, - "line_number": 70, - "is_secret": false + "line_number": 70 } ], "tests/library/test_loaders.py": [ @@ -171,18 +208,16 @@ "filename": "tests/library/test_loaders.py", "hashed_secret": "8d814baafe5d8412572dc520dcab83f60ce1375c", "is_verified": false, - "line_number": 125, - "is_secret": false + "line_number": 125 }, { "type": "Secret Keyword", "filename": "tests/library/test_loaders.py", "hashed_secret": "42a472ac88cd8d43a2c5ae0bd0bdf4626cdaba31", "is_verified": false, - "line_number": 135, - "is_secret": false + "line_number": 135 } ] }, - "generated_at": "2025-02-12T09:37:42Z" + "generated_at": "2025-02-13T13:37:19Z" } From 4edb36dbcb95852650f9b8ab26902e602fc9abd4 Mon Sep 17 00:00:00 2001 From: elronbandel Date: Thu, 13 Feb 2025 16:45:40 +0200 Subject: [PATCH 2/2] Update model to SmolLM2-1.7B-Instruct in evaluation examples Signed-off-by: elronbandel --- examples/evaluate_using_metrics_ensemble.py | 2 +- examples/ner_evaluation.py | 2 +- examples/qa_evaluation.py | 2 +- .../standalone_evaluation_llm_as_judge.py | 2 +- examples/standalone_qa_evaluation.py | 2 +- utils/.secrets.baseline | 51 ++----------------- 6 files changed, 10 insertions(+), 51 deletions(-) diff --git a/examples/evaluate_using_metrics_ensemble.py b/examples/evaluate_using_metrics_ensemble.py index ea18b6ef23..37b00d0ed8 100644 --- a/examples/evaluate_using_metrics_ensemble.py +++ b/examples/evaluate_using_metrics_ensemble.py @@ -29,7 +29,7 @@ # Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-1.7B-Instruct", max_new_tokens=32 ) # Change to this to infer with external APIs: # CrossProviderInferenceEngine(model="llama-3-2-1b-instruct", provider="watsonx") diff --git a/examples/ner_evaluation.py b/examples/ner_evaluation.py index fdecf51c67..cbcdef03c2 100644 --- a/examples/ner_evaluation.py +++ b/examples/ner_evaluation.py @@ -37,7 +37,7 @@ # Infer using SmolLM2 using HF API # model = HFPipelineBasedInferenceEngine( -# model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 +# model_name="HuggingFaceTB/SmolLM2-1.7B-Instruct", max_new_tokens=32 # ) # Change to this to infer with external APIs: diff --git a/examples/qa_evaluation.py b/examples/qa_evaluation.py index 79b9663987..50c6a9f215 100644 --- a/examples/qa_evaluation.py +++ b/examples/qa_evaluation.py @@ -32,7 +32,7 @@ # Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-1.7B-Instruct", max_new_tokens=32 ) # Change to this to infer with external APIs: # from unitxt.inference import CrossProviderInferenceEngine diff --git a/examples/standalone_evaluation_llm_as_judge.py b/examples/standalone_evaluation_llm_as_judge.py index 2d0fb0a3c2..b4f16f6943 100644 --- a/examples/standalone_evaluation_llm_as_judge.py +++ b/examples/standalone_evaluation_llm_as_judge.py @@ -91,7 +91,7 @@ # Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-1.7B-Instruct", max_new_tokens=32 ) predictions = model(dataset) diff --git a/examples/standalone_qa_evaluation.py b/examples/standalone_qa_evaluation.py index d6f6163c82..8871aecb5a 100644 --- a/examples/standalone_qa_evaluation.py +++ b/examples/standalone_qa_evaluation.py @@ -39,7 +39,7 @@ # Infer using SmolLM2 using HF API model = HFPipelineBasedInferenceEngine( - model_name="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=32 + model_name="HuggingFaceTB/SmolLM2-1.7B-Instruct", max_new_tokens=32 ) # Change to this to infer with external APIs: # from unitxt.inference import CrossProviderInferenceEngine diff --git a/utils/.secrets.baseline b/utils/.secrets.baseline index c544bced1f..9b476c7904 100644 --- a/utils/.secrets.baseline +++ b/utils/.secrets.baseline @@ -90,6 +90,10 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": "utils/.secrets.baseline" + }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -123,51 +127,6 @@ } ], "results": { - "examples/evaluate_using_metrics_ensemble.py": [ - { - "type": "Base64 High Entropy String", - "filename": "examples/evaluate_using_metrics_ensemble.py", - "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", - "is_verified": false, - "line_number": 32 - } - ], - "examples/ner_evaluation.py": [ - { - "type": "Base64 High Entropy String", - "filename": "examples/ner_evaluation.py", - "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", - "is_verified": false, - "line_number": 40 - } - ], - "examples/qa_evaluation.py": [ - { - "type": "Base64 High Entropy String", - "filename": "examples/qa_evaluation.py", - "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", - "is_verified": false, - "line_number": 35 - } - ], - "examples/standalone_evaluation_llm_as_judge.py": [ - { - "type": "Base64 High Entropy String", - "filename": "examples/standalone_evaluation_llm_as_judge.py", - "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", - "is_verified": false, - "line_number": 94 - } - ], - "examples/standalone_qa_evaluation.py": [ - { - "type": "Base64 High Entropy String", - "filename": "examples/standalone_qa_evaluation.py", - "hashed_secret": "bed3655d44736098fa59a0661d724a73da7c5654", - "is_verified": false, - "line_number": 42 - } - ], "src/unitxt/inference.py": [ { "type": "Secret Keyword", @@ -219,5 +178,5 @@ } ] }, - "generated_at": "2025-02-13T13:37:19Z" + "generated_at": "2025-02-13T14:45:34Z" }