IBM · dafnapension · Feb 3, 2025 · Feb 4, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/performance/bluebench_profiler.py b/performance/bluebench_profiler.py
@@ -93,7 +93,14 @@ def profiler_do_the_profiling(self, dataset_query: str, split: str, **kwargs):
             benchmark_recipe=benchmark_recipe, split=split, **kwargs
         )
 
-        logger.critical(f"length of bluebench generated dataset: {len(dataset)}")
+        model = self.profiler_instantiate_model()
+
+        predictions = self.profiler_infer_predictions(model=model, dataset=dataset)
+
+        evaluation_result = self.profiler_evaluate_predictions(
+            predictions=predictions, dataset=dataset
+        )
+        logger.critical(f"length of evaluation_result: {len(evaluation_result)}")
 
 
 dataset_query = "benchmarks.bluebench[loader_limit=30,max_samples_per_subset=30]"
@@ -147,29 +154,44 @@ def main():
         pst.strip_dirs()
         pst.sort_stats("name")  # sort by function name
         pst.print_stats(
-            "profile_benchmark_blue_bench|profiler_instantiate_benchmark_recipe|profiler_generate_benchmark_dataset|load_data|load_iterables"
+            "profile_benchmark_blue_bench|profiler_instantiate_benchmark_recipe|profiler_generate_benchmark_dataset|profiler_instantiate_model|profiler_infer_predictions|profiler_evaluate_predictions|load_data|load_iterables"
         )
         s = f.getvalue()
         assert s.split("\n")[7].split()[3] == "cumtime"
         overall_tot_time = find_cummtime_of(
             "profile_benchmark_blue_bench", "bluebench_profiler.py", s
         )
         load_time = find_cummtime_of("load_data", "loaders.py", s)
-
+        just_load_no_initial_ms_time = find_cummtime_of(
+            "load_iterables", "loaders.py", s
+        )
         instantiate_benchmark_time = find_cummtime_of(
             "profiler_instantiate_benchmark_recipe", "bluebench_profiler.py", s
         )
         generate_benchmark_dataset_time = find_cummtime_of(
             "profiler_generate_benchmark_dataset", "bluebench_profiler.py", s
         )
+        instantiate_model_time = find_cummtime_of(
+            "profiler_instantiate_model", "bluebench_profiler.py", s
+        )
+        inference_time = find_cummtime_of(
+            "profiler_infer_predictions", "bluebench_profiler.py", s
+        )
+        evaluation_time = find_cummtime_of(
+            "profiler_evaluate_predictions", "bluebench_profiler.py", s
+        )
 
         # Data to be written
         dictionary = {
             "dataset_query": dataset_query,
             "total_time": overall_tot_time,
             "load_time": load_time,
+            "load_time_no_initial_ms": just_load_no_initial_ms_time,
             "instantiate_benchmark_time": instantiate_benchmark_time,
             "generate_benchmark_dataset_time": generate_benchmark_dataset_time,
+            "instantiate_model_time": instantiate_model_time,
+            "inference_time": inference_time,
+            "evaluation_time": evaluation_time,
             "used_eager_mode": settings.use_eager_execution,
             "performance.prof file": temp_prof_file_path,
         }

diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py
@@ -1484,28 +1484,32 @@ class IntersectCorrespondingFields(InstanceOperator):
 
     Assume the instances contain a field of 'labels' and a field with the labels' corresponding 'positions' in the text.
 
-    IntersectCorrespondingFields(field="label",
-                                 allowed_values=["b", "f"],
-                                 corresponding_fields_to_intersect=["position"])
+    .. code-block:: text
+
+        IntersectCorrespondingFields(field="label",
+                                    allowed_values=["b", "f"],
+                                    corresponding_fields_to_intersect=["position"])
 
     would keep only "b" and "f" values in 'labels' field and
     their respective values in the 'position' field.
     (All other fields are not effected)
 
-    Given this input:
+    .. code-block:: text
 
-    [
-        {"label": ["a", "b"],"position": [0,1],"other" : "not"},
-        {"label": ["a", "c", "d"], "position": [0,1,2], "other" : "relevant"},
-        {"label": ["a", "b", "f"], "position": [0,1,2], "other" : "field"}
-    ]
+        Given this input:
 
-    So the output would be:
-    [
-            {"label": ["b"], "position":[1],"other" : "not"},
-            {"label": [], "position": [], "other" : "relevant"},
-            {"label": ["b", "f"],"position": [1,2], "other" : "field"},
-    ]
+        [
+            {"label": ["a", "b"],"position": [0,1],"other" : "not"},
+            {"label": ["a", "c", "d"], "position": [0,1,2], "other" : "relevant"},
+            {"label": ["a", "b", "f"], "position": [0,1,2], "other" : "field"}
+        ]
+
+        So the output would be:
+        [
+                {"label": ["b"], "position":[1],"other" : "not"},
+                {"label": [], "position": [], "other" : "relevant"},
+                {"label": ["b", "f"],"position": [1,2], "other" : "field"},
+        ]
 
     Args:
         field - the field to intersected (must contain list values)
@@ -2367,21 +2371,23 @@ class CollateInstancesByField(StreamOperator):
     Example:
         Collate the instances based on field "category" and aggregate fields "value" and "id".
 
-        CollateInstancesByField(by_field="category", aggregate_fields=["value", "id"])
+        .. code-block:: text
+
+            CollateInstancesByField(by_field="category", aggregate_fields=["value", "id"])
 
-        given input:
-        [
-            {"id": 1, "category": "A", "value": 10", "flag" : True},
-            {"id": 2, "category": "B", "value": 20", "flag" : False},
-            {"id": 3, "category": "A", "value": 30", "flag" : True},
-            {"id": 4, "category": "B", "value": 40", "flag" : False}
-        ]
+            given input:
+            [
+                {"id": 1, "category": "A", "value": 10", "flag" : True},
+                {"id": 2, "category": "B", "value": 20", "flag" : False},
+                {"id": 3, "category": "A", "value": 30", "flag" : True},
+                {"id": 4, "category": "B", "value": 40", "flag" : False}
+            ]
 
-        the output is:
-        [
-            {"category": "A", "id": [1, 3], "value": [10, 30], "info": True},
-            {"category": "B", "id": [2, 4], "value": [20, 40], "info": False}
-        ]
+            the output is:
+            [
+                {"category": "A", "id": [1, 3], "value": [10, 30], "info": True},
+                {"category": "B", "id": [2, 4], "value": [20, 40], "info": False}
+            ]
 
         Note that the "flag" field is not aggregated, and must be the same
         in all instances in the same category, or an error is raised.