feat(components): add LLM Eval pipeline parameter for customizing eva…

…l dataset reference ground truth field PiperOrigin-RevId: 579029514
kubeflow · Nov 6, 2023 · 1ad5553 · 1ad5553
1 parent 0948561
commit 1ad5553
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 6 deletions.
diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md
@@ -1,5 +1,6 @@
 ## Upcoming release
 * Bump supported KFP versions to kfp>=2.0.0b10,<=2.4.0
+* Add LLM Eval pipeline parameter for customizing eval dataset reference ground truth field
 
 ## Release 2.5.0
 * Upload tensorboard metrics from `preview.llm.rlhf_pipeline` if a `tensorboard_resource_id` is provided at runtime.

diff --git a/...le_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/...le_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py
@@ -32,7 +32,7 @@ def model_evaluation_text_generation(
     project: str,
     location: str,
     evaluation_task: str = 'text-generation',
-    target_field_name: str = 'instance.ground_truth',
+    target_field_name: str = 'instance.output_text',
     prediction_field_name: str = 'predictions.content',
     predictions_format: str = 'jsonl',
     joined_predictions_gcs_source: dsl.Input[Artifact] = None,

diff --git a/...d_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py b/...d_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py
@@ -30,11 +30,12 @@ def evaluation_llm_text_generation_pipeline(  # pylint: disable=dangerous-defaul
     location: str,
     batch_predict_gcs_source_uris: List[str],
     batch_predict_gcs_destination_output_uri: str,
-    batch_predict_model_parameters: Dict[str, str] = {},
     model_name: str = 'publishers/google/models/text-bison@001',
     evaluation_task: str = 'text-generation',
+    target_field_name: str = 'output_text',
     batch_predict_instances_format: str = 'jsonl',
     batch_predict_predictions_format: str = 'jsonl',
+    batch_predict_model_parameters: Dict[str, str] = {},
     machine_type: str = 'e2-highmem-16',
     service_account: str = '',
     network: str = '',
@@ -52,10 +53,11 @@ def evaluation_llm_text_generation_pipeline(  # pylint: disable=dangerous-defaul
   Args:
     project: The GCP project that runs the pipeline components.
     location: The GCP region that runs the pipeline components.
-    batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.
-    batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to.
+    batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your eval dataset instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.
+    batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the eval pipeline output is to be written to.
     model_name: The Model name used to run evaluation. Must be a publisher Model or a managed Model sharing the same ancestor location. Starting this job has no impact on any existing deployments of the Model and their resources.
     evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that specific task. Currently supported tasks are: `summarization`, `question-answering`, `text-generation`.
+    target_field_name: The field name of the eval dataset instance that contains an example reference text response. Alternatively referred to as the ground truth (or ground_truth_column) field. If not set, defaulted to `output_text`.
     batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. Only "jsonl" is currently supported. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.
     batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. Only "jsonl" is currently supported. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.
     batch_predict_model_parameters: A map of parameters that govern the predictions. Some acceptable parameters include: maxOutputTokens, topK, topP, and temperature.
@@ -102,8 +104,7 @@ def evaluation_llm_text_generation_pipeline(  # pylint: disable=dangerous-defaul
       project=project,
       location=location,
       evaluation_task=evaluation_task,
-      target_field_name='instance.ground_truth',
-      prediction_field_name='predictions.content',
+      target_field_name=f'instance.{target_field_name}',
       predictions_format=batch_predict_predictions_format,
       joined_predictions_gcs_source=batch_predict_task.outputs[
           'gcs_output_directory'