kubeflow · google-oss-prow · Aug 8, 2024 · Jul 2, 2024 · Jul 15, 2024 · Aug 1, 2024
diff --git a/backend/src/v2/compiler/argocompiler/argo.go b/backend/src/v2/compiler/argocompiler/argo.go
@@ -21,6 +21,7 @@ import (
 	wfapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
 	"github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec"
 	"github.com/kubeflow/pipelines/backend/src/v2/compiler"
+	log "github.com/sirupsen/logrus"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/structpb"
 	k8score "k8s.io/api/core/v1"
@@ -63,7 +64,7 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S
 	if err != nil {
 		return nil, err
 	}
-	// fill root component default paramters to PipelineJob
+	// fill root component default parameters to PipelineJob
 	specParams := spec.GetRoot().GetInputDefinitions().GetParameters()
 	for name, param := range specParams {
 		_, ok := job.RuntimeConfig.ParameterValues[name]
@@ -108,6 +109,9 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S
 					"pipelines.kubeflow.org/v2_component": "true",
 				},
 			},
+			Arguments: wfapi.Arguments{
+				Parameters: []wfapi.Parameter{},
+			},
 			ServiceAccountName: "pipeline-runner",
 			Entrypoint:         tmplEntrypoint,
 		},
@@ -180,69 +184,127 @@ func (c *workflowCompiler) templateName(componentName string) string {
 	return componentName
 }
 
-// WIP: store component spec, task spec and executor spec in annotations
-
 const (
-	annotationComponents     = "pipelines.kubeflow.org/components-"
-	annotationContainers     = "pipelines.kubeflow.org/implementations-"
-	annotationKubernetesSpec = "pipelines.kubeflow.org/kubernetes-"
+	argumentsComponents     = "components-"
+	argumentsContainers     = "implementations-"
+	argumentsKubernetesSpec = "kubernetes-"
 )
 
 func (c *workflowCompiler) saveComponentSpec(name string, spec *pipelinespec.ComponentSpec) error {
-	return c.saveProtoToAnnotation(annotationComponents+name, spec)
+	functionName := c.extractFunctionName(name)
+
+	return c.saveProtoToArguments(argumentsComponents+functionName, spec)
 }
 
 // useComponentSpec returns a placeholder we can refer to the component spec
 // in argo workflow fields.
 func (c *workflowCompiler) useComponentSpec(name string) (string, error) {
-	return c.annotationPlaceholder(annotationComponents + name)
+	functionName := c.extractFunctionName(name)
+
+	return c.argumentsPlaceholder(argumentsComponents + functionName)
 }
 
 func (c *workflowCompiler) saveComponentImpl(name string, msg proto.Message) error {
-	return c.saveProtoToAnnotation(annotationContainers+name, msg)
+	functionName := c.extractFunctionName(name)
+
+	return c.saveProtoToArguments(argumentsContainers+functionName, msg)
 }
 
 func (c *workflowCompiler) useComponentImpl(name string) (string, error) {
-	return c.annotationPlaceholder(annotationContainers + name)
+	functionName := c.extractFunctionName(name)
+
+	return c.argumentsPlaceholder(argumentsContainers + functionName)
 }
 
 func (c *workflowCompiler) saveKubernetesSpec(name string, spec *structpb.Struct) error {
-	return c.saveProtoToAnnotation(annotationKubernetesSpec+name, spec)
+	return c.saveProtoToArguments(argumentsKubernetesSpec+name, spec)
 }
 
 func (c *workflowCompiler) useKubernetesImpl(name string) (string, error) {
-	return c.annotationPlaceholder(annotationKubernetesSpec + name)
+	return c.argumentsPlaceholder(argumentsKubernetesSpec + name)
 }
 
-// TODO(Bobgy): sanitize component name
-func (c *workflowCompiler) saveProtoToAnnotation(name string, msg proto.Message) error {
+// saveProtoToArguments saves a proto message to the workflow arguments. The
+// message is serialized to JSON and stored in the workflow arguments and then
+// referenced by the workflow templates using AWF templating syntax. The reason
+// for storing it in the workflow arguments is because there is a 1-many
+// relationship between components and tasks that reference them. The workflow
+// arguments allow us to deduplicate the component logic (implementation & spec
+// in IR), significantly reducing the size of the argo workflow manifest.
+func (c *workflowCompiler) saveProtoToArguments(componentName string, msg proto.Message) error {
 	if c == nil {
 		return fmt.Errorf("compiler is nil")
 	}
-	if c.wf.Annotations == nil {
-		c.wf.Annotations = make(map[string]string)
+	if c.wf.Spec.Arguments.Parameters == nil {
+		c.wf.Spec.Arguments = wfapi.Arguments{Parameters: []wfapi.Parameter{}}
 	}
-	if _, alreadyExists := c.wf.Annotations[name]; alreadyExists {
-		return fmt.Errorf("annotation %q already exists", name)
+	if c.wf.Spec.Arguments.GetParameterByName(componentName) != nil {
+		return nil
 	}
 	json, err := stablyMarshalJSON(msg)
 	if err != nil {
-		return fmt.Errorf("saving component spec of %q to annotations: %w", name, err)
+		return fmt.Errorf("saving component spec of %q to arguments: %w", componentName, err)
 	}
-	// TODO(Bobgy): verify name adheres to Kubernetes annotation restrictions: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/#syntax-and-character-set
-	c.wf.Annotations[name] = json
+	c.wf.Spec.Arguments.Parameters = append(c.wf.Spec.Arguments.Parameters, wfapi.Parameter{
+		Name:  componentName,
+		Value: wfapi.AnyStringPtr(json),
+	})
 	return nil
 }
 
-func (c *workflowCompiler) annotationPlaceholder(name string) (string, error) {
+// argumentsPlaceholder checks for the unique component name within the workflow
+// arguments and returns a template tag that references the component in the
+// workflow arguments.
+func (c *workflowCompiler) argumentsPlaceholder(componentName string) (string, error) {
 	if c == nil {
 		return "", fmt.Errorf("compiler is nil")
 	}
-	if _, exists := c.wf.Annotations[name]; !exists {
-		return "", fmt.Errorf("using component spec: failed to find annotation %q", name)
+	if c.wf.Spec.Arguments.GetParameterByName(componentName) == nil {
+		return "", fmt.Errorf("using component spec: failed to find workflow parameter %q", componentName)
+	}
+
+	return workflowParameter(componentName), nil
+}
+
+// extractFunctionName extracts the function name of a component by looking it
+// up in the pipeline spec.
+func (c *workflowCompiler) extractFunctionName(componentName string) string {
 comp-condition-13: 
   dag: 
     tasks: 
       print-text-8: 
         cachingOptions: 
           enableCache: true 
         componentRef: 
           name: comp-print-text-8 
         inputs: 
           parameters: 
             msg: 
               runtimeValue: 
                 constant: '1' 
         taskInfo: 
           name: print-text-8 
   inputDefinitions: 
     parameters: 
       pipelinechannel--flip-coin-op-Output: 
         parameterType: STRING 
       pipelinechannel--loop-item-param-11: 
         parameterType: STRING 
 comp-inner-pipeline: 
   dag: 
     tasks: 
       condition-1: 
         componentRef: 
           name: comp-condition-1 
         dependentTasks: 
         - print-op1 
         inputs: 
           parameters: 
             pipelinechannel--print-op1-Output: 
               taskOutputParameter: 
                 outputParameterKey: Output 
                 producerTask: print-op1 
         taskInfo: 
           name: condition-1 
         triggerPolicy: 
           condition: inputs.parameter_values['pipelinechannel--print-op1-Output'] 
             == 'Hello' 
 exec-container-io: 
   container: 
     args: 
     - --output_path 
     - '{{$.outputs.parameters[''output_path''].output_file}}' 
     command: 
     - my_program 
     - '{{$.inputs.parameters[''text'']}}' 
     image: python:3.7 
 comp-condition-13: 
   dag: 
     tasks: 
       print-text-8: 
         cachingOptions: 
           enableCache: true 
         componentRef: 
           name: comp-print-text-8 
         inputs: 
           parameters: 
             msg: 
               runtimeValue: 
                 constant: '1' 
         taskInfo: 
           name: print-text-8 
   inputDefinitions: 
     parameters: 
       pipelinechannel--flip-coin-op-Output: 
         parameterType: STRING 
       pipelinechannel--loop-item-param-11: 
         parameterType: STRING 
 comp-inner-pipeline: 
   dag: 
     tasks: 
       condition-1: 
         componentRef: 
           name: comp-condition-1 
         dependentTasks: 
         - print-op1 
         inputs: 
           parameters: 
             pipelinechannel--print-op1-Output: 
               taskOutputParameter: 
                 outputParameterKey: Output 
                 producerTask: print-op1 
         taskInfo: 
           name: condition-1 
         triggerPolicy: 
           condition: inputs.parameter_values['pipelinechannel--print-op1-Output'] 
             == 'Hello' 
 exec-container-io: 
   container: 
     args: 
     - --output_path 
     - '{{$.outputs.parameters[''output_path''].output_file}}' 
     command: 
     - my_program 
     - '{{$.inputs.parameters[''text'']}}' 
     image: python:3.7 
+	log.Debug("componentName: ", componentName)
+	// The root component is a DAG and therefore doesn't have a corresponding
+	// executor or function name. The final return statement in this function
+	// would cover this edge case, but this saves us some unecessary iteration.
+	if componentName == "root" {
+		return componentName
 	}
-	// Reference: https://argoproj.github.io/argo-workflows/variables/
-	return fmt.Sprintf("{{workflow.annotations.%s}}", name), nil
+	executorLabel := c.spec.Components[componentName].GetExecutorLabel()
+	log.Debug("executorLabel: ", executorLabel)
+	// There are more nested conditionals here than we would prefer, but we
+	// don't want to make any assumptions about the presence of specific fields
+	// in the IR.
+	if c.executors != nil {
+		for executorName, executorValue := range c.executors {
+			log.Debug("executorName: ", executorName)
+			if executorName == executorLabel {
+				args := executorValue.GetContainer().GetArgs()
+				if args != nil {
+					if len(args) > 1 {
+						penultimateArg := args[len(args)-2]
+						if penultimateArg == "--function_to_execute" {
+							componentFunctionName := args[len(args)-1]
+							log.Debug("componentFunctionName: ", componentFunctionName)
+							return componentFunctionName
+						}
+					}
+				}
+			}
+		}
+	}
+
+	log.Debug("No corresponding executor for component: ", componentName)
+	// We could theoretically return an error here, but since the only
+	// consequence of not finding a matching executor is reduced deduplication,
+	// this doesn't result in application failure and we therefore continue.
+	return componentName
 }
 
 const (

diff --git a/backend/src/v2/compiler/argocompiler/container.go b/backend/src/v2/compiler/argocompiler/container.go
@@ -358,9 +358,11 @@ func (c *workflowCompiler) addContainerExecutorTemplate(refName string) string {
 		},
 	}
 	// Update pod metadata if it defined in the Kubernetes Spec
-	if kubernetesConfigString, ok := c.wf.Annotations[annotationKubernetesSpec+refName]; ok {
+	kubernetesConfigParam := c.wf.Spec.Arguments.GetParameterByName(argumentsKubernetesSpec + refName)
+
+	if kubernetesConfigParam != nil {
 		k8sExecCfg := &kubernetesplatform.KubernetesExecutorConfig{}
-		if err := jsonpb.UnmarshalString(kubernetesConfigString, k8sExecCfg); err == nil {
+		if err := jsonpb.UnmarshalString(string(*kubernetesConfigParam.Value), k8sExecCfg); err == nil {
 			extendPodMetadata(&executor.Metadata, k8sExecCfg)
 		}
 	}