diff --git a/components/google-cloud/google_cloud_pipeline_components/aiplatform/__init__.py b/components/google-cloud/google_cloud_pipeline_components/aiplatform/__init__.py index dfc72b45cc4..bc119268c72 100644 --- a/components/google-cloud/google_cloud_pipeline_components/aiplatform/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/aiplatform/__init__.py @@ -162,7 +162,7 @@ ModelDeployOp = load_component_from_file( os.path.join( - os.path.dirname(__file__), 'endpoint/deploy_model/component.yaml')) + os.path.dirname(__file__), 'model/deploy_model/component.yaml')) ModelUndeployOp = load_component_from_file( os.path.join( diff --git a/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/create_endpoint/component.yaml b/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/create_endpoint/component.yaml index 80afaee2978..d55fcc1c662 100644 --- a/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/create_endpoint/component.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/create_endpoint/component.yaml @@ -8,6 +8,8 @@ description: | Required. Project to create the endpoint. location (Optional[str]): Location to create the endpoint. If not set, default to us-central1. + endpoint_name (str): + Required. The resource name of the Endpoint. display_name (str): Required. The user-defined name of the Endpoint. The name can be up to 128 characters long and can be consist @@ -33,14 +35,6 @@ description: | resource is created. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key. - network (Optional[str]): - The full name of the Google Compute Engine network to which the - Endpoint should be peered. Private services access must already be configured for - the network. If left unspecified, the Endpoint is not peered with any network. - - [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): - projects/{project}/global/networks/{network}. - Where {project} is a project number, as in '12345', and {network} is network name. Returns: endpoint (google.VertexEndpoint): Artifact tracking the created endpoint. @@ -51,32 +45,47 @@ description: | inputs: - {name: project, type: String} - {name: location, type: String, default: "us-central1"} +- {name: endpoint_name, type: String} - {name: display_name, type: String} - {name: description, type: String, optional: true, default: ''} - {name: labels, type: JsonObject, optional: true, default: '{}'} -- {name: encryption_spec_key_name, type: String, optional: true, default: ''} -- {name: network, type: String, optional: true, default: ''} +- {name: encryption_spec_key_name, type: String, optional: true} outputs: - {name: endpoint, type: google.VertexEndpoint} - {name: gcp_resources, type: String} implementation: container: image: gcr.io/ml-pipeline/google-cloud-pipeline-components:latest - command: [python3, -u, -m, google_cloud_pipeline_components.container.v1.endpoint.create_endpoint.launcher] - args: [ - --type, CreateEndpoint, - --payload, - concat: [ - '{', - '"display_name": "', {inputValue: display_name}, '"', - ', "description": "', {inputValue: description}, '"', - ', "labels": ', {inputValue: labels}, - ', "encryption_spec": {"kms_key_name":"', {inputValue: encryption_spec_key_name}, '"}', - ', "network": "', {inputValue: network}, '"', - '}' - ], - --project, {inputValue: project}, - --location, {inputValue: location}, - --gcp_resources, {outputPath: gcp_resources}, - --executor_input, "{{$}}", - ] + command: [python3, -u, -m, google_cloud_pipeline_components.container.aiplatform.remote_runner, --cls_name, Endpoint, --method_name, create] + args: + - --init.project + - {inputValue: project} + - --init.location + - {inputValue: location} + - --init.endpoint_name + - {inputValue: endpoint_name} + - --method.project + - {inputValue: project} + - --method.location + - {inputValue: location} + - --method.display_name + - {inputValue: display_name} + - if: + cond: {isPresent: description} + then: + - --method.description + - {inputValue: description} + - if: + cond: {isPresent: labels} + then: + - --method.labels + - {inputValue: labels} + - if: + cond: {isPresent: encryption_spec_key_name} + then: + - --method.encryption_spec_key_name + - {inputValue: encryption_spec_key_name} + - --gcp_resources + - {outputPath: gcp_resources} + - --executor_input + - "{{$}}" diff --git a/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/delete_endpoint/component.yaml b/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/delete_endpoint/component.yaml index f52b51ce400..906f6a6197f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/delete_endpoint/component.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/delete_endpoint/component.yaml @@ -6,6 +6,10 @@ description: | Args: endpoint (google.VertexEndpoint): Required. The endpoint to be deleted. + force (Optional[bool]): + Optional. If force is set to True, all deployed models on this Endpoint will be undeployed first. Default is False. + sync (Optional[bool]): + Optional. Whether to execute this method synchronously. Returns: gcp_resources (str): @@ -14,21 +18,28 @@ description: | For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. inputs: - {name: endpoint, type: google.VertexEndpoint} +- {name: force, type: Boolean, optional: true, default: False} +- {name: sync, type: Boolean, optional: true, default: True} outputs: - {name: gcp_resources, type: String} implementation: container: image: gcr.io/ml-pipeline/google-cloud-pipeline-components:latest - command: [python3, -u, -m, google_cloud_pipeline_components.container.v1.endpoint.delete_endpoint.launcher] - args: [ - --type, DeleteEndpoint, - --payload, - concat: [ - '{', - '"endpoint": "', "{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}", '"', - '}' - ], - --project, '', # not being used - --location, '', # not being used - --gcp_resources, {outputPath: gcp_resources}, - ] + command: [python3, -u, -m, google_cloud_pipeline_components.container.aiplatform.remote_runner, --cls_name, Endpoint, --method_name, delete] + args: + - --init.endpoint_name + - "{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}" + - if: + cond: {isPresent: force} + then: + - --method.force + - {inputValue: force} + - if: + cond: {isPresent: sync} + then: + - --method.sync + - {inputValue: sync} + - --gcp_resources, + - {outputPath: gcp_resources} + - --executor_input + - "{{$}}" diff --git a/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/deploy_model/component.yaml b/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/deploy_model/component.yaml deleted file mode 100644 index 7fffb1db9f1..00000000000 --- a/components/google-cloud/google_cloud_pipeline_components/aiplatform/endpoint/deploy_model/component.yaml +++ /dev/null @@ -1,170 +0,0 @@ -name: model_deploy -description: | - Deploys a Google Cloud Vertex Model to the Endpoint, creating a DeployedModel within it. - For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel. - - Args: - model (google.VertexModel): - Required. The model to be deployed. - endpoint (google.VertexEndpoint): - Required. The endpoint to be deployed to. - deployed_model_display_name (Optional[str]): - The display name of the DeployedModel. If not provided - upon creation, the Model's display_name is used. - traffic_split (Optional[Dict[str, int]]): - A map from a DeployedModel's ID to the percentage - of this Endpoint's traffic that should be forwarded to that DeployedModel. - - If this field is non-empty, then the Endpoint's trafficSplit - will be overwritten with it. To refer to the ID of the just - being deployed Model, a "0" should be used, and the actual ID - of the new DeployedModel will be filled in its place by this method. - The traffic percentage values must add up to 100. - - If this field is empty, then the Endpoint's trafficSplit is not updated. - dedicated_resources_machine_type (Optional[str]): - The specification of a single machine used by the prediction. - - This field is required if `automatic_resources_min_replica_count` is not specified. - - For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#dedicatedresources. - dedicated_resources_accelerator_type (Optional[str]): - Hardware accelerator type. Must also set accelerator_count if used. - See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType - for available options. - - This field is required if `dedicated_resources_machine_type` is specified. - dedicated_resources_accelerator_count (Optional[int]): - The number of accelerators to attach to a worker replica. - dedicated_resources_min_replica_count (Optional[int]): - The minimum number of machine replicas this DeployedModel will be - always deployed on. This value must be greater than or equal to 1. - If traffic against the DeployedModel increases, it may dynamically be deployed - onto more replicas, and as traffic decreases, some of these extra replicas may be freed. - dedicated_resources_max_replica_count (Optional[int]): - The maximum number of replicas this deployed model may - the larger value of min_replica_count or 1 will - be used. If value provided is smaller than min_replica_count, it - will automatically be increased to be min_replica_count. - The maximum number of replicas this deployed model may - be deployed on when the traffic against it increases. If requested - value is too large, the deployment will error, but if deployment - succeeds then the ability to scale the model to that many replicas - is guaranteed (barring service outages). If traffic against the - deployed model increases beyond what its replicas at maximum may - handle, a portion of the traffic will be dropped. If this value - is not provided, will use dedicated_resources_min_replica_count as - the default value. - automatic_resources_min_replica_count (Optional[int]): - The minimum number of replicas this DeployedModel - will be always deployed on. If traffic against it increases, - it may dynamically be deployed onto more replicas up to - automatic_resources_max_replica_count, and as traffic decreases, - some of these extra replicas may be freed. If the requested value - is too large, the deployment will error. - - This field is required if `dedicated_resources_machine_type` is not specified. - automatic_resources_max_replica_count (Optional[int]): - The maximum number of replicas this DeployedModel may - be deployed on when the traffic against it increases. If the requested - value is too large, the deployment will error, but if deployment - succeeds then the ability to scale the model to that many replicas - is guaranteed (barring service outages). If traffic against the - DeployedModel increases beyond what its replicas at maximum may handle, - a portion of the traffic will be dropped. If this value is not provided, - a no upper bound for scaling under heavy traffic will be assume, - though Vertex AI may be unable to scale beyond certain replica number. - service_account (Optional[str]): - The service account that the DeployedModel's container runs as. Specify the - email address of the service account. If this service account is not - specified, the container runs as a service account that doesn't have access - to the resource project. - - Users deploying the Model must have the `iam.serviceAccounts.actAs` - permission on this service account. - disable_container_logging (Optional[bool]): - For custom-trained Models and AutoML Tabular Models, the container of the - DeployedModel instances will send stderr and stdout streams to Stackdriver - Logging by default. Please note that the logs incur cost, which are subject - to Cloud Logging pricing. - - User can disable container logging by setting this flag to true. - enable_access_logging (Optional[bool]): - These logs are like standard server access logs, containing information like - timestamp and latency for each prediction request. - - Note that Stackdriver logs may incur a cost, especially if your project - receives prediction requests at a high queries per second rate (QPS). - Estimate your costs before enabling this option. - explanation_metadata (Optional[dict]): - Metadata describing the Model's input and output for explanation. - - For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - explanation_parameters (Optional[dict]): - Parameters that configure explaining information of the Model's predictions. - - For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - Returns: - gcp_resources (str): - Serialized gcp_resources proto tracking the deploy model's long running operation. - - For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. -inputs: -- {name: model, type: google.VertexModel} -- {name: endpoint, type: google.VertexEndpoint, optional: true} -- {name: deployed_model_display_name, type: String, optional: true, default: ''} -- {name: traffic_split, type: JsonObject, optional: true, default: '{}'} -- {name: dedicated_resources_machine_type, type: String, optional: true, default: ''} -- {name: dedicated_resources_min_replica_count, type: Integer, optional: true, default: 0} -- {name: dedicated_resources_max_replica_count, type: Integer, optional: true, default: 0} -- {name: dedicated_resources_accelerator_type, type: String, optional: true, default: ''} -- {name: dedicated_resources_accelerator_count, type: Integer, optional: true, default: 0} -- {name: automatic_resources_min_replica_count, type: Integer, optional: true, default: 0} -- {name: automatic_resources_max_replica_count, type: Integer, optional: true, default: 0} -- {name: service_account, type: String, optional: true, default: ''} -- {name: disable_container_logging, type: Boolean, optional: true, default: False} -- {name: enable_access_logging, type: Boolean, optional: true, default: False} -- {name: explanation_metadata, type: JsonObject, optional: true, default: '{}'} -- {name: explanation_parameters, type: JsonObject, optional: true, default: '{}'} -outputs: -- {name: gcp_resources, type: String} -implementation: - container: - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:latest - command: [python3, -u, -m, google_cloud_pipeline_components.container.v1.endpoint.deploy_model.launcher] - args: [ - --type, DeployModel, - --payload, - concat: [ - '{', - '"endpoint": "', "{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}", '"', - ', "traffic_split": ', {inputValue: traffic_split}, - ', "deployed_model": {', - '"model": "', "{{$.inputs.artifacts['model'].metadata['resourceName']}}", '"', - ', "dedicated_resources": {', - '"machine_spec": {', - '"machine_type": "',{inputValue: dedicated_resources_machine_type}, '"', - ', "accelerator_type": "',{inputValue: dedicated_resources_accelerator_type}, '"', - ', "accelerator_count": ',{inputValue: dedicated_resources_accelerator_count}, - '}', - ', "min_replica_count": ', {inputValue: dedicated_resources_min_replica_count}, - ', "max_replica_count": ', {inputValue: dedicated_resources_max_replica_count}, - '}', - ', "automatic_resources": {', - '"min_replica_count": ',{inputValue: automatic_resources_min_replica_count}, - ', "max_replica_count": ',{inputValue: automatic_resources_max_replica_count}, - '}', - ', "service_account": "', {inputValue: service_account}, '"', - ', "disable_container_logging": ', {inputValue: disable_container_logging}, - ', "enable_access_logging": ', {inputValue: enable_access_logging}, - ', "explanation_spec": {', - '"parameters": ', {inputValue: explanation_parameters}, - ', "metadata": ', {inputValue: explanation_metadata}, - '}', - '}', - '}' - ], - --project, '', # not being used - --location, '', # not being used - --gcp_resources, {outputPath: gcp_resources}, - ] diff --git a/components/google-cloud/google_cloud_pipeline_components/aiplatform/model/deploy_model/component.yaml b/components/google-cloud/google_cloud_pipeline_components/aiplatform/model/deploy_model/component.yaml new file mode 100644 index 00000000000..866b3b670ec --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/aiplatform/model/deploy_model/component.yaml @@ -0,0 +1,211 @@ +name: model_deploy +description: | + Deploys a Google Cloud Vertex Model to the Endpoint, creating a DeployedModel within it. + For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel. + + Args: + model (google.VertexModel): + Required. The model to be deployed. + endpoint (Optional[google.VertexEndpoint]): + Optional. The endpoint to be deployed to. + deployed_model_display_name (Optional[str]): + The display name of the DeployedModel. If not provided + upon creation, the Model's display_name is used. + traffic_split (Optional[Dict[str, int]]): + A map from a DeployedModel's ID to the percentage + of this Endpoint's traffic that should be forwarded to that DeployedModel. + + If this field is non-empty, then the Endpoint's trafficSplit + will be overwritten with it. To refer to the ID of the just + being deployed Model, a "0" should be used, and the actual ID + of the new DeployedModel will be filled in its place by this method. + The traffic percentage values must add up to 100. + + If this field is empty, then the Endpoint's trafficSplit is not updated. + machine_type (Optional[str]): + The specification of a single machine used by the prediction. + + This field is required if `min_replica_count` is not specified. + + For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#dedicatedresources. + accelerator_type (Optional[str]): + Hardware accelerator type. Must also set accelerator_count if used. + See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType + for available options. + + This field is required if `machine_type` is specified. + accelerator_count (Optional[int]): + The number of accelerators to attach to a worker replica. + min_replica_count (Optional[int]): + The minimum number of machine replicas this DeployedModel will be + always deployed on. This value must be greater than or equal to 1. + If traffic against the DeployedModel increases, it may dynamically be deployed + onto more replicas, and as traffic decreases, some of these extra replicas may be freed. + max_replica_count (Optional[int]): + The maximum number of replicas this deployed model may + the larger value of min_replica_count or 1 will + be used. If value provided is smaller than min_replica_count, it + will automatically be increased to be min_replica_count. + The maximum number of replicas this deployed model may + be deployed on when the traffic against it increases. If requested + value is too large, the deployment will error, but if deployment + succeeds then the ability to scale the model to that many replicas + is guaranteed (barring service outages). If traffic against the + deployed model increases beyond what its replicas at maximum may + handle, a portion of the traffic will be dropped. If this value + is not provided, will use min_replica_count as + the default value. + service_account (Optional[str]): + The service account that the DeployedModel's container runs as. Specify the + email address of the service account. If this service account is not + specified, the container runs as a service account that doesn't have access + to the resource project. + + Users deploying the Model must have the `iam.serviceAccounts.actAs` + permission on this service account. + explanation_metadata (Optional[dict]): + Metadata describing the Model's input and output for explanation. + + For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + explanation_parameters (Optional[dict]): + Parameters that configure explaining information of the Model's predictions. + + For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. + network (Optional[str]) + Optional. The full name of the Compute Engine network to which the Endpoint, if created, will be peered to. + deploy_request_timeout (Optional[float]) + Optional. The timeout for the deploy request in seconds. + autoscaling_target_cpu_utilization (Optional[int]) + Optional. Target CPU Utilization to use for Autoscaling Replicas. + autoscaling_target_accelerator_duty_cycle (Optional[int]) + Optional. Target Accelerator Duty Cycle. + sync (Optional[bool]) + Optional. Whether to execute this method synchronously. + Returns: + endpoint (google.VertexEndpoint): + Endpoint with the deployed model. + gcp_resources (str): + Serialized gcp_resources proto tracking the deploy model's long running operation. + + For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. +inputs: +- {name: model, type: google.VertexModel} +- {name: endpoint, type: google.VertexEndpoint, optional: true} +- {name: deployed_model_display_name, type: String, optional: true, default: ''} +- {name: traffic_split, type: JsonObject, optional: true, default: '{}'} +- {name: machine_type, type: String, optional: true, default: ''} +- {name: min_replica_count, type: Integer, optional: true, default: 0} +- {name: max_replica_count, type: Integer, optional: true, default: 0} +- {name: accelerator_type, type: String, optional: true, default: ''} +- {name: accelerator_count, type: Integer, optional: true, default: 0} +- {name: service_account, type: String, optional: true, default: ''} +- {name: explanation_metadata, type: JsonObject, optional: true, default: '{}'} +- {name: explanation_parameters, type: JsonObject, optional: true, default: '{}'} +- {name: encryption_spec_key_name, type: String, optional: true, default: ''} +- {name: network, type: String, optional: true, default: ''} +- {name: sync, type: Boolean, optional: true, default: True} +- {name: deploy_request_timeout, type: Float, optional: true} +- {name: autoscaling_target_cpu_utilization, type: Integer, optional: true, default: 60} +- {name: autoscaling_target_accelerator_duty_cycle, type: Integer, optional: true, default: 60} +outputs: +- {name: endpoint, type: google.VertexEndpoint} +- {name: gcp_resources, type: String} +implementation: + container: + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:latest + command: [python3, -m, google_cloud_pipeline_components.container.aiplatform.remote_runner, + --cls_name, Model, --method_name, deploy] + args: + - --init.model_name + - "{{$.inputs.artifacts['model'].metadata['resourceName']}}" + - if: + cond: {isPresent: endpoint} + then: + - --method.endpoint + - "{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}" + - if: + cond: {isPresent: deployed_model_display_name} + then: + - --method.deployed_model_display_name + - {inputValue: deployed_model_display_name} + - if: + cond: {isPresent: traffic_split} + then: + - --method.traffic_split + - {inputValue: traffic_split} + - if: + cond: {isPresent: machine_type} + then: + - --method.machine_type + - {inputValue: machine_type} + - if: + cond: {isPresent: min_replica_count} + then: + - --method.min_replica_count + - {inputValue: min_replica_count} + - if: + cond: {isPresent: max_replica_count} + then: + - --method.max_replica_count + - {inputValue: max_replica_count} + - if: + cond: {isPresent: accelerator_type} + then: + - --method.accelerator_type + - {inputValue: accelerator_type} + - if: + cond: {isPresent: accelerator_count} + then: + - --method.accelerator_count + - {inputValue: accelerator_count} + - if: + cond: {isPresent: service_account} + then: + - --method.service_account + - {inputValue: service_account} + - if: + cond: {isPresent: explanation_metadata} + then: + - --method.explanation_metadata + - {inputValue: explanation_metadata} + - if: + cond: {isPresent: explanation_parameters} + then: + - --method.explanation_parameters + - {inputValue: explanation_parameters} + - if: + cond: {isPresent: encryption_spec_key_name} + then: + - --method.encryption_spec_key_name + - {inputValue: encryption_spec_key_name} + - if: + cond: {isPresent: network} + then: + - --method.network + - {inputValue: network} + - if: + cond: {isPresent: sync} + then: + - --method.sync + - {inputValue: sync} + - if: + cond: {isPresent: deploy_request_timeout} + then: + - --method.deploy_request_timeout + - {inputValue: deploy_request_timeout} + - if: + cond: {isPresent: autoscaling_target_cpu_utilization} + then: + - --method.autoscaling_target_cpu_utilization + - {inputValue: autoscaling_target_cpu_utilization} + - if: + cond: {isPresent: autoscaling_target_accelerator_duty_cycle} + then: + - --method.autoscaling_target_accelerator_duty_cycle + - {inputValue: autoscaling_target_accelerator_duty_cycle} + - --gcp_resources + - {outputPath: gcp_resources} + - --executor_input + - "{{$}}" diff --git a/components/google-cloud/tests/aiplatform/integration/test_components_compile.py b/components/google-cloud/tests/aiplatform/integration/test_components_compile.py index 82b68d8ce18..6639cab48db 100644 --- a/components/google-cloud/tests/aiplatform/integration/test_components_compile.py +++ b/components/google-cloud/tests/aiplatform/integration/test_components_compile.py @@ -386,14 +386,16 @@ def pipeline(): create_endpoint_op = EndpointCreateOp( project=self._project, location=self._location, + endpoint_name="endpoint-name", display_name=self._display_name, description="some description", labels={"foo": "bar"}, - network="abc", encryption_spec_key_name="some encryption_spec_key_name") delete_endpoint_op = EndpointDeleteOp( - endpoint=create_endpoint_op.outputs["endpoint"]) + endpoint=create_endpoint_op.outputs["endpoint"], + force=False, + sync=True,) compiler.Compiler().compile( pipeline_func=pipeline, package_path=self._package_path) @@ -455,16 +457,20 @@ def pipeline(): endpoint=create_endpoint_op.outputs["endpoint"], deployed_model_display_name="deployed_model_display_name", traffic_split={}, - dedicated_resources_machine_type="n1-standard-4", - dedicated_resources_min_replica_count=1, - dedicated_resources_max_replica_count=2, - dedicated_resources_accelerator_type="fake-accelerator", - dedicated_resources_accelerator_count=1, - automatic_resources_min_replica_count=1, - automatic_resources_max_replica_count=2, + machine_type="n1-standard-4", + min_replica_count=1, + max_replica_count=2, + accelerator_type="fake-accelerator", + accelerator_count=1, service_account="fake-sa", explanation_metadata={"xai_m": "bar"}, explanation_parameters={"xai_p": "foo"}, + encryption_spec_key_name="some encryption_spec_key_name", + network="fake-network", + sync=True, + deploy_request_timeout=600, + autoscaling_target_cpu_utilization=1, + autoscaling_target_accelerator_duty_cycle=1, ) _ = ModelUndeployOp( diff --git a/components/google-cloud/tests/aiplatform/testdata/create_and_delete_endpoint_pipeline.json b/components/google-cloud/tests/aiplatform/testdata/create_and_delete_endpoint_pipeline.json index ecb78027c44..0ef910118a9 100644 --- a/components/google-cloud/tests/aiplatform/testdata/create_and_delete_endpoint_pipeline.json +++ b/components/google-cloud/tests/aiplatform/testdata/create_and_delete_endpoint_pipeline.json @@ -13,15 +13,15 @@ "encryption_spec_key_name":{ "parameterType":"STRING" }, + "endpoint_name":{ + "parameterType":"STRING" + }, "labels":{ "parameterType":"STRUCT" }, "location":{ "parameterType":"STRING" }, - "network":{ - "parameterType":"STRING" - }, "project":{ "parameterType":"STRING" } @@ -53,6 +53,14 @@ "schemaVersion":"0.0.1" } } + }, + "parameters":{ + "force":{ + "parameterType":"BOOLEAN" + }, + "sync":{ + "parameterType":"BOOLEAN" + } } }, "outputDefinitions":{ @@ -69,14 +77,24 @@ "exec-endpoint-create":{ "container":{ "args":[ - "--type", - "CreateEndpoint", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"network\": \"{{$.inputs.parameters['network']}}\"}", - "--project", + "--init.project", + "{{$.inputs.parameters['project']}}", + "--init.location", + "{{$.inputs.parameters['location']}}", + "--init.endpoint_name", + "{{$.inputs.parameters['endpoint_name']}}", + "--method.project", "{{$.inputs.parameters['project']}}", - "--location", + "--method.location", "{{$.inputs.parameters['location']}}", + "--method.display_name", + "{{$.inputs.parameters['display_name']}}", + "--method.description", + "{{$.inputs.parameters['description']}}", + "--method.labels", + "{{$.inputs.parameters['labels']}}", + "--method.encryption_spec_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", "--gcp_resources", "{{$.outputs.parameters['gcp_resources'].output_file}}", "--executor_input", @@ -86,7 +104,7 @@ "python3", "-u", "-m", - "google_cloud_pipeline_components.container.v1.endpoint.create_endpoint.launcher" + "google_cloud_pipeline_components.container.aiplatform.remote_runner, --cls_name, Endpoint, --method_name, create" ], "image":"gcr.io/ml-pipeline/google-cloud-pipeline-components:latest" } @@ -94,14 +112,12 @@ "exec-endpoint-delete":{ "container":{ "args":[ - "--type", - "DeleteEndpoint", - "--payload", - "{\"endpoint\": \"{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}\"}", - "--project", - "", - "--location", - "", + "--init.endpoint_name", + "{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}", + "--method.force", + "{{$.inputs.parameters['force']}}", + "--method.sync", + "{{$.inputs.parameters['sync']}}", "--gcp_resources", "{{$.outputs.parameters['gcp_resources'].output_file}}" ], @@ -109,7 +125,7 @@ "python3", "-u", "-m", - "google_cloud_pipeline_components.container.v1.endpoint.delete_endpoint.launcher" + "google_cloud_pipeline_components.container.aiplatform.remote_runner, --cls_name, Endpoint, --method_name, delete" ], "image":"gcr.io/ml-pipeline/google-cloud-pipeline-components:latest" } @@ -146,6 +162,11 @@ "constant":"some encryption_spec_key_name" } }, + "endpoint_name":{ + "runtimeValue":{ + "constant":"endpoint-name" + } + }, "labels":{ "runtimeValue":{ "constant":{ @@ -158,11 +179,6 @@ "constant":"us-central1" } }, - "network":{ - "runtimeValue":{ - "constant":"abc" - } - }, "project":{ "runtimeValue":{ "constant":"test_project" @@ -192,6 +208,18 @@ "producerTask":"endpoint-create" } } + }, + "parameters":{ + "force":{ + "runtimeValue":{ + "constant":0.0 + } + }, + "sync":{ + "runtimeValue":{ + "constant":1.0 + } + } } }, "taskInfo":{ diff --git a/components/google-cloud/tests/aiplatform/testdata/model_deploy_and_undeploy_pipeline.json b/components/google-cloud/tests/aiplatform/testdata/model_deploy_and_undeploy_pipeline.json index 3f605abf0c2..3df4dfc73b9 100644 --- a/components/google-cloud/tests/aiplatform/testdata/model_deploy_and_undeploy_pipeline.json +++ b/components/google-cloud/tests/aiplatform/testdata/model_deploy_and_undeploy_pipeline.json @@ -7,6 +7,9 @@ "display_name":{ "parameterType":"STRING" }, + "endpoint_name":{ + "parameterType":"STRING" + }, "location":{ "parameterType":"STRING" }, @@ -49,25 +52,19 @@ } }, "parameters":{ - "automatic_resources_max_replica_count":{ + "accelerator_count":{ "parameterType":"NUMBER_INTEGER" }, - "automatic_resources_min_replica_count":{ - "parameterType":"NUMBER_INTEGER" - }, - "dedicated_resources_accelerator_count":{ - "parameterType":"NUMBER_INTEGER" - }, - "dedicated_resources_accelerator_type":{ + "accelerator_type":{ "parameterType":"STRING" }, - "dedicated_resources_machine_type":{ + "machine_type":{ "parameterType":"STRING" }, - "dedicated_resources_max_replica_count":{ + "max_replica_count":{ "parameterType":"NUMBER_INTEGER" }, - "dedicated_resources_min_replica_count":{ + "min_replica_count":{ "parameterType":"NUMBER_INTEGER" }, "deployed_model_display_name":{ @@ -84,10 +81,36 @@ }, "traffic_split":{ "parameterType":"STRUCT" + }, + "encryption_spec_key_name":{ + "parameterType":"STRING" + }, + "network":{ + "parameterType":"STRING" + }, + "sync":{ + "parameterType":"BOOLEAN" + }, + "deploy_request_timeout":{ + "parameterType":"NUMBER_FLOAT" + }, + "autoscaling_target_cpu_utilization":{ + "parameterType":"NUMBER_INTEGER" + }, + "autoscaling_target_accelerator_duty_cycle":{ + "parameterType":"NUMBER_INTEGER" } } }, "outputDefinitions":{ + "artifacts":{ + "endpoint":{ + "artifactType":{ + "schemaTitle":"google.VertexEndpoint", + "schemaVersion":"0.0.1" + } + } + }, "parameters":{ "gcp_resources":{ "parameterType":"STRING" @@ -161,14 +184,24 @@ "exec-endpoint-create":{ "container":{ "args":[ - "--type", - "CreateEndpoint", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"\", \"labels\": , \"encryption_spec\": {\"kms_key_name\":\"\"}, \"network\": \"\"}", - "--project", + "--init.project", "{{$.inputs.parameters['project']}}", - "--location", + "--init.location", + "{{$.inputs.parameters['location']}}", + "--init.endpoint_name", + "{{$.inputs.parameters['endpoint_name']}}", + "--method.project", + "{{$.inputs.parameters['project']}}", + "--method.location", "{{$.inputs.parameters['location']}}", + "--method.display_name", + "{{$.inputs.parameters['display_name']}}", + "--method.description", + "{{$.inputs.parameters['description']}}", + "--method.labels", + "{{$.inputs.parameters['labels']}}", + "--method.encryption_spec_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", "--gcp_resources", "{{$.outputs.parameters['gcp_resources'].output_file}}", "--executor_input", @@ -178,7 +211,7 @@ "python3", "-u", "-m", - "google_cloud_pipeline_components.container.v1.endpoint.create_endpoint.launcher" + "google_cloud_pipeline_components.container.aiplatform.remote_runner, --cls_name, Endpoint, --method_name, create" ], "image":"gcr.io/ml-pipeline/google-cloud-pipeline-components:latest" } @@ -186,22 +219,52 @@ "exec-model-deploy":{ "container":{ "args":[ - "--type", - "DeployModel", - "--payload", - "{\"endpoint\": \"{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}\", \"traffic_split\": {{$.inputs.parameters['traffic_split']}}, \"deployed_model\": {\"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['dedicated_resources_machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['dedicated_resources_accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['dedicated_resources_accelerator_count']}}}, \"min_replica_count\": {{$.inputs.parameters['dedicated_resources_min_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['dedicated_resources_max_replica_count']}}}, \"automatic_resources\": {\"min_replica_count\": {{$.inputs.parameters['automatic_resources_min_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['automatic_resources_max_replica_count']}}}, \"service_account\": \"{{$.inputs.parameters['service_account']}}\", \"disable_container_logging\": , \"enable_access_logging\": , \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}}}", - "--project", - "", - "--location", - "", + "--init.model_name", + "{{$.inputs.artifacts['model'].metadata['resourceName']}}", + "--method.endpoint", + "{{$.inputs.artifacts['endpoint'].metadata['resourceName']}}", + "--method.deployed_model_display_name", + "{{$.inputs.parameters['deployed_model_display_name']}}", + "--method.traffic_split", + "{{$.inputs.parameters['traffic_split']}}", + "--method.machine_type", + "{{$.inputs.parameters['machine_type']}}", + "--method.min_replica_count", + "{{$.inputs.parameters['min_replica_count']}}", + "--method.max_replica_count", + "{{$.inputs.parameters['max_replica_count']}}", + "--method.accelerator_type", + "{{$.inputs.parameters['accelerator_type']}}", + "--method.accelerator_count", + "{{$.inputs.parameters['accelerator_count']}}", + "--method.service_account", + "{{$.inputs.parameters['service_account']}}", + "--method.explanation_metadata", + "{{$.inputs.parameters['explanation_metadata']}}", + "--method.explanation_parameters", + "{{$.inputs.parameters['explanation_parameters']}}", + "--method.encryption_spec_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--method.network", + "{{$.inputs.parameters['network']}}", + "--method.sync", + "{{$.inputs.parameters['sync']}}", + "--method.deploy_request_timeout", + "{{$.inputs.parameters['deploy_request_timeout']}}", + "--method.autoscaling_target_cpu_utilization", + "{{$.inputs.parameters['autoscaling_target_cpu_utilization']}}", + "--method.autoscaling_target_accelerator_duty_cycle", + "{{$.inputs.parameters['autoscaling_target_accelerator_duty_cycle']}}", "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}" + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" ], "command":[ "python3", "-u", "-m", - "google_cloud_pipeline_components.container.v1.endpoint.deploy_model.launcher" + "google_cloud_pipeline_components.container.aiplatform.remote_runner, --cls_name, Model, --method_name, deploy" ], "image":"gcr.io/ml-pipeline/google-cloud-pipeline-components:latest" } @@ -318,37 +381,27 @@ } }, "parameters":{ - "automatic_resources_max_replica_count":{ - "runtimeValue":{ - "constant":2.0 - } - }, - "automatic_resources_min_replica_count":{ + "accelerator_count":{ "runtimeValue":{ "constant":1.0 } }, - "dedicated_resources_accelerator_count":{ - "runtimeValue":{ - "constant":1.0 - } - }, - "dedicated_resources_accelerator_type":{ + "accelerator_type":{ "runtimeValue":{ "constant":"fake-accelerator" } }, - "dedicated_resources_machine_type":{ + "machine_type":{ "runtimeValue":{ "constant":"n1-standard-4" } }, - "dedicated_resources_max_replica_count":{ + "max_replica_count":{ "runtimeValue":{ "constant":2.0 } }, - "dedicated_resources_min_replica_count":{ + "min_replica_count":{ "runtimeValue":{ "constant":1.0 } @@ -383,6 +436,36 @@ } } + }, + "encryption_spec_key_name":{ + "runtimeValue":{ + "constant":"some encryption_spec_key_name" + } + }, + "network":{ + "runtimeValue":{ + "constant":"fake-network" + } + }, + "sync":{ + "runtimeValue":{ + "constant":1.0 + } + }, + "deploy_request_timeout":{ + "runtimeValue":{ + "constant":600.0 + } + }, + "autoscaling_target_cpu_utilization":{ + "runtimeValue":{ + "constant":1.0 + } + }, + "autoscaling_target_accelerator_duty_cycle":{ + "runtimeValue":{ + "constant":1.0 + } } } }, diff --git a/requirements.txt b/requirements.txt index 4ff1b327119..1f92d8a8d20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -137,7 +137,7 @@ pyyaml==5.4.1 # via -r -, kfp, kubernetes, papermill, tfx, yamale pyzmq==22.2.1 # via jupyter-client, notebook regex==2021.8.3 # via black requests-oauthlib==1.3.0 # via google-auth-oauthlib, kubernetes -requests-toolbelt==0.9.1 # via -r -, kfp +requests-toolbelt==1.1.0 # via -r -, kfp requests==2.26.0 # via apache-beam, docker, google-api-core, google-cloud-profiler, google-cloud-storage, hdfs, keras-tuner, kubernetes, papermill, requests-oauthlib, requests-toolbelt, tensorboard rsa==4.7.2 # via google-auth, oauth2client scikit-learn==0.24.2 # via keras-tuner