Skip to content

Commit

Permalink
feat(vertexai): Support google_vertex_ai_deployment_resource_pool (#9518
Browse files Browse the repository at this point in the history
)
  • Loading branch information
shotarok authored Mar 27, 2024
1 parent ae8c6b7 commit 83d97a1
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 0 deletions.
161 changes: 161 additions & 0 deletions mmv1/products/vertexai/DeploymentResourcePool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
# Copyright 2024 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--- !ruby/object:Api::Resource
name: 'DeploymentResourcePool'
base_url: 'projects/{{project}}/locations/{{region}}/deploymentResourcePools'
create_url: 'projects/{{project}}/locations/{{region}}/deploymentResourcePools'
self_link: 'projects/{{project}}/locations/{{region}}/deploymentResourcePools/{{name}}'
# No method is not defined for DeploymentResourcePools
immutable: true
create_verb: :POST
references: !ruby/object:Api::Resource::ReferenceLinks
api: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.deploymentResourcePools
async: !ruby/object:Api::OpAsync
actions:
- create
- delete
operation: !ruby/object:Api::OpAsync::Operation
path: 'name'
base_url: '{{op_id}}'
wait_ms: 1000
result: !ruby/object:Api::OpAsync::Result
path: 'response'
resource_inside_response: true
status: !ruby/object:Api::OpAsync::Status
path: 'done'
complete: true
allowed:
- true
- false
error: !ruby/object:Api::OpAsync::Error
path: 'error'
message: 'message'
description: |-
'DeploymentResourcePool can be shared by multiple deployed models,
whose underlying specification consists of dedicated resources.'
custom_code: !ruby/object:Provider::Terraform::CustomCode
encoder: templates/terraform/encoders/wrap_object_with_deployment_resource_pool_id.go.erb
examples:
- !ruby/object:Provider::Terraform::Examples
name: 'vertex_ai_deployment_resource_pool'
primary_resource_id: 'deployment_resource_pool'
vars:
deployment_resource_pool_id: example-deployment-resource-pool
parameters:
- !ruby/object:Api::Type::String
name: region
description: The region of deployment resource pool. eg us-central1
url_param_only: true
immutable: true
properties:
- !ruby/object:Api::Type::String
name: 'name'
description: The resource name of deployment resource pool. The maximum length is 63 characters, and valid characters are `/^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$/`.
immutable: true
required: true
custom_flatten: templates/terraform/custom_flatten/name_from_self_link.erb
- !ruby/object:Api::Type::NestedObject
name: 'dedicatedResources'
description: The underlying dedicated resources that the deployment resource pool uses.
properties:
- !ruby/object:Api::Type::NestedObject
name: 'machineSpec'
required: true
immutable: true
description: The specification of a single machine used by the prediction
properties:
- !ruby/object:Api::Type::String
name: machineType
description: The type of the machine. See the [list of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types).
immutable: true
- !ruby/object:Api::Type::String
name: acceleratorType
description:
The type of accelerator(s) that may be attached to the
machine as per accelerator_count. See possible values
[here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
- !ruby/object:Api::Type::Integer
name: acceleratorCount
description: The number of accelerators to attach to the machine.
- !ruby/object:Api::Type::Integer
name: minReplicaCount
required: true
immutable: true
description:
The minimum number of machine replicas this DeployedModel will
be always deployed on. This value must be greater than or equal
to 1. If traffic against the DeployedModel increases, it may
dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
- !ruby/object:Api::Type::Integer
name: maxReplicaCount
immutable: true
description:
The maximum number of replicas this DeployedModel may be
deployed on when the traffic against it increases. If the
requested value is too large, the deployment will error, but if
deployment succeeds then the ability to scale the model to that
many replicas is guaranteed (barring service outages). If
traffic against the DeployedModel increases beyond what its
replicas at maximum may handle, a portion of the traffic will be
dropped. If this value is not provided, will use
min_replica_count as the default value. The value of this field
impacts the charge against Vertex CPU and GPU quotas.
Specifically, you will be charged for max_replica_count * number
of cores in the selected machine type) and (max_replica_count *
number of GPUs per replica in the selected machine type).
- !ruby/object:Api::Type::Array
name: autoscalingMetricSpecs
immutable: true
description: A list of the metric specifications that overrides a resource utilization metric.
item_type: !ruby/object:Api::Type::NestedObject
name: autoscalingMetricSpecs
description:
The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle,
and so on) target value (default to 60 if not set). At most
one entry is allowed per metric. If
machine_spec.accelerator_count is above 0, the autoscaling
will be based on both CPU utilization and accelerator's duty
cycle metrics and scale up when either metrics exceeds its
target value while scale down if both metrics are under their
target value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is 0, the autoscaling will
be based on CPU utilization metric only with default target
value 60 if not explicitly set. For example, in the case of
Online Prediction, if you want to override target CPU
utilization to 80, you should set
autoscaling_metric_specs.metric_name to
`aiplatform.googleapis.com/prediction/online/cpu/utilization`
and autoscaling_metric_specs.target to `80`.
properties:
- !ruby/object:Api::Type::String
name: metricName
required: true
description:
'The resource metric name. Supported metrics: For Online Prediction:
* `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
* `aiplatform.googleapis.com/prediction/online/cpu/utilization`'
- !ruby/object:Api::Type::Integer
name: target
description:
The target resource utilization in percentage (1% - 100%)
for the given metric; once the real usage deviates from
the target by a certain percentage, the machine replicas
change. The default value is 60 (representing 60%) if not
provided.
- !ruby/object:Api::Type::String
name: 'createTime'
output: true
description: A timestamp in RFC3339 UTC "Zulu" format, with nanosecond resolution and up to nine fractional digits.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<%# The license inside this block applies to this file.
# Copyright 2024 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-%>
newObj := make(map[string]interface{})
newObj["deploymentResourcePool"] = obj
nameProp, ok := d.GetOk("name")
if ok && nameProp != nil {
newObj["deploymentResourcePoolId"] = nameProp
}
return newObj, nil
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
resource "google_vertex_ai_deployment_resource_pool" "<%= ctx[:primary_resource_id] %>" {
region = "us-central1"
name = "<%= ctx[:vars]['deployment_resource_pool_id'] %>"
dedicated_resources {
machine_spec {
machine_type = "n1-standard-4"
accelerator_type = "NVIDIA_TESLA_K80"
accelerator_count = 1
}

min_replica_count = 1
max_replica_count = 2

autoscaling_metric_specs {
metric_name = "aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle"
target = 60
}
}
}

0 comments on commit 83d97a1

Please sign in to comment.