Skip to content

Commit

Permalink
AKS cluster (#1)
Browse files Browse the repository at this point in the history
* AKS cluster
  • Loading branch information
matas-cast authored May 26, 2022
1 parent ddee280 commit e4623b2
Show file tree
Hide file tree
Showing 8 changed files with 340 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @castai/cast-core
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
terraform.d
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<a href="https://cast.ai">
<img src="https://cast.ai/wp-content/themes/cast/img/cast-logo-dark-blue.svg" align="right" height="100" />
</a>

Terraform module for connecting a AKS cluster to CAST AI
==================


Website: https://www.cast.ai

Requirements
------------

- [Terraform](https://www.terraform.io/downloads.html) 0.13+

Using the module
------------

A module to create Azure role and a service principal that can be used to connect to CAST AI

Requires `castai/castai`, `hashicorp/azurerm`, `hashicorp/azuread`, `hashicorp/helm` providers to be configured.

```hcl
module "castai-aks-cluster" {
source = "castai/aks-cluster/castai"
aks_cluster_name = var.aks_cluster_name
aks_cluster_region = var.aks_cluster_region
node_resource_group = azurerm_kubernetes_cluster.example.node_resource_group
resource_group = azurerm_kubernetes_cluster.example.resource_group_name
delete_nodes_on_disconnect = true
subscription_id = data.azurerm_subscription.current.subscription_id
tenant_id = data.azurerm_subscription.current.tenant_id
}
```
86 changes: 86 additions & 0 deletions iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
locals {
role_name = "CastAKSRole-${var.aks_cluster_name}-tf"
app_name = substr("CAST AI ${var.aks_cluster_name}-${var.resource_group}", 0, 64)
}

// Azure RM
resource "azurerm_role_definition" "castai" {
name = local.role_name
description = "Role used by CAST AI"

scope = "/subscriptions/${var.subscription_id}/resourceGroups/${var.resource_group}"

permissions {
actions = [
"Microsoft.Compute/*/read",
"Microsoft.Compute/virtualMachines/*",
"Microsoft.Compute/virtualMachineScaleSets/*",
"Microsoft.Compute/disks/write",
"Microsoft.Compute/disks/delete",
"Microsoft.Network/*/read",
"Microsoft.Network/networkInterfaces/write",
"Microsoft.Network/networkInterfaces/delete",
"Microsoft.Network/networkInterfaces/join/action",
"Microsoft.Network/networkSecurityGroups/join/action",
"Microsoft.Network/publicIPAddresses/write",
"Microsoft.Network/publicIPAddresses/delete",
"Microsoft.Network/publicIPAddresses/join/action",
"Microsoft.Network/virtualNetworks/subnets/join/action",
"Microsoft.Network/virtualNetworks/subnets/write",
"Microsoft.Network/applicationGateways/backendhealth/action",
"Microsoft.Network/applicationGateways/backendAddressPools/join/action",
"Microsoft.Network/applicationSecurityGroups/joinIpConfiguration/action",
"Microsoft.Network/loadBalancers/backendAddressPools/write",
"Microsoft.Network/loadBalancers/backendAddressPools/join/action",
"Microsoft.ContainerService/*/read",
"Microsoft.ContainerService/managedClusters/start/action",
"Microsoft.ContainerService/managedClusters/stop/action",
"Microsoft.ContainerService/managedClusters/runCommand/action",
"Microsoft.ContainerService/managedClusters/agentPools/write",
"Microsoft.Resources/*/read",
"Microsoft.Resources/tags/write",
"Microsoft.Authorization/roleAssignments/read",
"Microsoft.Authorization/roleDefinitions/read",
"Microsoft.ManagedIdentity/userAssignedIdentities/assign/action"
]
not_actions = []
}

assignable_scopes = [
"/subscriptions/${var.subscription_id}/resourceGroups/${var.resource_group}",
"/subscriptions/${var.subscription_id}/resourceGroups/${var.node_resource_group}"
]
}


resource "azurerm_role_assignment" "castai_resource_group" {
principal_id = azuread_service_principal.castai.id
role_definition_id = azurerm_role_definition.castai.role_definition_resource_id

scope = "/subscriptions/${var.subscription_id}/resourceGroups/${var.resource_group}"
}

resource "azurerm_role_assignment" "castai_node_resource_group" {
principal_id = azuread_service_principal.castai.id
role_definition_id = azurerm_role_definition.castai.role_definition_resource_id

scope = "/subscriptions/${var.subscription_id}/resourceGroups/${var.node_resource_group}"
}

// Azure AD

data "azuread_client_config" "current" {}

resource "azuread_application" "castai" {
display_name = local.app_name
}

resource "azuread_application_password" "castai" {
application_object_id = azuread_application.castai.object_id
}

resource "azuread_service_principal" "castai" {
application_id = azuread_application.castai.application_id
app_role_assignment_required = false
owners = [data.azuread_client_config.current.object_id]
}
143 changes: 143 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
resource "castai_aks_cluster" "castai_cluster" {
name = var.aks_cluster_name

region = var.aks_cluster_region
delete_nodes_on_disconnect = var.delete_nodes_on_disconnect

subscription_id = var.subscription_id
node_resource_group = var.node_resource_group
tenant_id = var.tenant_id
client_id = azuread_application.castai.application_id
client_secret = azuread_application_password.castai.value
}

resource "helm_release" "castai_agent" {
name = "castai-agent"
repository = "https://castai.github.io/helm-charts"
chart = "castai-agent"
namespace = "castai-agent"
create_namespace = true
cleanup_on_fail = true
wait = true

set {
name = "provider"
value = "aks"
}

set {
name = "createNamespace"
value = "false"
}

dynamic "set" {
for_each = var.api_url != "" ? [var.api_url] : []
content {
name = "apiURL"
value = var.api_url
}
}

set_sensitive {
name = "apiKey"
value = castai_aks_cluster.castai_cluster.cluster_token
}
}

resource "helm_release" "castai_evictor" {
name = "castai-evictor"
repository = "https://castai.github.io/helm-charts"
chart = "castai-evictor"
namespace = "castai-agent"
create_namespace = true
cleanup_on_fail = true
wait = true

set {
name = "replicaCount"
value = "0"
}

depends_on = [helm_release.castai_agent]

lifecycle {
ignore_changes = [set, version]
}
}

resource "helm_release" "castai_cluster_controller" {
name = "cluster-controller"
repository = "https://castai.github.io/helm-charts"
chart = "castai-cluster-controller"
namespace = "castai-agent"
create_namespace = true
cleanup_on_fail = true
wait = true

set {
name = "aks.enabled"
value = "true"
}

set {
name = "castai.clusterID"
value = castai_aks_cluster.castai_cluster.id
}

dynamic "set" {
for_each = var.api_url != "" ? [var.api_url] : []
content {
name = "castai.apiURL"
value = var.api_url
}
}

set_sensitive {
name = "castai.apiKey"
value = castai_aks_cluster.castai_cluster.cluster_token
}

depends_on = [helm_release.castai_agent]
}

resource "helm_release" "castai_spot_handler" {
name = "castai-spot-handler"
repository = "https://castai.github.io/helm-charts"
chart = "castai-spot-handler"
namespace = "castai-agent"
create_namespace = true
cleanup_on_fail = true
wait = true

set {
name = "castai.provider"
value = "azure"
}

set {
name = "createNamespace"
value = "false"
}

dynamic "set" {
for_each = var.api_url != "" ? [var.api_url] : []
content {
name = "castai.apiURL"
value = var.api_url
}
}

set {
name = "castai.clusterID"
value = castai_aks_cluster.castai_cluster.id
}

depends_on = [helm_release.castai_agent]
}

resource "castai_autoscaler" "castai_autoscaler_policies" {
autoscaler_policies_json = var.autoscaler_policies_json
cluster_id = castai_aks_cluster.castai_cluster.id

depends_on = [helm_release.castai_agent, helm_release.castai_evictor]
}
5 changes: 5 additions & 0 deletions output.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
output "cluster_id" {
value = castai_aks_cluster.castai_cluster.id
description = "CAST.AI cluster id, which can be used for accessing cluster data using API"
sensitive = true
}
44 changes: 44 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
variable "api_url" {
type = string
description = "URL of alternative CAST AI API to be used during development or testing"
default = "https://api.cast.ai"
}

variable "aks_cluster_name" {
type = string
description = "Name of the cluster to be connected to CAST AI."
}

variable "aks_cluster_region" {
type = string
description = "Region of the AKS cluster"
}

variable "subscription_id" {
type = string
description = "Azure subscription ID"
}

variable "autoscaler_policies_json" {
type = string
description = "Optional json object to override CAST AI cluster autoscaler policies"
default = ""
}

variable "delete_nodes_on_disconnect" {
type = bool
description = "Optionally delete Cast AI created nodes when the cluster is destroyed"
default = false
}

variable "resource_group" {
type = string
}

variable "node_resource_group" {
type = string
}

variable "tenant_id" {
type = string
}
23 changes: 23 additions & 0 deletions version.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
terraform {
required_version = ">= 0.13"

required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "3.7.0"
}
azuread = {
source = "hashicorp/azuread"
version = "2.22.0"
}
castai = {
source = "castai/castai"
version = ">= 0.18.0"
}
helm = {
source = "hashicorp/helm"
version = ">=2.0.0"
}
}
}

0 comments on commit e4623b2

Please sign in to comment.