+## References
+* [Here](https://databrickslabs.github.io/terraform-provider-databricks/overview/) is the provider docs.
+## Requirements
+| Name | Version |
+| [terraform](#requirement\_terraform) | >= 0.13 |
+## Providers
+| Name | Version |
+| [aws](#provider\_aws) | n/a |
+| [databricks](#provider\_databricks) | n/a |
+## Modules
+| Name | Source | Version |
+| [databricks\_bucket](#module\_databricks\_bucket) | github.com/chanzuckerberg/cztack//aws-s3-private-bucket | v0.60.1 |
+## Resources
+| Name | Type |
+| [aws_iam_role.databricks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
+| [aws_iam_role_policy.policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource |
+| [aws_security_group.databricks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource |
+| [databricks_mws_credentials.databricks](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_credentials) | resource |
+| [databricks_mws_networks.networking](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_networks) | resource |
+| [databricks_mws_storage_configurations.databricks](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_storage_configurations) | resource |
+| [databricks_mws_workspaces.databricks](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_workspaces) | resource |
+| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
+| [aws_iam_policy_document.databricks-s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.databricks-setup-assume-role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source |
+## Inputs
+| Name | Description | Type | Default | Required |
+| [audit\_log\_bucket\_name](#input\_audit\_log\_bucket\_name) | Name of bucket to write cluster logs to - also where the audit logs go, too | `string` | `"czi-audit-logs"` | no |
+| [databricks\_external\_id](#input\_databricks\_external\_id) | The ID of a Databricks root account. | `string` | n/a | yes |
+| [env](#input\_env) | The environment / stage. Aka staging, dev, prod. | `string` | n/a | yes |
+| [object\_ownership](#input\_object\_ownership) | Set default owner of all objects within bucket (e.g., bucket vs. object owner) | `string` | `null` | no |
+| [owner](#input\_owner) | n/a | `string` | n/a | yes |
+| [passable\_role\_arn](#input\_passable\_role\_arn) | A role to allow the cross-account role to pass to other accounts | `string` | `""` | no |
+| [private\_subnets](#input\_private\_subnets) | List of private subnets. | `list(string)` | n/a | yes |
+| [project](#input\_project) | A high level name, typically the name of the site. | `string` | n/a | yes |
+| [service](#input\_service) | The service. Aka databricks-workspace. | `string` | n/a | yes |
+| [vpc\_id](#input\_vpc\_id) | ID of the VPC. | `string` | n/a | yes |
+| [workspace\_name\_override](#input\_workspace\_name\_override) | Override the workspace name. If not set, the workspace name will be set to the project, env, and service. | `string` | `null` | no |
+## Outputs
+| Name | Description |
+| [role\_arn](#output\_role\_arn) | ARN of the AWS IAM role. |
+| [workspace\_id](#output\_workspace\_id) | ID of the workspace. |
+| [workspace\_url](#output\_workspace\_url) | Url of the deployed workspace. |
+locals {
+ cluster_log_bucket_prefix = "databricks-cluster-logs"
+data "aws_iam_policy_document" "databricks-setup-assume-role" {
+ statement {
+ principals {
+ type = "AWS"
+ identifiers = ["arn:aws:iam::${local.databricks_aws_account}:root"]
+ }
+ actions = ["sts:AssumeRole"]
+ condition {
+ test = "StringLike"
+ variable = "sts:ExternalId"
+ values = [var.databricks_external_id]
+ }
+ }
+resource "aws_iam_role" "databricks" {
+ name = local.name
+ assume_role_policy = data.aws_iam_policy_document.databricks-setup-assume-role.json
+ tags = local.tags
+data "aws_iam_policy_document" "policy" {
+ statement {
+ sid = "NonResourceBasedPermissions"
+ actions = [
+ "ec2:CancelSpotInstanceRequests",
+ "ec2:DescribeAvailabilityZones",
+ "ec2:DescribeIamInstanceProfileAssociations",
+ "ec2:DescribeInstanceStatus",
+ "ec2:DescribeInstances",
+ "ec2:DescribeInternetGateways",
+ "ec2:DescribeNatGateways",
+ "ec2:DescribeNetworkAcls",
+ "ec2:DescribePlacementGroups",
+ "ec2:DescribePrefixLists",
+ "ec2:DescribeReservedInstancesOfferings",
+ "ec2:DescribeRouteTables",
+ "ec2:DescribeSecurityGroups",
+ "ec2:DescribeSpotInstanceRequests",
+ "ec2:DescribeSpotPriceHistory",
+ "ec2:DescribeSubnets",
+ "ec2:DescribeVolumes",
+ "ec2:DescribeVpcAttribute",
+ "ec2:DescribeVpcs",
+ "ec2:CreatePlacementGroup",
+ "ec2:DeletePlacementGroup",
+ "ec2:CreateKeyPair",
+ "ec2:DeleteKeyPair",
+ "ec2:CreateTags",
+ "ec2:DeleteTags",
+ "ec2:RequestSpotInstances",
+ ]
+ resources = ["*"]
+ effect = "Allow"
+ }
+ statement {
+ effect = "Allow"
+ actions = ["iam:PassRole"]
+ resources = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/databricks/*"]
+ }
+ dynamic "statement" {
+ for_each = length(var.passable_role_arn) > 0 ? [1] : []
+ content {
+ actions = [
+ "iam:PassRole"
+ ]
+ resources = [
+ var.passable_role_arn
+ ]
+ }
+ }
+ statement {
+ sid = "InstancePoolsSupport"
+ actions = [
+ "ec2:AssociateIamInstanceProfile",
+ "ec2:DisassociateIamInstanceProfile",
+ "ec2:ReplaceIamInstanceProfileAssociation",
+ ]
+ resources = ["${local.ec2_arn_base}:instance/*"]
+ condition {
+ test = "StringEquals"
+ variable = "ec2:ResourceTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ sid = "AllowEc2RunInstancePerTag"
+ actions = [
+ "ec2:RunInstances",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:instance/*",
+ "${local.ec2_arn_base}:volume/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "aws:RequestTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ sid = "AllowEc2RunInstanceImagePerTag"
+ actions = [
+ "ec2:RunInstances",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:image/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "aws:ResourceTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ sid = "AllowEc2RunInstancePerVPCid"
+ actions = [
+ "ec2:RunInstances",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:network-interface/*",
+ "${local.ec2_arn_base}:subnet/*",
+ "${local.ec2_arn_base}:security-group/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "ec2:vpc"
+ values = ["${local.ec2_arn_base}:vpc/${var.vpc_id}"]
+ }
+ }
+ statement {
+ sid = "AllowEc2RunInstanceOtherResources"
+ actions = [
+ "ec2:RunInstances",
+ ]
+ not_resources = [
+ "${local.ec2_arn_base}:image/*",
+ "${local.ec2_arn_base}:network-interface/*",
+ "${local.ec2_arn_base}:subnet/*",
+ "${local.ec2_arn_base}:security-group/*",
+ "${local.ec2_arn_base}:volume/*",
+ "${local.ec2_arn_base}:instance/*"
+ ]
+ }
+ statement {
+ sid = "EC2TerminateInstancesTag"
+ actions = [
+ "ec2:TerminateInstances",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:instance/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "ec2:ResourceTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ sid = "EC2AttachDetachVolumeTag"
+ actions = [
+ "ec2:AttachVolume",
+ "ec2:DetachVolume",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:instance/*",
+ "${local.ec2_arn_base}:volume/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "ec2:ResourceTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ sid = "EC2CreateVolumeByTag"
+ actions = [
+ "ec2:CreateVolume",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:volume/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "aws:RequestTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ sid = "EC2DeleteVolumeByTag"
+ actions = [
+ "ec2:DeleteVolume",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:volume/*",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "ec2:ResourceTag/Vendor"
+ values = ["Databricks"]
+ }
+ }
+ statement {
+ actions = [
+ "iam:CreateServiceLinkedRole",
+ "iam:PutRolePolicy",
+ ]
+ resources = [
+ "arn:aws:iam::*:role/aws-service-role/spot.amazonaws.com/AWSServiceRoleForEC2Spot",
+ ]
+ condition {
+ test = "StringLike"
+ variable = "iam:AWSServiceName"
+ values = ["spot.amazonaws.com"]
+ }
+ effect = "Allow"
+ }
+ statement {
+ sid = "VpcNonresourceSpecificActions"
+ actions = [
+ "ec2:AuthorizeSecurityGroupEgress",
+ "ec2:AuthorizeSecurityGroupIngress",
+ "ec2:RevokeSecurityGroupEgress",
+ "ec2:RevokeSecurityGroupIngress",
+ ]
+ resources = [
+ "${local.ec2_arn_base}:security-group/${aws_security_group.databricks.id}",
+ ]
+ condition {
+ test = "StringEquals"
+ variable = "ec2:vpc"
+ values = ["${local.ec2_arn_base}:vpc/${var.vpc_id}"]
+ }
+ }
+resource "aws_iam_role_policy" "policy" {
+ name = "extras"
+ role = aws_iam_role.databricks.id
+ policy = data.aws_iam_policy_document.policy.json
+data "aws_iam_policy_document" "databricks-s3" {
+ statement {
+ sid = "grant databricks access"
+ effect = "Allow"
+ principals {
+ type = "AWS"
+ identifiers = ["arn:aws:iam::${local.databricks_aws_account}:root"]
+ }
+ actions = [
+ "s3:GetObject",
+ "s3:GetObjectVersion",
+ "s3:PutObject",
+ "s3:DeleteObject",
+ "s3:ListBucket",
+ "s3:GetBucketLocation",
+ ]
+ resources = [
+ "arn:aws:s3:::${local.name}/*",
+ "arn:aws:s3:::${local.name}",
+ ]
+ }
+module "databricks_bucket" {
+ source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.60.1"
+ bucket_name = local.name
+ bucket_policy = data.aws_iam_policy_document.databricks-s3.json
+ project = var.project
+ env = var.env
+ service = var.service
+ owner = var.owner
+ object_ownership = var.object_ownership
+// https://docs.databricks.com/administration-guide/multiworkspace/iam-role.html#language-Your%C2%A0VPC,%C2%A0custom
+locals {
+ databricks_aws_account = "414351767826" # Databricks' own AWS account, not CZI's. See https://docs.databricks.com/en/administration-guide/account-settings-e2/credentials.html#step-1-create-a-cross-account-iam-role
+ ec2_arn_base = "arn:aws:ec2:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}"
+ name = coalesce(var.workspace_name_override, "${var.project}-${var.env}-${var.service}")
+ security_group_ids = [aws_security_group.databricks.id]
+ tags = {
+ project = var.project
+ env = var.env
+ service = var.service
+ owner = var.owner
+ managedBy = "terraform"
+ }
+data "aws_caller_identity" "current" {}
+data "aws_region" "current" {}
+resource "databricks_mws_networks" "networking" {
+ account_id = var.databricks_external_id
+ network_name = local.name
+ vpc_id = var.vpc_id
+ subnet_ids = var.private_subnets
+ security_group_ids = local.security_group_ids
+resource "databricks_mws_storage_configurations" "databricks" {
+ account_id = var.databricks_external_id
+ storage_configuration_name = local.name
+ bucket_name = module.databricks_bucket.id
+resource "databricks_mws_credentials" "databricks" {
+ account_id = var.databricks_external_id
+ credentials_name = local.name
+ role_arn = aws_iam_role.databricks.arn
+resource "databricks_mws_workspaces" "databricks" {
+ account_id = var.databricks_external_id
+ workspace_name = local.name
+ deployment_name = local.name
+ aws_region = data.aws_region.current.name
+ credentials_id = databricks_mws_credentials.databricks.credentials_id
+ storage_configuration_id = databricks_mws_storage_configurations.databricks.storage_configuration_id
+ network_id = databricks_mws_networks.networking.network_id
+output "workspace_id" {
+ description = "ID of the workspace."
+ value = databricks_mws_workspaces.databricks.workspace_id
+output "workspace_url" {
+ description = "Url of the deployed workspace."
+ value = databricks_mws_workspaces.databricks.workspace_url
+output "role_arn" {
+ description = "ARN of the AWS IAM role."
+ value = aws_iam_role.databricks.arn
+resource "aws_security_group" "databricks" {
+ name = local.name
+ description = "self tcp and udp all ports and all outbound"
+ vpc_id = var.vpc_id
+ ingress {
+ description = "self tcp all ports"
+ from_port = 0
+ to_port = 65535
+ protocol = "tcp"
+ self = true
+ }
+ ingress {
+ description = "self udp all ports"
+ from_port = 0
+ to_port = 65535
+ protocol = "udp"
+ self = true
+ }
+ egress {
+ from_port = 0
+ to_port = 0
+ protocol = "-1"
+ cidr_blocks = [""]
+ }
+ tags = local.tags
+variable "vpc_id" {
+ description = "ID of the VPC."
+ type = string
+variable "private_subnets" {
+ description = "List of private subnets."
+ type = list(string)
+variable "databricks_external_id" {
+ description = "The ID of a Databricks root account."
+ type = string
+variable "project" {
+ description = "A high level name, typically the name of the site."
+ type = string
+variable "env" {
+ description = "The environment / stage. Aka staging, dev, prod."
+ type = string
+variable "service" {
+ description = "The service. Aka databricks-workspace."
+ type = string
+variable "owner" {
+ type = string
+variable "passable_role_arn" {
+ description = "A role to allow the cross-account role to pass to other accounts"
+ type = string
+ default = ""
+# check if argument is null or is in list (2nd parameter of contains() cannot be null)
+variable "object_ownership" {
+ type = string
+ default = null
+ description = "Set default owner of all objects within bucket (e.g., bucket vs. object owner)"
+ validation {
+ condition = var.object_ownership == null ? true : contains(["BucketOwnerEnforced", "BucketOwnerPreferred", "ObjectWriter"], var.object_ownership)
+ error_message = "Valid values for var.object_ownership are ('BucketOwnerEnforced', 'BucketOwnerPreferred', 'ObjectWriter')."
+ }
+variable "audit_log_bucket_name" {
+ type = string
+ description = "Name of bucket to write cluster logs to - also where the audit logs go, too"
+variable "workspace_name_override" {
+ type = string
+ default = null
+ description = "Override the workspace name. If not set, the workspace name will be set to the project, env, and service."
\ No newline at end of file
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ }
+ databricks = {
+ source = "databricks/databricks"
+ }
+ }
+ required_version = ">= 1.3.0"