-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: CDI-3103 - New databricks volume module (#593)
* feat: new databricks_volume module * fix: grantees and grant structure
- Loading branch information
Showing
11 changed files
with
565 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Auto-generated by fogg. Do not edit | ||
# Make improvements in fogg, so that everyone can benefit. | ||
|
||
export TERRAFORM_VERSION := 1.3.0 | ||
export TF_PLUGIN_CACHE_DIR := ../../..//.terraform.d/plugin-cache | ||
|
||
include ../../..//scripts/module.mk | ||
|
||
|
||
help: ## display help for this makefile | ||
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' | ||
.PHONY: help |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
<!-- START --> | ||
## Requirements | ||
|
||
| Name | Version | | ||
|------|---------| | ||
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3.0 | | ||
|
||
## Providers | ||
|
||
| Name | Version | | ||
|------|---------| | ||
| <a name="provider_aws"></a> [aws](#provider\_aws) | n/a | | ||
| <a name="provider_databricks"></a> [databricks](#provider\_databricks) | n/a | | ||
|
||
## Modules | ||
|
||
| Name | Source | Version | | ||
|------|--------|---------| | ||
| <a name="module_databricks_bucket"></a> [databricks\_bucket](#module\_databricks\_bucket) | github.com/chanzuckerberg/cztack//aws-s3-private-bucket | v0.71.0 | | ||
|
||
## Resources | ||
|
||
| Name | Type | | ||
|------|------| | ||
| [aws_iam_policy.dbx_unity_access_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | ||
| [aws_iam_role.dbx_unity_aws_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | | ||
| [aws_iam_role_policy_attachment.dbx_unity_aws_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | ||
| [databricks_catalog.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/catalog) | resource | | ||
| [databricks_external_location.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/external_location) | resource | | ||
| [databricks_grant.catalog_r](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource | | ||
| [databricks_grant.catalog_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource | | ||
| [databricks_grant.schema_r](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource | | ||
| [databricks_grant.schema_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource | | ||
| [databricks_grant.volume_r](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource | | ||
| [databricks_grant.volume_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource | | ||
| [databricks_schema.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/schema) | resource | | ||
| [databricks_storage_credential.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/storage_credential) | resource | | ||
| [databricks_volume.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/volume) | resource | | ||
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | ||
| [aws_iam_policy_document.databricks-s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | ||
| [aws_iam_policy_document.dbx_unity_aws_role_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | ||
| [aws_iam_policy_document.volume_bucket_dbx_unity_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | ||
|
||
## Inputs | ||
|
||
| Name | Description | Type | Default | Required | | ||
|------|-------------|------|---------|:--------:| | ||
| <a name="input_additional_rw_bucket_grant_arns"></a> [additional\_rw\_bucket\_grant\_arns](#input\_additional\_rw\_bucket\_grant\_arns) | (Optional) Additional AWS ARNs to grant read/write permissions to on the bucket (may be necessary for service principals, instance profiles, or users | `list(string)` | `[]` | no | | ||
| <a name="input_bucket_object_ownership"></a> [bucket\_object\_ownership](#input\_bucket\_object\_ownership) | Set default owner of all objects within bucket (e.g., bucket vs. object owner) | `string` | `null` | no | | ||
| <a name="input_catalog_name"></a> [catalog\_name](#input\_catalog\_name) | Name of the Databricks existing catalog to add the volume to | `string` | n/a | yes | | ||
| <a name="input_catalog_owner"></a> [catalog\_owner](#input\_catalog\_owner) | User or group name of the catalog owner | `string` | n/a | yes | | ||
| <a name="input_catalog_r_grant_principals"></a> [catalog\_r\_grant\_principals](#input\_catalog\_r\_grant\_principals) | (Optional) Databricks groups to grant read-only permissions to on the catalog | `list(string)` | `[]` | no | | ||
| <a name="input_catalog_rw_grant_principals"></a> [catalog\_rw\_grant\_principals](#input\_catalog\_rw\_grant\_principals) | (Optional) Databricks groups to grant read/write permissions to on the catalog | `list(string)` | `[]` | no | | ||
| <a name="input_metastore_id"></a> [metastore\_id](#input\_metastore\_id) | ID of metastore to create catalog in | `string` | n/a | yes | | ||
| <a name="input_schema_r_grant_principals"></a> [schema\_r\_grant\_principals](#input\_schema\_r\_grant\_principals) | (Optional) Databricks groups to grant read-only permissions to on the schema | `list(string)` | `[]` | no | | ||
| <a name="input_schema_rw_grant_principals"></a> [schema\_rw\_grant\_principals](#input\_schema\_rw\_grant\_principals) | (Optional) Databricks groups to grant read/write permissions to on the schema | `list(string)` | `[]` | no | | ||
| <a name="input_tags"></a> [tags](#input\_tags) | REQUIRED: Tags to include for this environment. | <pre>object({<br> project : string<br> env : string<br> service : string<br> owner : string<br> managedBy : string<br> })</pre> | n/a | yes | | ||
| <a name="input_volume_bucket"></a> [volume\_bucket](#input\_volume\_bucket) | (Optional) Name of an existing S3 bucket to use for Databricks volume. NOTE: if provided, you will need to update the bucket policy whereever it is defined to allow Databricks access | `string` | `null` | no | | ||
| <a name="input_volume_comment"></a> [volume\_comment](#input\_volume\_comment) | (Optional) Comment to add to the Databricks volume | `string` | `"Managed by Terraform - this is a default volume for the Databricks workspace"` | no | | ||
| <a name="input_volume_name"></a> [volume\_name](#input\_volume\_name) | Name of the Databricks volume to create | `string` | n/a | yes | | ||
| <a name="input_volume_r_grant_principals"></a> [volume\_r\_grant\_principals](#input\_volume\_r\_grant\_principals) | (Optional) Databricks groups to grant read-only permissions to on the volume | `list(string)` | `[]` | no | | ||
| <a name="input_volume_rw_grant_principals"></a> [volume\_rw\_grant\_principals](#input\_volume\_rw\_grant\_principals) | (Optional) Databricks groups to grant read/write permissions to on the volume | `list(string)` | `[]` | no | | ||
| <a name="input_volume_schema_properties"></a> [volume\_schema\_properties](#input\_volume\_schema\_properties) | Properties of the Databricks schema to add the volume to | `map(string)` | `{}` | no | | ||
| <a name="input_workspace_name"></a> [workspace\_name](#input\_workspace\_name) | Name of the Databricks catalog to add the volume to | `string` | n/a | yes | | ||
|
||
## Outputs | ||
|
||
| Name | Description | | ||
|------|-------------| | ||
| <a name="output_dbx_unity_aws_role_arn"></a> [dbx\_unity\_aws\_role\_arn](#output\_dbx\_unity\_aws\_role\_arn) | n/a | | ||
| <a name="output_volume_path"></a> [volume\_path](#output\_volume\_path) | n/a | | ||
| <a name="output_volume_specific_bucket_name"></a> [volume\_specific\_bucket\_name](#output\_volume\_specific\_bucket\_name) | n/a | | ||
<!-- END --> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
locals { | ||
standard_grant_principals = concat(["arn:aws:iam::${local.databricks_aws_account}:root"], var.additional_rw_bucket_grant_arns) | ||
} | ||
|
||
data "aws_iam_policy_document" "databricks-s3" { | ||
count = var.volume_bucket != null ? 0 : 1 | ||
|
||
# standard UC access | ||
statement { | ||
sid = "dbxBucketAccess" | ||
effect = "Allow" | ||
principals { | ||
type = "AWS" | ||
identifiers = local.standard_grant_principals | ||
} | ||
actions = [ | ||
"s3:ListBucket", | ||
"s3:GetBucketLocation", | ||
"s3:GetLifecycleConfiguration", | ||
"s3:PutLifecycleConfiguration", | ||
] | ||
resources = [ | ||
"arn:aws:s3:::${local.bucket_name}", | ||
] | ||
} | ||
statement { | ||
sid = "dbxObjAccess" | ||
effect = "Allow" | ||
principals { | ||
type = "AWS" | ||
identifiers = local.standard_grant_principals | ||
} | ||
actions = [ | ||
"s3:GetObject", | ||
"s3:GetObjectVersion", | ||
"s3:PutObject", | ||
"s3:DeleteObject", | ||
] | ||
resources = [ | ||
"arn:aws:s3:::${local.bucket_name}/*" # root access | ||
] | ||
} | ||
# storage credential access - uses string to avoid race condition of role v. bucket creation | ||
statement { | ||
sid = "dbxSCBucketAccess" | ||
effect = "Allow" | ||
principals { | ||
type = "AWS" | ||
identifiers = [ | ||
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root", | ||
] | ||
} | ||
condition { | ||
test = "ArnEquals" | ||
variable = "aws:PrincipalArn" | ||
values = [ | ||
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}", | ||
] | ||
} | ||
actions = [ | ||
"s3:ListBucket", | ||
"s3:GetBucketLocation", | ||
"s3:GetLifecycleConfiguration", | ||
"s3:PutLifecycleConfiguration", | ||
] | ||
resources = [ | ||
"arn:aws:s3:::${local.bucket_name}", | ||
] | ||
} | ||
statement { | ||
sid = "dbxSCObjAccess" | ||
effect = "Allow" | ||
principals { | ||
type = "AWS" | ||
identifiers = [ | ||
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root", | ||
] | ||
} | ||
condition { | ||
test = "ArnEquals" | ||
variable = "aws:PrincipalArn" | ||
values = [ | ||
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}", | ||
] | ||
} | ||
actions = [ | ||
"s3:GetObject", | ||
"s3:GetObjectVersion", | ||
"s3:PutObject", | ||
"s3:DeleteObject", | ||
] | ||
resources = [ | ||
"arn:aws:s3:::${local.bucket_name}/*" | ||
] | ||
} | ||
|
||
} | ||
|
||
module "databricks_bucket" { | ||
count = var.volume_bucket != null ? 0 : 1 | ||
depends_on = [ | ||
aws_iam_role.dbx_unity_aws_role | ||
] | ||
|
||
source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.71.0" | ||
bucket_name = local.bucket_name | ||
bucket_policy = data.aws_iam_policy_document.databricks-s3[0].json | ||
project = var.tags["project"] | ||
env = var.tags["env"] | ||
service = var.tags["service"] | ||
owner = var.tags["owner"] | ||
object_ownership = var.bucket_object_ownership | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto-generated by fogg. Do not edit | ||
# Make improvements in fogg, so that everyone can benefit. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# catalog | ||
resource "databricks_grant" "catalog_r" { | ||
for_each = toset(var.catalog_r_grant_principals) | ||
catalog = databricks_catalog.volume.name | ||
principal = each.value | ||
privileges = ["USE_CATALOG", "USE_SCHEMA", "SELECT"] | ||
} | ||
|
||
resource "databricks_grant" "catalog_rw" { | ||
for_each = toset(var.catalog_rw_grant_principals) | ||
catalog = databricks_catalog.volume.name | ||
principal = "Data Scientists" | ||
privileges = [ | ||
"APPLY_TAG", | ||
"CREATE_CONNECTION", | ||
"CREATE_SCHEMA", | ||
"USE_CATALOG", | ||
"CREATE_FUNCTION", | ||
"CREATE_TABLE", | ||
"EXECUTE", | ||
"MODIFY", | ||
"REFRESH", | ||
"SELECT", | ||
"READ_VOLUME", | ||
"WRITE_VOLUME", | ||
"USE_SCHEMA", | ||
] | ||
} | ||
|
||
# schema | ||
resource "databricks_grant" "schema_r" { | ||
for_each = toset(var.schema_r_grant_principals) | ||
schema = databricks_schema.volume.id | ||
principal = each.value | ||
privileges = ["USE_SCHEMA", "SELECT", "READ_VOLUME"] | ||
} | ||
|
||
resource "databricks_grant" "schema_rw" { | ||
for_each = toset(var.schema_rw_grant_principals) | ||
schema = databricks_schema.volume.id | ||
principal = each.value | ||
privileges = [ | ||
"APPLY_TAG", | ||
"CREATE_FUNCTION", | ||
"CREATE_TABLE", | ||
"CREATE_VOLUME", | ||
"USE_SCHEMA", | ||
"EXECUTE", | ||
"MODIFY", | ||
"REFRESH", | ||
"SELECT", | ||
"READ_VOLUME", | ||
"WRITE_VOLUME" | ||
] | ||
} | ||
|
||
# volume | ||
resource "databricks_grant" "volume_r" { | ||
for_each = toset(var.volume_r_grant_principals) | ||
volume = databricks_volume.volume.id | ||
principal = each.value | ||
privileges = ["READ_VOLUME"] | ||
} | ||
|
||
resource "databricks_grant" "volume_rw" { | ||
for_each = toset(var.volume_rw_grant_principals) | ||
volume = databricks_volume.volume.id | ||
principal = each.value | ||
privileges = ["READ_VOLUME", "WRITE_VOLUME"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
## Databricks external location and IAM | ||
|
||
data "aws_caller_identity" "current" { | ||
provider = aws | ||
} | ||
|
||
data "aws_iam_policy_document" "dbx_unity_aws_role_assume_role" { | ||
statement { | ||
principals { | ||
type = "AWS" | ||
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"] | ||
} | ||
|
||
actions = ["sts:AssumeRole"] | ||
condition { | ||
test = "StringEquals" | ||
variable = "sts:ExternalId" | ||
|
||
values = ["4a2f419c-ae7a-49f1-b774-8f3113d9834d"] | ||
} | ||
} | ||
statement { | ||
principals { | ||
type = "AWS" | ||
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] | ||
} | ||
|
||
actions = ["sts:AssumeRole"] | ||
condition { | ||
test = "ArnEquals" | ||
variable = "aws:PrincipalArn" | ||
values = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}"] | ||
} | ||
} | ||
} | ||
|
||
resource "aws_iam_role" "dbx_unity_aws_role" { | ||
name = local.unity_aws_role_name | ||
path = local.path | ||
assume_role_policy = data.aws_iam_policy_document.dbx_unity_aws_role_assume_role.json | ||
} | ||
|
||
### Policy document to access default volume bucket and assume role | ||
data "aws_iam_policy_document" "volume_bucket_dbx_unity_access" { | ||
depends_on = [ | ||
module.databricks_bucket | ||
] | ||
|
||
statement { | ||
sid = "dbxSCBucketAccess" | ||
effect = "Allow" | ||
actions = [ | ||
"s3:ListBucket", | ||
"s3:GetBucketLocation", | ||
"s3:GetLifecycleConfiguration", | ||
"s3:PutLifecycleConfiguration" | ||
] | ||
resources = [ | ||
"arn:aws:s3:::${local.bucket_name}", | ||
] | ||
} | ||
statement { | ||
sid = "dbxSCObjAccess" | ||
effect = "Allow" | ||
actions = [ | ||
"s3:GetObject", | ||
"s3:PutObject", | ||
"s3:DeleteObject", | ||
] | ||
resources = [ | ||
"arn:aws:s3:::${local.bucket_name}/*", | ||
] | ||
} | ||
statement { | ||
sid = "databricksAssumeRole" | ||
effect = "Allow" | ||
actions = [ | ||
"sts:AssumeRole" | ||
] | ||
resources = [ | ||
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}" | ||
] | ||
} | ||
} | ||
|
||
resource "aws_iam_policy" "dbx_unity_access_policy" { | ||
policy = data.aws_iam_policy_document.volume_bucket_dbx_unity_access.json | ||
} | ||
|
||
resource "aws_iam_role_policy_attachment" "dbx_unity_aws_access" { | ||
policy_arn = aws_iam_policy.dbx_unity_access_policy.arn | ||
role = aws_iam_role.dbx_unity_aws_role.name | ||
} |
Oops, something went wrong.