Skip to content

Commit

Permalink
feat: Add modules for Azure Monitor Metric Alert Rules and Action Gro…
Browse files Browse the repository at this point in the history
…ups (#8)

This PR introduces new modules for Azure Monitor Alert Rules and Action
Groups.

Furthermore, these modules are now incorporated into existing modules.

Other changes are cleanup discovered during the integration of the
above.

---------

Co-authored-by: Jón Orri @ Skývafnir <[email protected]>
  • Loading branch information
gzur and jonorri authored Apr 8, 2024
1 parent b759099 commit 2912262
Show file tree
Hide file tree
Showing 36 changed files with 851 additions and 240 deletions.
8 changes: 4 additions & 4 deletions examples/azure-databricks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
## Inputs

| Name | Description | Type | Default | Required |
|--------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------|--------------|:--------:|
| ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | ------------ | :------: |
| <a name="input_budget_contact_emails"></a> [budget_contact_emails](#input_budget_contact_emails) | Emails to notify when the budget is forecasted to be broken | `list(string)` | n/a | yes |
| <a name="input_databricks_users"></a> [databricks_users](#input_databricks_users) | List of users to add to the databricks workspace | <pre>list(object({<br> user_name = string<br> display_name = string<br> }))</pre> | n/a | yes |
| <a name="input_instance"></a> [instance](#input_instance) | Identifier for the application, workload or service | `string` | n/a | yes |
Expand All @@ -17,13 +17,13 @@
## Outputs

| Name | Description |
|-------------------------------------------------------------------------------------------------------------|-------------------------------------|
| ----------------------------------------------------------------------------------------------------------- | ----------------------------------- |
| <a name="output_databricks_workspace_url"></a> [databricks_workspace_url](#output_databricks_workspace_url) | The URL of the Databricks workspace |

## Modules

| Name | Source | Version |
|-------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| ----------------------------------------------------------------------------------------------- | --------------------------------------- | ------- |
| <a name="module_base_setup"></a> [base_setup](#module_base_setup) | ../../modules/azure/base-setup | n/a |
| <a name="module_databricks_config"></a> [databricks_config](#module_databricks_config) | ../../modules/databricks/initial_config | n/a |
| <a name="module_databricks_workspace"></a> [databricks_workspace](#module_databricks_workspace) | ../../modules/azure/databricks | n/a |
Expand All @@ -32,7 +32,7 @@
## Requirements

| Name | Version |
|--------------------------------------------------------------------------|---------|
| ------------------------------------------------------------------------ | ------- |
| <a name="requirement_terraform"></a> [terraform](#requirement_terraform) | >= 1.1 |

<!-- /TERRAFORM_DOCS_BLOCK -->
Expand Down
7 changes: 0 additions & 7 deletions examples/azure-databricks/providers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,4 @@

terraform {
required_version = ">= 1.1"

required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = ">=3.0.0"
}
}
}
7 changes: 5 additions & 2 deletions examples/azure-datalakehouse/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ module "datafactory" {
owners = var.datalakehouse_admins
}

alert_on_pipeline_failure = var.alert_on_pipeline_failure
pipeline_failure_alert_emails = var.alert_contact_emails

tags = local.tags
}

Expand All @@ -144,8 +147,8 @@ module "datawarehouse" {
sql_server_name_override = try(var.name_overrides.sql_server, null)
keyvault_key_name_override = try(var.name_overrides.warehouse_audit_keyvault_key, null)
audit_storage_account_name_override = try(var.name_overrides.warehouse_audit_storage_account, null)
monitor_alert_emails = ["[email protected]"]
enable_monitor_alerts = true
monitor_alert_emails = var.alert_contact_emails
enable_db_monitor_alerts = true

datawarehouse_contributor_principal_ids = {
"Data Engineer Group" = module.data_engineer_user_group[0].group_id
Expand Down
36 changes: 36 additions & 0 deletions examples/azure-datalakehouse/monitor.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

# TODO: All of this needs to move to a module
module "defaults" {
source = "../../modules/skyvafnir/defaults"
resource_abbreviation = "logs"
tags = var.tags
caller = basename(path.module)
org_code = var.org_code
tier = var.tier
instance = var.instance
}

resource "azurerm_log_analytics_workspace" "this" {
name = module.defaults.resource_name
resource_group_name = module.base_setup.resource_group_name
location = module.base_setup.resource_group_location

tags = module.defaults.tags
}

resource "azurerm_monitor_diagnostic_setting" "this" {
count = local.data_factory_enabled ? 1 : 0
name = format(module.defaults.resource_name_template, "diag")

log_analytics_workspace_id = azurerm_log_analytics_workspace.this.id
target_resource_id = module.datafactory[0].id

log_analytics_destination_type = "Dedicated"

enabled_log {
category_group = "allLogs"
}
metric {
category = "AllMetrics"
}
}
11 changes: 11 additions & 0 deletions examples/azure-datalakehouse/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,14 @@ variable "name_overrides" {
DESC
default = {}
}

variable "alert_on_pipeline_failure" {
type = bool
description = "Whether to alert on pipeline failure"
default = false
}
variable "alert_contact_emails" {
type = list(string)
description = "A list of emails to send alerts to on pipeline failure"
default = []
}
4 changes: 0 additions & 4 deletions modules/azure/base-setup/defaults.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,3 @@ module "defaults" {
tier = var.tier
instance = var.instance
}

locals {
tags = module.defaults.tags
}
4 changes: 2 additions & 2 deletions modules/azure/base-setup/lock.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
resource "azurerm_management_lock" "this" {
count = var.lock == true ? 1 : 0
name = format("lock-${local.resource_group_name}")
name = "lock-${module.defaults.resource_name}"
scope = local.resource_group_info.id
lock_level = "CanNotDelete"
notes = "Protect ${local.resource_group_name} from deletion."
notes = "Protect ${module.defaults.resource_name} from deletion."
}
37 changes: 16 additions & 21 deletions modules/azure/base-setup/main.tf
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
locals {
resource_abbreviation = "rg"
resource_name_template = module.defaults.resource_name_template
resource_group_name = module.defaults.resource_name
}

resource "time_offset" "now" {
offset_seconds = 1
}

data "external" "git_sha" {
# We tag our resource groups with the git sha of the provisioning commit.
program = [
"git",
"log",
"--pretty=format:{ \"git_sha\": \"%H\" }",
"-1",
"HEAD"
]
}

# Unify resource group access patterns, since sometimes we are creating the resource group and sometimes we are not.
locals {
provision_resource_group = var.existing_resource_group_info == null ? true : false
Expand All @@ -33,26 +20,34 @@ locals {

resource "azurerm_resource_group" "this" {
count = local.provision_resource_group ? 1 : 0
name = local.resource_group_name
name = module.defaults.resource_name
location = var.location

tags = merge(local.tags, data.external.git_sha.result)
tags = module.defaults.tags
}

resource "azurerm_monitor_action_group" "this" {
module "cost_alarm_action_group" {
count = local.provision_action_group ? 1 : 0

name = format(local.resource_name_template, "ag")
short_name = substr(format(local.resource_name_template, "ag"), 0, 12)
resource_group_name = local.resource_group_info.name
source = "../monitor-action-group"

tier = var.tier
instance = var.instance
org_code = var.org_code

resource_group_info = local.resource_group_info

group_purpose = "cost-alert"
short_name = "cost"
email_receivers = var.budget_contact_emails

tags = local.tags
tags = module.defaults.tags
}

resource "azurerm_consumption_budget_resource_group" "this" {
count = local.provision_resource_group && var.budget_for_resource_group > 0.0 ? 1 : 0

name = format(local.resource_name_template, "budget")
name = format(module.defaults.resource_name_template, "budget")
resource_group_id = local.resource_group_info.id

amount = var.budget_for_resource_group
Expand Down
7 changes: 6 additions & 1 deletion modules/azure/base-setup/moved.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,9 @@ moved { # 2023-11-09 - Add support for using an existing resource group
moved {
from = data.external.git
to = data.external.git_sha
}
}

moved {
from = azurerm_monitor_action_group.this[0]
to = module.cost_alarm_action_group[0].azurerm_monitor_action_group.this
}
4 changes: 2 additions & 2 deletions modules/azure/base-setup/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ output "resource_group_info" {

output "budget_alert_action_group_id" {
description = "The Action Group ID"
value = local.provision_action_group ? azurerm_monitor_action_group.this[0].id : null
}
value = local.provision_action_group ? module.cost_alarm_action_group[0].action_group_id : null
}
4 changes: 0 additions & 4 deletions modules/azure/base-setup/providers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,5 @@ terraform {
source = "hashicorp/time"
version = ">=0.9"
}
external = {
source = "hashicorp/external"
version = ">=2.3"
}
}
}
4 changes: 3 additions & 1 deletion modules/azure/base-setup/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ variable "location" {

variable "budget_for_resource_group" {
type = number
description = "Budget for the whole resource group"
description = <<DESC
Budget for the whole resource group. The currency is determined by the subscription's billing currency.
DESC
default = 0
}

Expand Down
12 changes: 6 additions & 6 deletions modules/azure/databricks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
## Inputs

| Name | Description | Type | Default | Required |
|--------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------|---------|:--------:|
| ------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ------- | :------: |
| <a name="input_instance"></a> [instance](#input_instance) | Identifier for the application, workload or service | `string` | n/a | yes |
| <a name="input_org_code"></a> [org_code](#input_org_code) | Org code | `string` | n/a | yes |
| <a name="input_resource_group_info"></a> [resource_group_info](#input_resource_group_info) | Name and Location of the Resource Group | <pre>object({<br> name = string<br> location = string<br> })</pre> | n/a | yes |
Expand All @@ -16,32 +16,32 @@
## Outputs

| Name | Description |
|----------------------------------------------------------------------------|-------------------------------------|
| -------------------------------------------------------------------------- | ----------------------------------- |
| <a name="output_workspace_url"></a> [workspace_url](#output_workspace_url) | The URL of the Databricks workspace |

## Resources

| Name | Type |
|-------------------------------------------------------------------------------------------------------------------------------------------|----------|
| ----------------------------------------------------------------------------------------------------------------------------------------- | -------- |
| [azurerm_databricks_workspace.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/databricks_workspace) | resource |

## Modules

| Name | Source | Version |
|-------------------------------------------------------------|--------------------------|---------|
| ----------------------------------------------------------- | ------------------------ | ------- |
| <a name="module_defaults"></a> [defaults](#module_defaults) | ../../skyvafnir/defaults | n/a |

## Requirements

| Name | Version |
|--------------------------------------------------------------------------|---------|
| ------------------------------------------------------------------------ | ------- |
| <a name="requirement_terraform"></a> [terraform](#requirement_terraform) | >= 1.1 |
| <a name="requirement_azurerm"></a> [azurerm](#requirement_azurerm) | >=3.0.0 |

## Providers

| Name | Version |
|--------------------------------------------------------------|---------|
| ------------------------------------------------------------ | ------- |
| <a name="provider_azurerm"></a> [azurerm](#provider_azurerm) | 3.90.0 |

<!-- /TERRAFORM_DOCS_BLOCK -->
Expand Down
64 changes: 64 additions & 0 deletions modules/azure/datafactory/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,67 @@ resource "azurerm_data_factory_linked_service_data_lake_storage_gen2" "this" {
url = data.azurerm_storage_account.datalake.primary_dfs_endpoint
use_managed_identity = true
}

module "pipeline_failure_action_group" {
source = "../monitor-action-group"
count = var.alert_on_pipeline_failure == true ? 1 : 0

tier = var.tier
instance = var.instance
org_code = var.org_code

resource_group_info = var.resource_group_info

group_purpose = "pipeline-alert"
short_name = "pipefail"
email_receivers = var.pipeline_failure_alert_emails
arm_receiver_role_ids = {
"Monitoring Reader" = "43d0d8ad-25c7-4714-9337-8ba259a9fe05"
}

tags = module.defaults.tags

}

resource "azurerm_monitor_metric_alert" "this" {
count = var.alert_on_pipeline_failure == true ? 1 : 0
name = format(module.defaults.resource_name_template, "pipeline_fail_alert")
description = <<DESC
This alert is triggered when a pipeline fails inside for ${azurerm_data_factory.this.name}
${azurerm_data_factory.this.id}
DESC

resource_group_name = var.resource_group_info.name
target_resource_type = "Microsoft.DataFactory/factories"
target_resource_location = var.resource_group_info.location
scopes = [azurerm_data_factory.this.id]

severity = 2
window_size = "PT5M"

criteria {
aggregation = "Total"
metric_name = "PipelineFailedRuns"
metric_namespace = "Microsoft.DataFactory/factories"
operator = "GreaterThan"
threshold = 0

dimension {
name = "Name"
operator = "Include"
values = ["*"]
}
}

action {
action_group_id = module.pipeline_failure_action_group[0].action_group_id
}

tags = module.defaults.tags
lifecycle {
precondition {
condition = var.alert_on_pipeline_failure == true && length(var.pipeline_failure_alert_emails) > 0
error_message = "var.pipeline_failure_alert_emails can't be empty if var.alert_on_pipeline_failure is true."
}
}
}
12 changes: 12 additions & 0 deletions modules/azure/datafactory/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,15 @@ variable "name_override" {
description = "Override the name of the Data Factory. If not provided, the name will be generated."
default = null
}

variable "alert_on_pipeline_failure" {
type = bool
description = "If true, an alert will be created to notify when a pipeline fails."
default = true
}

variable "pipeline_failure_alert_emails" {
type = list(string)
description = "A list of email addresses to notify when a pipeline fails."
default = []
}
Loading

0 comments on commit 2912262

Please sign in to comment.