Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial refactor of the vector configuration management code to suppo… #1506

Merged
merged 2 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pillar/top.sls
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ base:
- elastic_stack.version_production
'roles:auth_server':
- match: grain
- fluentd.cas
- vector.cas
'G@roles:elasticsearch and not P@environment:operations*':
- match: compound
- fluentd.elasticsearch
Expand Down Expand Up @@ -116,7 +116,7 @@ base:
- nginx
- nginx.reddit
- reddit
- fluentd.reddit
- vector.reddit
'G@environment:operations and G@roles:redash':
- match: compound
- nginx
Expand Down Expand Up @@ -183,7 +183,7 @@ base:
'roles:xqwatcher':
- match: grain
- edx.xqwatcher
- fluentd.xqwatcher
- vector.xqwatcher
'lightsail-xqwatcher-686':
- match: glob
- edx.xqwatcher
Expand Down Expand Up @@ -220,7 +220,7 @@ base:
'roles:rabbitmq':
- match: grain
- rabbitmq
- fluentd.rabbitmq
- vector.rabbitmq
- consul.rabbitmq
'roles:tika':
- match: grain
Expand Down
145 changes: 145 additions & 0 deletions pillar/vector/cas.sls
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
vector:
extra_configurations:
- name: cas_logs
content:
log_schema:
timestamp_key: vector_timestamp
host_key: log_host
sources:
collect_cas_nginx_access_logs:
type: file
read_from: end
file_key: log_file
include:
- /var/log/nginx/access.log
collect_cas_nginx_error_logs:
type: file
read_from: end
file_key: log_file
include:
- /var/log/nginx/error.log
collect_cas_application_logs:
type: file
read_from: end
file_key: log_file
include:
- /opt/log/django.log
multiline:
start_pattern: '^\['
condition_pattern: '^\['
mode: 'halt_before'
timeout_ms: 5000
collect_auth_logs:
{{ salt.pillar.get('vector:base_auth_log_collection')|yaml(False)|indent(8) }}
transforms:
# Transforms for NGINX logs
parse_cas_nginx_access_logs:
type: remap
inputs:
- 'collect_cas_nginx_access_logs'
source: |
parsed, err = parse_regex(.message, r'^time=(?P<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2})\sclient=(?P<client>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\smethod=(?P<method>\S*)\srequest="(?P<request>.*)"\srequest_length=(?P<request_length>\d+)\sstatus=(?P<status>\d+)\sbytes_sent=(?P<bytes_sent>\d+)\sbody_bytes_sent=(?P<body_bytes_sent>\d+)\sreferer=(?P<referer>.*)\suser_agent="(?P<user_agent>.+)"\supstream_addr=(?P<upstream_addr>.+)\supstream_status=(?P<upstream_status>.+)\srequest_time=(?P<request_time>.+)\srequest_id=(?P<request_id>\w+)\supstream_response_time=(?P<upstream_response_time>.+)\supstream_connect_time=(?P<upstream_connect_time>.+)\supstream_header_time=(?P<upstream_header_time>.*)$')
if err != null {
.parse_error = err
}
err = null
. = merge(., parsed)
.log_process = "nginx"
.log_type = "cas.nginx.access"
.environment = "${ENVIRONMENT}"
parsed_bs, err = to_int(.bytes_sent)
if err == null {
.bytes_sent = parsed_bs
}
err = null
parsed_bbs, err = to_int(.body_bytes_sent)
if err == null {
.body_bytes_sent = parsed_bbs
}
err = null
parsed_rl, err = to_int(.request_length)
if err == null {
.request_length = parsed_rl
}
err = null
parsed_rt, err = to_float(.request_time)
if err == null {
.request_time = parsed_rt
}
err = null
parsed_status, err = to_int(.status)
if err == null {
.status = parsed_status
}
err = null
parsed_usct, err = to_float(.upstream_connect_time)
if err == null {
.upstream_connect_time = parsed_usct
}
err = null
parsed_usht, err = to_float(.upstream_header_time)
if err == null {
.upstream_header_time = parsed_usht
}
err = null
parsed_uprt, err = to_float(.upstream_response_time)
if err == null {
.upstream_response_time = parsed_uprt
}
err = null
parsed_ups, err = to_int(.upstream_response)
if err == null {
.upstream_status = parsed_ups
}
err = null
filter_healthchecks_cas_nginx_access_logs:
inputs:
- 'parse_cas_nginx_access_logs'
type: filter
condition: '! contains!(.http_user_agent, "ELB-HealthChecker")'
parse_cas_nginx_error_logs:
type: remap
inputs:
- 'collect_cas_nginx_error_logs'
source: |
parsed, err = parse_regex(.message, r'^(?P<time>\d{4}/\d{2}/\d{2}\s\d{2}:\d{2}:\d{2})\s\[(?P<severity>.*)\]\s(?P<pid>\d*)#(?P<tid>\d*):\s\*(?P<cid>\d*)\s(?P<message>.*),\sclient:\s(?P<client>.*),\sserver:(?P<server>.*)(?P<additional_content>.*)$')
. = merge(., parsed)
if err != null {
.parse_error = err
}
.log_process = "nginx"
.log_type = "cas.nginx.error"
.environment = "${ENVIRONMENT}"
parse_cas_application_logs:
type: remap
inputs:
- 'collect_cas_application_logs'
source: |
parsed = parse_regex!(.message, r'^\[(?P<time>\d{4}-\d{2}-\d{2}\w+:\d{2}:\d{2})\] (?P<log_level>\w+) \[(?P<module_name>[a-zA-Z0-9-_.]+):(?P<line_number>\d+)\] (?P<message>.*)')
if err != null {
.parse_error = err
}
. = merge(., parsed)
.log_process = "cas"
.log_type = "cas.application"
.environment = "${ENVIRONMENT}"
enrich_cas_application_logs:
type: aws_ec2_metadata
inputs:
- 'parse_cas_application_logs'
namespace: ec2
parse_auth_logs:
{{ salt.pillar.get('vector:base_auth_log_parse_source')|yaml(False)|indent(10) }}
sinks:
ship_cas_logs_to_grafana_cloud:
inputs:
- 'filter_healthchecks_cas_nginx_access_logs'
- 'parse_cas_nginx_error_logs'
- 'enrich_cas_application_logs'
- 'parse_auth_logs'
type: loki
labels:
application: cas
environment: ${ENVIRONMENT}
service: cas
{{ salt.pillar.get('vector:base_loki_configuration')|yaml(False)|indent(10) }}
80 changes: 59 additions & 21 deletions pillar/vector/init.sls
Original file line number Diff line number Diff line change
@@ -1,42 +1,80 @@
{% set ENVIRONMENT = salt.grains.get('environment', 'dev') %}

vector:
configuration:
base_auth_log_collection:
type: file
file_key: log_file
read_from: end
include:
- /var/log/auth.log
base_auth_log_parse_source:
type: remap
inputs:
- 'collect_auth_logs'
source: |
parsed, err = parse_syslog(.message)
if err != null {
.parse_error = err
}
. = merge(., parsed)
.log_process = "authlog"
.environment = "${ENVIRONMENT}"

# These two are intentionally incomplete sink configurations. The type, inputs, and labels
# need to be provided on a configuration-by-configuration basis.
base_loki_configuration:
auth:
strategy: basic
password: __vault__::secret-operations/global/grafana-cloud-credentials>data>api_key
user: __vault__::secret-operations/global/grafana-cloud-credentials>data>loki_user
endpoint: https://logs-prod-us-central1.grafana.net
encoding:
codec: json
out_of_order_action: rewrite_timestamp
base_cortex_configuration:
endpoint: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push
healthcheck: false
auth:
strategy: basic
user: __vault__::secret-operations/global/grafana-cloud-credentials>data>prometheus_user
password: __vault__::secret-operations/global/grafana-cloud-credentials>data>api_key

# By default, there are no extra vector configurations to add
extra_configurations: []

# Call out host metrics in their own area because they will be enabled globally
host_metrics_configuration:
sources:
host_metrics:
collect_host_metrics:
type: host_metrics
scrape_interval_secs: 60
collectors:
- cpu
- disk
- filesystem
- load
- host
- memory
- network

transforms:
host_metrics_relabel:
cleanup_host_metrics:
type: remap
inputs:
- host_metrics
- 'collect_host_metrics'
source: |
.tags.job = "integrations/linux_host"

add_labels_to_metrics:
# Drop all the not-real filesystems metrics
abort_match_filesystem, err = !(match_any(.tags.filesystem, [r'ext.', r'btrfs', r'xfs']))
if abort_match_filesystem {
abort
}
add_labels_to_host_metrics:
type: remap
inputs:
- '*metrics_relabel'
- 'cleanup_host_metrics'
source: |
.tags.environment = "{{ ENVIRONMENT }}"

.tags.environment = "${ENVIRONMENT}"
.tags.job = "integrations/linux_host"
sinks:
grafana_cortex_metrics:
ship_host_metrics_to_grafana_cloud:
inputs:
- add_labels_to_metrics
type: prometheus_remote_write
endpoint: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push
healthcheck: false
auth:
strategy: basic
user: __vault__::secret-operations/global/grafana-cloud-credentials>data>prometheus_user
password: __vault__::secret-operations/global/grafana-cloud-credentials>data>api_key
- 'add_labels_to_host_metrics'
{{ salt.pillar.get('vector:base_cortex_configuration')|yaml(False)|indent(8) }}
Loading