Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Installing and configuring vector #1511

Merged
merged 3 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions pillar/top.sls
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ base:
- match: compound
- common
- environment_settings
# - vector
- vector
# '* and not proxy-* and not restore-* and not G@roles:devstack and not P@environment:mitxonline and not G@context:packer and not P@roles:(edx|edx-worker)$':
# - match: compound
# - fluentd
Expand All @@ -15,7 +15,6 @@ base:
- elastic_stack.version_production
'roles:auth_server':
- match: grain
- vector.cas
'G@roles:elasticsearch and not P@environment:operations*':
- match: compound
- fluentd.elasticsearch
Expand Down Expand Up @@ -83,8 +82,8 @@ base:
- consul
- shibboleth
- shibboleth.odlvideo
- fluentd.odlvideo
- logrotate.odlvideo
- vector.odlvideo
proxy-bootcamps-*:
- heroku.bootcamps
proxy-mitxpro-*:
Expand All @@ -103,7 +102,7 @@ base:
- consul
- shibboleth
- shibboleth.mitx_cas
- fluentd.cas
- vector.cas
'G@roles:rabbitmq and P@environment:mitx.*':
- match: compound
- rabbitmq.mitx
Expand All @@ -115,8 +114,8 @@ base:
- match: grain
- nginx
- nginx.reddit
- reddit
- vector.reddit
- reddit
'G@environment:operations and G@roles:redash':
- match: compound
- nginx
Expand Down Expand Up @@ -220,8 +219,8 @@ base:
'roles:rabbitmq':
- match: grain
- rabbitmq
- vector.rabbitmq
- consul.rabbitmq
- vector.rabbitmq
'roles:tika':
- match: grain
- nginx
Expand Down
153 changes: 9 additions & 144 deletions pillar/vector/cas.sls
Original file line number Diff line number Diff line change
@@ -1,145 +1,10 @@
vector:
extra_configurations:
- name: cas_logs
content:
log_schema:
timestamp_key: vector_timestamp
host_key: log_host
sources:
collect_cas_nginx_access_logs:
type: file
read_from: end
file_key: log_file
include:
- /var/log/nginx/access.log
collect_cas_nginx_error_logs:
type: file
read_from: end
file_key: log_file
include:
- /var/log/nginx/error.log
collect_cas_application_logs:
type: file
read_from: end
file_key: log_file
include:
- /opt/log/django.log
multiline:
start_pattern: '^\['
condition_pattern: '^\['
mode: 'halt_before'
timeout_ms: 5000
collect_auth_logs:
{{ salt.pillar.get('vector:base_auth_log_collection')|yaml(False)|indent(8) }}
transforms:
# Transforms for NGINX logs
parse_cas_nginx_access_logs:
type: remap
inputs:
- 'collect_cas_nginx_access_logs'
source: |
parsed, err = parse_regex(.message, r'^time=(?P<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2})\sclient=(?P<client>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\smethod=(?P<method>\S*)\srequest="(?P<request>.*)"\srequest_length=(?P<request_length>\d+)\sstatus=(?P<status>\d+)\sbytes_sent=(?P<bytes_sent>\d+)\sbody_bytes_sent=(?P<body_bytes_sent>\d+)\sreferer=(?P<referer>.*)\suser_agent="(?P<user_agent>.+)"\supstream_addr=(?P<upstream_addr>.+)\supstream_status=(?P<upstream_status>.+)\srequest_time=(?P<request_time>.+)\srequest_id=(?P<request_id>\w+)\supstream_response_time=(?P<upstream_response_time>.+)\supstream_connect_time=(?P<upstream_connect_time>.+)\supstream_header_time=(?P<upstream_header_time>.*)$')
if err != null {
.parse_error = err
}
err = null
. = merge(., parsed)
.log_process = "nginx"
.log_type = "cas.nginx.access"
.environment = "${ENVIRONMENT}"
parsed_bs, err = to_int(.bytes_sent)
if err == null {
.bytes_sent = parsed_bs
}
err = null
parsed_bbs, err = to_int(.body_bytes_sent)
if err == null {
.body_bytes_sent = parsed_bbs
}
err = null
parsed_rl, err = to_int(.request_length)
if err == null {
.request_length = parsed_rl
}
err = null
parsed_rt, err = to_float(.request_time)
if err == null {
.request_time = parsed_rt
}
err = null
parsed_status, err = to_int(.status)
if err == null {
.status = parsed_status
}
err = null
parsed_usct, err = to_float(.upstream_connect_time)
if err == null {
.upstream_connect_time = parsed_usct
}
err = null
parsed_usht, err = to_float(.upstream_header_time)
if err == null {
.upstream_header_time = parsed_usht
}
err = null
parsed_uprt, err = to_float(.upstream_response_time)
if err == null {
.upstream_response_time = parsed_uprt
}
err = null
parsed_ups, err = to_int(.upstream_response)
if err == null {
.upstream_status = parsed_ups
}
err = null
filter_healthchecks_cas_nginx_access_logs:
inputs:
- 'parse_cas_nginx_access_logs'
type: filter
condition: '! contains!(.http_user_agent, "ELB-HealthChecker")'
parse_cas_nginx_error_logs:
type: remap
inputs:
- 'collect_cas_nginx_error_logs'
source: |
parsed, err = parse_regex(.message, r'^(?P<time>\d{4}/\d{2}/\d{2}\s\d{2}:\d{2}:\d{2})\s\[(?P<severity>.*)\]\s(?P<pid>\d*)#(?P<tid>\d*):\s\*(?P<cid>\d*)\s(?P<message>.*),\sclient:\s(?P<client>.*),\sserver:(?P<server>.*)(?P<additional_content>.*)$')
. = merge(., parsed)
if err != null {
.parse_error = err
}
.log_process = "nginx"
.log_type = "cas.nginx.error"
.environment = "${ENVIRONMENT}"
parse_cas_application_logs:
type: remap
inputs:
- 'collect_cas_application_logs'
source: |
parsed = parse_regex!(.message, r'^\[(?P<time>\d{4}-\d{2}-\d{2}\w+:\d{2}:\d{2})\] (?P<log_level>\w+) \[(?P<module_name>[a-zA-Z0-9-_.]+):(?P<line_number>\d+)\] (?P<message>.*)')
if err != null {
.parse_error = err
}
. = merge(., parsed)
.log_process = "cas"
.log_type = "cas.application"
.environment = "${ENVIRONMENT}"
enrich_cas_application_logs:
type: aws_ec2_metadata
inputs:
- 'parse_cas_application_logs'
namespace: ec2
parse_auth_logs:
{{ salt.pillar.get('vector:base_auth_log_parse_source')|yaml(False)|indent(10) }}
sinks:
ship_cas_logs_to_grafana_cloud:
inputs:
- 'filter_healthchecks_cas_nginx_access_logs'
- 'parse_cas_nginx_error_logs'
- 'enrich_cas_application_logs'
- 'parse_auth_logs'
type: loki
labels:
application: cas
environment: ${ENVIRONMENT}
service: cas
{{ salt.pillar.get('vector:base_loki_configuration')|yaml(False)|indent(10) }}
configurations:
- host_metrics
- auth_logs
- nginx_logs
- cas_logs

config_elements:
application_name: 'cas'
service_name: 'cas'
93 changes: 15 additions & 78 deletions pillar/vector/init.sls
Original file line number Diff line number Diff line change
@@ -1,80 +1,17 @@
{% set ENVIRONMENT = salt.grains.get('environment', 'dev') %}

vector:
base_auth_log_collection:
type: file
file_key: log_file
read_from: end
include:
- /var/log/auth.log
base_auth_log_parse_source:
type: remap
inputs:
- 'collect_auth_logs'
source: |
parsed, err = parse_syslog(.message)
if err != null {
.parse_error = err
}
. = merge(., parsed)
.log_process = "authlog"
.environment = "${ENVIRONMENT}"

# These two are intentionally incomplete sink configurations. The type, inputs, and labels
# need to be provided on a configuration-by-configuration basis.
base_loki_configuration:
auth:
strategy: basic
password: __vault__::secret-operations/global/grafana-cloud-credentials>data>api_key
user: __vault__::secret-operations/global/grafana-cloud-credentials>data>loki_user
endpoint: https://logs-prod-us-central1.grafana.net
encoding:
codec: json
out_of_order_action: rewrite_timestamp
base_cortex_configuration:
endpoint: https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push
healthcheck: false
auth:
strategy: basic
user: __vault__::secret-operations/global/grafana-cloud-credentials>data>prometheus_user
password: __vault__::secret-operations/global/grafana-cloud-credentials>data>api_key

# By default, there are no extra vector configurations to add
extra_configurations: []
# This list only applies if there is not a more specific vector:configurations
# defined elsewhere. If there is, and you would like to include these elements as well,
# you will need to explicitly state them again.
configurations:
- host_metrics
- auth_logs

# Call out host metrics in their own area because they will be enabled globally
host_metrics_configuration:
sources:
collect_host_metrics:
type: host_metrics
scrape_interval_secs: 60
collectors:
- cpu
- filesystem
- load
- host
- memory
- network
transforms:
cleanup_host_metrics:
type: remap
inputs:
- 'collect_host_metrics'
source: |
# Drop all the not-real filesystems metrics
abort_match_filesystem, err = !(match_any(.tags.filesystem, [r'ext.', r'btrfs', r'xfs']))
if abort_match_filesystem {
abort
}
add_labels_to_host_metrics:
type: remap
inputs:
- 'cleanup_host_metrics'
source: |
.tags.environment = "${ENVIRONMENT}"
.tags.job = "integrations/linux_host"
sinks:
ship_host_metrics_to_grafana_cloud:
inputs:
- 'add_labels_to_host_metrics'
{{ salt.pillar.get('vector:base_cortex_configuration')|yaml(False)|indent(8) }}
config_elements:
application_name: 'configuration_error_application_name'
service_name: 'configuration-error_service_name'
environment: {{ salt.grains.get('environment', 'configuration_error_environment') }}
grafana_cloud_loki_endpoint: 'https://logs-prod-us-central1.grafana.net'
grafana_cloud_prometheus_endpoint: 'https://prometheus-prod-10-prod-us-central-0.grafana.net/api/prom/push'
grafana_cloud_loki_user: __vault__::secret-operations/global/grafana-cloud-credentials>data>loki_user
grafana_cloud_cortex_user: __vault__::secret-operations/global/grafana-cloud-credentials>data>prometheus_user
grafana_cloud_password: __vault__::secret-operations/global/grafana-cloud-credentials>data>api_key
55 changes: 8 additions & 47 deletions pillar/vector/ocw_build.sls
Original file line number Diff line number Diff line change
@@ -1,48 +1,9 @@
vector:
configuration:
api:
enabled: true

log_schema:
timestamp_key: vector_timestamp
host_key: log_host

sources:
webhook_publish_log:
type: file
include:
- /opt/ocw/logs/webhook-publish.log

transforms:
webhook_publish_log_parser:
inputs:
- webhook_publish_log
type: remap
source: |
matches, err = parse_regex(
.message,
r'^(?P<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{9}) (?P<message>.*)'
)
if matches != null {
.message = matches.message
.@timestamp = parse_timestamp!(matches.time, "%F %T%.9f")
.labels = ["ocw_build"]
.environment = "{{ salt.grains.get('environment') }}"
} else {
log(err, level: "error")
.malformed = true
}
webhook_publish_malformed_message_filter:
inputs:
- webhook_publish_log_parser
type: filter
condition: .malformed != true

sinks:
es_cluster:
inputs:
- webhook_publish_malformed_message_filter
type: elasticsearch
endpoint: 'http://operations-elasticsearch.query.consul:9200'
index: logstash-ocw-build-%Y.%W
healthcheck: false
configurations:
- host_metrics
- auth_logs
- ocw_build_logs

config_elements:
application_name: 'ocw_build'
service_name: 'ocw_builds'
Loading