-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathjenkins.yml
70 lines (60 loc) · 3.09 KB
/
jenkins.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# https://plugins.jenkins.io/prometheus/
groups:
- name: MetricPlugin
rules:
- alert: JenkinsOffline
expr: 'jenkins_node_offline_value > 1'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins offline (instance {{ $labels.instance }})
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsHealthcheck
expr: 'jenkins_health_check_score < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins healthcheck (instance {{ $labels.instance }})
description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsOutdatedPlugins
expr: 'sum(jenkins_plugins_withUpdate) by (instance) > 3'
for: 1d
labels:
severity: warning
annotations:
summary: Jenkins outdated plugins (instance {{ $labels.instance }})
description: "{{ $value }} plugins need update\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsBuildsHealthScore
expr: 'default_jenkins_builds_health_score < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins builds health score (instance {{ $labels.instance }})
description: "Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsRunFailureTotal
expr: 'delta(jenkins_runs_failure_total[1h]) > 100'
for: 0m
labels:
severity: warning
annotations:
summary: Jenkins run failure total (instance {{ $labels.instance }})
description: "Job run failures: ({{$value}}) {{$labels.jenkins_job}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsBuildTestsFailing
expr: 'default_jenkins_builds_last_build_tests_failing > 0'
for: 0m
labels:
severity: warning
annotations:
summary: Jenkins build tests failing (instance {{ $labels.instance }})
description: "Last build tests failed: {{$labels.jenkins_job}}. Failed build Tests for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsLastBuildFailed
expr: 'default_jenkins_builds_last_build_result_ordinal == 2'
for: 0m
labels:
severity: warning
annotations:
summary: Jenkins last build failed (instance {{ $labels.instance }})
description: "Last build failed: {{$labels.jenkins_job}}. Failed build for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"