Skip to content

Commit

Permalink
chore: monitoring 연동
Browse files Browse the repository at this point in the history
  • Loading branch information
non-cpu committed May 16, 2024
1 parent 6caa65b commit fa128a0
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,8 @@ out/

.DS_Store
.env

monitor/prometheus/volume/*
monitor/grafana/alerting/*
monitor/grafana/volume/*
monitor/grafana/*.db
69 changes: 69 additions & 0 deletions docker-compose.monitoring.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
version: '3'
services:
spring-app:
container_name: spring-app
build: .
restart: unless-stopped
ports:
- "8080:8080"
environment:
- SPRING_DATASOURCE_URL=jdbc:mysql://${DB_URL}
- SPRING_DATASOURCE_USERNAME=${DB_USERNAME}
- SPRING_DATASOURCE_PASSWORD=${DB_PASSWORD}
networks:
- network

node_exporter:
container_name: node_exporter
image: prom/node-exporter:latest
command:
- '--path.rootfs=/host'
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- --collector.filesystem.ignored-mount-points
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
ports:
- "9100:9100"
networks:
- network
restart: unless-stopped
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro

prometheus:
container_name: prometheus
image: prom/prometheus:latest
user: root
ports:
- "9090:9090"
volumes:
- ./monitor/prometheus/config/:/etc/prometheus/
- ./monitor/prometheus/volume:/prometheus
command:
- '--web.enable-lifecycle'
- '--config.file=/etc/prometheus/prometheus.yml'
restart: always
depends_on:
- spring-app
- node_exporter
networks:
- network

grafana:
container_name: grafana
image: grafana/grafana:latest
ports:
- "3000:3000"
depends_on:
- prometheus
volumes:
- ./monitor/grafana:/var/lib/grafana
restart: always
networks:
- network

networks:
network:
driver: bridge
33 changes: 33 additions & 0 deletions monitor/prometheus/config/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
global:
scrape_interval: 15s # scrap target의 기본 interval을 15초로 변경 / default = 1m
scrape_timeout: 15s # scrap request 가 timeout 나는 길이 / default = 10s
evaluation_interval: 2m # rule 을 얼마나 빈번하게 검증하는지 / default = 1m

# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor' # 기본적으로 붙여줄 라벨
query_log_file: query_log_file.log # prometheus의 쿼리 로그들을 기록, 없으면 기록안함

rule_files:
- "rule.yml" # 파일 위치는 prometheus.yml 이 있는 곳과 동일 위치

scrape_configs:
- job_name: 'node_exporter' # job_name 은 모든 scrap 내에서 고유해야함
scrape_interval: 10s # global에서 default 값을 정의해주었기 떄문에 안써도됨
scrape_timeout: 10s # global에서 default 값을 정의해주었기 떄문에 안써도됨
metrics_path: '/metrics' # 옵션 - prometheus가 metrics를 얻기위해 참조하는 URI를 변경할 수 있음 | default = /metrics
honor_labels: false # 옵션 - 라벨 충동이 있을경우 라벨을 변경할지설정(false일 경우 라벨 안바뀜) | default = false
honor_timestamps: false # 옵션 - honor_labels이 참일 경우, metrics timestamp가 노출됨(true일 경우) | default = false
scheme: 'http' # 옵션 - request를 보낼 scheme 설정 | default = http

static_configs:
- targets: ['host.docker.internal:9100'] ## prometheus가 scrap할 대상을 설정
labels: # 옵션 - scrap 해서 가져올 metrics 들 전부에게 붙여줄 라벨
service : 'server-1'

- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
service: 'prometheus'
20 changes: 20 additions & 0 deletions monitor/prometheus/config/rule.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
groups:
- name: example
rules:
# Alert for any instance that is unreachable for >5 minutes.
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."

# Alert for any instance that has a median request latency >1s.
- alert: APIHighRequestLatency
expr: api_http_request_latencies_second{quantile="0.5"} > 1
for: 10m
annotations:
summary: "High request latency on {{ $labels.instance }}"
description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)"

0 comments on commit fa128a0

Please sign in to comment.