diff --git a/.werf/bundle.yaml b/.werf/bundle.yaml index eb37c5b7..799f8152 100644 --- a/.werf/bundle.yaml +++ b/.werf/bundle.yaml @@ -2,7 +2,7 @@ --- image: bundle from: registry.deckhouse.io/base_images/scratch@sha256:b054705fcc9f2205777d80a558d920c0b4209efdc3163c22b5bfcb5dda1db5fc -fromCacheVersion: "2023-11-27.1" +fromCacheVersion: "20240802074306" import: # Rendering .werf/images-digests.yaml is required! - image: images-digests diff --git a/monitoring/grafana-dashboards/storage/extender-scheduler-metrtics.json b/monitoring/grafana-dashboards/storage/extender-scheduler-metrtics.json new file mode 100644 index 00000000..a13cbdfe --- /dev/null +++ b/monitoring/grafana-dashboards/storage/extender-scheduler-metrtics.json @@ -0,0 +1,727 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Process status published by Go Prometheus client library, e.g. memory used, fds open, GC details", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 34, + "iteration": 1721721409311, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_memstats_sys_bytes{job=~\"sds.*\"}[$__rate_interval])", + "legendFormat": "{{job}} ({{node}}) - bytes sys", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_memstats_alloc_bytes_total{job=~\"sds.*\"}[$__rate_interval])", + "hide": false, + "legendFormat": "{{job}} ({{node}}) - bytes_total", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_memstats_heap_alloc_bytes{job=~\"sds.*\"}[$__rate_interval])", + "hide": false, + "legendFormat": "{{job}} ({{node}}) - heap alloc", + "range": true, + "refId": "C" + } + ], + "title": "Go memstats alloc rate", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.13", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "resident", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "go_threads{job=~\"sds.*\"}", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{app}} ({{node}}) ", + "metric": "process_resident_memory_bytes", + "range": true, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "go threads", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "process_open_fds{job=~\"sds.*\"}", + "legendFormat": "{{job}} ({{node}})", + "range": true, + "refId": "A" + } + ], + "title": "Process open fds", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 1, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.13", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "resident", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(process_resident_memory_bytes{job=~\"sds.*\"}[$__rate_interval])) by (app, node)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{app}} ({{node}}) - resident", + "metric": "process_resident_memory_bytes", + "range": true, + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum(rate(process_virtual_memory_bytes{job=~\"sds.*\"}[$__rate_interval])) by (app, node)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{app}} ({{node}}) - virtual", + "metric": "process_virtual_memory_bytes", + "range": true, + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Process memory bytes", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 12 + }, + "hiddenSeries": false, + "id": 7, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.13", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "go_goroutines{job=~\"sds.*\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{app}} ({{node}})", + "metric": "go_goroutines", + "range": true, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Goroutines", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.13", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "rate(go_gc_duration_seconds_count{job=~\"sds.*\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{app}}: {{node}}", + "metric": "go_gc_duration_seconds", + "range": true, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "GC duration quantiles", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": false, + "schemaVersion": 36, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "Prometheus", + "multi": false, + "name": "ds_prometheus", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "2024-07-22T04:07:25.498Z", + "to": "2024-07-22T12:47:31.395Z" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Go Processes", + "uid": "ypF2Fgvmz", + "version": 1 +} \ No newline at end of file diff --git a/templates/monitoring.yaml b/templates/monitoring.yaml new file mode 100644 index 00000000..e9be5d00 --- /dev/null +++ b/templates/monitoring.yaml @@ -0,0 +1,2 @@ +{{- include "helm_lib_grafana_dashboard_definitions" . }} +{{- include "helm_lib_prometheus_rules" (list . "d8-sds-local-volume") }} diff --git a/templates/sds-local-volume-scheduler-extender/podmonitor.yaml b/templates/sds-local-volume-scheduler-extender/podmonitor.yaml new file mode 100644 index 00000000..48e62ff6 --- /dev/null +++ b/templates/sds-local-volume-scheduler-extender/podmonitor.yaml @@ -0,0 +1,34 @@ +{{- if (.Values.global.enabledModules | has "operator-prometheus-crd") }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: sds-local-volume + namespace: d8-monitoring + {{- include "helm_lib_module_labels" (list $ (dict "prometheus" "main")) | nindent 2 }} +spec: + podMetricsEndpoints: + - targetPort: 8080 + scheme: http + path: /metrics + relabelings: + - regex: endpoint|namespace|pod|container + action: labeldrop + - targetLabel: job + replacement: sds-local-volume + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node + - sourceLabels: [__meta_kubernetes_pod_label_app] + targetLabel: app + - targetLabel: tier + replacement: cluster + - sourceLabels: [__meta_kubernetes_pod_ready] + regex: "true" + action: keep + selector: + matchLabels: + app: sds-local-volume-scheduler + namespaceSelector: + matchNames: + - d8-{{ .Chart.Name }} + {{- end }} \ No newline at end of file