forked from matusnovak/prometheus-smartctl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsmartprom.py
executable file
·188 lines (151 loc) · 5.01 KB
/
smartprom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python3
import glob
import re
import subprocess
import time
import json
from typing import List
from prometheus_client import start_http_server, Gauge
def isDrive(s: str) -> bool:
"""
checks if the device string matches an expected disk device name
"""
return re.match('^/dev/(sd[a-z]+|nvme[0-9]+)$', s)
def run(args: [str]):
"""
runs the smartctl command on the system
"""
out = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
stdout, stderr = out.communicate()
if out.returncode != 0:
if stderr:
print(stderr.decode("utf-8"))
raise Exception('Command returned code {}'.format(out.returncode))
return stdout.decode("utf-8")
def get_types():
types = {}
results = run(['smartctl', '--scan-open'])
for result in results.split('\n'):
if not result:
continue
tokens = result.split()
if len(tokens) > 3:
types[tokens[0]] = tokens[2]
return types
DRIVES = list(filter(lambda d: isDrive(d), glob.glob("/dev/*")))
TYPES = get_types()
HEADER = 'ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE'
METRICS = {}
LABELS = ['drive']
def smart_sat(dev: str) -> List[str]:
"""
Runs the smartctl command on a "sat" device
and processes its attributes
"""
results = run(['smartctl', '-A', '-d', 'sat', dev])
attributes = {}
got_header = False
for result in results.split('\n'):
if not result:
continue
if result == HEADER:
got_header = True
continue
if got_header:
tokens = result.split()
if len(tokens) > 3:
raw = None
try:
raw = int(tokens[9])
except:
pass
attributes[tokens[1]] = (int(tokens[0]), int(tokens[3]))
if raw:
attributes[f'{tokens[1]}_raw'] = (int(tokens[0]), raw)
return attributes
def smart_nvme(dev: str) -> List[str]:
"""
Runs the smartctl command on a "nvme" device
and processes its attributes
"""
results = run(['smartctl', '-A', '-d', 'nvme', '--json=c', dev])
attributes = {}
health_info = json.loads(results)['nvme_smart_health_information_log']
for k, v in health_info.items():
if k == 'temperature_sensors':
for i, value in enumerate(v, start=1):
attributes['temperature_sensor{i}'.format(i=i)] = value
continue
attributes[k] = v
return attributes
def smart_scsi(dev: str) -> List[str]:
"""
Runs the smartctl command on a "scsi" device
and processes its attributes
"""
results = run(['smartctl', '-A', '-d', 'scsi', '--json=c', dev])
attributes = {}
data = json.loads(results)
for key, value in data.items():
if type(value) == dict:
for _label, _value in value.items():
if type(_value) == int:
attributes[f"{key}_{_label}"] = _value
elif type(value) == int:
attributes[key] = value
return attributes
def collect():
"""
Collect all drive metrics and save them as Gauge type
"""
global METRICS
global TYPES
for drive in DRIVES:
try:
# Grab all of the attributes that SMART gave us
if drive in TYPES:
typ = TYPES[drive]
if typ == 'sat':
attrs = smart_sat(drive)
elif typ == 'nvme':
attrs = smart_nvme(drive)
elif typ == 'scsi':
attrs = smart_scsi(drive)
else:
continue
for key, values in attrs.items():
# Create metric if does not exist
if key not in METRICS:
name = key.replace('-', '_').replace(' ', '_').replace('.', '').replace('/', '_')
desc = key.replace('_', ' ')
if typ == 'sat':
num = hex(values[0])
else:
num = hex(values)
skey = f'smartprom_{name}'
print(f'Adding new gauge {skey} ({num})')
METRICS[key] = Gauge(skey, f'({num}) {desc}', LABELS)
# Update metric
if typ == 'sat':
METRICS[key].labels(drive.replace('/dev/', '')).set(values[1])
else:
METRICS[key].labels(drive.replace('/dev/', '')).set(values)
except Exception as e:
print('Exception:', e)
pass
def main():
"""
starts a server at port 9902 and exposes the metrics
"""
start_http_server(9902)
collect()
start_time = time.time()
while True:
elapsed_time = time.time() - start_time
if elapsed_time > 20.0:
start_time = time.time()
collect()
time.sleep(0.1)
if __name__ == '__main__':
main()