Skip to content

Commit

Permalink
Merge pull request plambe#3 from ixcat/master
Browse files Browse the repository at this point in the history
split/version agent configuration file sections, install makefile & more plots
  • Loading branch information
plambe authored Jun 6, 2018
2 parents 3c0f521 + 881e21d commit 3c2f29a
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 27 deletions.
16 changes: 16 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# installation makefile -
# tested against zabbix.com 3.x packages on ubuntu 16.04

all:
@echo "usage: make install"

install: install-agent-config install-script

install-agent-config:
install -o root -g root -m 644 userparameter_nvidia-smi.conf.linux \
/etc/zabbix/zabbix_agentd.d/userparameter_nvidia-smi.conf

install-script:
install -d -o root -g root -m 755 /etc/zabbix/scripts
install -o root -g root -m 755 get_gpus_info.sh /etc/zabbix/scripts

29 changes: 3 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,7 @@ This is essentially a multi-GPU rewrite of RichardKav's template, as found here:

## On Windows:

```
UserParameter=gpu.number,"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" -L | find /c /v ""
UserParameter=gpu.discovery,C:\scripts\get_gpus_info.bat
UserParameter=gpu.fanspeed[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=fan.speed --format=csv,noheader,nounits -i $1
UserParameter=gpu.power[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=power.draw --format=csv,noheader,nounits -i $1
UserParameter=gpu.temp[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1
UserParameter=gpu.utilization[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1
UserParameter=gpu.memfree[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.free --format=csv,noheader,nounits -i $1
UserParameter=gpu.memused[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.used --format=csv,noheader,nounits -i $1
UserParameter=gpu.memtotal[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.total --format=csv,noheader,nounits -i $1
```

* Add the contents of the file `userparameter_nvidia-smi.conf.windows` to your zabbix_agentd.conf file.
* The Windows script get_gpus_info.bat file should be put in C:\scripts\
* It doesn't have any dependencies, other than having nvidia-smi.exe
* Of course, it is possible to use other paths, but:
Expand All @@ -46,20 +35,8 @@ UserParameter=gpu.memtotal[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-
## On Linux:

The following UserParameters need to be added to the zabbix-agent configuration:

```
UserParameter=gpu.number,/usr/bin/nvidia-smi -L | /bin/grep GeForce | /usr/bin/wc -l
UserParameter=gpu.discovery,/etc/zabbix/scripts/get_gpus_info.sh
UserParameter=gpu.fanspeed[*],nvidia-smi --query-gpu=fan.speed --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.power[*],nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.temp[*],nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.utilization[*],nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.memfree[*],nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.memused[*],nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.memtotal[*],nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i $1 | tr -d "\n"
```

The get_gpus_info.sh file should be put in /etc/zabbix/scripts/ and made executable by running ```chmod +x get_gpus_info.sh```
* Add the contents of the file `userparameter_nvidia-smi.conf.linux` to your zabbix_agentd.conf file.
* The get_gpus_info.sh file should be put in /etc/zabbix/scripts/ and made executable by running ```chmod +x get_gpus_info.sh```

## If you feel like donating:

Expand Down
9 changes: 9 additions & 0 deletions userparameter_nvidia-smi.conf.linux
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
UserParameter=gpu.number,/usr/bin/nvidia-smi -L | /bin/grep GeForce | /usr/bin/wc -l
UserParameter=gpu.discovery,/etc/zabbix/scripts/get_gpus_info.sh
UserParameter=gpu.fanspeed[*],nvidia-smi --query-gpu=fan.speed --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.power[*],nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.temp[*],nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.utilization[*],nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.memfree[*],nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.memused[*],nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $1 | tr -d "\n"
UserParameter=gpu.memtotal[*],nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i $1 | tr -d "\n"
9 changes: 9 additions & 0 deletions userparameter_nvidia-smi.conf.windows
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
UserParameter=gpu.number,"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" -L | find /c /v ""
UserParameter=gpu.discovery,C:\scripts\get_gpus_info.bat
UserParameter=gpu.fanspeed[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=fan.speed --format=csv,noheader,nounits -i $1
UserParameter=gpu.power[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=power.draw --format=csv,noheader,nounits -i $1
UserParameter=gpu.temp[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1
UserParameter=gpu.utilization[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1
UserParameter=gpu.memfree[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.free --format=csv,noheader,nounits -i $1
UserParameter=gpu.memused[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.used --format=csv,noheader,nounits -i $1
UserParameter=gpu.memtotal[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.total --format=csv,noheader,nounits -i $1
78 changes: 77 additions & 1 deletion zbx_nvidia-smi-multi-gpu.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>3.0</version>
<date>2018-01-31T17:55:26Z</date>
<date>2018-06-05T20:56:12Z</date>
<groups>
<group>
<name>Templates</name>
Expand Down Expand Up @@ -443,6 +443,50 @@
</trigger_prototype>
</trigger_prototypes>
<graph_prototypes>
<graph_prototype>
<name>GPU {#GPUINDEX} Memory</name>
<width>900</width>
<height>200</height>
<yaxismin>0.0000</yaxismin>
<yaxismax>100.0000</yaxismax>
<show_work_period>1</show_work_period>
<show_triggers>1</show_triggers>
<type>0</type>
<show_legend>1</show_legend>
<show_3d>0</show_3d>
<percent_left>0.0000</percent_left>
<percent_right>0.0000</percent_right>
<ymin_type_1>0</ymin_type_1>
<ymax_type_1>0</ymax_type_1>
<ymin_item_1>0</ymin_item_1>
<ymax_item_1>0</ymax_item_1>
<graph_items>
<graph_item>
<sortorder>0</sortorder>
<drawtype>0</drawtype>
<color>00AA00</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Nvidia GPUs Performance</host>
<key>gpu.memfree[{#GPUINDEX}]</key>
</item>
</graph_item>
<graph_item>
<sortorder>1</sortorder>
<drawtype>0</drawtype>
<color>0000DD</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Nvidia GPUs Performance</host>
<key>gpu.memused[{#GPUINDEX}]</key>
</item>
</graph_item>
</graph_items>
</graph_prototype>
<graph_prototype>
<name>GPU {#GPUINDEX} Temperature, Fan Speed and Power</name>
<width>900</width>
Expand Down Expand Up @@ -499,6 +543,38 @@
</graph_item>
</graph_items>
</graph_prototype>
<graph_prototype>
<name>GPU {#GPUINDEX} Utilization</name>
<width>900</width>
<height>200</height>
<yaxismin>0.0000</yaxismin>
<yaxismax>100.0000</yaxismax>
<show_work_period>1</show_work_period>
<show_triggers>1</show_triggers>
<type>0</type>
<show_legend>1</show_legend>
<show_3d>0</show_3d>
<percent_left>0.0000</percent_left>
<percent_right>0.0000</percent_right>
<ymin_type_1>0</ymin_type_1>
<ymax_type_1>0</ymax_type_1>
<ymin_item_1>0</ymin_item_1>
<ymax_item_1>0</ymax_item_1>
<graph_items>
<graph_item>
<sortorder>0</sortorder>
<drawtype>0</drawtype>
<color>2774A4</color>
<yaxisside>0</yaxisside>
<calc_fnc>2</calc_fnc>
<type>0</type>
<item>
<host>Template Nvidia GPUs Performance</host>
<key>gpu.utilization[{#GPUINDEX}]</key>
</item>
</graph_item>
</graph_items>
</graph_prototype>
</graph_prototypes>
<host_prototypes/>
</discovery_rule>
Expand Down

0 comments on commit 3c2f29a

Please sign in to comment.