ncc.zabbix_templates
19:34ed7fe33c29 Browse Files
+ Мониторинг MD RAID
mdadm/README.md mdadm/config/aw_mdadm.conf mdadm/config/zbx_mdadm_sudoers mdadm/zbx_export_templates.yaml
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/mdadm/README.md Sat Feb 24 18:19:09 2024 +0300 1.3 @@ -0,0 +1,15 @@ 1.4 +Описание 1.5 +======== 1.6 + 1.7 +Zabbix-шаблон для мониторинга програмных RAID массивов Linux. 1.8 + 1.9 +Шаблону требуется только Zabbix-агент и дополнение его конфиграции и конфигурации утилиты `sudo`. 1.10 + 1.11 + 1.12 +Перечень компонентов 1.13 +-------------------- 1.14 + 1.15 +* `zbx_export_templates.yaml` - шаблон, которые требуется импортировать в Zabbix 1.16 +* `config/aw_mdadm.conf` - конфигурация zabbix-агента, которой нужно дополнить 1.17 + установленный на узле клиент 1.18 +* `config/zbx_mdadm_sudoers` - конфигурация `sudo` для работы мониторинга. 1.19 \ No newline at end of file
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/mdadm/config/aw_mdadm.conf Sat Feb 24 18:19:09 2024 +0300 2.3 @@ -0,0 +1,4 @@ 2.4 +# devel.a0fs.ru -- mon.linux.mdadm:config/aw_mdadm.conf -- v0.r202402.1 2.5 +Timeout=30 2.6 +UserParameter=aw.linux.mdadm.list,cat /proc/mdstat | awk -F: '$1 ~ "^md[0-9]+ " {gsub(/ +$/, "", $1); print $1 }' 2.7 +UserParameter=aw.linux.mdadm.get-stat[*],sudo mdadm --detail /dev/$1 2.8 \ No newline at end of file
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/mdadm/config/zbx_mdadm_sudoers Sat Feb 24 18:19:09 2024 +0300 3.3 @@ -0,0 +1,2 @@ 3.4 +Defaults:zabbix !requiretty 3.5 +zabbix ALL=(ALL) NOPASSWD: /usr/bin/mdadm --detail *, /usr/sbin/mdadm --detail * 3.6 \ No newline at end of file
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/mdadm/zbx_export_templates.yaml Sat Feb 24 18:19:09 2024 +0300 4.3 @@ -0,0 +1,408 @@ 4.4 +zabbix_export: 4.5 + version: '6.0' 4.6 + date: '2024-02-24T15:10:26Z' 4.7 + groups: 4.8 + - uuid: 22941f4e01294888a9bed3aae15f6ef9 4.9 + name: Templates/AWNET 4.10 + templates: 4.11 + - uuid: d60632a7a4b94709b0455e1bec6ce54a 4.12 + template: 'Linux software RAID monitoring' 4.13 + name: 'Linux software RAID monitoring' 4.14 + description: 'version: v0.r202402.1' 4.15 + groups: 4.16 + - name: Templates/AWNET 4.17 + discovery_rules: 4.18 + - uuid: 533db42d1dc340e593a8d4761eda1347 4.19 + name: 'Данные устройств' 4.20 + key: aw.linux.mdadm.list 4.21 + delay: 5m 4.22 + lifetime: 1d 4.23 + item_prototypes: 4.24 + - uuid: 2fd74bd606d241d1bf34759ed1449ad5 4.25 + name: 'MD RAID {#DEVICE}: Количество активных устройства' 4.26 + type: DEPENDENT 4.27 + key: 'aw.linux.mdadm.d.active-devices[{#DEVICE}]' 4.28 + delay: '0' 4.29 + preprocessing: 4.30 + - type: JSONPATH 4.31 + parameters: 4.32 + - '$["active devices"]' 4.33 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.34 + parameters: 4.35 + - '86400' 4.36 + master_item: 4.37 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.38 + tags: 4.39 + - tag: Application 4.40 + value: 'MD RAID' 4.41 + - tag: Устройство 4.42 + value: '{#DEVICE}' 4.43 + - uuid: 9706ad0fa9814755a5fec3aeb074785c 4.44 + name: 'MD RAID {#DEVICE}: Общий полезный объём' 4.45 + type: DEPENDENT 4.46 + key: 'aw.linux.mdadm.d.array-size[{#DEVICE}]' 4.47 + delay: '0' 4.48 + units: B 4.49 + preprocessing: 4.50 + - type: JSONPATH 4.51 + parameters: 4.52 + - '$["array size"]' 4.53 + - type: REGEX 4.54 + parameters: 4.55 + - '^([0-9]+)' 4.56 + - \1 4.57 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.58 + parameters: 4.59 + - '86400' 4.60 + master_item: 4.61 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.62 + tags: 4.63 + - tag: Application 4.64 + value: 'MD RAID' 4.65 + - tag: Устройство 4.66 + value: '{#DEVICE}' 4.67 + - uuid: ecbe2d984e0a4cd2a23bdefed863b3e7 4.68 + name: 'MD RAID {#DEVICE}: Время создания массива' 4.69 + type: DEPENDENT 4.70 + key: 'aw.linux.mdadm.d.creation-time[{#DEVICE}]' 4.71 + delay: '0' 4.72 + trends: '0' 4.73 + value_type: TEXT 4.74 + preprocessing: 4.75 + - type: JSONPATH 4.76 + parameters: 4.77 + - '$["creation time"]' 4.78 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.79 + parameters: 4.80 + - '86400' 4.81 + master_item: 4.82 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.83 + tags: 4.84 + - tag: Application 4.85 + value: 'MD RAID' 4.86 + - tag: Устройство 4.87 + value: '{#DEVICE}' 4.88 + - uuid: 1542cad0f9934df78e884eaee0220061 4.89 + name: 'MD RAID {#DEVICE}: Количество событий, произошедших с массивом' 4.90 + type: DEPENDENT 4.91 + key: 'aw.linux.mdadm.d.events[{#DEVICE}]' 4.92 + delay: '0' 4.93 + preprocessing: 4.94 + - type: JSONPATH 4.95 + parameters: 4.96 + - '$["events"]' 4.97 + - type: SIMPLE_CHANGE 4.98 + parameters: 4.99 + - '' 4.100 + master_item: 4.101 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.102 + tags: 4.103 + - tag: Application 4.104 + value: 'MD RAID' 4.105 + - tag: Устройство 4.106 + value: '{#DEVICE}' 4.107 + - uuid: 7fbc5180754444c79702eabcd19213cf 4.108 + name: 'MD RAID {#DEVICE}: Количество отказавших устройства' 4.109 + type: DEPENDENT 4.110 + key: 'aw.linux.mdadm.d.failed-devices[{#DEVICE}]' 4.111 + delay: '0' 4.112 + preprocessing: 4.113 + - type: JSONPATH 4.114 + parameters: 4.115 + - '$["failed devices"]' 4.116 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.117 + parameters: 4.118 + - '86400' 4.119 + master_item: 4.120 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.121 + tags: 4.122 + - tag: Application 4.123 + value: 'MD RAID' 4.124 + - tag: Устройство 4.125 + value: '{#DEVICE}' 4.126 + trigger_prototypes: 4.127 + - uuid: 617dd8c5ae984a759a9f4a55e2e6edfe 4.128 + expression: 'last(/Linux software RAID monitoring/aw.linux.mdadm.d.failed-devices[{#DEVICE}])<>0' 4.129 + name: 'Наличие отказавших устройств в массиве {#DEVICE}' 4.130 + priority: HIGH 4.131 + - uuid: db60d1b47ebd479386aace38ad0b503f 4.132 + name: 'MD RAID {#DEVICE}: Устройств в массиве' 4.133 + type: DEPENDENT 4.134 + key: 'aw.linux.mdadm.d.raid-devices[{#DEVICE}]' 4.135 + delay: '0' 4.136 + preprocessing: 4.137 + - type: JSONPATH 4.138 + parameters: 4.139 + - '$["raid devices"]' 4.140 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.141 + parameters: 4.142 + - '86400' 4.143 + master_item: 4.144 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.145 + tags: 4.146 + - tag: Application 4.147 + value: 'MD RAID' 4.148 + - tag: Устройство 4.149 + value: '{#DEVICE}' 4.150 + - uuid: 3ac9863ea63948bbbeb0d45f6816d926 4.151 + name: 'MD RAID {#DEVICE}: Тип массива' 4.152 + type: DEPENDENT 4.153 + key: 'aw.linux.mdadm.d.raid-level[{#DEVICE}]' 4.154 + delay: '0' 4.155 + trends: '0' 4.156 + value_type: CHAR 4.157 + preprocessing: 4.158 + - type: JSONPATH 4.159 + parameters: 4.160 + - '$["raid level"]' 4.161 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.162 + parameters: 4.163 + - '86400' 4.164 + master_item: 4.165 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.166 + tags: 4.167 + - tag: Application 4.168 + value: 'MD RAID' 4.169 + - tag: Устройство 4.170 + value: '{#DEVICE}' 4.171 + trigger_prototypes: 4.172 + - uuid: 7ea6e8b61acc4eb6a068e4a289725fd7 4.173 + expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}])<>0' 4.174 + recovery_mode: RECOVERY_EXPRESSION 4.175 + recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],1h)=1' 4.176 + name: 'Изменился тип массива {#DEVICE}' 4.177 + priority: INFO 4.178 + - uuid: f04b15b7f32a4e62b4e6a302c60a8fe4 4.179 + name: 'MD RAID {#DEVICE}: Состояние составляющих блочных устройств' 4.180 + type: DEPENDENT 4.181 + key: 'aw.linux.mdadm.d.raw-state[{#DEVICE}]' 4.182 + delay: '0' 4.183 + trends: '0' 4.184 + value_type: TEXT 4.185 + preprocessing: 4.186 + - type: JSONPATH 4.187 + parameters: 4.188 + - '$["raw state"]' 4.189 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.190 + parameters: 4.191 + - '86400' 4.192 + master_item: 4.193 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.194 + tags: 4.195 + - tag: Application 4.196 + value: 'MD RAID' 4.197 + - tag: Устройство 4.198 + value: '{#DEVICE}' 4.199 + - uuid: 8cc07323b6084b00be54e9f5f2adcd6c 4.200 + name: 'MD RAID {#DEVICE}: Количество запасных устройств' 4.201 + type: DEPENDENT 4.202 + key: 'aw.linux.mdadm.d.spare-devices[{#DEVICE}]' 4.203 + delay: '0' 4.204 + preprocessing: 4.205 + - type: JSONPATH 4.206 + parameters: 4.207 + - '$["spare devices"]' 4.208 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.209 + parameters: 4.210 + - '86400' 4.211 + master_item: 4.212 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.213 + tags: 4.214 + - tag: Application 4.215 + value: 'MD RAID' 4.216 + - tag: Устройство 4.217 + value: '{#DEVICE}' 4.218 + trigger_prototypes: 4.219 + - uuid: 77a85a17674b4b5aa7e023b56290ed61 4.220 + expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.spare-devices[{#DEVICE}])<0' 4.221 + recovery_mode: RECOVERY_EXPRESSION 4.222 + recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.spare-devices[{#DEVICE}],1h)=1' 4.223 + name: 'Уменьшилось количество запасных устройств в {#DEVICE}' 4.224 + priority: HIGH 4.225 + - uuid: 7bf13d288746481da7aa1ba863e24dcf 4.226 + name: 'MD RAID {#DEVICE}: Состояние массива' 4.227 + type: DEPENDENT 4.228 + key: 'aw.linux.mdadm.d.state[{#DEVICE}]' 4.229 + delay: '0' 4.230 + trends: '0' 4.231 + value_type: CHAR 4.232 + preprocessing: 4.233 + - type: JSONPATH 4.234 + parameters: 4.235 + - '$["state"]' 4.236 + error_handler: CUSTOM_VALUE 4.237 + error_handler_params: '__NO STATUS__' 4.238 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.239 + parameters: 4.240 + - '86400' 4.241 + master_item: 4.242 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.243 + tags: 4.244 + - tag: Application 4.245 + value: 'MD RAID' 4.246 + - tag: Устройство 4.247 + value: '{#DEVICE}' 4.248 + trigger_prototypes: 4.249 + - uuid: 6a4bcf95112c4dfd91b1108b1a4bb8f8 4.250 + expression: 'last(/Linux software RAID monitoring/aw.linux.mdadm.d.state[{#DEVICE}])<>"clean" and last(/Linux software RAID monitoring/aw.linux.mdadm.d.state[{#DEVICE}])<>"active"' 4.251 + name: 'Неожиданное состояние массива {#DEVICE}' 4.252 + priority: AVERAGE 4.253 + dependencies: 4.254 + - name: 'Отсутствуют данные о состоянии массива {#DEVICE}' 4.255 + expression: 'last(/Linux software RAID monitoring/aw.linux.mdadm.d.state[{#DEVICE}])="__NO STATUS__"' 4.256 + - uuid: fadccd30aba941cf97d00224117492d2 4.257 + expression: 'last(/Linux software RAID monitoring/aw.linux.mdadm.d.state[{#DEVICE}])="__NO STATUS__"' 4.258 + name: 'Отсутствуют данные о состоянии массива {#DEVICE}' 4.259 + priority: HIGH 4.260 + - uuid: 2e82148caae345bf99c201497d278edd 4.261 + name: 'MD RAID {#DEVICE}: Общее количество устройств в массиве' 4.262 + type: DEPENDENT 4.263 + key: 'aw.linux.mdadm.d.total-devices[{#DEVICE}]' 4.264 + delay: '0' 4.265 + preprocessing: 4.266 + - type: JSONPATH 4.267 + parameters: 4.268 + - '$["total devices"]' 4.269 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.270 + parameters: 4.271 + - '86400' 4.272 + master_item: 4.273 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.274 + tags: 4.275 + - tag: Application 4.276 + value: 'MD RAID' 4.277 + - tag: Устройство 4.278 + value: '{#DEVICE}' 4.279 + - uuid: 913441f9f2a142329869258c2e20bcf5 4.280 + name: 'MD RAID {#DEVICE}: Время последнего события в связанного со структурой массива' 4.281 + type: DEPENDENT 4.282 + key: 'aw.linux.mdadm.d.update-time[{#DEVICE}]' 4.283 + delay: '0' 4.284 + trends: '0' 4.285 + value_type: CHAR 4.286 + preprocessing: 4.287 + - type: JSONPATH 4.288 + parameters: 4.289 + - '$["update time"]' 4.290 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.291 + parameters: 4.292 + - '86400' 4.293 + master_item: 4.294 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.295 + tags: 4.296 + - tag: Application 4.297 + value: 'MD RAID' 4.298 + - tag: Устройство 4.299 + value: '{#DEVICE}' 4.300 + - uuid: d19120bc1e374ba69d140a60011a7389 4.301 + name: 'MD RAID {#DEVICE}: Используемое на каждом устройстве пространство под массив' 4.302 + type: DEPENDENT 4.303 + key: 'aw.linux.mdadm.d.used-dev-size[{#DEVICE}]' 4.304 + delay: '0' 4.305 + units: B 4.306 + preprocessing: 4.307 + - type: JSONPATH 4.308 + parameters: 4.309 + - '$["used dev size"]' 4.310 + - type: REGEX 4.311 + parameters: 4.312 + - '^([0-9]+)' 4.313 + - \1 4.314 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.315 + parameters: 4.316 + - '86400' 4.317 + master_item: 4.318 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.319 + tags: 4.320 + - tag: Application 4.321 + value: 'MD RAID' 4.322 + - tag: Устройство 4.323 + value: '{#DEVICE}' 4.324 + - uuid: 2bf72ad31e7d4a9d99e9b1fc27eaace0 4.325 + name: 'MD RAID {#DEVICE}: Количество устройств в работе' 4.326 + type: DEPENDENT 4.327 + key: 'aw.linux.mdadm.d.working-devices[{#DEVICE}]' 4.328 + delay: '0' 4.329 + preprocessing: 4.330 + - type: JSONPATH 4.331 + parameters: 4.332 + - '$["working devices"]' 4.333 + - type: DISCARD_UNCHANGED_HEARTBEAT 4.334 + parameters: 4.335 + - '86400' 4.336 + master_item: 4.337 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.338 + tags: 4.339 + - tag: Application 4.340 + value: 'MD RAID' 4.341 + - tag: Устройство 4.342 + value: '{#DEVICE}' 4.343 + trigger_prototypes: 4.344 + - uuid: 6f76dffea3164025a379f012606f29b4 4.345 + expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}])<>0' 4.346 + recovery_mode: RECOVERY_EXPRESSION 4.347 + recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],1h)=1' 4.348 + name: 'Изменилос количество работающих устройств в массиве {#DEVICE}' 4.349 + priority: AVERAGE 4.350 + - uuid: 48d73b212abe49f797703669391eb431 4.351 + name: '_ MD RAID {#DEVICE}: Сырые данные' 4.352 + key: 'aw.linux.mdadm.get-stat[{#DEVICE}]' 4.353 + history: '0' 4.354 + trends: '0' 4.355 + value_type: TEXT 4.356 + preprocessing: 4.357 + - type: JAVASCRIPT 4.358 + parameters: 4.359 + - | 4.360 + var res = {}; 4.361 + var flag = true; 4.362 + var buf = value.split('\n'); 4.363 + for (var li in buf) { 4.364 + var l = buf[li] 4.365 + if (l.match(/^ +Number +Major +Minor/)) { 4.366 + flag = false; 4.367 + res["raw state"] = "" 4.368 + } 4.369 + 4.370 + if (flag) { 4.371 + var _buf = l.split(' : '); 4.372 + if (_buf.length >= 2) { 4.373 + res[_buf[0].trim().toLowerCase()] = _buf[1].trim(); 4.374 + } 4.375 + } else { 4.376 + res["raw state"] += l + '\n'; 4.377 + 4.378 + } 4.379 + 4.380 + } 4.381 + 4.382 + return JSON.stringify(res) 4.383 + tags: 4.384 + - tag: Application 4.385 + value: 'MD RAID' 4.386 + - tag: Устройство 4.387 + value: '{#DEVICE}' 4.388 + trigger_prototypes: 4.389 + - uuid: 7b81377396ad4602a684da37ea8fdb35 4.390 + expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}])<>0 or change(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}])<>0' 4.391 + recovery_mode: RECOVERY_EXPRESSION 4.392 + recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}],1h)=1 and nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],1h)=1' 4.393 + name: 'Изменились размеры массива {#DEVICE}' 4.394 + opdata: 'Полезный размер: {ITEM.LASTVALUE1} -> {ITEM.VALUE1}; занимаемое на устройствах пространство: {ITEM.LASTVALUE2} -> {ITEM.VALUE2}' 4.395 + priority: INFO 4.396 + preprocessing: 4.397 + - type: JAVASCRIPT 4.398 + parameters: 4.399 + - | 4.400 + var res = [] 4.401 + var buf = value.split('\n'); 4.402 + for (var li in buf) { 4.403 + var l = buf[li].trim() 4.404 + if (l.length > 2) { 4.405 + res.push({ 4.406 + '{#DEVICE}': l 4.407 + }) 4.408 + } 4.409 + } 4.410 + 4.411 + return JSON.stringify(res)