ncc.zabbix_templates

Yohn Y. 2024-05-10 Parent:c6fdaa108d9e Child:cccbd4830294

23:f1258954911b Go to Latest

ncc.zabbix_templates/mdadm/zbx_export_templates.yaml

* Ложные срабатывания триггеров при частых изменениях состояния массива + Отдельное наблюдение за состоянием деградации массива.

History
     1.1 --- a/mdadm/zbx_export_templates.yaml	Thu Apr 25 20:49:48 2024 +0300
     1.2 +++ b/mdadm/zbx_export_templates.yaml	Fri May 10 21:49:47 2024 +0300
     1.3 @@ -1,6 +1,6 @@
     1.4  zabbix_export:
     1.5    version: '6.0'
     1.6 -  date: '2024-02-25T06:30:53Z'
     1.7 +  date: '2024-05-10T18:47:47Z'
     1.8    groups:
     1.9      - uuid: 22941f4e01294888a9bed3aae15f6ef9
    1.10        name: Templates/AWNET
    1.11 @@ -8,7 +8,7 @@
    1.12      - uuid: d60632a7a4b94709b0455e1bec6ce54a
    1.13        template: 'Linux software RAID monitoring'
    1.14        name: 'Linux software RAID monitoring'
    1.15 -      description: 'version: v0.r202402.2'
    1.16 +      description: 'version: v0.r202405.1'
    1.17        groups:
    1.18          - name: Templates/AWNET
    1.19        discovery_rules:
    1.20 @@ -18,6 +18,31 @@
    1.21            delay: 5m
    1.22            lifetime: 1d
    1.23            item_prototypes:
    1.24 +            - uuid: d86159d183f14b7490d5ba69d35c7766
    1.25 +              name: 'MD RAID {#DEVICE}: Состояние деградации'
    1.26 +              type: DEPENDENT
    1.27 +              key: 'aw.linux.mdadm.c.is-degrade[{#DEVICE}]'
    1.28 +              delay: '0'
    1.29 +              description: |
    1.30 +                Имеет 2 значения:
    1.31 +                0 - массив в норме
    1.32 +                1 - массив деградировал
    1.33 +              preprocessing:
    1.34 +                - type: JAVASCRIPT
    1.35 +                  parameters:
    1.36 +                    - 'return /degraded/.test(value.toLowerCase()) ? 1 : 0'
    1.37 +              master_item:
    1.38 +                key: 'aw.linux.mdadm.d.state[{#DEVICE}]'
    1.39 +              tags:
    1.40 +                - tag: Application
    1.41 +                  value: 'MD RAID'
    1.42 +                - tag: Устройство
    1.43 +                  value: '{#DEVICE}'
    1.44 +              trigger_prototypes:
    1.45 +                - uuid: 69befe4603314ec4b6bc41d0f549a6a6
    1.46 +                  expression: 'last(/Linux software RAID monitoring/aw.linux.mdadm.c.is-degrade[{#DEVICE}])=1'
    1.47 +                  name: 'Массив {#DEVICE} деградировал.'
    1.48 +                  priority: HIGH
    1.49              - uuid: 2fd74bd606d241d1bf34759ed1449ad5
    1.50                name: 'MD RAID {#DEVICE}: Количество активных устройства'
    1.51                type: DEPENDENT
    1.52 @@ -167,9 +192,15 @@
    1.53                    value: '{#DEVICE}'
    1.54                trigger_prototypes:
    1.55                  - uuid: 7ea6e8b61acc4eb6a068e4a289725fd7
    1.56 -                  expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}])<>0'
    1.57 +                  expression: |
    1.58 +                    change(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}])<>0
    1.59 +                    and last(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],#1) 
    1.60 +                          <> last(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],#2)
    1.61                    recovery_mode: RECOVERY_EXPRESSION
    1.62 -                  recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],1h)=1'
    1.63 +                  recovery_expression: |
    1.64 +                    nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],1h)=1
    1.65 +                    or last(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],#1) 
    1.66 +                          = last(/Linux software RAID monitoring/aw.linux.mdadm.d.raid-level[{#DEVICE}],#2)
    1.67                    name: 'Изменился тип массива {#DEVICE}'
    1.68                    priority: INFO
    1.69              - uuid: f04b15b7f32a4e62b4e6a302c60a8fe4
    1.70 @@ -216,7 +247,7 @@
    1.71                  - uuid: 77a85a17674b4b5aa7e023b56290ed61
    1.72                    expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.spare-devices[{#DEVICE}])<0'
    1.73                    recovery_mode: RECOVERY_EXPRESSION
    1.74 -                  recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.spare-devices[{#DEVICE}],1h)=1'
    1.75 +                  recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.spare-devices[{#DEVICE}],1h)=1 or change(/Linux software RAID monitoring/aw.linux.mdadm.d.spare-devices[{#DEVICE}]) = 0'
    1.76                    name: 'Уменьшилось количество запасных устройств в {#DEVICE}'
    1.77                    priority: HIGH
    1.78              - uuid: 7bf13d288746481da7aa1ba863e24dcf
    1.79 @@ -318,9 +349,9 @@
    1.80                    value: '{#DEVICE}'
    1.81                trigger_prototypes:
    1.82                  - uuid: 6f76dffea3164025a379f012606f29b4
    1.83 -                  expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}])<>0'
    1.84 +                  expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}])<>0 and last(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],#2) <> last(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],#1)'
    1.85                    recovery_mode: RECOVERY_EXPRESSION
    1.86 -                  recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],1h)=1'
    1.87 +                  recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],1h)=1 or last(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],#2) = last(/Linux software RAID monitoring/aw.linux.mdadm.d.working-devices[{#DEVICE}],#1)'
    1.88                    name: 'Изменилос количество работающих устройств в массиве {#DEVICE}'
    1.89                    priority: AVERAGE
    1.90              - uuid: 48d73b212abe49f797703669391eb431
    1.91 @@ -363,9 +394,28 @@
    1.92                    value: '{#DEVICE}'
    1.93            trigger_prototypes:
    1.94              - uuid: 7b81377396ad4602a684da37ea8fdb35
    1.95 -              expression: 'change(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}])<>0 or change(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}])<>0'
    1.96 +              expression: |
    1.97 +                (
    1.98 +                  change(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}])<>0
    1.99 +                  and last(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],#1) 
   1.100 +                      <> last(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],#2)
   1.101 +                ) or (
   1.102 +                  change(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}])<>0
   1.103 +                  or last(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}],#1) 
   1.104 +                      <> last(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}],#2)
   1.105 +                )
   1.106                recovery_mode: RECOVERY_EXPRESSION
   1.107 -              recovery_expression: 'nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}],1h)=1 and nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],1h)=1'
   1.108 +              recovery_expression: |
   1.109 +                (
   1.110 +                  nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.used-dev-size[{#DEVICE}],1h)=1
   1.111 +                  or last(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],#1) 
   1.112 +                      = last(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],#2)
   1.113 +                ) 
   1.114 +                or (
   1.115 +                  nodata(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],1h)=1
   1.116 +                  or last(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],#1) 
   1.117 +                      = last(/Linux software RAID monitoring/aw.linux.mdadm.d.array-size[{#DEVICE}],#2)
   1.118 +                )
   1.119                name: 'Изменились размеры массива {#DEVICE}'
   1.120                opdata: 'Полезный размер: {ITEM.LASTVALUE1} -> {ITEM.VALUE1}; занимаемое на устройствах пространство: {ITEM.LASTVALUE2} -> {ITEM.VALUE2}'
   1.121                priority: INFO