Alerts


/etc/prometheus/alertmanager/alert.rules.yaml > nodes
High_Cpu_Load (0 active)
alert: High_Cpu_Load
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    CPU load is >95%
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: High CPU load (instance {{ $labels.instance }})
High_Memory_Load (0 active)
alert: High_Memory_Load
expr: ((node_memory_MemTotal_bytes) - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / (node_memory_MemTotal_bytes) * 100 > 95
for: 5m
labels:
  severity: warning
annotations:
  description: Server usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.
  summary: Server memory is almost full (instance {{ $labels.instance }})
High_Storage_Load (0 active)
alert: High_Storage_Load
expr: (node_filesystem_size_bytes{fstype!~"rootfs|selinuxfs|autofs|rpc_pipefs|tmpfs",mountpoint!~"/boot"} - node_filesystem_avail_bytes{fstype!~"rootfs|selinuxfs|autofs|rpc_pipefs|tmpfs",mountpoint!~"/boot"}) / node_filesystem_size_bytes{fstype!~"rootfs|selinuxfs|autofs|rpc_pipefs|tmpfs",mountpoint!~"/boot"} * 100 > 95
for: 5m
labels:
  severity: warning
annotations:
  description: Server storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.
  summary: Server storage is almost full
Swap_Is_Filling_Up (0 active)
alert: Swap_Is_Filling_Up
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 90
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Swap is filling up (>80%)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Swap is filling up (instance {{ $labels.instance }})
Warning_Cpu_Load (0 active)
alert: Warning_Cpu_Load
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 10m
labels:
  severity: warning
annotations:
  description: |-
    CPU load is > 80%
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: High CPU load (instance {{ $labels.instance }})