34 lines
1.4 KiB
YAML
34 lines
1.4 KiB
YAML
groups:
|
|
- name: alert.rules
|
|
rules:
|
|
- alert: InstanceDown
|
|
expr: up{job="node-exporter"} == 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Endpoint {{ $labels.instance }} down"
|
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
|
- alert: InstanceUptime
|
|
expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Uptime of {{ $labels.instance }} more than 90 days"
|
|
description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"
|
|
- alert: BackupStorageFull
|
|
expr: ((node_filesystem_avail_bytes{instance="clerie-backup.mon.clerie.de:9100", job="node-exporter", mountpoint="/mnt/clerie-backup"} / node_filesystem_size_bytes{instance="clerie-backup.mon.clerie.de:9100", job="node-exporter", mountpoint="/mnt/clerie-backup"}) * 100) < 5
|
|
for: 30m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Storage for backup is nearly full"
|
|
description: "Storage for backups is nearly full"
|
|
- alert: InstanceJustBooted
|
|
expr: time() - node_boot_time_seconds{job="node-exporter"} <= 300
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Uptime of {{ $labels.instance }} less than 5 min"
|
|
description: "{{ $labels.instance }} just booted"
|