1
0

hosts/monitoring-3: alert for all storage drives when they are full

This commit is contained in:
clerie 2023-04-22 18:30:51 +02:00
parent 7d13fb29d1
commit 882df0098f

View File

@ -8,7 +8,7 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: "Endpoint {{ $labels.instance }} down" summary: "Endpoint {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." description: "{{ $labels.instance }} is unreachable"
- alert: InstanceConfigNotSync - alert: InstanceConfigNotSync
expr: nixos_current_system_is_sync{} == 0 expr: nixos_current_system_is_sync{} == 0
for: 2h for: 2h
@ -18,13 +18,13 @@ groups:
summary: "Current system of {{ $labels.instance }} not in sync with config" summary: "Current system of {{ $labels.instance }} not in sync with config"
description: "The current system hash of {{ $labels.instance }} does not match the one generated by hydra based on the current config" description: "The current system hash of {{ $labels.instance }} does not match the one generated by hydra based on the current config"
- alert: BackupStorageFull - alert: BackupStorageFull
expr: ((node_filesystem_avail_bytes{instance="clerie-backup.net.clerie.de", job="node-exporter", mountpoint="/mnt/clerie-backup"} / node_filesystem_size_bytes{instance="clerie-backup.net.clerie.de", job="node-exporter", mountpoint="/mnt/clerie-backup"}) * 100) < 5 expr: ((node_filesystem_avail_bytes{job="node-exporter"} / node_filesystem_size_bytes{job="node-exporter"}) * 100) < 5
for: 30m for: 30m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: "Storage for backup is nearly full" summary: "Storage of {{ $labels.instance }} is full"
description: "Storage for backups is nearly full" description: "Storage of {{ $labels.instance }} for {{ $labels.mountpoint }} on {{ $labels.device }} is full"
- alert: InstanceJustBooted - alert: InstanceJustBooted
expr: time() - node_boot_time_seconds{job="node-exporter"} <= 300 expr: time() - node_boot_time_seconds{job="node-exporter"} <= 300
labels: labels: