diff --git a/hosts/monitoring-3/rules.yml b/hosts/monitoring-3/rules.yml index 1382016..7763004 100644 --- a/hosts/monitoring-3/rules.yml +++ b/hosts/monitoring-3/rules.yml @@ -8,7 +8,7 @@ groups: severity: critical annotations: summary: "Endpoint {{ $labels.instance }} down" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." + description: "{{ $labels.instance }} is unreachable" - alert: InstanceConfigNotSync expr: nixos_current_system_is_sync{} == 0 for: 2h @@ -18,13 +18,13 @@ groups: summary: "Current system of {{ $labels.instance }} not in sync with config" description: "The current system hash of {{ $labels.instance }} does not match the one generated by hydra based on the current config" - alert: BackupStorageFull - expr: ((node_filesystem_avail_bytes{instance="clerie-backup.net.clerie.de", job="node-exporter", mountpoint="/mnt/clerie-backup"} / node_filesystem_size_bytes{instance="clerie-backup.net.clerie.de", job="node-exporter", mountpoint="/mnt/clerie-backup"}) * 100) < 5 + expr: ((node_filesystem_avail_bytes{job="node-exporter"} / node_filesystem_size_bytes{job="node-exporter"}) * 100) < 5 for: 30m labels: severity: critical annotations: - summary: "Storage for backup is nearly full" - description: "Storage for backups is nearly full" + summary: "Storage of {{ $labels.instance }} is full" + description: "Storage of {{ $labels.instance }} for {{ $labels.mountpoint }} on {{ $labels.device }} is full" - alert: InstanceJustBooted expr: time() - node_boot_time_seconds{job="node-exporter"} <= 300 labels: