From 398067f533ef45b88da7cb88f633eb5cff2e2711 Mon Sep 17 00:00:00 2001 From: clerie Date: Thu, 4 May 2023 14:43:14 +0200 Subject: [PATCH] hosts/monitoring-3: alert on averaged metrics --- hosts/monitoring-3/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hosts/monitoring-3/rules.yml b/hosts/monitoring-3/rules.yml index 7763004..49ea8b5 100644 --- a/hosts/monitoring-3/rules.yml +++ b/hosts/monitoring-3/rules.yml @@ -18,7 +18,7 @@ groups: summary: "Current system of {{ $labels.instance }} not in sync with config" description: "The current system hash of {{ $labels.instance }} does not match the one generated by hydra based on the current config" - alert: BackupStorageFull - expr: ((node_filesystem_avail_bytes{job="node-exporter"} / node_filesystem_size_bytes{job="node-exporter"}) * 100) < 5 + expr: ((avg_over_time(node_filesystem_avail_bytes{job="node-exporter"}[5m]) / avg_over_time(node_filesystem_size_bytes{job="node-exporter"}[5m])) * 100) < 5 for: 30m labels: severity: critical @@ -42,7 +42,7 @@ groups: summary: "Event instance {{ $labels.instance }} down" description: "{{ $labels.instance }} has been down for more than 2 hours." - alert: KernelChanged - expr: nixos_current_system_kernel_is_booted_system_kernel{job="nixos-exporter"} == 0 + expr: avg_over_time(nixos_current_system_kernel_is_booted_system_kernel{job="nixos-exporter"}[5m]) == 0 for: 2h labels: severity: warning