From 3b7f59a66e85624ef5a8405d88a5f72a606b823d Mon Sep 17 00:00:00 2001
From: clerie <git@clerie.de>
Date: Tue, 21 Jan 2025 17:18:41 +0100
Subject: [PATCH] hosts/monitoring-3: Warn if storages are almost full

---
 hosts/monitoring-3/alertmanager.nix | 12 ++++++++++++
 hosts/monitoring-3/rules.yml        | 10 +++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/hosts/monitoring-3/alertmanager.nix b/hosts/monitoring-3/alertmanager.nix
index 7913392..86472a8 100644
--- a/hosts/monitoring-3/alertmanager.nix
+++ b/hosts/monitoring-3/alertmanager.nix
@@ -63,6 +63,18 @@
             "instance"
           ];
         }
+        {
+          target_matchers = [
+            ''alertname = "StorageAlmostFull"''
+          ];
+          source_matchers = [
+            ''alertname = "StorageFull"''
+          ];
+          equal = [
+            "instance"
+            "mountpoint"
+          ];
+        }
       ];
     };
   };
diff --git a/hosts/monitoring-3/rules.yml b/hosts/monitoring-3/rules.yml
index a895ddb..9a4a160 100644
--- a/hosts/monitoring-3/rules.yml
+++ b/hosts/monitoring-3/rules.yml
@@ -17,7 +17,7 @@ groups:
     annotations:
       summary: "Current system of {{ $labels.instance }} not in sync with config"
       description: "The current system hash of {{ $labels.instance }} does not match the one generated by hydra based on the current config"
-  - alert: BackupStorageFull
+  - alert: StorageFull
     expr: ((last_over_time(node_filesystem_avail_bytes{job="node-exporter"}[5m]) / last_over_time(node_filesystem_size_bytes{job="node-exporter"}[5m])) * 100) < 5
     for: 30m
     labels:
@@ -25,6 +25,14 @@ groups:
     annotations:
       summary: "Storage of {{ $labels.instance }} is full"
       description: "Storage of {{ $labels.instance }} for {{ $labels.mountpoint }} on {{ $labels.device }} is full"
+  - alert: StorageAlmostFull
+    expr: ((last_over_time(node_filesystem_avail_bytes{job="node-exporter"}[5m]) / last_over_time(node_filesystem_size_bytes{job="node-exporter"}[5m])) * 100) < 10
+    for: 30m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Storage of {{ $labels.instance }} is almost full"
+      description: "Storage of {{ $labels.instance }} for {{ $labels.mountpoint }} on {{ $labels.device }} is almost full"
   - alert: ClerieBackupJobLastSuccessfulRunBehind
     expr: time() - last_over_time(clerie_backup_last_successful_run_time{}[5m]) >= 9000
     for: 5m