diff --git a/hosts/monitoring-3/configuration.nix b/hosts/monitoring-3/configuration.nix index de3e543..826536b 100644 --- a/hosts/monitoring-3/configuration.nix +++ b/hosts/monitoring-3/configuration.nix @@ -101,7 +101,17 @@ in { configuration = { route = { receiver = "xmpp-receiver"; + repeat_interval = "4h"; group_by = [ "instance" ]; + routes = [ + { + receiver = "xmpp-receiver"; + matchers = [ + ''severity = "warning"'' + ]; + repeat_interval = "70h"; + } + ]; }; receivers = [ { diff --git a/hosts/monitoring-3/rules.yml b/hosts/monitoring-3/rules.yml index de0a8de..71a4d26 100644 --- a/hosts/monitoring-3/rules.yml +++ b/hosts/monitoring-3/rules.yml @@ -9,3 +9,10 @@ groups: annotations: summary: "Endpoint {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." + - alert: InstanceUptime + expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000 + labels: + severity: warning + annotations: + summary: "Uptime of {{ $labels.instance }} more than 90 days" + description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"