From 40338d9b8569ce68eb110fcf1d86b15b97577ded Mon Sep 17 00:00:00 2001 From: clerie Date: Sat, 9 Aug 2025 11:41:34 +0200 Subject: [PATCH] hosts/monitoring-3: Monitor alertmanager --- hosts/monitoring-3/prometheus.nix | 17 ++++++++++++++++- hosts/monitoring-3/rules.yml | 7 +++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/hosts/monitoring-3/prometheus.nix b/hosts/monitoring-3/prometheus.nix index e305e88..72392f7 100644 --- a/hosts/monitoring-3/prometheus.nix +++ b/hosts/monitoring-3/prometheus.nix @@ -104,6 +104,21 @@ in { relabelAddressToInstance ]; } + { + job_name = "alertmanager"; + scrape_interval = "20s"; + scheme = "http"; + static_configs = [ + { + targets = [ + "monitoring-3.mon.clerie.de:9093" + ]; + } + ]; + relabel_configs = [ + relabelAddressToInstance + ]; + } { job_name = "node-exporter"; scrape_interval = "20s"; @@ -526,7 +541,7 @@ in { { static_configs = [ { targets = [ - "[::1]:9093" + "monitoring-3.mon.clerie.de:9093" ]; } ]; } diff --git a/hosts/monitoring-3/rules.yml b/hosts/monitoring-3/rules.yml index bb0f5c9..7cb9d07 100644 --- a/hosts/monitoring-3/rules.yml +++ b/hosts/monitoring-3/rules.yml @@ -95,3 +95,10 @@ groups: annotations: summary: "blog.nadja.top unreachable via IPv4" description: "blog.nadja.top unreachable IPv4, but reachable via IPv6" + - alert: AlertmanagerNotificationRequestsFailed + expr: rate(alertmanager_notification_requests_failed_total[5m]) > 0 + labels: + severity: critical + annotations: + summary: "Too many notification requests failed" + description: "Too many notification requests to Alertmanager integration {{ $labels.integration }} failed"