Introduce service levels and change alert routing based on this
This commit is contained in:
@@ -20,7 +20,7 @@ let
|
||||
monitoringHosts;
|
||||
|
||||
monitoringTargets = mapAttrsToList (name: host:
|
||||
"${host.config.networking.hostName}.mon.clerie.de:9100")
|
||||
"${host.config.networking.hostName}.mon.clerie.de:9100;${attrByPath ["clerie" "monitoring" "serviceLevel"] "infra" host.config}")
|
||||
monitoringHosts;
|
||||
|
||||
nixosMonitoringTargets = mapAttrsToList (name: host:
|
||||
@@ -106,6 +106,12 @@ in {
|
||||
repeat_interval = "4h";
|
||||
group_by = [ "instance" ];
|
||||
routes = [
|
||||
{
|
||||
matchers = [
|
||||
''severity = "muted"''
|
||||
];
|
||||
receiver = "muted";
|
||||
}
|
||||
{
|
||||
receiver = "xmpp-receiver";
|
||||
matchers = [
|
||||
@@ -116,6 +122,9 @@ in {
|
||||
];
|
||||
};
|
||||
receivers = [
|
||||
{
|
||||
name = "muted";
|
||||
}
|
||||
{
|
||||
name = "xmpp-receiver";
|
||||
webhook_configs = [
|
||||
@@ -125,6 +134,20 @@ in {
|
||||
];
|
||||
}
|
||||
];
|
||||
inhibit_rules = [
|
||||
{
|
||||
# Mute all alerts for an instance which also has an alert starting with MuteInstanceOn
|
||||
target_matchers = [
|
||||
''alertname =~ ".+"''
|
||||
];
|
||||
source_matchers = [
|
||||
''mute = "instance"''
|
||||
];
|
||||
equal = [
|
||||
"instance"
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
@@ -160,12 +183,24 @@ in {
|
||||
static_configs = [
|
||||
{
|
||||
targets = [
|
||||
"monitoring-3.mon.clerie.de:9100"
|
||||
"monitoring-3.mon.clerie.de:9100;infra"
|
||||
]
|
||||
++ monitoringTargets;
|
||||
}
|
||||
];
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "(.+);(.+)";
|
||||
target_label = "service_level";
|
||||
replacement = "\${2}";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "(.+);(.+)";
|
||||
target_label = "__address__";
|
||||
replacement = "\${1}";
|
||||
}
|
||||
relabelAddressToInstance
|
||||
];
|
||||
}
|
||||
|
Reference in New Issue
Block a user