1
0

Alert for hosts that are up for too long

This commit is contained in:
clerie 2022-09-11 17:01:24 +02:00
parent 6d1e848564
commit cdbe62e788
2 changed files with 17 additions and 0 deletions

View File

@ -101,7 +101,17 @@ in {
configuration = { configuration = {
route = { route = {
receiver = "xmpp-receiver"; receiver = "xmpp-receiver";
repeat_interval = "4h";
group_by = [ "instance" ]; group_by = [ "instance" ];
routes = [
{
receiver = "xmpp-receiver";
matchers = [
''severity = "warning"''
];
repeat_interval = "70h";
}
];
}; };
receivers = [ receivers = [
{ {

View File

@ -9,3 +9,10 @@ groups:
annotations: annotations:
summary: "Endpoint {{ $labels.instance }} down" summary: "Endpoint {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
- alert: InstanceUptime
expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000
labels:
severity: warning
annotations:
summary: "Uptime of {{ $labels.instance }} more than 90 days"
description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"