Alert for hosts that are up for too long
This commit is contained in:
parent
6d1e848564
commit
cdbe62e788
@ -101,7 +101,17 @@ in {
|
||||
configuration = {
|
||||
route = {
|
||||
receiver = "xmpp-receiver";
|
||||
repeat_interval = "4h";
|
||||
group_by = [ "instance" ];
|
||||
routes = [
|
||||
{
|
||||
receiver = "xmpp-receiver";
|
||||
matchers = [
|
||||
''severity = "warning"''
|
||||
];
|
||||
repeat_interval = "70h";
|
||||
}
|
||||
];
|
||||
};
|
||||
receivers = [
|
||||
{
|
||||
|
@ -9,3 +9,10 @@ groups:
|
||||
annotations:
|
||||
summary: "Endpoint {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
||||
- alert: InstanceUptime
|
||||
expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Uptime of {{ $labels.instance }} more than 90 days"
|
||||
description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"
|
||||
|
Loading…
Reference in New Issue
Block a user