Alert for hosts that are up for too long
This commit is contained in:
parent
6d1e848564
commit
cdbe62e788
@ -101,7 +101,17 @@ in {
|
|||||||
configuration = {
|
configuration = {
|
||||||
route = {
|
route = {
|
||||||
receiver = "xmpp-receiver";
|
receiver = "xmpp-receiver";
|
||||||
|
repeat_interval = "4h";
|
||||||
group_by = [ "instance" ];
|
group_by = [ "instance" ];
|
||||||
|
routes = [
|
||||||
|
{
|
||||||
|
receiver = "xmpp-receiver";
|
||||||
|
matchers = [
|
||||||
|
''severity = "warning"''
|
||||||
|
];
|
||||||
|
repeat_interval = "70h";
|
||||||
|
}
|
||||||
|
];
|
||||||
};
|
};
|
||||||
receivers = [
|
receivers = [
|
||||||
{
|
{
|
||||||
|
@ -9,3 +9,10 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
summary: "Endpoint {{ $labels.instance }} down"
|
summary: "Endpoint {{ $labels.instance }} down"
|
||||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
||||||
|
- alert: InstanceUptime
|
||||||
|
expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Uptime of {{ $labels.instance }} more than 90 days"
|
||||||
|
description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"
|
||||||
|
Loading…
Reference in New Issue
Block a user