Introduce service levels and change alert routing based on this
This commit is contained in:
@@ -31,3 +31,12 @@ groups:
|
||||
annotations:
|
||||
summary: "Uptime of {{ $labels.instance }} less than 5 min"
|
||||
description: "{{ $labels.instance }} just booted"
|
||||
- alert: EventInstanceDown
|
||||
expr: up{job="node-exporter", service_level="event"} == 0
|
||||
for: 2h
|
||||
labels:
|
||||
mute: instance
|
||||
severity: muted
|
||||
annotations:
|
||||
summary: "Event instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} has been down for more than 2 hours."
|
||||
|
Reference in New Issue
Block a user