1
0
Fork 0
nixfiles/hosts/monitoring-3/rules.yml

19 lines
638 B
YAML

groups:
- name: alert.rules
rules:
- alert: InstanceDown
expr: up{job="node-exporter"} == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Endpoint {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
- alert: InstanceUptime
expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000
labels:
severity: warning
annotations:
summary: "Uptime of {{ $labels.instance }} more than 90 days"
description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"