hosts/monitoring-3: Replace InstanceUp alert with KernelChanged
This commit is contained in:
parent
77d10e4835
commit
41cd4792a6
@ -9,13 +9,6 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
summary: "Endpoint {{ $labels.instance }} down"
|
summary: "Endpoint {{ $labels.instance }} down"
|
||||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
||||||
- alert: InstanceUptime
|
|
||||||
expr: time() - node_boot_time_seconds{job="node-exporter"} >= 7776000
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Uptime of {{ $labels.instance }} more than 90 days"
|
|
||||||
description: "The last reboot of {{ $labels.instance }} was more than 90 days ago"
|
|
||||||
- alert: InstanceConfigNotSync
|
- alert: InstanceConfigNotSync
|
||||||
expr: nixos_current_system_is_sync{} == 0
|
expr: nixos_current_system_is_sync{} == 0
|
||||||
for: 2h
|
for: 2h
|
||||||
@ -48,3 +41,11 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
summary: "Event instance {{ $labels.instance }} down"
|
summary: "Event instance {{ $labels.instance }} down"
|
||||||
description: "{{ $labels.instance }} has been down for more than 2 hours."
|
description: "{{ $labels.instance }} has been down for more than 2 hours."
|
||||||
|
- alert: KernelChanged
|
||||||
|
expr: nixos_current_system_kernel_is_booted_system_kernel{job="nixos-exporter"} == 0
|
||||||
|
for: 2h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Kernel of {{ $labels.instance }} changed"
|
||||||
|
description: "The Kernel {{ $labels.instance }} booted with, isn't the target Kernel. A reboot may be required."
|
||||||
|
Loading…
Reference in New Issue
Block a user