1
0

hosts/monitoring-3: split host config to multiple files

This commit is contained in:
clerie 2023-02-03 22:28:50 +01:00
parent 0a1311252b
commit 6082fb0744
5 changed files with 433 additions and 405 deletions

View File

@ -0,0 +1,70 @@
{ ... }:
{
services.prometheus.xmpp-alerts = {
enable = true;
settings = {
jid = "feuer@fem-net.de";
password_command = "cat /var/src/secrets/xmpp-alert/password";
listen_address = "::1";
listen_port = 9199;
to_jid = "clerie@fem-net.de";
format = "short";
};
};
services.prometheus.alertmanager = {
enable = true;
listenAddress = "[::1]";
port = 9093;
configuration = {
route = {
receiver = "xmpp-receiver";
repeat_interval = "4h";
group_by = [ "instance" ];
routes = [
{
matchers = [
''severity = "muted"''
];
receiver = "muted";
}
{
receiver = "xmpp-receiver";
matchers = [
''severity = "warning"''
];
repeat_interval = "70h";
}
];
};
receivers = [
{
name = "muted";
}
{
name = "xmpp-receiver";
webhook_configs = [
{
url = "http://[::1]:9199/alert";
}
];
}
];
inhibit_rules = [
{
# Mute all alerts for an instance which also has an alert starting with MuteInstanceOn
target_matchers = [
''alertname =~ ".+"''
];
source_matchers = [
''mute = "instance"''
];
equal = [
"instance"
];
}
];
};
};
}

View File

@ -1,55 +1,15 @@
{ config, pkgs, lib, ... }: { config, pkgs, lib, ... }:
with lib; {
let
hosts = (import ../../lib/hosts.nix { inherit pkgs; }).hosts;
monitoringHosts = filterAttrs (name: host:
attrByPath ["clerie" "monitoring" "enable"] false host.config)
hosts;
monitoringHostsNames = mapAttrs' (name: host:
nameValuePair "fd00:327:327:327::${host.config.clerie.monitoring.id}" ["${host.config.networking.hostName}.mon.clerie.de"])
monitoringHosts;
monitoringPeers = mapAttrsToList (name: host: {
allowedIPs = [ "fd00:327:327:327::${host.config.clerie.monitoring.id}/128" ];
publicKey = host.config.clerie.monitoring.pubkey;
})
monitoringHosts;
monitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9100;${attrByPath ["clerie" "monitoring" "serviceLevel"] "infra" host.config}")
monitoringHosts;
nixosMonitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9152")
(filterAttrs (name: host:
# assume this is a NixOS system if not specified
attrByPath ["clerie" "monitoring" "nixos"] true host.config)
monitoringHosts);
birdMonitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9324")
(filterAttrs (name: host:
attrByPath ["clerie" "monitoring" "bird"] false host.config)
monitoringHosts);
blackboxMonitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9115")
(filterAttrs (name: host:
attrByPath ["clerie" "monitoring" "blackbox"] false host.config)
monitoringHosts);
eachWithEachOther = (f: x: y: lib.lists.flatten (lib.lists.forEach x (a: lib.lists.forEach y (b: f a b))));
in {
imports = imports =
[ [
./hardware-configuration.nix ./hardware-configuration.nix
../../configuration/common ../../configuration/common
../../configuration/proxmox-vm ../../configuration/proxmox-vm
./alertmanager.nix
./grafana.nix
./prometheus.nix
./uptimestatus.nix
]; ];
boot.loader.grub.enable = true; boot.loader.grub.enable = true;
@ -65,370 +25,10 @@ in {
networking.defaultGateway6 = { address = "2001:638:904:ffca::1"; interface = "ens19"; }; networking.defaultGateway6 = { address = "2001:638:904:ffca::1"; interface = "ens19"; };
networking.nameservers = [ "2001:638:904:ffcc::3" "2001:638:904:ffcc::4" "141.24.40.3" "141.24.40.4" ]; networking.nameservers = [ "2001:638:904:ffcc::3" "2001:638:904:ffcc::4" "141.24.40.3" "141.24.40.4" ];
networking.hosts = {
"::1" = [ "monitoring-3.mon.clerie.de" ]; # fd00:327:327:327::1
}
// monitoringHostsNames;
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
wg-monitoring = {
ips = [ "fd00:327:327:327::1/64" ];
listenPort = 54523;
peers = monitoringPeers;
privateKeyFile = "/var/src/secrets/wireguard/wg-monitoring";
};
};
networking.firewall.allowedUDPPorts = [ 54523 ];
services.prometheus.exporters.node.enable = true; services.prometheus.exporters.node.enable = true;
services.prometheus.xmpp-alerts = {
enable = true;
settings = {
jid = "feuer@fem-net.de";
password_command = "cat /var/src/secrets/xmpp-alert/password";
listen_address = "::1";
listen_port = 9199;
to_jid = "clerie@fem-net.de";
format = "short";
};
};
services.prometheus.alertmanager = {
enable = true;
listenAddress = "[::1]";
port = 9093;
configuration = {
route = {
receiver = "xmpp-receiver";
repeat_interval = "4h";
group_by = [ "instance" ];
routes = [
{
matchers = [
''severity = "muted"''
];
receiver = "muted";
}
{
receiver = "xmpp-receiver";
matchers = [
''severity = "warning"''
];
repeat_interval = "70h";
}
];
};
receivers = [
{
name = "muted";
}
{
name = "xmpp-receiver";
webhook_configs = [
{
url = "http://[::1]:9199/alert";
}
];
}
];
inhibit_rules = [
{
# Mute all alerts for an instance which also has an alert starting with MuteInstanceOn
target_matchers = [
''alertname =~ ".+"''
];
source_matchers = [
''mute = "instance"''
];
equal = [
"instance"
];
}
];
};
};
services.prometheus = {
enable = true;
listenAddress = "[::1]";
scrapeConfigs = let
relabelAddressToInstance = {
source_labels = [ "__address__" ];
target_label = "instance";
regex = ''([\w-]+)\.mon\.clerie\.de\:\d+'';
replacement = "\${1}.net.clerie.de";
};
in [
{
job_name = "prometheus";
scrape_interval = "20s";
scheme = "http";
static_configs = [
{
targets = [
"monitoring-3.mon.clerie.de:9090"
];
}
];
relabel_configs = [
relabelAddressToInstance
];
}
{
job_name = "node-exporter";
scrape_interval = "20s";
static_configs = [
{
targets = [
"monitoring-3.mon.clerie.de:9100;infra"
]
++ monitoringTargets;
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "service_level";
replacement = "\${2}";
}
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__address__";
replacement = "\${1}";
}
relabelAddressToInstance
];
}
{
job_name = "nixos-exporter";
scrape_interval = "1m";
static_configs = [
{
targets = nixosMonitoringTargets;
}
];
relabel_configs = [
relabelAddressToInstance
];
}
{
job_name = "bird-exporter";
scrape_interval = "20s";
static_configs = [
{
targets = birdMonitoringTargets;
}
];
relabel_configs = [
relabelAddressToInstance
];
}
{
job_name = "blackbox_icmp6";
scrape_interval = "20s";
metrics_path = "/probe";
params = {
module = [ "icmp6" ];
};
static_configs = [
{
targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [
"clerie.de"
"tagesschau.de"
"google.com"
"achtbaan.nikhef.nl"
"fluorine.net.clerie.de"
"www.fem.tu-ilmenau.de"
"www.heise.de"
];
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__param_target";
replacement = "\${2}";
}
{
source_labels = [ "__param_target" ];
target_label = "target";
}
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__address__";
replacement = "\${1}";
}
relabelAddressToInstance
];
}
{
job_name = "blackbox_icmp4";
scrape_interval = "20s";
metrics_path = "/probe";
params = {
module = [ "icmp4" ];
};
static_configs = [
{
targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [
"clerie.de"
"tagesschau.de"
"google.com"
"achtbaan.nikhef.nl"
"www.fem.tu-ilmenau.de"
"www.heise.de"
"ie10-ffm2.nodes.nethinks.com"
"matrix.bau-ha.us"
];
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__param_target";
replacement = "\${2}";
}
{
source_labels = [ "__param_target" ];
target_label = "target";
}
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__address__";
replacement = "\${1}";
}
relabelAddressToInstance
];
}
{
job_name = "zimmer-temp";
scrape_interval = "20s";
scheme = "https";
metrics_path = "/data/zimmer-temp/";
static_configs = [
{
targets = [
"iot-data.clerie.de"
];
}
];
}
{
job_name = "outdoor-temp";
scrape_interval = "20s";
scheme = "https";
metrics_path = "/data/outdoor-temp/";
static_configs = [
{
targets = [
"iot-data.clerie.de"
];
}
];
}
{
job_name = "xmpp-alerts";
scrape_interval = "20s";
static_configs = [
{
targets = [
"monitoring-3.mon.clerie.de:9199"
];
}
];
relabel_configs = [
relabelAddressToInstance
];
}
];
alertmanagers = [
{
static_configs = [ {
targets = [
"[::1]:9093"
];
} ];
}
];
rules = [ (readFile ./rules.yml) ];
};
services.grafana = {
enable = true;
domain = "grafana.monitoring.clerie.de";
rootUrl = "https://grafana.monitoring.clerie.de";
port = 3001;
addr = "::1";
auth.anonymous.enable = true;
provision = {
enable = true;
datasources.settings.datasources = [
{
type = "prometheus";
name = "Prometheus";
url = "http://[::1]:9090";
isDefault = true;
}
];
dashboards.settings.providers = [
{
options.path = ./dashboards;
}
];
};
};
users.users.uptimestatus = {
description = "Uptime Status Service";
group = "uptimestatus";
home = "/var/lib/uptimestatus/";
useDefaultShell = true;
isSystemUser = true;
};
users.groups.uptimestatus = {};
systemd.services.uptimestatus = {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
RuntimeDirectory = "uptimestatus";
StateDirectory = "uptimestatus";
User = "uptimestatus";
Group = "uptimestatus";
};
script = "gunicorn -w 4 -b [::1]:8235 uptimestatus:app";
path = with pkgs; [ (python3.withPackages (ps: [ ps.gunicorn uptimestatus ])) ];
};
services.nginx = { services.nginx = {
enable = true; enable = true;
virtualHosts = {
"prometheus.monitoring.clerie.de" = {
enableACME = true;
forceSSL = true;
locations."/".proxyPass = "http://[::1]:9090/";
};
"grafana.monitoring.clerie.de" = {
enableACME = true;
forceSSL = true;
locations."/".proxyPass = "http://[::1]:3001/";
};
"status.monitoring.clerie.de" = {
enableACME = true;
forceSSL = true;
locations."/".proxyPass = "http://[::1]:8235/";
};
};
}; };
networking.firewall.allowedTCPPorts = [ 80 443 ]; networking.firewall.allowedTCPPorts = [ 80 443 ];

View File

@ -0,0 +1,39 @@
{ ... }:
{
services.grafana = {
enable = true;
domain = "grafana.monitoring.clerie.de";
rootUrl = "https://grafana.monitoring.clerie.de";
port = 3001;
addr = "::1";
auth.anonymous.enable = true;
provision = {
enable = true;
datasources.settings.datasources = [
{
type = "prometheus";
name = "Prometheus";
url = "http://[::1]:9090";
isDefault = true;
}
];
dashboards.settings.providers = [
{
options.path = ./dashboards;
}
];
};
};
services.nginx = {
virtualHosts = {
"grafana.monitoring.clerie.de" = {
enableACME = true;
forceSSL = true;
locations."/".proxyPass = "http://[::1]:3001/";
};
};
};
}

View File

@ -0,0 +1,286 @@
{ config, pkgs, lib, ... }:
with lib;
let
hosts = (import ../../lib/hosts.nix { inherit pkgs; }).hosts;
monitoringHosts = filterAttrs (name: host:
attrByPath ["clerie" "monitoring" "enable"] false host.config)
hosts;
monitoringHostsNames = mapAttrs' (name: host:
nameValuePair "fd00:327:327:327::${host.config.clerie.monitoring.id}" ["${host.config.networking.hostName}.mon.clerie.de"])
monitoringHosts;
monitoringPeers = mapAttrsToList (name: host: {
allowedIPs = [ "fd00:327:327:327::${host.config.clerie.monitoring.id}/128" ];
publicKey = host.config.clerie.monitoring.pubkey;
})
monitoringHosts;
monitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9100;${attrByPath ["clerie" "monitoring" "serviceLevel"] "infra" host.config}")
monitoringHosts;
nixosMonitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9152")
(filterAttrs (name: host:
# assume this is a NixOS system if not specified
attrByPath ["clerie" "monitoring" "nixos"] true host.config)
monitoringHosts);
birdMonitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9324")
(filterAttrs (name: host:
attrByPath ["clerie" "monitoring" "bird"] false host.config)
monitoringHosts);
blackboxMonitoringTargets = mapAttrsToList (name: host:
"${host.config.networking.hostName}.mon.clerie.de:9115")
(filterAttrs (name: host:
attrByPath ["clerie" "monitoring" "blackbox"] false host.config)
monitoringHosts);
eachWithEachOther = (f: x: y: lib.lists.flatten (lib.lists.forEach x (a: lib.lists.forEach y (b: f a b))));
in {
networking.hosts = {
"::1" = [ "monitoring-3.mon.clerie.de" ]; # fd00:327:327:327::1
}
// monitoringHostsNames;
networking.wireguard.enable = true;
networking.wireguard.interfaces = {
wg-monitoring = {
ips = [ "fd00:327:327:327::1/64" ];
listenPort = 54523;
peers = monitoringPeers;
privateKeyFile = "/var/src/secrets/wireguard/wg-monitoring";
};
};
networking.firewall.allowedUDPPorts = [ 54523 ];
services.prometheus = {
enable = true;
listenAddress = "[::1]";
scrapeConfigs = let
relabelAddressToInstance = {
source_labels = [ "__address__" ];
target_label = "instance";
regex = ''([\w-]+)\.mon\.clerie\.de\:\d+'';
replacement = "\${1}.net.clerie.de";
};
in [
{
job_name = "prometheus";
scrape_interval = "20s";
scheme = "http";
static_configs = [
{
targets = [
"monitoring-3.mon.clerie.de:9090"
];
}
];
relabel_configs = [
relabelAddressToInstance
];
}
{
job_name = "node-exporter";
scrape_interval = "20s";
static_configs = [
{
targets = [
"monitoring-3.mon.clerie.de:9100;infra"
]
++ monitoringTargets;
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "service_level";
replacement = "\${2}";
}
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__address__";
replacement = "\${1}";
}
relabelAddressToInstance
];
}
{
job_name = "nixos-exporter";
scrape_interval = "1m";
static_configs = [
{
targets = nixosMonitoringTargets;
}
];
relabel_configs = [
relabelAddressToInstance
];
}
{
job_name = "bird-exporter";
scrape_interval = "20s";
static_configs = [
{
targets = birdMonitoringTargets;
}
];
relabel_configs = [
relabelAddressToInstance
];
}
{
job_name = "blackbox_icmp6";
scrape_interval = "20s";
metrics_path = "/probe";
params = {
module = [ "icmp6" ];
};
static_configs = [
{
targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [
"clerie.de"
"tagesschau.de"
"google.com"
"achtbaan.nikhef.nl"
"fluorine.net.clerie.de"
"www.fem.tu-ilmenau.de"
"www.heise.de"
];
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__param_target";
replacement = "\${2}";
}
{
source_labels = [ "__param_target" ];
target_label = "target";
}
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__address__";
replacement = "\${1}";
}
relabelAddressToInstance
];
}
{
job_name = "blackbox_icmp4";
scrape_interval = "20s";
metrics_path = "/probe";
params = {
module = [ "icmp4" ];
};
static_configs = [
{
targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [
"clerie.de"
"tagesschau.de"
"google.com"
"achtbaan.nikhef.nl"
"www.fem.tu-ilmenau.de"
"www.heise.de"
"ie10-ffm2.nodes.nethinks.com"
"matrix.bau-ha.us"
];
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__param_target";
replacement = "\${2}";
}
{
source_labels = [ "__param_target" ];
target_label = "target";
}
{
source_labels = [ "__address__" ];
regex = "(.+);(.+)";
target_label = "__address__";
replacement = "\${1}";
}
relabelAddressToInstance
];
}
{
job_name = "zimmer-temp";
scrape_interval = "20s";
scheme = "https";
metrics_path = "/data/zimmer-temp/";
static_configs = [
{
targets = [
"iot-data.clerie.de"
];
}
];
}
{
job_name = "outdoor-temp";
scrape_interval = "20s";
scheme = "https";
metrics_path = "/data/outdoor-temp/";
static_configs = [
{
targets = [
"iot-data.clerie.de"
];
}
];
}
{
job_name = "xmpp-alerts";
scrape_interval = "20s";
static_configs = [
{
targets = [
"monitoring-3.mon.clerie.de:9199"
];
}
];
relabel_configs = [
relabelAddressToInstance
];
}
];
alertmanagers = [
{
static_configs = [ {
targets = [
"[::1]:9093"
];
} ];
}
];
rules = [ (readFile ./rules.yml) ];
};
services.nginx = {
virtualHosts = {
"prometheus.monitoring.clerie.de" = {
enableACME = true;
forceSSL = true;
locations."/".proxyPass = "http://[::1]:9090/";
};
};
};
}

View File

@ -0,0 +1,33 @@
{ pkgs, ... }:
{
users.users.uptimestatus = {
description = "Uptime Status Service";
group = "uptimestatus";
home = "/var/lib/uptimestatus/";
useDefaultShell = true;
isSystemUser = true;
};
users.groups.uptimestatus = {};
systemd.services.uptimestatus = {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
RuntimeDirectory = "uptimestatus";
StateDirectory = "uptimestatus";
User = "uptimestatus";
Group = "uptimestatus";
};
script = "gunicorn -w 4 -b [::1]:8235 uptimestatus:app";
path = with pkgs; [ (python3.withPackages (ps: [ ps.gunicorn uptimestatus ])) ];
};
services.nginx = {
virtualHosts = {
"status.monitoring.clerie.de" = {
enableACME = true;
forceSSL = true;
locations."/".proxyPass = "http://[::1]:8235/";
};
};
};
}