diff --git a/hosts/monitoring-3/alertmanager.nix b/hosts/monitoring-3/alertmanager.nix new file mode 100644 index 0000000..656e683 --- /dev/null +++ b/hosts/monitoring-3/alertmanager.nix @@ -0,0 +1,70 @@ +{ ... }: +{ + services.prometheus.xmpp-alerts = { + enable = true; + settings = { + jid = "feuer@fem-net.de"; + password_command = "cat /var/src/secrets/xmpp-alert/password"; + listen_address = "::1"; + listen_port = 9199; + to_jid = "clerie@fem-net.de"; + format = "short"; + }; + }; + + services.prometheus.alertmanager = { + enable = true; + listenAddress = "[::1]"; + port = 9093; + configuration = { + route = { + receiver = "xmpp-receiver"; + repeat_interval = "4h"; + group_by = [ "instance" ]; + routes = [ + { + matchers = [ + ''severity = "muted"'' + ]; + receiver = "muted"; + } + { + receiver = "xmpp-receiver"; + matchers = [ + ''severity = "warning"'' + ]; + repeat_interval = "70h"; + } + ]; + }; + receivers = [ + { + name = "muted"; + } + { + name = "xmpp-receiver"; + webhook_configs = [ + { + url = "http://[::1]:9199/alert"; + } + ]; + } + ]; + inhibit_rules = [ + { + # Mute all alerts for an instance which also has an alert starting with MuteInstanceOn + target_matchers = [ + ''alertname =~ ".+"'' + ]; + source_matchers = [ + ''mute = "instance"'' + ]; + equal = [ + "instance" + ]; + } + ]; + }; + }; + +} diff --git a/hosts/monitoring-3/configuration.nix b/hosts/monitoring-3/configuration.nix index ea0eb5e..1d5bacb 100644 --- a/hosts/monitoring-3/configuration.nix +++ b/hosts/monitoring-3/configuration.nix @@ -1,55 +1,15 @@ { config, pkgs, lib, ... }: -with lib; - -let - hosts = (import ../../lib/hosts.nix { inherit pkgs; }).hosts; - - monitoringHosts = filterAttrs (name: host: - attrByPath ["clerie" "monitoring" "enable"] false host.config) - hosts; - - monitoringHostsNames = mapAttrs' (name: host: - nameValuePair "fd00:327:327:327::${host.config.clerie.monitoring.id}" ["${host.config.networking.hostName}.mon.clerie.de"]) - monitoringHosts; - - monitoringPeers = mapAttrsToList (name: host: { - allowedIPs = [ "fd00:327:327:327::${host.config.clerie.monitoring.id}/128" ]; - publicKey = host.config.clerie.monitoring.pubkey; - }) - monitoringHosts; - - monitoringTargets = mapAttrsToList (name: host: - "${host.config.networking.hostName}.mon.clerie.de:9100;${attrByPath ["clerie" "monitoring" "serviceLevel"] "infra" host.config}") - monitoringHosts; - - nixosMonitoringTargets = mapAttrsToList (name: host: - "${host.config.networking.hostName}.mon.clerie.de:9152") - (filterAttrs (name: host: - # assume this is a NixOS system if not specified - attrByPath ["clerie" "monitoring" "nixos"] true host.config) - monitoringHosts); - - birdMonitoringTargets = mapAttrsToList (name: host: - "${host.config.networking.hostName}.mon.clerie.de:9324") - (filterAttrs (name: host: - attrByPath ["clerie" "monitoring" "bird"] false host.config) - monitoringHosts); - - blackboxMonitoringTargets = mapAttrsToList (name: host: - "${host.config.networking.hostName}.mon.clerie.de:9115") - (filterAttrs (name: host: - attrByPath ["clerie" "monitoring" "blackbox"] false host.config) - monitoringHosts); - - eachWithEachOther = (f: x: y: lib.lists.flatten (lib.lists.forEach x (a: lib.lists.forEach y (b: f a b)))); - -in { +{ imports = [ ./hardware-configuration.nix ../../configuration/common ../../configuration/proxmox-vm + ./alertmanager.nix + ./grafana.nix + ./prometheus.nix + ./uptimestatus.nix ]; boot.loader.grub.enable = true; @@ -65,370 +25,10 @@ in { networking.defaultGateway6 = { address = "2001:638:904:ffca::1"; interface = "ens19"; }; networking.nameservers = [ "2001:638:904:ffcc::3" "2001:638:904:ffcc::4" "141.24.40.3" "141.24.40.4" ]; - networking.hosts = { - "::1" = [ "monitoring-3.mon.clerie.de" ]; # fd00:327:327:327::1 - } - // monitoringHostsNames; - - networking.wireguard.enable = true; - networking.wireguard.interfaces = { - wg-monitoring = { - ips = [ "fd00:327:327:327::1/64" ]; - listenPort = 54523; - peers = monitoringPeers; - privateKeyFile = "/var/src/secrets/wireguard/wg-monitoring"; - }; - }; - - networking.firewall.allowedUDPPorts = [ 54523 ]; - services.prometheus.exporters.node.enable = true; - services.prometheus.xmpp-alerts = { - enable = true; - settings = { - jid = "feuer@fem-net.de"; - password_command = "cat /var/src/secrets/xmpp-alert/password"; - listen_address = "::1"; - listen_port = 9199; - to_jid = "clerie@fem-net.de"; - format = "short"; - }; - }; - - services.prometheus.alertmanager = { - enable = true; - listenAddress = "[::1]"; - port = 9093; - configuration = { - route = { - receiver = "xmpp-receiver"; - repeat_interval = "4h"; - group_by = [ "instance" ]; - routes = [ - { - matchers = [ - ''severity = "muted"'' - ]; - receiver = "muted"; - } - { - receiver = "xmpp-receiver"; - matchers = [ - ''severity = "warning"'' - ]; - repeat_interval = "70h"; - } - ]; - }; - receivers = [ - { - name = "muted"; - } - { - name = "xmpp-receiver"; - webhook_configs = [ - { - url = "http://[::1]:9199/alert"; - } - ]; - } - ]; - inhibit_rules = [ - { - # Mute all alerts for an instance which also has an alert starting with MuteInstanceOn - target_matchers = [ - ''alertname =~ ".+"'' - ]; - source_matchers = [ - ''mute = "instance"'' - ]; - equal = [ - "instance" - ]; - } - ]; - }; - }; - - services.prometheus = { - enable = true; - listenAddress = "[::1]"; - scrapeConfigs = let - relabelAddressToInstance = { - source_labels = [ "__address__" ]; - target_label = "instance"; - regex = ''([\w-]+)\.mon\.clerie\.de\:\d+''; - replacement = "\${1}.net.clerie.de"; - }; - in [ - { - job_name = "prometheus"; - scrape_interval = "20s"; - scheme = "http"; - static_configs = [ - { - targets = [ - "monitoring-3.mon.clerie.de:9090" - ]; - } - ]; - relabel_configs = [ - relabelAddressToInstance - ]; - } - { - job_name = "node-exporter"; - scrape_interval = "20s"; - static_configs = [ - { - targets = [ - "monitoring-3.mon.clerie.de:9100;infra" - ] - ++ monitoringTargets; - } - ]; - relabel_configs = [ - { - source_labels = [ "__address__" ]; - regex = "(.+);(.+)"; - target_label = "service_level"; - replacement = "\${2}"; - } - { - source_labels = [ "__address__" ]; - regex = "(.+);(.+)"; - target_label = "__address__"; - replacement = "\${1}"; - } - relabelAddressToInstance - ]; - } - { - job_name = "nixos-exporter"; - scrape_interval = "1m"; - static_configs = [ - { - targets = nixosMonitoringTargets; - } - ]; - relabel_configs = [ - relabelAddressToInstance - ]; - } - { - job_name = "bird-exporter"; - scrape_interval = "20s"; - static_configs = [ - { - targets = birdMonitoringTargets; - } - ]; - relabel_configs = [ - relabelAddressToInstance - ]; - } - { - job_name = "blackbox_icmp6"; - scrape_interval = "20s"; - metrics_path = "/probe"; - params = { - module = [ "icmp6" ]; - }; - static_configs = [ - { - targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [ - "clerie.de" - "tagesschau.de" - "google.com" - "achtbaan.nikhef.nl" - "fluorine.net.clerie.de" - "www.fem.tu-ilmenau.de" - "www.heise.de" - ]; - } - ]; - relabel_configs = [ - { - source_labels = [ "__address__" ]; - regex = "(.+);(.+)"; - target_label = "__param_target"; - replacement = "\${2}"; - } - { - source_labels = [ "__param_target" ]; - target_label = "target"; - } - { - source_labels = [ "__address__" ]; - regex = "(.+);(.+)"; - target_label = "__address__"; - replacement = "\${1}"; - } - relabelAddressToInstance - ]; - } - { - job_name = "blackbox_icmp4"; - scrape_interval = "20s"; - metrics_path = "/probe"; - params = { - module = [ "icmp4" ]; - }; - static_configs = [ - { - targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [ - "clerie.de" - "tagesschau.de" - "google.com" - "achtbaan.nikhef.nl" - "www.fem.tu-ilmenau.de" - "www.heise.de" - "ie10-ffm2.nodes.nethinks.com" - "matrix.bau-ha.us" - ]; - } - ]; - relabel_configs = [ - { - source_labels = [ "__address__" ]; - regex = "(.+);(.+)"; - target_label = "__param_target"; - replacement = "\${2}"; - } - { - source_labels = [ "__param_target" ]; - target_label = "target"; - } - { - source_labels = [ "__address__" ]; - regex = "(.+);(.+)"; - target_label = "__address__"; - replacement = "\${1}"; - } - relabelAddressToInstance - ]; - } - { - job_name = "zimmer-temp"; - scrape_interval = "20s"; - scheme = "https"; - metrics_path = "/data/zimmer-temp/"; - static_configs = [ - { - targets = [ - "iot-data.clerie.de" - ]; - } - ]; - } - { - job_name = "outdoor-temp"; - scrape_interval = "20s"; - scheme = "https"; - metrics_path = "/data/outdoor-temp/"; - static_configs = [ - { - targets = [ - "iot-data.clerie.de" - ]; - } - ]; - } - { - job_name = "xmpp-alerts"; - scrape_interval = "20s"; - static_configs = [ - { - targets = [ - "monitoring-3.mon.clerie.de:9199" - ]; - } - ]; - relabel_configs = [ - relabelAddressToInstance - ]; - } - ]; - alertmanagers = [ - { - static_configs = [ { - targets = [ - "[::1]:9093" - ]; - } ]; - } - ]; - rules = [ (readFile ./rules.yml) ]; - }; - - services.grafana = { - enable = true; - domain = "grafana.monitoring.clerie.de"; - rootUrl = "https://grafana.monitoring.clerie.de"; - port = 3001; - addr = "::1"; - auth.anonymous.enable = true; - - provision = { - enable = true; - datasources.settings.datasources = [ - { - type = "prometheus"; - name = "Prometheus"; - url = "http://[::1]:9090"; - isDefault = true; - } - ]; - dashboards.settings.providers = [ - { - options.path = ./dashboards; - } - ]; - }; - }; - - users.users.uptimestatus = { - description = "Uptime Status Service"; - group = "uptimestatus"; - home = "/var/lib/uptimestatus/"; - useDefaultShell = true; - isSystemUser = true; - }; - users.groups.uptimestatus = {}; - - systemd.services.uptimestatus = { - wantedBy = [ "multi-user.target" ]; - serviceConfig = { - RuntimeDirectory = "uptimestatus"; - StateDirectory = "uptimestatus"; - User = "uptimestatus"; - Group = "uptimestatus"; - }; - script = "gunicorn -w 4 -b [::1]:8235 uptimestatus:app"; - path = with pkgs; [ (python3.withPackages (ps: [ ps.gunicorn uptimestatus ])) ]; - }; - services.nginx = { enable = true; - - virtualHosts = { - "prometheus.monitoring.clerie.de" = { - enableACME = true; - forceSSL = true; - locations."/".proxyPass = "http://[::1]:9090/"; - }; - "grafana.monitoring.clerie.de" = { - enableACME = true; - forceSSL = true; - locations."/".proxyPass = "http://[::1]:3001/"; - }; - "status.monitoring.clerie.de" = { - enableACME = true; - forceSSL = true; - locations."/".proxyPass = "http://[::1]:8235/"; - }; - }; }; networking.firewall.allowedTCPPorts = [ 80 443 ]; diff --git a/hosts/monitoring-3/grafana.nix b/hosts/monitoring-3/grafana.nix new file mode 100644 index 0000000..e57eb63 --- /dev/null +++ b/hosts/monitoring-3/grafana.nix @@ -0,0 +1,39 @@ +{ ... }: +{ + services.grafana = { + enable = true; + domain = "grafana.monitoring.clerie.de"; + rootUrl = "https://grafana.monitoring.clerie.de"; + port = 3001; + addr = "::1"; + auth.anonymous.enable = true; + + provision = { + enable = true; + datasources.settings.datasources = [ + { + type = "prometheus"; + name = "Prometheus"; + url = "http://[::1]:9090"; + isDefault = true; + } + ]; + dashboards.settings.providers = [ + { + options.path = ./dashboards; + } + ]; + }; + }; + + services.nginx = { + virtualHosts = { + "grafana.monitoring.clerie.de" = { + enableACME = true; + forceSSL = true; + locations."/".proxyPass = "http://[::1]:3001/"; + }; + }; + }; + +} diff --git a/hosts/monitoring-3/prometheus.nix b/hosts/monitoring-3/prometheus.nix new file mode 100644 index 0000000..38df034 --- /dev/null +++ b/hosts/monitoring-3/prometheus.nix @@ -0,0 +1,286 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + hosts = (import ../../lib/hosts.nix { inherit pkgs; }).hosts; + + monitoringHosts = filterAttrs (name: host: + attrByPath ["clerie" "monitoring" "enable"] false host.config) + hosts; + + monitoringHostsNames = mapAttrs' (name: host: + nameValuePair "fd00:327:327:327::${host.config.clerie.monitoring.id}" ["${host.config.networking.hostName}.mon.clerie.de"]) + monitoringHosts; + + monitoringPeers = mapAttrsToList (name: host: { + allowedIPs = [ "fd00:327:327:327::${host.config.clerie.monitoring.id}/128" ]; + publicKey = host.config.clerie.monitoring.pubkey; + }) + monitoringHosts; + + monitoringTargets = mapAttrsToList (name: host: + "${host.config.networking.hostName}.mon.clerie.de:9100;${attrByPath ["clerie" "monitoring" "serviceLevel"] "infra" host.config}") + monitoringHosts; + + nixosMonitoringTargets = mapAttrsToList (name: host: + "${host.config.networking.hostName}.mon.clerie.de:9152") + (filterAttrs (name: host: + # assume this is a NixOS system if not specified + attrByPath ["clerie" "monitoring" "nixos"] true host.config) + monitoringHosts); + + birdMonitoringTargets = mapAttrsToList (name: host: + "${host.config.networking.hostName}.mon.clerie.de:9324") + (filterAttrs (name: host: + attrByPath ["clerie" "monitoring" "bird"] false host.config) + monitoringHosts); + + blackboxMonitoringTargets = mapAttrsToList (name: host: + "${host.config.networking.hostName}.mon.clerie.de:9115") + (filterAttrs (name: host: + attrByPath ["clerie" "monitoring" "blackbox"] false host.config) + monitoringHosts); + + eachWithEachOther = (f: x: y: lib.lists.flatten (lib.lists.forEach x (a: lib.lists.forEach y (b: f a b)))); + +in { + networking.hosts = { + "::1" = [ "monitoring-3.mon.clerie.de" ]; # fd00:327:327:327::1 + } + // monitoringHostsNames; + + networking.wireguard.enable = true; + networking.wireguard.interfaces = { + wg-monitoring = { + ips = [ "fd00:327:327:327::1/64" ]; + listenPort = 54523; + peers = monitoringPeers; + privateKeyFile = "/var/src/secrets/wireguard/wg-monitoring"; + }; + }; + + networking.firewall.allowedUDPPorts = [ 54523 ]; + + services.prometheus = { + enable = true; + listenAddress = "[::1]"; + scrapeConfigs = let + relabelAddressToInstance = { + source_labels = [ "__address__" ]; + target_label = "instance"; + regex = ''([\w-]+)\.mon\.clerie\.de\:\d+''; + replacement = "\${1}.net.clerie.de"; + }; + in [ + { + job_name = "prometheus"; + scrape_interval = "20s"; + scheme = "http"; + static_configs = [ + { + targets = [ + "monitoring-3.mon.clerie.de:9090" + ]; + } + ]; + relabel_configs = [ + relabelAddressToInstance + ]; + } + { + job_name = "node-exporter"; + scrape_interval = "20s"; + static_configs = [ + { + targets = [ + "monitoring-3.mon.clerie.de:9100;infra" + ] + ++ monitoringTargets; + } + ]; + relabel_configs = [ + { + source_labels = [ "__address__" ]; + regex = "(.+);(.+)"; + target_label = "service_level"; + replacement = "\${2}"; + } + { + source_labels = [ "__address__" ]; + regex = "(.+);(.+)"; + target_label = "__address__"; + replacement = "\${1}"; + } + relabelAddressToInstance + ]; + } + { + job_name = "nixos-exporter"; + scrape_interval = "1m"; + static_configs = [ + { + targets = nixosMonitoringTargets; + } + ]; + relabel_configs = [ + relabelAddressToInstance + ]; + } + { + job_name = "bird-exporter"; + scrape_interval = "20s"; + static_configs = [ + { + targets = birdMonitoringTargets; + } + ]; + relabel_configs = [ + relabelAddressToInstance + ]; + } + { + job_name = "blackbox_icmp6"; + scrape_interval = "20s"; + metrics_path = "/probe"; + params = { + module = [ "icmp6" ]; + }; + static_configs = [ + { + targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [ + "clerie.de" + "tagesschau.de" + "google.com" + "achtbaan.nikhef.nl" + "fluorine.net.clerie.de" + "www.fem.tu-ilmenau.de" + "www.heise.de" + ]; + } + ]; + relabel_configs = [ + { + source_labels = [ "__address__" ]; + regex = "(.+);(.+)"; + target_label = "__param_target"; + replacement = "\${2}"; + } + { + source_labels = [ "__param_target" ]; + target_label = "target"; + } + { + source_labels = [ "__address__" ]; + regex = "(.+);(.+)"; + target_label = "__address__"; + replacement = "\${1}"; + } + relabelAddressToInstance + ]; + } + { + job_name = "blackbox_icmp4"; + scrape_interval = "20s"; + metrics_path = "/probe"; + params = { + module = [ "icmp4" ]; + }; + static_configs = [ + { + targets = eachWithEachOther (instance: target: "${instance};${target}") blackboxMonitoringTargets [ + "clerie.de" + "tagesschau.de" + "google.com" + "achtbaan.nikhef.nl" + "www.fem.tu-ilmenau.de" + "www.heise.de" + "ie10-ffm2.nodes.nethinks.com" + "matrix.bau-ha.us" + ]; + } + ]; + relabel_configs = [ + { + source_labels = [ "__address__" ]; + regex = "(.+);(.+)"; + target_label = "__param_target"; + replacement = "\${2}"; + } + { + source_labels = [ "__param_target" ]; + target_label = "target"; + } + { + source_labels = [ "__address__" ]; + regex = "(.+);(.+)"; + target_label = "__address__"; + replacement = "\${1}"; + } + relabelAddressToInstance + ]; + } + { + job_name = "zimmer-temp"; + scrape_interval = "20s"; + scheme = "https"; + metrics_path = "/data/zimmer-temp/"; + static_configs = [ + { + targets = [ + "iot-data.clerie.de" + ]; + } + ]; + } + { + job_name = "outdoor-temp"; + scrape_interval = "20s"; + scheme = "https"; + metrics_path = "/data/outdoor-temp/"; + static_configs = [ + { + targets = [ + "iot-data.clerie.de" + ]; + } + ]; + } + { + job_name = "xmpp-alerts"; + scrape_interval = "20s"; + static_configs = [ + { + targets = [ + "monitoring-3.mon.clerie.de:9199" + ]; + } + ]; + relabel_configs = [ + relabelAddressToInstance + ]; + } + ]; + alertmanagers = [ + { + static_configs = [ { + targets = [ + "[::1]:9093" + ]; + } ]; + } + ]; + rules = [ (readFile ./rules.yml) ]; + }; + + services.nginx = { + virtualHosts = { + "prometheus.monitoring.clerie.de" = { + enableACME = true; + forceSSL = true; + locations."/".proxyPass = "http://[::1]:9090/"; + }; + }; + }; + +} diff --git a/hosts/monitoring-3/uptimestatus.nix b/hosts/monitoring-3/uptimestatus.nix new file mode 100644 index 0000000..ddefbcd --- /dev/null +++ b/hosts/monitoring-3/uptimestatus.nix @@ -0,0 +1,33 @@ +{ pkgs, ... }: +{ + users.users.uptimestatus = { + description = "Uptime Status Service"; + group = "uptimestatus"; + home = "/var/lib/uptimestatus/"; + useDefaultShell = true; + isSystemUser = true; + }; + users.groups.uptimestatus = {}; + + systemd.services.uptimestatus = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + RuntimeDirectory = "uptimestatus"; + StateDirectory = "uptimestatus"; + User = "uptimestatus"; + Group = "uptimestatus"; + }; + script = "gunicorn -w 4 -b [::1]:8235 uptimestatus:app"; + path = with pkgs; [ (python3.withPackages (ps: [ ps.gunicorn uptimestatus ])) ]; + }; + + services.nginx = { + virtualHosts = { + "status.monitoring.clerie.de" = { + enableACME = true; + forceSSL = true; + locations."/".proxyPass = "http://[::1]:8235/"; + }; + }; + }; +}