diff --git a/clanServices/monitoring/README.md b/clanServices/monitoring/README.md new file mode 100644 index 000000000..9f8c0bf7c --- /dev/null +++ b/clanServices/monitoring/README.md @@ -0,0 +1,17 @@ +## Usage + +``` +inventory.instances = { + monitoring = { + module.name = "monitoring"; + roles.telegraf.tags.all = { + settings.interfaces = [ "wg-clan" ]; + }; + }; + }; +``` + +This service will eventually set up a monitoring stack for your clan. For now, +only a telegraf role is implemented, which exposes the currently deployed +version of your configuration, so it can be used to check for required updates. + diff --git a/clanServices/monitoring/default.nix b/clanServices/monitoring/default.nix new file mode 100644 index 000000000..1369d8ca2 --- /dev/null +++ b/clanServices/monitoring/default.nix @@ -0,0 +1,28 @@ +{ packages }: +{ ... }: +{ + _class = "clan.service"; + manifest.name = "clan-core/monitoring"; + manifest.description = "Monitoring service for the nodes in your clan"; + manifest.readme = builtins.readFile ./README.md; + + roles.telegraf = { + interface = + { lib, ... }: + { + options.allowAllInterfaces = lib.mkOption { + type = lib.types.bool; + default = false; + description = "If true, Telegraf will listen on all interfaces. Otherwise, it will only listen on the interfaces specified in `interfaces`"; + }; + + options.interfaces = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ "zt+" ]; + description = "List of interfaces to expose the metrics to"; + }; + }; + }; + + imports = [ ./telegraf.nix ]; +} diff --git a/clanServices/monitoring/flake-module.nix b/clanServices/monitoring/flake-module.nix new file mode 100644 index 000000000..21ad0371f --- /dev/null +++ b/clanServices/monitoring/flake-module.nix @@ -0,0 +1,23 @@ +{ + self, + lib, + ... +}: +let + module = lib.modules.importApply ./default.nix { + inherit (self) packages; + }; +in +{ + clan.modules.monitoring = module; + + perSystem = + { ... }: + { + clan.nixosTests.monitoring = { + imports = [ ./tests/vm/default.nix ]; + + clan.modules.monitoring = module; + }; + }; +} diff --git a/clanServices/monitoring/telegraf.nix b/clanServices/monitoring/telegraf.nix new file mode 100644 index 000000000..66e1d066e --- /dev/null +++ b/clanServices/monitoring/telegraf.nix @@ -0,0 +1,57 @@ +{ + roles.telegraf.perInstance = + { settings, ... }: + { + + nixosModule = + { pkgs, lib, ... }: + { + + networking.firewall.interfaces = lib.mkIf (settings.allowAllInterfaces == false) ( + builtins.listToAttrs ( + map (name: { + inherit name; + value.allowedTCPPorts = [ 9273 ]; + }) settings.interfaces + ) + ); + + networking.firewall.allowedTCPPorts = lib.mkIf (settings.allowAllInterfaces == true) [ 9273 ]; + + services.telegraf = { + enable = true; + extraConfig = { + agent.interval = "60s"; + inputs = { + + diskio = { }; + kernel_vmstat = { }; + system = { }; + mem = { }; + systemd_units = { }; + swap = { }; + + exec = + let + currentSystemScript = pkgs.writeShellScript "current-system" '' + printf "current_system,path=%s present=0\n" $(readlink /run/current-system) + ''; + in + [ + { + # Expose the path to current-system as metric. We use + # this to check if the machine is up-to-date. + commands = [ currentSystemScript ]; + data_format = "influx"; + } + ]; + }; + outputs.prometheus_client = { + listen = ":9273"; + metric_version = 2; + }; + }; + }; + }; + }; +} diff --git a/clanServices/monitoring/tests/vm/default.nix b/clanServices/monitoring/tests/vm/default.nix new file mode 100644 index 000000000..5d2836e71 --- /dev/null +++ b/clanServices/monitoring/tests/vm/default.nix @@ -0,0 +1,24 @@ +{ + name = "monitoring"; + + clan = { + directory = ./.; + inventory = { + machines.peer1 = { }; + + instances."test" = { + module.name = "monitoring"; + module.input = "self"; + + roles.telegraf.machines.peer1 = { }; + + }; + }; + }; + + testScript = + { ... }: + '' + start_all() + ''; +} diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index ba3271184..fb372c48b 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -103,6 +103,7 @@ nav: - reference/clanServices/localbackup.md - reference/clanServices/matrix-synapse.md - reference/clanServices/mycelium.md + - reference/clanServices/monitoring.md - reference/clanServices/packages.md - reference/clanServices/sshd.md - reference/clanServices/state-version.md