diff --git a/clanServices/monitoring/telegraf.nix b/clanServices/monitoring/telegraf.nix index 93abe67b2..5a9d580c5 100644 --- a/clanServices/monitoring/telegraf.nix +++ b/clanServices/monitoring/telegraf.nix @@ -56,7 +56,7 @@ enable = true; wantedBy = [ "multi-user.target" ]; after = [ "telegraf.service" ]; - wants = [ "telegraf.service" ]; + requires = [ "telegraf.service" ]; serviceConfig = { LoadCredential = [ "auth_file_path:${config.clan.core.vars.generators.telegraf.files.miniserve-auth.path}" @@ -67,13 +67,9 @@ Restart = "on-failure"; User = "telegraf"; Group = "telegraf"; + RuntimeDirectory = "telegraf-www"; }; - script = "${pkgs.miniserve}/bin/miniserve -p 9990 /var/lib/telegraf/telegraf.json --auth-file \"$AUTH_FILE_PATH\""; - }; - - users.users.telegraf = { - home = "/var/lib/telegraf"; - createHome = true; + script = "${pkgs.miniserve}/bin/miniserve -p 9990 /run/telegraf-www --auth-file \"$AUTH_FILE_PATH\""; }; services.telegraf = { @@ -119,7 +115,7 @@ }; outputs.file = { - files = [ "/var/lib/telegraf/telegraf.json" ]; + files = [ "/run/telegraf-www/telegraf.json" ]; data_format = "json"; json_timestamp_units = "1s"; }; diff --git a/clanServices/monitoring/tests/vm/default.nix b/clanServices/monitoring/tests/vm/default.nix index 7afdd53db..27d2dfffa 100644 --- a/clanServices/monitoring/tests/vm/default.nix +++ b/clanServices/monitoring/tests/vm/default.nix @@ -56,12 +56,12 @@ peer1.wait_for_unit("telegraf.service") peer1.wait_for_unit("telegraf-json.service") - peer1.succeed("curl http://localhost:9990") + peer1.succeed("curl http://localhost:9990/telegraf.json") peer1.succeed("curl http://localhost:9273/metrics") # Fetch the basic auth password from the secret file password = peer1.succeed("cat /var/run/secrets/vars/telegraf/password") - url = f"http://192.168.1.1:9990" + url = f"http://192.168.1.1:9990/telegraf.json" credentials = f"prometheus:{password}" print("Using credentials:", credentials) time.sleep(10) # wait a bit for telegraf to collect some data @@ -75,13 +75,12 @@ # Look for the nixos_systems metric in the json output found_system = False for line in response: - line_str = line.decode("utf-8").strip() - line = json.loads(line_str) - if line["name"] == "nixos_systems": - found_system = True - print("Found nixos_systems metric in json output") - break - print(line) + line_str = line.decode("utf-8").strip() + line = json.loads(line_str) + if line["name"] == "nixos_systems": + found_system = True + print("Found nixos_systems metric in json output") + break assert found_system, "nixos_systems metric not found in json output" # TODO: I would like to test the python code here but it's not working yet diff --git a/pkgs/clan-cli/clan_lib/metrics/telegraf.py b/pkgs/clan-cli/clan_lib/metrics/telegraf.py index 2dbfb4315..13afd9588 100644 --- a/pkgs/clan-cli/clan_lib/metrics/telegraf.py +++ b/pkgs/clan-cli/clan_lib/metrics/telegraf.py @@ -36,7 +36,7 @@ def get_metrics( """ # Example: fetch Prometheus metrics with basic auth - url = f"http://{target_host.address}:9990" + url = f"http://{target_host.address}:9990/telegraf.json" username = "prometheus" var_name = "telegraf/password" password_var = get_machine_var(machine, var_name)