clanServices: add ca certs for monitoring/telegraf
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
{ packages, pkgs, ... }:
|
||||
{ ... }:
|
||||
{
|
||||
name = "monitoring";
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
services.telegraf.extraConfig = {
|
||||
agent.interval = lib.mkForce "1s";
|
||||
outputs.prometheus_client = {
|
||||
# BUG: We have to disable basic auth here because the prometheus_client
|
||||
# output plugin will otherwise deadlock Telegraf on startup.
|
||||
basic_password = lib.mkForce "";
|
||||
basic_username = lib.mkForce "";
|
||||
};
|
||||
@@ -35,17 +37,16 @@
|
||||
};
|
||||
};
|
||||
|
||||
extraPythonPackages = _p: [
|
||||
(pkgs.python3.pkgs.toPythonModule packages.${pkgs.system}.clan-cli)
|
||||
];
|
||||
|
||||
# !!! ANY CHANGES HERE MUST BE REFLECTED IN:
|
||||
# clan_lib/metrics/telegraf.py::get_metrics
|
||||
testScript =
|
||||
{ ... }:
|
||||
{ nodes, ... }:
|
||||
''
|
||||
import time
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import ssl
|
||||
import json
|
||||
import shlex
|
||||
import urllib.request
|
||||
@@ -54,45 +55,44 @@
|
||||
|
||||
peer1.wait_for_unit("network-online.target")
|
||||
peer1.wait_for_unit("telegraf.service")
|
||||
|
||||
peer1.wait_for_unit("telegraf-json.service")
|
||||
peer1.succeed("curl http://localhost:9990/telegraf.json")
|
||||
peer1.succeed("curl http://localhost:9273/metrics")
|
||||
|
||||
# Fetch the basic auth password from the secret file
|
||||
password = peer1.succeed("cat /var/run/secrets/vars/telegraf/password")
|
||||
url = f"http://192.168.1.1:9990/telegraf.json"
|
||||
password = peer1.succeed("cat ${nodes.peer1.clan.core.vars.generators.telegraf.files.password.path}").strip()
|
||||
credentials = f"prometheus:{password}"
|
||||
print("Using credentials:", credentials)
|
||||
time.sleep(10) # wait a bit for telegraf to collect some data
|
||||
|
||||
# Fetch the json output from miniserve
|
||||
print("Using credentials:", credentials)
|
||||
peer1.succeed(f"curl -k -u {credentials} https://localhost:9990/telegraf.json")
|
||||
peer1.succeed(f"curl -k -u {credentials} https://localhost:9273/metrics")
|
||||
|
||||
cert_path = "${nodes.peer1.clan.core.vars.generators.telegraf-certs.files.crt.path}"
|
||||
url = "https://192.168.1.1:9990/telegraf.json" # HTTPS required
|
||||
|
||||
print("Waiting for /var/run/telegraf-www/telegraf.json to be bigger then 200 bytes")
|
||||
peer1.wait_until_succeeds(f"test \"$(stat -c%s /var/run/telegraf-www/telegraf.json)\" -ge 200", timeout=30)
|
||||
|
||||
encoded_credentials = b64encode(credentials.encode("utf-8")).decode("utf-8")
|
||||
headers = {"Authorization": f"Basic {encoded_credentials}"}
|
||||
req = urllib.request.Request(url, headers=headers) # noqa: S310
|
||||
response = urllib.request.urlopen(req)
|
||||
|
||||
# Look for the nixos_systems metric in the json output
|
||||
# Trust the provided CA/server certificate
|
||||
context = ssl.create_default_context(cafile=cert_path)
|
||||
context.check_hostname = False
|
||||
context.verify_mode = ssl.CERT_REQUIRED
|
||||
|
||||
found_system = False
|
||||
for line in response:
|
||||
line_str = line.decode("utf-8").strip()
|
||||
line = json.loads(line_str)
|
||||
if line["name"] == "nixos_systems":
|
||||
found_system = True
|
||||
print("Found nixos_systems metric in json output")
|
||||
break
|
||||
assert found_system, "nixos_systems metric not found in json output"
|
||||
with urllib.request.urlopen(req, context=context, timeout=5) as response:
|
||||
for raw_line in response:
|
||||
line_str = raw_line.decode("utf-8").strip()
|
||||
if not line_str:
|
||||
continue
|
||||
obj = json.loads(line_str)
|
||||
if obj.get("name") == "nixos_systems":
|
||||
found_system = True
|
||||
print("Found nixos_systems metric in json output")
|
||||
break
|
||||
|
||||
# TODO: I would like to test the python code here but it's not working yet
|
||||
# Missing: I need a way to get the encrypted var from the clan
|
||||
#from clan_lib.metrics.version import get_nixos_systems
|
||||
#from clan_lib.machines.machines import Machine as ClanMachine
|
||||
#from clan_lib.flake import Flake
|
||||
#from clan_lib.ssh.remote import Remote
|
||||
#target_host = Remote("peer1", "192.168.1.1")
|
||||
#machine = ClanMachine("peer1", flake=Flake("${./.}"))
|
||||
# data = get_nixos_systems(mymachine, target_host)
|
||||
# assert data["current_system"] is not None
|
||||
assert found_system, "nixos_systems metric not found in json output"
|
||||
|
||||
'';
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user