clanServices: add ca certs for monitoring/telegraf
This commit is contained in:
@@ -153,7 +153,7 @@ def generations_command(args: argparse.Namespace) -> None:
|
||||
).override(host_key_check=host_key_check)
|
||||
else:
|
||||
try:
|
||||
with get_best_remote(machine, only_vpns=True) as remote:
|
||||
with get_best_remote(machine) as _remote:
|
||||
target_host = machine.target_host().override(
|
||||
host_key_check=host_key_check
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
import ssl
|
||||
import urllib.request
|
||||
from base64 import b64encode
|
||||
from collections.abc import Iterator
|
||||
@@ -25,7 +26,7 @@ class MonitoringNotEnabledError(ClanError):
|
||||
pass
|
||||
|
||||
|
||||
# Tests for this function are in the monitoring clanService tests
|
||||
# Tests for this function are in the 'monitoring' clanService tests
|
||||
def get_metrics(
|
||||
machine: Machine,
|
||||
target_host: Host,
|
||||
@@ -41,17 +42,19 @@ def get_metrics(
|
||||
|
||||
"""
|
||||
# Example: fetch Prometheus metrics with basic auth
|
||||
url = f"http://{target_host.address}:9990/telegraf.json"
|
||||
url = f"https://{target_host.address}:9990/telegraf.json"
|
||||
username = "prometheus"
|
||||
var_name = "telegraf/password"
|
||||
|
||||
try:
|
||||
password_var = get_machine_var(machine, var_name)
|
||||
password_var = get_machine_var(machine, "telegraf/password")
|
||||
cert_var = get_machine_var(machine, "telegraf-certs/crt")
|
||||
except VarNotFoundError as e:
|
||||
msg = f"Module 'monitoring' is required to fetch metrics from machine '{machine.name}'."
|
||||
msg = "Module 'monitoring' is required to fetch metrics from machine."
|
||||
raise MonitoringNotEnabledError(msg) from e
|
||||
if not password_var.exists:
|
||||
|
||||
if not password_var.exists or not cert_var.exists:
|
||||
msg = (
|
||||
f"Missing required var '{var_name}' for machine '{machine.name}'.\n"
|
||||
f"Missing required var.\n"
|
||||
f"Ensure the 'monitoring' clanService is enabled and run `clan machines update {machine.name}`."
|
||||
"For more information, see: https://docs.clan.lol/reference/clanServices/monitoring/"
|
||||
)
|
||||
@@ -62,22 +65,30 @@ def get_metrics(
|
||||
|
||||
encoded_credentials = b64encode(credentials.encode("utf-8")).decode("utf-8")
|
||||
headers = {"Authorization": f"Basic {encoded_credentials}"}
|
||||
|
||||
cert_path = machine.select(
|
||||
"config.clan.core.vars.generators.telegraf-certs.files.crt.path"
|
||||
)
|
||||
context = ssl.create_default_context(cafile=cert_path)
|
||||
context.check_hostname = False
|
||||
context.verify_mode = ssl.CERT_REQUIRED
|
||||
|
||||
req = urllib.request.Request(url, headers=headers) # noqa: S310
|
||||
|
||||
try:
|
||||
machine.info(f"Fetching Prometheus metrics from {url}")
|
||||
response = urllib.request.urlopen(req, timeout=6) # noqa: S310
|
||||
for line in response:
|
||||
line_str = line.decode("utf-8").strip()
|
||||
if line_str:
|
||||
try:
|
||||
yield cast("MetricSample", json.loads(line_str))
|
||||
except json.JSONDecodeError:
|
||||
machine.warn(f"Skipping invalid JSON line: {line_str}")
|
||||
continue
|
||||
with urllib.request.urlopen(req, context=context, timeout=10) as response: # noqa: S310
|
||||
for line in response:
|
||||
line_str = line.decode("utf-8").strip()
|
||||
if line_str:
|
||||
try:
|
||||
yield cast("MetricSample", json.loads(line_str))
|
||||
except json.JSONDecodeError:
|
||||
machine.warn(f"Skipping invalid JSON line: {line_str}")
|
||||
continue
|
||||
except Exception as e:
|
||||
msg = (
|
||||
f"Failed to fetch Prometheus metrics from {url} for machine '{machine.name}': {e}\n"
|
||||
f"Failed to fetch Prometheus metrics from {url}: {e}\n"
|
||||
"Ensure the telegraf.service is running and accessible."
|
||||
)
|
||||
raise ClanError(msg) from e
|
||||
|
||||
Reference in New Issue
Block a user