Compare commits

..

33 Commits

Author SHA1 Message Date
pinpox
5f05e36c9d Remove outdated facts message
One every executino of `clan machines update` a message referring to
facts is still being printed. While there is more code to be cleaned up
around facts, this simple change removes the prominent message out of
user's attention.
2025-10-28 15:32:14 +01:00
hsjobeki
11372d35e1 Merge pull request 'clan/checks: fix clanLib not checking' (#5685) from check into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5685
2025-10-28 10:51:17 +00:00
Johannes Kirschbauer
b7508b2b43 clan/checks: fix clanLib not checking 2025-10-28 11:46:49 +01:00
clan-bot
183817b769 Merge pull request 'Update nixpkgs-dev in devFlake' (#5684) from update-devFlake-nixpkgs-dev into main 2025-10-28 10:08:10 +00:00
clan-bot
591e53e9be Update nixpkgs-dev in devFlake 2025-10-28 10:01:54 +00:00
hsjobeki
a6a6415e31 Merge pull request 'clan/checks: move into lib function; add tests' (#5683) from role-settings into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5683
2025-10-28 08:46:41 +00:00
Johannes Kirschbauer
0060ead876 clan/checks: move into lib function; add tests 2025-10-28 09:40:31 +01:00
hsjobeki
224e41d3ad Merge pull request 'modules: clean up clan module' (#5679) from role-settings into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5679
2025-10-28 08:22:46 +00:00
Johannes Kirschbauer
b3323007b2 test: update test filesets 2025-10-28 09:04:50 +01:00
Johannes Kirschbauer
3e950bc66f docs: add doc-comment for template submodule 2025-10-28 09:04:50 +01:00
Johannes Kirschbauer
9503b46b21 modules: rename arbitrary interface.nix to 'top-level-interface' 2025-10-28 09:04:50 +01:00
Johannes Kirschbauer
a2cec323a2 modules: move nixos modules into nixosModules folder 2025-10-28 09:04:50 +01:00
Johannes Kirschbauer
4239f4d27f clan/module: explain throw 2025-10-28 09:04:50 +01:00
clan-bot
8ac8264997 Merge pull request 'Update nixpkgs-dev in devFlake' (#5681) from update-devFlake-nixpkgs-dev into main 2025-10-27 20:06:57 +00:00
clan-bot
544a53ae9c Update nixpkgs-dev in devFlake 2025-10-27 20:01:46 +00:00
Luis Hebendanz
89e18482ed Merge pull request 'checks: Fix flakey llm test, improve performance' (#5678) from Qubasa/clan-core:fix_slow_llm into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5678
2025-10-27 16:34:21 +00:00
Qubasa
a8217b5a32 llm checks: Skip parts of the test on aarch64 for performance 2025-10-27 17:25:06 +01:00
Qubasa
bdd5de5628 checks: Fix flakey llm test, improve performance 2025-10-27 17:12:12 +01:00
Mic92
61d8bfd0d1 Merge pull request 'fix: respect directory parameter in machines_dir' (#5677) from fix-custom-directory into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5677
Reviewed-by: hsjobeki <hsjobeki@gmail.com>
2025-10-27 15:42:44 +00:00
Jörg Thalheim
b8d79c7fc2 fix: respect directory parameter in machines_dir
The machines_dir() function was hardcoding "machines" without considering
the directory parameter from buildClan/clan configuration. This caused
update-hardware-config and other commands to write files to the wrong
location when a custom directory was specified (e.g., directory = ./clan).

Solution:
1. Added relativeDirectory to inventoryClass in Nix, computed where both
   self and directory have consistent store paths during evaluation
2. Updated machines_dir() to use this pre-computed relative path from Nix
   via flake.select("clanInternals.inventoryClass.relativeDirectory")

Fixes: https://git.clan.lol/clan/clan-core/issues/2906
2025-10-27 16:37:07 +01:00
hsjobeki
fb25ab028b Merge pull request 'services: add role settings with explicit warning' (#5676) from role-settings into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5676
2025-10-27 15:33:39 +00:00
clan-bot
5b136ecaf0 Merge pull request 'Update nixpkgs-dev in devFlake' (#5675) from update-devFlake-nixpkgs-dev into main 2025-10-27 15:08:23 +00:00
clan-bot
d4733dbb0a Update nixpkgs-dev in devFlake 2025-10-27 15:01:55 +00:00
Johannes Kirschbauer
bfb30251e6 lib: replace uniqueStrings after upstreamed 2025-10-27 14:00:46 +01:00
Johannes Kirschbauer
33115f76b7 services: add role settings with explicit warning 2025-10-27 13:31:44 +01:00
pinpox
9e9208e699 Merge pull request 'yggdrasil: read peers from exports' (#5657) from yggdrasil-export-peers into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5657
2025-10-27 12:13:59 +00:00
hsjobeki
6b3fd57174 Merge pull request 'extraModules: soft deprecation for string extraModules' (#5656) from inline into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5656
2025-10-27 12:12:17 +00:00
clan-bot
3be5237cf6 Merge pull request 'Update nixpkgs-dev in devFlake' (#5674) from update-devFlake-nixpkgs-dev into main 2025-10-26 20:06:09 +00:00
clan-bot
368f80eaae Merge pull request 'Update nix-darwin' (#5665) from update-nix-darwin into main 2025-10-26 20:04:12 +00:00
clan-bot
4d7079534c Update nixpkgs-dev in devFlake 2025-10-26 20:01:51 +00:00
clan-bot
7d4cf1c551 Update nix-darwin 2025-10-26 20:00:59 +00:00
Johannes Kirschbauer
1aba0577dc schemas: filter 'extraModules' from python classes and derived schemas 2025-10-24 16:57:26 +02:00
Johannes Kirschbauer
6ee4657da3 extraModules: soft deprecation for string extraModules 2025-10-24 12:54:17 +02:00
44 changed files with 539 additions and 779 deletions

View File

@@ -1,24 +0,0 @@
{ lib }:
lib.mapAttrsToList
(name: opts: {
alert = name;
expr = opts.condition;
for = opts.time or "2m";
labels = { };
annotations.description = opts.description;
})
{
# TODO Remove this alert, just for testing
"Filesystem > = 10%" = {
condition = ''disk_used_percent{fstype!~"tmpfs|vfat|devtmpfs|efivarfs"} > 10'';
time = "1m";
description = "{{$labels.instance}} device {{$labels.device}} on {{$labels.path}} got less than 90% space left on its filesystem.";
};
filesystem_full_80percent = {
condition = ''disk_used_percent{fstype!~"tmpfs|vfat|devtmpfs|efivarfs"} > 80'';
time = "1m";
description = "{{$labels.instance}} device {{$labels.device}} on {{$labels.path}} got less than 20% space left on its filesystem.";
};
}

View File

@@ -24,48 +24,5 @@
};
};
roles.prometheus = {
description = "Prometheus monitoring daemon. Will collect metrics from all hosts with the telegraf role";
interface =
{ lib, ... }:
{
options.webExternalUrl = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
example = "https://prometheus.tld";
description = "The URL under which Prometheus is externally reachable";
};
};
};
imports = [
./telegraf.nix
./prometheus.nix
];
perMachine.nixosModule =
{ pkgs, ... }:
{
clan.core.vars.generators."prometheus" = {
share = true;
files.password.restartUnits = [
"telegraf.service"
"prometheus.service"
];
files.password-env.restartUnits = [ "telegraf.service" ];
runtimeInputs = [
pkgs.coreutils
pkgs.xkcdpass
];
script = ''
xkcdpass --numwords 6 --delimiter - --count 1 | tr -d "\n" > $out/password
printf 'BASIC_AUTH_PWD=%s\n' "$(cat $out/password)" > $out/password-env
'';
};
};
imports = [ ./telegraf.nix ];
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1,11 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:svgjs="http://svgjs.dev/svgjs" width="1000" height="1000"><g clip-path="url(#SvgjsClipPath1007)"><rect width="1000" height="1000" fill="#ffffff"></rect><g transform="matrix(5.132341080724394,0,0,5.132341080724394,217.38764012391061,149.97935090550055)"><svg xmlns="http://www.w3.org/2000/svg" version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:svgjs="http://svgjs.dev/svgjs" width="110.13" height="136.39"><svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 110.13 136.39">
<defs>
<style>
.cls-1 {
fill: #231f20;
}
</style>
<clipPath id="SvgjsClipPath1007"><rect width="1000" height="1000" x="0" y="0" rx="350" ry="350"></rect></clipPath></defs>
<path class="cls-1" d="M88.27,30.81h16.69c1.77,0,3.21-1.44,3.21-3.21v-12.84c0-1.77-1.44-3.21-3.21-3.21h-5.26c-1.7,0-3.08-1.38-3.08-3.08V3.21c0-1.77-1.44-3.21-3.21-3.21h-47.49c-1.77,0-3.21,1.44-3.21,3.21v5.26c0,1.7-1.38,3.08-3.08,3.08h-5.26c-1.77,0-3.21,1.44-3.21,3.21v5.26c0,1.7-1.38,3.08-3.08,3.08h-5.26c-1.77,0-3.21,1.44-3.21,3.21,0,0-.77-1.95-.77,34.47,0,32.56.77,29.7.77,29.7,0,1.77,1.44,3.21,3.21,3.21h5.26c1.7,0,3.08,1.38,3.08,3.08v5.39c0,1.7,1.38,3.08,3.08,3.08h5.39c1.7,0,3.08,1.38,3.08,3.08v5.26c0,1.77,1.44,3.21,3.21,3.21h46.21c1.77,0,3.21-1.44,3.21-3.21v-5.26c0-1.7,1.38-3.08,3.08-3.08h8.5c1.77,0,3.21-1.44,3.21-3.21v-15.3c0-1.77-1.44-3.21-3.21-3.21h-19.93c-1.77,0-3.21,1.44-3.21,3.21v7.73c0,1.7-1.38,3.08-3.08,3.08h-23.36c-1.7,0-3.08-1.38-3.08-3.08v-7.83c0-1.77-1.44-3.21-3.21-3.21h-7.83c-1.7,0-2.66.25-3.08-3.08-.13-1.07-.2-2.38-.3-4.13-.25-4.41-.47-2.64-.47-15.89,0-18.52.48-23.85.77-26.42s1.38-3.08,3.08-3.08h7.83c1.77,0,3.21-1.44,3.21-3.21v-5.26c0-1.7,1.38-3.08,3.08-3.08h24.65c1.7,0,3.08,1.38,3.08,3.08v5.26c0,1.77,1.44,3.21,3.21,3.21Z"></path>
<path class="cls-1" d="M28.49,113.03h-3.79c-.74,0-1.33-.6-1.33-1.33v-3.79c0-1.47-1.19-2.67-2.67-2.67h-10.24c-1.47,0-2.67,1.19-2.67,2.67v3.79c0,.74-.6,1.33-1.33,1.33h-3.79c-1.47,0-2.67,1.19-2.67,2.67v10.24c0,1.47,1.19,2.67,2.67,2.67h3.79c.74,0,1.33.6,1.33,1.33v3.79c0,1.47,1.19,2.67,2.67,2.67h10.24c1.47,0,2.67-1.19,2.67-2.67v-3.79c0-.74.6-1.33,1.33-1.33h3.79c1.47,0,2.67-1.19,2.67-2.67v-10.24c0-1.47-1.19-2.67-2.67-2.67Z"></path>
</svg></svg></g></g></svg>

Before

Width:  |  Height:  |  Size: 2.3 KiB

View File

@@ -1,249 +0,0 @@
<!DOCTYPE html> <html>
<head>
<meta charset="utf-8">
<title>Clan status</title>
<link rel="icon" type="image/png" href="favicon-48x48.png" sizes="48x48" />
<link rel="icon" type="image/svg+xml" href="favicon.svg" />
<link rel="shortcut icon" href="favicon.ico" />
<link rel="apple-touch-icon" sizes="180x180" href="apple-touch-icon.png" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
:root {
--dark: rgb(22, 35, 36);
--light: rgb(229, 231, 235);
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
margin: 0;
padding: 20px;
background: var(--dark);
}
.container {
max-width: 1400px;
margin: 0 auto;
background: var(--light);
padding: 30px;
border-radius: 8px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
h1 {
margin-top: 0;
color: #333;
border-bottom: 2px solid var(--dark);
padding-bottom: 10px;
}
h2 {
color: #555;
margin-top: 30px;
}
table {
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}
th {
background: var(--dark);
color: var(--light);
padding: 12px;
text-align: left;
font-weight: 600;
}
td {
padding: 10px 12px;
border-bottom: 1px solid #ddd;
}
tr:hover {
background: var(--light);
}
.status-up {
color: #28a745;
font-weight: bold;
}
.status-down {
color: #dc3545;
font-weight: bold;
}
.alert-success {
background: #d4edda;
color: #155724;
padding: 12px;
border-radius: 4px;
border: 1px solid #c3e6cb;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 20px;
margin: 20px 0;
}
.card {
border: 1px solid #ddd;
border-radius: 4px;
padding: 15px;
}
.metric-value {
font-size: 1.2em;
font-weight: bold;
color: var(--dark);
}
</style>
</head>
<body>
<div class="container">
<h1>Clan Status</h1>
<h2>Instances</h2>
<table>
<thead>
<tr>
<th>Host</th>
<th>Status</th>
<th>CPU Usage</th>
<th>Memory Usage</th>
<th>Disk Usage</th>
</tr>
</thead>
<tbody>
{{ range query "up" | sortByLabel "instance" }}
{{ $hostname := reReplaceAll "\\..*" "" .Labels.instance }}
<tr>
<td>{{ $hostname }}</td>
<td>
{{ if eq .Value 1.0 }}
<span class="status-up">UP</span>
{{ else }}
<span class="status-down">DOWN</span>
{{ end }}
</td>
<td>
{{ $cpuQuery := query (printf "100 - cpu_usage_idle{cpu=\"cpu-total\",host=\"%s\"}" $hostname) }}
{{ if $cpuQuery }}
{{ with $cpuQuery | first }}
<span class="metric-value">{{ . | value | printf "%.1f" }}%</span>
{{ end }}
{{ else }}
N/A
{{ end }}
</td>
<td>
{{ $memQuery := query (printf "(1 - (mem_available{host=\"%s\"} / mem_total{host=\"%s\"})) * 100" $hostname $hostname) }}
{{ if $memQuery }}
{{ with $memQuery | first }}
<span class="metric-value">{{ . | value | printf "%.1f" }}%</span>
{{ end }}
{{ else }}
N/A
{{ end }}
</td>
<td>
{{ $diskQuery := query (printf "(1 - (disk_free{host=\"%s\",path=\"/\"} / disk_total{host=\"%s\",path=\"/\"})) * 100" $hostname $hostname) }}
{{ if $diskQuery }}
{{ with $diskQuery | first }}
<span class="metric-value">{{ . | value | printf "%.1f" }}%</span>
{{ end }}
{{ else }}
N/A
{{ end }}
</td>
</tr>
{{ end }}
</tbody>
</table>
<h2>Services</h2>
<table>
<thead>
<tr>
<th>Service</th>
<th>Host</th>
<th>State</th>
</tr>
</thead>
<tbody>
<!-- <tr> -->
<!-- <td>Vaultwarden</td> -->
<!-- <td>kiwi</td> -->
<!-- <td> -->
<!-- <span class="status-up">UP</span> -->
<!-- </td> -->
<!-- </tr> -->
</tbody>
</table>
<!-- <h2>NixOS Systems</h2> -->
<!-- <table> -->
<!-- <thead> -->
<!-- <tr> -->
<!-- <th>Host</th> -->
<!-- <th>Booted System</th> -->
<!-- <th>Current System</th> -->
<!-- <th>Booted Kernel</th> -->
<!-- <th>Current Kernel</th> -->
<!-- </tr> -->
<!-- </thead> -->
<!-- <tbody> -->
<!-- {{ range query "nixos_systems_present" | sortByLabel "host" }} -->
<!-- <tr> -->
<!-- <td>{{ .Labels.host }}</td> -->
<!-- <td style="font-family: monospace; font-size: 0.85em;">{{ .Labels.booted_system }}</td> -->
<!-- <td style="font-family: monospace; font-size: 0.85em;">{{ .Labels.current_system }}</td> -->
<!-- <td>{{ .Labels.booted_kernel }}</td> -->
<!-- <td>{{ .Labels.current_kernel }}</td> -->
<!-- </tr> -->
<!-- {{ end }} -->
<!-- </tbody> -->
<!-- </table> -->
<h2>Failed Systemd Units</h2>
{{ $failedUnits := query "systemd_units_sub_code{sub=\"failed\"}" }}
{{ if $failedUnits }}
<table>
<thead>
<tr>
<th>Host</th>
<th>Unit</th>
</tr>
</thead>
<tbody>
{{ range $failedUnits | sortByLabel "host" }}
<tr>
<td>{{ .Labels.host }}</td>
<td style="color: #dc3545;">{{ .Labels.name }}</td>
</tr>
{{ end }}
</tbody>
</table>
{{ else }}
<div class="alert-success">No failed systemd units</div>
{{ end }}
<h2>Active Alerts</h2>
{{ with query "ALERTS{alertstate=\"firing\"}" }}
<table>
<thead>
<tr>
<th>Host</th>
<th>Alert</th>
<th>Value</th>
</tr>
</thead>
<tbody>
{{ range . }}
<tr>
<td>{{ or .Labels.host .Labels.instance }}</td>
<td>{{ .Labels.alertname }}</td>
<td>{{ .Value }}</td>
</tr>
{{ end }}
</tbody>
</table>
{{ else }}
<div class="alert-success">No active alerts</div>
{{ end }}
</div>
</body>
</html>

View File

@@ -1,80 +0,0 @@
:root {
--dark: rgb(22, 35, 36);
--light: rgb(229, 231, 235);
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
margin: 0;
padding: 20px;
background: var(--dark);
}
.container {
max-width: 1400px;
margin: 0 auto;
background: var(--light);
padding: 30px;
border-radius: 8px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
h1 {
margin-top: 0;
color: #333;
border-bottom: 2px solid var(--dark);
padding-bottom: 10px;
}
h2 {
color: #555;
margin-top: 30px;
}
table {
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}
th {
background: var(--dark);
color: var(--light);
padding: 12px;
text-align: left;
font-weight: 600;
}
td {
padding: 10px 12px;
border-bottom: 1px solid #ddd;
}
tr:hover {
background: var(--light);
}
.status-up {
color: #28a745;
font-weight: bold;
}
.status-down {
color: #dc3545;
font-weight: bold;
}
.alert-success {
background: #d4edda;
color: #155724;
padding: 12px;
border-radius: 4px;
border: 1px solid #c3e6cb;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 20px;
margin: 20px 0;
}
.card {
border: 1px solid #ddd;
border-radius: 4px;
padding: 15px;
}
.metric-value {
font-size: 1.2em;
font-weight: bold;
color: var(--dark);
}

View File

@@ -1,83 +0,0 @@
{
roles.prometheus.perInstance =
{
settings,
instanceName,
roles,
...
}:
{
nixosModule =
{
config,
lib,
pkgs,
...
}:
{
systemd.services.prometheus = {
serviceConfig = {
LoadCredential = "password:${config.clan.core.vars.generators.prometheus.files.password.path}";
BindReadOnlyPaths = "%d/password:/etc/prometheus/password";
};
};
services.prometheus = {
enable = true;
# TODO what do we set here? do we even need something?
# TODO this should be a export
# "https://prometheus.${config.clan.core.settings.tld}";
webExternalUrl = settings.webExternalUrl;
# Configure console templates and libraries paths
extraFlags = [
"--storage.tsdb.retention.time=30d"
"--web.console.templates=${./prometheus-consoles}"
"--web.console.libraries=${./prometheus-consoles}"
];
ruleFiles = [
(pkgs.writeText "prometheus-rules.yml" (
builtins.toJSON {
groups = [
{
name = "alerting-rules";
rules = import ./alert-rules.nix { inherit lib; };
}
];
}
))
];
scrapeConfigs = [
{
job_name = "telegraf";
scrape_interval = "60s";
metrics_path = "/metrics";
basic_auth.username = "prometheus";
basic_auth.password_file = "/etc/prometheus/password";
static_configs = [
{
# Scrape all machines with the `telegraf` role
# https://prometheus:<password>@<host>.<tld>:9273/metrics
# scheme = "https";
# scheme = "http";
targets = map (m: "${m}.${config.clan.core.settings.tld}:9273") (
lib.attrNames roles.telegraf.machines
);
labels.type = instanceName;
}
];
}
];
};
};
};
}

View File

@@ -1,32 +1,128 @@
{
roles.telegraf.perInstance =
{ ... }:
{ settings, ... }:
{
nixosModule =
{
config,
pkgs,
lib,
...
}:
let
auth_user = "prometheus";
in
{
warnings =
lib.optionals (settings.allowAllInterfaces != null) [
"monitoring.settings.allowAllInterfaces is deprecated and and has no effect. Please remove it from your inventory."
"The monitoring service will now always listen on all interfaces over https."
]
++ (lib.optionals (settings.interfaces != null) [
"monitoring.settings.interfaces is deprecated and and has no effect. Please remove it from your inventory."
"The monitoring service will now always listen on all interfaces over https."
]);
networking.firewall.allowedTCPPorts = [ 9273 ];
networking.firewall.allowedTCPPorts = [
9273
9990
];
clan.core.vars.generators."telegraf-certs" = {
files.crt = {
restartUnits = [ "telegraf.service" ];
deploy = true;
secret = false;
};
files.key = {
mode = "0600";
restartUnits = [ "telegraf.service" ];
};
runtimeInputs = [
pkgs.openssl
];
# TODO: Implement automated certificate rotation instead of using a 100-year expiration
script = ''
openssl req -x509 -nodes -newkey rsa:4096 \
-days 36500 \
-keyout "$out"/key \
-out "$out"/crt \
-subj "/C=US/ST=CA/L=San Francisco/O=Example Corp/OU=IT/CN=example.com"
'';
};
clan.core.vars.generators."telegraf" = {
files.password.restartUnits = [ "telegraf.service" ];
files.password-env.restartUnits = [ "telegraf.service" ];
files.miniserve-auth.restartUnits = [ "telegraf.service" ];
dependencies = [ "telegraf-certs" ];
runtimeInputs = [
pkgs.coreutils
pkgs.xkcdpass
pkgs.mkpasswd
];
script = ''
PASSWORD=$(xkcdpass --numwords 4 --delimiter - --count 1 | tr -d "\n")
echo "BASIC_AUTH_PWD=$PASSWORD" > "$out"/password-env
echo "${auth_user}:$PASSWORD" > "$out"/miniserve-auth
echo "$PASSWORD" | tr -d "\n" > "$out"/password
'';
};
systemd.services.telegraf-json = {
enable = true;
wantedBy = [ "multi-user.target" ];
after = [ "telegraf.service" ];
requires = [ "telegraf.service" ];
serviceConfig = {
LoadCredential = [
"auth_file_path:${config.clan.core.vars.generators.telegraf.files.miniserve-auth.path}"
"telegraf_crt_path:${config.clan.core.vars.generators.telegraf-certs.files.crt.path}"
"telegraf_key_path:${config.clan.core.vars.generators.telegraf-certs.files.key.path}"
];
Environment = [
"AUTH_FILE_PATH=%d/auth_file_path"
"CRT_PATH=%d/telegraf_crt_path"
"KEY_PATH=%d/telegraf_key_path"
];
Restart = "on-failure";
User = "telegraf";
Group = "telegraf";
RuntimeDirectory = "telegraf-www";
};
script = "${pkgs.miniserve}/bin/miniserve -p 9990 /run/telegraf-www --auth-file \"$AUTH_FILE_PATH\" --tls-cert \"$CRT_PATH\" --tls-key \"$KEY_PATH\"";
};
systemd.services.telegraf = {
serviceConfig = {
LoadCredential = [
"telegraf_crt_path:${config.clan.core.vars.generators.telegraf-certs.files.crt.path}"
"telegraf_key_path:${config.clan.core.vars.generators.telegraf-certs.files.key.path}"
];
Environment = [
"CRT_PATH=%d/telegraf_crt_path"
"KEY_PATH=%d/telegraf_key_path"
];
};
};
services.telegraf = {
enable = true;
environmentFiles = [ config.clan.core.vars.generators.prometheus.files.password-env.path ];
environmentFiles = [
(builtins.toString config.clan.core.vars.generators.telegraf.files.password-env.path)
];
extraConfig = {
agent.interval = "60s";
inputs = {
# More input plugins available at:
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs
diskio = { };
disk = { };
cpu = { };
processes = { };
kernel_vmstat = { };
system = { };
mem = { };
@@ -51,12 +147,20 @@
}
];
};
# sadly there doesn't seem to exist a telegraf http_client output plugin
# sadly there doesn'T seem to exist a telegraf http_client output plugin
outputs.prometheus_client = {
listen = ":9273";
metric_version = 2;
basic_username = "prometheus";
basic_username = "${auth_user}";
basic_password = "$${BASIC_AUTH_PWD}";
tls_cert = "$${CRT_PATH}";
tls_key = "$${KEY_PATH}";
};
outputs.file = {
files = [ "/run/telegraf-www/telegraf.json" ];
data_format = "json";
json_timestamp_units = "1s";
};
};
};

View File

@@ -39,7 +39,6 @@
...
}:
let
uniqueStrings = list: builtins.attrNames (builtins.groupBy lib.id list);
# Collect searchDomains from all servers in this instance
allServerSearchDomains = lib.flatten (
lib.mapAttrsToList (_name: machineConfig: machineConfig.settings.certificate.searchDomains or [ ]) (
@@ -47,7 +46,7 @@
)
);
# Merge client's searchDomains with all servers' searchDomains
searchDomains = uniqueStrings (settings.certificate.searchDomains ++ allServerSearchDomains);
searchDomains = lib.uniqueStrings (settings.certificate.searchDomains ++ allServerSearchDomains);
in
{
clan.core.vars.generators.openssh-ca = lib.mkIf (searchDomains != [ ]) {

View File

@@ -22,6 +22,7 @@ in
../../clanServices/syncthing
# Required modules
../../nixosModules/clanCore
../../nixosModules/machineModules
# Dependencies like clan-cli
../../pkgs/clan-cli
];

View File

@@ -140,9 +140,6 @@
pkgs,
...
}:
let
uniqueStrings = list: builtins.attrNames (builtins.groupBy lib.id list);
in
{
imports = [
(import ./shared.nix {
@@ -159,7 +156,7 @@
config = {
systemd.services.zerotier-inventory-autoaccept =
let
machines = uniqueStrings (
machines = lib.uniqueStrings (
(lib.optionals (roles ? moon) (lib.attrNames roles.moon.machines))
++ (lib.optionals (roles ? controller) (lib.attrNames roles.controller.machines))
++ (lib.optionals (roles ? peer) (lib.attrNames roles.peer.machines))

View File

@@ -21,6 +21,7 @@ in
../../clanServices/zerotier
# Required modules
../../nixosModules/clanCore
../../nixosModules/machineModules
# Dependencies like clan-cli
../../pkgs/clan-cli
];

6
devFlake/flake.lock generated
View File

@@ -105,11 +105,11 @@
},
"nixpkgs-dev": {
"locked": {
"lastModified": 1761458099,
"narHash": "sha256-XeAdn1NidGKXSwlepyjH+n58hsCDqbpx1M8sdDM2Ggc=",
"lastModified": 1761631514,
"narHash": "sha256-VsXz+2W4DFBozzppbF9SXD9pNcv17Z+c/lYXvPJi/eI=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d8cc1036c65d3c9468a91443a75b51276279ac61",
"rev": "a0b0d4b52b5f375658ca8371dc49bff171dbda91",
"type": "github"
},
"original": {

6
flake.lock generated
View File

@@ -71,11 +71,11 @@
]
},
"locked": {
"lastModified": 1760721282,
"narHash": "sha256-aAHphQbU9t/b2RRy2Eb8oMv+I08isXv2KUGFAFn7nCo=",
"lastModified": 1761339987,
"narHash": "sha256-IUaawVwItZKi64IA6kF6wQCLCzpXbk2R46dHn8sHkig=",
"owner": "nix-darwin",
"repo": "nix-darwin",
"rev": "c3211fcd0c56c11ff110d346d4487b18f7365168",
"rev": "7cd9aac79ee2924a85c211d21fafd394b06a38de",
"type": "github"
},
"original": {

View File

@@ -39,32 +39,10 @@ in
};
modules = [
clan-core.modules.clan.default
{
checks.minNixpkgsVersion = {
assertion = lib.versionAtLeast nixpkgs.lib.version "25.11";
message = ''
Nixpkgs version: ${nixpkgs.lib.version} is incompatible with clan-core. (>= 25.11 is recommended)
---
Your version of 'nixpkgs' seems too old for clan-core.
Please read: https://docs.clan.lol/guides/nixpkgs-flake-input
You can ignore this check by setting:
clan.checks.minNixpkgsVersion.ignore = true;
---
'';
};
}
];
};
apply =
config:
lib.deepSeq (lib.mapAttrs (
id: check:
if check.ignore || check.assertion then
null
else
throw "clan.checks.${id} failed with message\n${check.message}"
) config.checks) config;
# Important: !This logic needs to be kept in sync with lib.clan function!
apply = config: clan-core.lib.checkConfig config.checks config;
};
# Mapped flake toplevel outputs

19
lib/clan/checkConfig.nix Normal file
View File

@@ -0,0 +1,19 @@
{ lib, ... }:
/**
Function to assert clan configuration checks.
Arguments:
- 'checks' attribute of clan configuration
- Any: the returned configuration (can be anything, is just passed through)
*/
checks:
lib.deepSeq (
lib.mapAttrs (
id: check:
if check.ignore || check.assertion then
null
else
throw "clan.checks.${id} failed with message\n${check.message}"
) checks
)

View File

@@ -33,20 +33,23 @@
let
nixpkgs = self.inputs.nixpkgs or clan-core.inputs.nixpkgs;
nix-darwin = self.inputs.nix-darwin or clan-core.inputs.nix-darwin;
configuration = (
lib.evalModules {
class = "clan";
specialArgs = {
inherit
self
;
inherit
nixpkgs
nix-darwin
;
};
modules = [
clan-core.modules.clan.default
m
];
}
);
in
lib.evalModules {
class = "clan";
specialArgs = {
inherit
self
;
inherit
nixpkgs
nix-darwin
;
};
modules = [
clan-core.modules.clan.default
m
];
}
clan-core.clanLib.checkConfig configuration.config.checks configuration

View File

@@ -16,6 +16,8 @@ lib.fix (
*/
callLib = file: args: import file ({ inherit lib clanLib; } // args);
checkConfig = clanLib.callLib ./clan/checkConfig.nix { };
evalService = clanLib.callLib ./evalService.nix { };
# ------------------------------------
# ClanLib functions

View File

@@ -53,7 +53,12 @@ in
};
};
}).clan
{ config.directory = rootPath; };
{
directory = rootPath;
self = {
inputs.nixpkgs.lib.version = "25.11";
};
};
in
{
inherit vclan;
@@ -94,7 +99,12 @@ in
};
};
}).clan
{ config.directory = rootPath; };
{
directory = rootPath;
self = {
inputs.nixpkgs.lib.version = "25.11";
};
};
in
{
inherit vclan;

View File

@@ -21,6 +21,7 @@ in
../../../flakeModules
../../../lib
../../../nixosModules/clanCore
../../../nixosModules/machineModules
../../../machines
../../../inventory.json
../../../modules

View File

@@ -7,14 +7,10 @@
...
}:
let
inherit (lib) mkOption types;
inherit (lib) mkOption types uniqueStrings;
inherit (types) attrsWith submoduleWith;
errorContext = "Error context: ${lib.concatStringsSep "." _ctx}";
# TODO:
# Remove once this gets merged upstream; performs in O(n*log(n) instead of O(n^2))
# https://github.com/NixOS/nixpkgs/pull/355616/files
uniqueStrings = list: builtins.attrNames (builtins.groupBy lib.id list);
/**
Merges the role- and machine-settings using the role interface
@@ -81,6 +77,7 @@ let
applySettings =
instanceName: instance:
lib.mapAttrs (roleName: role: {
settings = config.instances.${instanceName}.roles.${roleName}.finalSettings.config;
machines = lib.mapAttrs (machineName: _v: {
settings =
config.instances.${instanceName}.roles.${roleName}.machines.${machineName}.finalSettings.config;
@@ -158,6 +155,29 @@ in
(
{ name, ... }@role:
{
options.finalSettings = mkOption {
default = evalMachineSettings instance.name role.name null role.config.settings { };
type = types.raw;
description = ''
Final evaluated settings of the curent-machine
This contains the merged and evaluated settings of the role interface,
the role settings and the machine settings.
Type: 'configuration' as returned by 'lib.evalModules'
'';
apply = lib.warn ''
=== WANRING ===
'roles.<roleName>.settings' do not contain machine specific settings.
Prefer `machines.<machineName>.settings` instead. (i.e `perInstance: roles.<roleName>.machines.<machineName>.settings`)
If you have a use-case that requires access to the original role settings without machine overrides.
Contact us via matrix (https://matrix.to/#/#clan:clan.lol) or file an issue: https://git.clan.lol
This feature will be removed in the next release
'';
};
# instances.{instanceName}.roles.{roleName}.machines
options.machines = mkOption {
description = ''
@@ -859,7 +879,11 @@ in
instanceRes.nixosModule
]
++ (map (
s: if builtins.typeOf s == "string" then "${directory}/${s}" else s
s:
if builtins.typeOf s == "string" then
lib.warn "String types for 'extraModules' will be deprecated - ${s}" "${directory}/${s}"
else
lib.setDefaultModuleLocation "via inventory.instances.${instanceName}.roles.${roleName}" s
) instanceCfg.roles.${roleName}.extraModules);
};
}

View File

@@ -137,6 +137,7 @@ in
settings = { };
};
};
settings = { };
};
peer = {
machines = {
@@ -146,6 +147,9 @@ in
};
};
};
settings = {
timeout = "foo-peer";
};
};
};
settings = {

View File

@@ -102,18 +102,23 @@ in
specificRoleSettings =
res.importedModulesEvaluated.self-A.result.allMachines.jon.passthru.instances.instance_foo.roles.peer;
};
expected = rec {
expected = {
hasMachineSettings = true;
hasRoleSettings = false;
hasRoleSettings = true;
specificMachineSettings = {
timeout = "foo-peer-jon";
};
specificRoleSettings = {
machines = {
jon = {
settings = specificMachineSettings;
settings = {
timeout = "foo-peer-jon";
};
};
};
settings = {
timeout = "foo-peer";
};
};
};
};

View File

@@ -212,6 +212,36 @@ in
};
};
test_clan_check_simple_fail =
let
eval = clan {
checks.constFail = {
assertion = false;
message = "This is a constant failure";
};
};
in
{
result = eval;
expr = eval.config;
expectedError.type = "ThrownError";
expectedError.msg = "This is a constant failure";
};
test_clan_check_simple_pass =
let
eval = clan {
checks.constFail = {
assertion = true;
message = "This is a constant success";
};
};
in
{
result = eval;
expr = lib.seq eval.config 42;
expected = 42;
};
test_get_var_machine =
let
varsLib = import ./vars.nix { };

16
modules/clan/checks.nix Normal file
View File

@@ -0,0 +1,16 @@
{ lib, nixpkgs, ... }:
{
checks.minNixpkgsVersion = {
assertion = lib.versionAtLeast nixpkgs.lib.version "25.11";
message = ''
Nixpkgs version: ${nixpkgs.lib.version} is incompatible with clan-core. (>= 25.11 is recommended)
---
Your version of 'nixpkgs' seems too old for clan-core.
Please read: https://docs.clan.lol/guides/nixpkgs-flake-input
You can ignore this check by setting:
clan.checks.minNixpkgsVersion.ignore = true;
---
'';
};
}

View File

@@ -1,3 +1,14 @@
/**
Root 'clan' Module
Defines lib.clan and flake-parts.clan options
and all common logic for the 'clan' module.
- has Class _class = "clan"
- _module.args.clan-core: reference to clan-core flake
- _module.args.clanLib: reference to lib.clan function
*/
{ clan-core }:
{
_class = "clan";
@@ -6,7 +17,8 @@
inherit (clan-core) clanLib;
};
imports = [
./top-level-interface.nix
./module.nix
./interface.nix
./checks.nix
];
}

View File

@@ -100,7 +100,7 @@ let
_: machine:
machine.extendModules {
modules = [
(lib.modules.importApply ../machineModules/overridePkgs.nix {
(lib.modules.importApply ../../nixosModules/machineModules/overridePkgs.nix {
pkgs = pkgsFor.${system};
})
];
@@ -167,6 +167,9 @@ in
{ ... }@args:
let
_class =
# _class was added in https://github.com/NixOS/nixpkgs/pull/395141
# Clan relies on it to determine which modules to load
# people need to use at least that version of nixpkgs
args._class or (throw ''
Your version of nixpkgs is incompatible with the latest clan.
Please update nixpkgs input to the latest nixos-unstable or nixpkgs-unstable.
@@ -176,7 +179,7 @@ in
in
{
imports = [
(lib.modules.importApply ../machineModules/forName.nix {
(lib.modules.importApply ../../nixosModules/machineModules/forName.nix {
inherit (config.inventory) meta;
inherit
name
@@ -222,6 +225,18 @@ in
inventoryClass =
let
flakeInputs = config.self.inputs;
# Compute the relative directory path
selfStr = toString config.self;
dirStr = toString directory;
relativeDirectory =
if selfStr == dirStr then
""
else if lib.hasPrefix selfStr dirStr then
lib.removePrefix (selfStr + "/") dirStr
else
# This shouldn't happen in normal usage, but can occur when
# the flake is copied (e.g., in tests). Fall back to empty string.
"";
in
{
_module.args = {
@@ -230,7 +245,12 @@ in
imports = [
../inventoryClass/default.nix
{
inherit inventory directory flakeInputs;
inherit
inventory
directory
flakeInputs
relativeDirectory
;
exportsModule = config.exportsModule;
}
(

View File

@@ -1,3 +1,28 @@
/**
The templates submodule
'clan.templates'
Different kinds supported:
- clan templates: 'clan.templates.clan'
- disko templates: 'clan.templates.disko'
- machine templates: 'clan.templates.machine'
A template has the form:
```nix
{
description: string; # short summary what the template contains
path: path; # path to the template
}
```
The clan API copies the template from the given 'path'
into a target folder. For example,
`./machines/<machine-name>` for 'machine' templates.
*/
{
lib,
...

View File

@@ -81,6 +81,14 @@ in
directory = mkOption {
type = types.path;
};
relativeDirectory = mkOption {
type = types.str;
internal = true;
description = ''
The relative directory path from the flake root to the clan directory.
Empty string if directory equals the flake root.
'';
};
machines = mkOption {
type = types.attrsOf (submodule ({
options = {

View File

@@ -44,12 +44,6 @@ in
description = ''
List of additionally imported `.nix` expressions.
Supported types:
- **Strings**: Interpreted relative to the 'directory' passed to `lib.clan`.
- **Paths**: should be relative to the current file.
- **Any**: Nix expression must be serializable to JSON.
!!! Note
**The import only happens if the machine is part of the service or role.**
@@ -74,7 +68,7 @@ in
```
'';
default = [ ];
type = types.listOf types.deferredModule;
type = types.listOf types.raw;
};
};
}

View File

@@ -3,6 +3,7 @@
directory,
meta,
}:
# The following is a nixos/darwin module
{
_class,
lib,

View File

@@ -225,7 +225,9 @@ def generate_facts(
raise ClanError(msg)
if not was_regenerated and len(machines) > 0:
log.info("All secrets and facts are already up to date")
pass
# Remove message until facts has been propertly deleted
# log.info("All secrets and facts are already up to date")
return was_regenerated

View File

@@ -243,7 +243,11 @@ API.register(get_system_file)
if "oneOf" not in return_type:
msg = (
f"Return type of function '{name}' is not a union type. Expected a union of Success and Error types."
# @DavHau: no idea wy exactly this leads to the "oneOf" ot being present, but this should help
# If the SuccessData type is unsupported it was dropped by Union narrowing.
# This is probably an antifeature
# Introduced because run_generator wanted to use:
# Callable[[Generator], dict[str, str]]
# In its function signature.
"Hint: When using dataclasses as return types, ensure they don't contain public fields with non-serializable types"
)
raise JSchemaTypeError(msg)

View File

@@ -156,14 +156,28 @@ def vm_state_dir(flake_url: str, vm_name: str) -> Path:
def machines_dir(flake: "Flake") -> Path:
# Determine the base path
if flake.is_local:
return flake.path / "machines"
base_path = flake.path
else:
store_path = flake.store_path
if store_path is None:
msg = "Invalid flake object. Doesn't have a store path"
raise ClanError(msg)
base_path = Path(store_path)
store_path = flake.store_path
if store_path is None:
msg = "Invalid flake object. Doesn't have a store path"
raise ClanError(msg)
return Path(store_path) / "machines"
# Get the clan directory configuration from Nix
# This is computed in Nix where store paths are consistent
# Returns "" if no custom directory is set
# Fall back to "" if the option doesn't exist (backwards compatibility)
try:
clan_dir = flake.select("clanInternals.inventoryClass.relativeDirectory")
except ClanError:
# Option doesn't exist in older clan-core versions
# Assume no custom directory
clan_dir = ""
return base_path / clan_dir / "machines"
def specific_machine_dir(machine: "MachineSpecProtocol") -> Path:

View File

@@ -5,15 +5,18 @@ from pathlib import Path
from typing import TYPE_CHECKING
from unittest.mock import MagicMock, patch
import clan_lib.llm.llm_types
import pytest
from clan_lib.flake.flake import Flake
from clan_lib.llm.llm_types import ModelConfig
from clan_lib.llm.orchestrator import get_llm_turn
from clan_lib.llm.service import create_llm_model, run_llm_service
from clan_lib.service_runner import create_service_manager
if TYPE_CHECKING:
from clan_lib.llm.llm_types import ChatResult
from clan_lib.llm.schemas import ChatMessage, SessionState
from clan_lib.llm.schemas import SessionState
import platform
def get_current_mode(session_state: "SessionState") -> str:
@@ -168,28 +171,80 @@ def llm_service() -> Iterator[None]:
service_manager.stop_service("ollama")
def execute_multi_turn_workflow(
user_request: str,
flake: Flake | MagicMock,
conversation_history: list["ChatMessage"] | None = None,
provider: str = "ollama",
session_state: "SessionState | None" = None,
) -> "ChatResult":
"""Execute the multi-turn workflow, auto-executing all pending operations.
@pytest.mark.service_runner
@pytest.mark.usefixtures("mock_nix_shell", "llm_service")
def test_full_conversation_flow(mock_flake: MagicMock) -> None:
"""Test the complete conversation flow by manually calling get_llm_turn at each step.
This simulates the behavior of the CLI auto-execute loop in workflow.py.
This test verifies:
- State transitions through discovery -> readme_fetch -> service_selection -> final_decision
- Each step returns the correct next_action
- Conversation history is preserved across turns
- Session state is correctly maintained
"""
flake = mock_flake
trace_file = Path("~/.ollama/container_test_llm_trace.json").expanduser()
trace_file.unlink(missing_ok=True) # Start fresh
provider = "ollama"
# Override DEFAULT_MODELS with 4-minute timeouts for container tests
clan_lib.llm.llm_types.DEFAULT_MODELS = {
"ollama": ModelConfig(
name="qwen3:4b-instruct",
provider="ollama",
timeout=300, # set inference timeout to 5 minutes as CI may be slow
temperature=0, # set randomness to 0 for consistent test results
),
}
# ========== STEP 1: Initial request (should return next_action for discovery) ==========
print_separator("STEP 1: Initial Request", char="=", width=80)
result = get_llm_turn(
user_request=user_request,
user_request="What VPN options do I have?",
flake=flake,
conversation_history=conversation_history,
provider=provider, # type: ignore[arg-type]
session_state=session_state,
execute_next_action=False,
trace_file=trace_file,
)
# Auto-execute any pending operations
while result.next_action:
# Should have next_action for discovery phase
assert result.next_action is not None, "Should have next_action for discovery"
assert result.next_action["type"] == "discovery"
assert result.requires_user_response is False
assert len(result.proposed_instances) == 0
assert "pending_discovery" in result.session_state
print(f" Next Action: {result.next_action['type']}")
print(f" Description: {result.next_action['description']}")
print_meta_info(result, turn=1, phase="Initial Request")
# ========== STEP 2: Execute discovery (should return next_action for readme_fetch) ==========
print_separator("STEP 2: Execute Discovery", char="=", width=80)
result = get_llm_turn(
user_request="",
flake=flake,
conversation_history=list(result.conversation_history),
provider=provider, # type: ignore[arg-type]
session_state=result.session_state,
execute_next_action=True,
trace_file=trace_file,
)
# Should have next_action for readme fetch OR a clarifying question
if result.next_action:
assert result.next_action["type"] == "fetch_readmes"
assert "pending_readme_fetch" in result.session_state
print(f" Next Action: {result.next_action['type']}")
print(f" Description: {result.next_action['description']}")
else:
# LLM asked a clarifying question
assert result.requires_user_response is True
assert len(result.assistant_message) > 0
print(f" Assistant Message: {result.assistant_message[:100]}...")
print_meta_info(result, turn=2, phase="Discovery Executed")
# ========== STEP 3: Execute readme fetch (if applicable) ==========
if result.next_action and result.next_action["type"] == "fetch_readmes":
print_separator("STEP 3: Execute Readme Fetch", char="=", width=80)
result = get_llm_turn(
user_request="",
flake=flake,
@@ -197,187 +252,74 @@ def execute_multi_turn_workflow(
provider=provider, # type: ignore[arg-type]
session_state=result.session_state,
execute_next_action=True,
trace_file=trace_file,
)
return result
# Should have next_action for service selection
assert result.next_action is not None
assert result.next_action["type"] == "service_selection"
assert "pending_service_selection" in result.session_state
print(f" Next Action: {result.next_action['type']}")
print(f" Description: {result.next_action['description']}")
print_meta_info(result, turn=3, phase="Readme Fetch Executed")
if platform.machine() == "aarch64":
pytest.skip(
"aarch64 detected: skipping readme/service-selection and final step for performance reasons"
)
@pytest.mark.service_runner
@pytest.mark.usefixtures("mock_nix_shell", "llm_service")
def test_full_conversation_flow(mock_flake: MagicMock) -> None:
"""Comprehensive test that exercises the complete conversation flow with the actual LLM service.
This test simulates a realistic multi-turn conversation that covers:
- Discovery phase: Initial request and LLM gathering information
- Service selection phase: User choosing from available options
- Final decision phase: Configuring the selected service with specific parameters
- State transitions: pending_service_selection -> pending_final_decision -> completion
- Conversation history preservation across all turns
- Error handling and edge cases
"""
flake = mock_flake
# ========== TURN 1: Discovery Phase - Initial vague request ==========
print_separator("TURN 1: Discovery Phase", char="=", width=80)
result = execute_multi_turn_workflow(
user_request="What VPN options do I have?",
flake=flake,
provider="ollama",
)
# Verify discovery phase behavior
assert result.requires_user_response is True, (
"Should require user response in discovery"
)
assert len(result.conversation_history) >= 2, (
"Should have user + assistant messages"
)
assert result.conversation_history[0]["role"] == "user"
assert result.conversation_history[0]["content"] == "What VPN options do I have?"
assert result.conversation_history[-1]["role"] == "assistant"
assert len(result.assistant_message) > 0, "Assistant should provide a response"
# After multi-turn execution, we may have either:
# - pending_service_selection (if LLM provided options and is waiting for choice)
# - pending_final_decision (if LLM directly selected a service)
# - no pending state (if LLM asked a clarifying question)
# No instances yet
assert len(result.proposed_instances) == 0
assert result.error is None
print_chat_exchange(
"What VPN options do I have?", result.assistant_message, result.session_state
)
print_meta_info(result, turn=1, phase="Discovery")
# ========== TURN 2: Service Selection Phase - User makes a choice ==========
print_separator("TURN 2: Service Selection", char="=", width=80)
user_msg_2 = "I'll use ZeroTier please"
result = execute_multi_turn_workflow(
user_request=user_msg_2,
flake=flake,
conversation_history=list(result.conversation_history),
provider="ollama",
session_state=result.session_state,
)
# Verify conversation history growth and preservation
assert len(result.conversation_history) > 2, "History should grow"
assert result.conversation_history[0]["content"] == "What VPN options do I have?"
assert result.conversation_history[2]["content"] == "I'll use ZeroTier please"
# Should either ask for configuration details or provide direct config
# Most likely will ask for more details (pending_final_decision)
if result.requires_user_response:
# LLM is asking for configuration details
assert len(result.assistant_message) > 0
# Should transition to final decision phase
if "pending_final_decision" not in result.session_state:
# Might still be in service selection asking clarifications
assert "pending_service_selection" in result.session_state
else:
# LLM provided configuration immediately (less likely)
assert len(result.proposed_instances) > 0
assert result.proposed_instances[0]["module"]["name"] == "zerotier"
print_chat_exchange(user_msg_2, result.assistant_message, result.session_state)
print_meta_info(result, turn=2, phase="Service Selection")
# ========== Continue conversation until we reach final decision or completion ==========
max_turns = 10
turn_count = 2
while result.requires_user_response and turn_count < max_turns:
turn_count += 1
# Determine appropriate response based on current state
if "pending_service_selection" in result.session_state:
# Still selecting service
user_request = "Yes, ZeroTier"
phase = "Service Selection (continued)"
elif "pending_final_decision" in result.session_state:
# Configuring the service
user_request = "Set up gchq-local as controller, qube-email as moon, and wintux as peer"
phase = "Final Configuration"
else:
# Generic continuation
user_request = "Yes, that sounds good. Use gchq-local as controller."
phase = "Continuing Conversation"
print_separator(f"TURN {turn_count}: {phase}", char="=", width=80)
result = execute_multi_turn_workflow(
user_request=user_request,
# ========== STEP 4: Execute service selection ==========
print_separator("STEP 4: Execute Service Selection", char="=", width=80)
result = get_llm_turn(
user_request="I want ZeroTier.",
flake=flake,
conversation_history=list(result.conversation_history),
provider="ollama",
provider=provider, # type: ignore[arg-type]
session_state=result.session_state,
execute_next_action=True,
trace_file=trace_file,
)
# Verify conversation history continues to grow
assert len(result.conversation_history) == (turn_count * 2), (
f"History should have {turn_count * 2} messages (turn {turn_count})"
)
# Should either have next_action for final_decision OR a clarifying question
if result.next_action:
assert result.next_action["type"] == "final_decision"
assert "pending_final_decision" in result.session_state
print(f" Next Action: {result.next_action['type']}")
print(f" Description: {result.next_action['description']}")
else:
# LLM asked a clarifying question during service selection
assert result.requires_user_response is True
assert len(result.assistant_message) > 0
print(f" Assistant Message: {result.assistant_message[:100]}...")
print_meta_info(result, turn=4, phase="Service Selection Executed")
# Verify history preservation
assert (
result.conversation_history[0]["content"] == "What VPN options do I have?"
)
# ========== STEP 5: Execute final decision (if applicable) ==========
if result.next_action and result.next_action["type"] == "final_decision":
print_separator("STEP 5: Execute Final Decision", char="=", width=80)
result = get_llm_turn(
user_request="",
flake=flake,
conversation_history=list(result.conversation_history),
provider=provider, # type: ignore[arg-type]
session_state=result.session_state,
execute_next_action=True,
trace_file=trace_file,
)
print_chat_exchange(
user_request, result.assistant_message, result.session_state
)
print_meta_info(result, turn=turn_count, phase=phase)
# Should either have proposed_instances OR ask a clarifying question
if result.proposed_instances:
assert len(result.proposed_instances) > 0
assert result.next_action is None
print(f" Proposed Instances: {len(result.proposed_instances)}")
for inst in result.proposed_instances:
print(f" - {inst['module']['name']}")
else:
# LLM asked a clarifying question
assert result.requires_user_response is True
assert len(result.assistant_message) > 0
print(f" Assistant Message: {result.assistant_message[:100]}...")
print_meta_info(result, turn=5, phase="Final Decision Executed")
# Check for completion
if not result.requires_user_response:
print_separator("CONVERSATION COMPLETED", char="=", width=80)
break
# ========== Final Verification ==========
print_separator("FINAL VERIFICATION", char="=", width=80)
assert turn_count < max_turns, f"Conversation took too many turns ({turn_count})"
# If conversation completed, verify we have valid configuration
if not result.requires_user_response:
assert len(result.proposed_instances) > 0, (
"Should have at least one proposed instance"
)
instance = result.proposed_instances[0]
# Verify instance structure
assert "module" in instance
assert "name" in instance["module"]
assert instance["module"]["name"] in [
"zerotier",
"wireguard",
"yggdrasil",
"mycelium",
]
# Should not be in pending state anymore
assert "pending_service_selection" not in result.session_state
assert "pending_final_decision" not in result.session_state
assert result.error is None, f"Should not have error: {result.error}"
print_separator("FINAL SUMMARY", char="-", width=80, double=False)
print(" Status: SUCCESS")
print(f" Module Name: {instance['module']['name']}")
print(f" Total Turns: {turn_count}")
print(f" Final History Length: {len(result.conversation_history)} messages")
if "roles" in instance:
roles_list = ", ".join(instance["roles"].keys())
print(f" Configuration Roles: {roles_list}")
print(" Errors: None")
print("-" * 80)
else:
# Conversation didn't complete but should have made progress
assert len(result.conversation_history) > 2
assert result.error is None
print_separator("FINAL SUMMARY", char="-", width=80, double=False)
print(" Status: IN PROGRESS")
print(f" Total Turns: {turn_count}")
print(f" Current State: {list(result.session_state.keys())}")
print(f" History Length: {len(result.conversation_history)} messages")
print("-" * 80)
# Verify conversation history has grown
assert len(result.conversation_history) > 0
assert result.conversation_history[0]["content"] == "What VPN options do I have?"

View File

@@ -149,6 +149,7 @@ def call_openai_api(
trace_file: Path | None = None,
stage: str = "unknown",
trace_metadata: dict[str, Any] | None = None,
temperature: float | None = None,
) -> OpenAIChatCompletionResponse:
"""Call the OpenAI API for chat completion.
@@ -160,6 +161,7 @@ def call_openai_api(
trace_file: Optional path to write trace entries for debugging
stage: Stage name for trace entries (default: "unknown")
trace_metadata: Optional metadata to include in trace entries
temperature: Sampling temperature (default: None = use API default)
Returns:
The parsed JSON response from the API
@@ -178,6 +180,8 @@ def call_openai_api(
"messages": messages,
"tools": list(tools),
}
if temperature is not None:
payload["temperature"] = temperature
_debug_log_request("openai", messages, tools)
url = "https://api.openai.com/v1/chat/completions"
headers = {
@@ -256,6 +260,7 @@ def call_claude_api(
trace_file: Path | None = None,
stage: str = "unknown",
trace_metadata: dict[str, Any] | None = None,
temperature: float | None = None,
) -> OpenAIChatCompletionResponse:
"""Call the Claude API (via OpenAI-compatible endpoint) for chat completion.
@@ -268,6 +273,7 @@ def call_claude_api(
trace_file: Optional path to write trace entries for debugging
stage: Stage name for trace entries (default: "unknown")
trace_metadata: Optional metadata to include in trace entries
temperature: Sampling temperature (default: None = use API default)
Returns:
The parsed JSON response from the API
@@ -293,6 +299,8 @@ def call_claude_api(
"messages": messages,
"tools": list(tools),
}
if temperature is not None:
payload["temperature"] = temperature
_debug_log_request("claude", messages, tools)
url = f"{base_url}chat/completions"
@@ -372,6 +380,7 @@ def call_ollama_api(
stage: str = "unknown",
max_tokens: int | None = None,
trace_metadata: dict[str, Any] | None = None,
temperature: float | None = None,
) -> OllamaChatResponse:
"""Call the Ollama API for chat completion.
@@ -384,6 +393,7 @@ def call_ollama_api(
stage: Stage name for trace entries (default: "unknown")
max_tokens: Maximum number of tokens to generate (default: None = unlimited)
trace_metadata: Optional metadata to include in trace entries
temperature: Sampling temperature (default: None = use API default)
Returns:
The parsed JSON response from the API
@@ -399,9 +409,14 @@ def call_ollama_api(
"tools": list(tools),
}
# Add max_tokens limit if specified
# Add options for max_tokens and temperature if specified
options: dict[str, int | float] = {}
if max_tokens is not None:
payload["options"] = {"num_predict": max_tokens} # type: ignore[typeddict-item]
options["num_predict"] = max_tokens
if temperature is not None:
options["temperature"] = temperature
if options:
payload["options"] = options # type: ignore[typeddict-item]
_debug_log_request("ollama", messages, tools)
url = "http://localhost:11434/api/chat"

View File

@@ -73,19 +73,21 @@ class ModelConfig:
name: The model identifier/name
provider: The LLM provider
timeout: Request timeout in seconds (default: 120)
temperature: Sampling temperature for the model (default: None = use API default)
"""
name: str
provider: Literal["openai", "ollama", "claude"]
timeout: int = 120
temperature: float | None = None
# Default model configurations for each provider
DEFAULT_MODELS: dict[Literal["openai", "ollama", "claude"], ModelConfig] = {
"openai": ModelConfig(name="gpt-4o", provider="openai", timeout=60),
"claude": ModelConfig(name="claude-sonnet-4-5", provider="claude", timeout=60),
"ollama": ModelConfig(name="qwen3:4b-instruct", provider="ollama", timeout=120),
"ollama": ModelConfig(name="qwen3:4b-instruct", provider="ollama", timeout=180),
}

View File

@@ -100,6 +100,7 @@ def get_llm_discovery_phase(
trace_file=trace_file,
stage="discovery",
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_openai_response(
openai_response, provider="openai"
@@ -113,6 +114,7 @@ def get_llm_discovery_phase(
trace_file=trace_file,
stage="discovery",
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_openai_response(
claude_response, provider="claude"
@@ -127,6 +129,7 @@ def get_llm_discovery_phase(
stage="discovery",
max_tokens=300, # Limit output for discovery phase (get_readme calls or short question)
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_ollama_response(
ollama_response, provider="ollama"
@@ -249,6 +252,7 @@ def get_llm_service_selection(
trace_file=trace_file,
stage="select_service",
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_openai_response(
openai_response, provider="openai"
@@ -262,6 +266,7 @@ def get_llm_service_selection(
trace_file=trace_file,
stage="select_service",
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_openai_response(
claude_response, provider="claude"
@@ -276,6 +281,7 @@ def get_llm_service_selection(
stage="select_service",
max_tokens=600, # Allow space for summary
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_ollama_response(
ollama_response, provider="ollama"
@@ -447,6 +453,7 @@ def get_llm_final_decision(
trace_file=trace_file,
stage="final_decision",
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_openai_response(
openai_response, provider="openai"
@@ -462,6 +469,7 @@ def get_llm_final_decision(
trace_file=trace_file,
stage="final_decision",
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_openai_response(
claude_response, provider="claude"
@@ -477,6 +485,7 @@ def get_llm_final_decision(
stage="final_decision",
max_tokens=500, # Limit output to prevent excessive verbosity
trace_metadata=trace_metadata,
temperature=model_config.temperature,
)
function_calls, message_content = parse_ollama_response(
ollama_response, provider="ollama"

View File

@@ -231,6 +231,7 @@ class ChatCompletionRequestPayload(TypedDict, total=False):
messages: list[ChatMessage]
tools: list[ToolDefinition]
stream: NotRequired[bool]
temperature: NotRequired[float]
@dataclass(frozen=True)

View File

@@ -28,13 +28,11 @@ class InventoryInstanceRoleMachine(TypedDict):
InventoryInstanceRoleExtramodulesType = list[Unknown]
InventoryInstanceRoleMachinesType = dict[str, InventoryInstanceRoleMachine]
InventoryInstanceRoleSettingsType = Unknown
InventoryInstanceRoleTagsType = dict[str, Any] | list[str]
class InventoryInstanceRole(TypedDict):
extraModules: NotRequired[InventoryInstanceRoleExtramodulesType]
machines: NotRequired[InventoryInstanceRoleMachinesType]
settings: NotRequired[InventoryInstanceRoleSettingsType]
tags: NotRequired[InventoryInstanceRoleTagsType]

View File

@@ -5,7 +5,7 @@ from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Literal, TypedDict
from clan_lib.cmd import RunOpts, run
from clan_lib.cmd import Log, RunOpts, run
from clan_lib.errors import ClanError
if TYPE_CHECKING:
@@ -70,7 +70,7 @@ class SystemdUserService:
"""Run systemctl command with --user flag."""
return run(
["systemctl", "--user", action, f"{service_name}.service"],
RunOpts(check=False),
RunOpts(check=False, log=Log.NONE),
)
def _get_property(self, service_name: str, prop: str) -> str:
@@ -240,11 +240,15 @@ class SystemdUserService:
service_name = self._service_name(name)
result = self._systemctl("stop", service_name)
if result.returncode != 0 and "not loaded" not in result.stderr.lower():
if (
result.returncode != 0
and "not loaded" not in result.stderr.lower()
and "does not exist" not in result.stderr.lower()
):
msg = f"Failed to stop service: {result.stderr}"
raise ClanError(msg)
self._systemctl("disable", service_name) # Ignore errors for transient units
result = self._systemctl("disable", service_name)
unit_file = self._unit_file_path(name)
if unit_file.exists():

View File

@@ -241,6 +241,11 @@ def generate_dataclass(
# If we are at the top level, and the attribute name is not explicitly included we only do shallow
field_name = prop.replace("-", "_")
# Skip "extraModules"
# TODO: Introduce seperate model that is tied to the serialization format
if "extraModules" in field_name:
continue
# if len(attr_path) == 0 and prop in shallow_attrs:
# field_def = field_name, "dict[str, Any]"
# fields_with_default.append(field_def)