From 4010953041d233cb00706ae507feb2486f746151 Mon Sep 17 00:00:00 2001 From: Qubasa Date: Wed, 22 Oct 2025 15:31:18 +0200 Subject: [PATCH 1/5] clan_lib: Add llm api --- pkgs/clan-cli/clan_lib/errors/__init__.py | 13 + pkgs/clan-cli/clan_lib/llm/api.py | 200 +++++++ pkgs/clan-cli/clan_lib/llm/endpoints.py | 555 ++++++++++++++++++ pkgs/clan-cli/clan_lib/llm/llm.py | 65 ++ pkgs/clan-cli/clan_lib/llm/llm_types.py | 142 +++++ pkgs/clan-cli/clan_lib/llm/orchestrator.py | 415 +++++++++++++ pkgs/clan-cli/clan_lib/llm/phases.py | 519 ++++++++++++++++ pkgs/clan-cli/clan_lib/llm/prompts.py | 258 ++++++++ pkgs/clan-cli/clan_lib/llm/schemas.py | 551 +++++++++++++++++ pkgs/clan-cli/clan_lib/llm/service.py | 75 +++ pkgs/clan-cli/clan_lib/llm/trace.py | 126 ++++ pkgs/clan-cli/clan_lib/llm/utils.py | 83 +++ .../clan_lib/nix/allowed-packages.json | 3 +- pkgs/clan-cli/openapi.py | 1 + 14 files changed, 3005 insertions(+), 1 deletion(-) create mode 100644 pkgs/clan-cli/clan_lib/llm/api.py create mode 100644 pkgs/clan-cli/clan_lib/llm/endpoints.py create mode 100644 pkgs/clan-cli/clan_lib/llm/llm.py create mode 100644 pkgs/clan-cli/clan_lib/llm/llm_types.py create mode 100644 pkgs/clan-cli/clan_lib/llm/orchestrator.py create mode 100644 pkgs/clan-cli/clan_lib/llm/phases.py create mode 100644 pkgs/clan-cli/clan_lib/llm/prompts.py create mode 100644 pkgs/clan-cli/clan_lib/llm/schemas.py create mode 100644 pkgs/clan-cli/clan_lib/llm/service.py create mode 100644 pkgs/clan-cli/clan_lib/llm/trace.py create mode 100644 pkgs/clan-cli/clan_lib/llm/utils.py diff --git a/pkgs/clan-cli/clan_lib/errors/__init__.py b/pkgs/clan-cli/clan_lib/errors/__init__.py index d50257ccf..c5a34df2f 100644 --- a/pkgs/clan-cli/clan_lib/errors/__init__.py +++ b/pkgs/clan-cli/clan_lib/errors/__init__.py @@ -186,3 +186,16 @@ class ClanCmdError(ClanError): def __repr__(self) -> str: return f"ClanCmdError({self.cmd})" + + +class ClanAiError(ClanError): + """Exception raised for errors during AI/LLM operations.""" + + def __init__( + self, + msg: str, + *, + description: str | None = None, + location: str = "AI Processing", + ) -> None: + super().__init__(msg, description=description, location=location) diff --git a/pkgs/clan-cli/clan_lib/llm/api.py b/pkgs/clan-cli/clan_lib/llm/api.py new file mode 100644 index 000000000..a0c150adc --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/api.py @@ -0,0 +1,200 @@ +"""High-level API functions for LLM interactions, suitable for HTTP APIs and web UIs. + +This module provides a clean, stateless API for integrating LLM functionality into +web applications and HTTP services. It wraps the complex multi-stage workflow into +simple function calls with serializable inputs and outputs. +""" + +from pathlib import Path +from typing import Any, Literal, TypedDict, cast + +from clan_lib.api import API +from clan_lib.flake.flake import Flake + +from .llm import ( + DEFAULT_MODELS, + ChatResult, + DiscoveryProgressEvent, + FinalDecisionProgressEvent, + ModelConfig, + ProgressCallback, + ProgressEvent, + ReadmeFetchProgressEvent, + get_model_config, + process_chat_turn, +) +from .schemas import ChatMessage, ConversationHistory, SessionState + + +class ChatTurnRequest(TypedDict, total=False): + """Request payload for a chat turn. + + Attributes: + user_message: The user's message/request + conversation_history: Optional list of prior messages in the conversation + provider: The LLM provider to use (default: "claude") + trace_file: Optional path to write LLM interaction traces for debugging + session_state: Opaque state returned from the previous turn + + """ + + user_message: str + conversation_history: ConversationHistory | None + provider: Literal["openai", "ollama", "claude"] + trace_file: Path | None + session_state: SessionState | None + + +class ChatTurnResponse(TypedDict): + """Response payload for a chat turn. + + Attributes: + proposed_instances: List of inventory instances suggested by the LLM + conversation_history: Updated conversation history after this turn + assistant_message: Message from the assistant + requires_user_response: Whether the assistant is waiting for user input + error: Error message if something went wrong (None on success) + session_state: State blob to pass into the next turn when continuing the workflow + + """ + + proposed_instances: list[dict[str, Any]] + conversation_history: list[ChatMessage] + assistant_message: str + requires_user_response: bool + error: str | None + session_state: SessionState + + +class ProgressEventResponse(TypedDict): + """Progress event for streaming updates. + + Attributes: + stage: The current stage of processing + status: The status within that stage (if applicable) + count: Count of items (for readme_fetch stage) + message: Message content (for conversation stage) + + """ + + stage: str + status: str | None + count: int | None + message: str | None + + +@API.register +def get_llm_turn( + flake: Flake, + request: ChatTurnRequest, + progress_callback: ProgressCallback | None = None, +) -> ChatTurnResponse: + """Process a single chat turn through the LLM workflow. + + This is the main entry point for HTTP APIs and web UIs to interact with + the LLM functionality. It handles: + - Service discovery + - Documentation fetching + - Final decision making + - Conversation management + + Args: + flake: The Flake object representing the clan configuration + request: The chat turn request containing user message and optional history + progress_callback: Optional callback for progress updates + + Returns: + ChatTurnResponse with proposed instances and conversation state + + Example: + >>> from clan_lib.flake.flake import Flake + >>> flake = Flake("/path/to/clan") + >>> request: ChatTurnRequest = { + ... "user_message": "Set up a web server", + ... "provider": "claude" + ... } + >>> response = chat_turn(flake, request) + >>> if response["proposed_instances"]: + ... print("LLM suggests:", response["proposed_instances"]) + >>> if response["requires_user_response"]: + ... print("Assistant asks:", response["assistant_message"]) + + """ + result: ChatResult = process_chat_turn( + user_request=request["user_message"], + flake=flake, + conversation_history=request.get("conversation_history"), + provider=request.get("provider", "claude"), + progress_callback=progress_callback, + trace_file=request.get("trace_file"), + session_state=request.get("session_state"), + ) + + # Convert frozen tuples to lists for JSON serialization + return ChatTurnResponse( + proposed_instances=[dict(inst) for inst in result.proposed_instances], + conversation_history=list(result.conversation_history), + assistant_message=result.assistant_message, + requires_user_response=result.requires_user_response, + error=result.error, + session_state=cast("SessionState", dict(result.session_state)), + ) + + +def progress_event_to_dict(event: ProgressEvent) -> ProgressEventResponse: + """Convert a ProgressEvent to a dictionary suitable for JSON serialization. + + This helper function is useful for streaming progress updates over HTTP + (e.g., Server-Sent Events or WebSockets). + + Args: + event: The progress event to convert + + Returns: + Dictionary representation of the event + + Example: + >>> from clan_lib.llm.llm import DiscoveryProgressEvent + >>> event = DiscoveryProgressEvent(status="analyzing") + >>> progress_event_to_dict(event) + {'stage': 'discovery', 'status': 'analyzing', 'count': None, 'message': None} + + """ + base_response: ProgressEventResponse = { + "stage": event.stage, + "status": None, + "count": None, + "message": None, + } + + if isinstance(event, (DiscoveryProgressEvent, FinalDecisionProgressEvent)): + base_response["status"] = event.status + elif isinstance(event, ReadmeFetchProgressEvent): + base_response["status"] = event.status + base_response["count"] = event.count + # ConversationProgressEvent has message field + elif hasattr(event, "message"): + base_response["message"] = event.message # type: ignore[attr-defined] + if hasattr(event, "awaiting_response"): + base_response["status"] = ( + "awaiting_response" + if event.awaiting_response # type: ignore[attr-defined] + else "complete" + ) + + return base_response + + +# Re-export types for convenience +__all__ = [ + "DEFAULT_MODELS", + "ChatTurnRequest", + "ChatTurnResponse", + "ModelConfig", + "ProgressCallback", + "ProgressEvent", + "ProgressEventResponse", + "get_llm_turn", + "get_model_config", + "progress_event_to_dict", +] diff --git a/pkgs/clan-cli/clan_lib/llm/endpoints.py b/pkgs/clan-cli/clan_lib/llm/endpoints.py new file mode 100644 index 000000000..bd5e1e553 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/endpoints.py @@ -0,0 +1,555 @@ +"""API client code for LLM providers (OpenAI and Ollama).""" + +import json +import logging +import os +import time +import urllib.request +from collections.abc import Sequence +from http import HTTPStatus +from pathlib import Path +from typing import Any, cast +from urllib.error import HTTPError, URLError + +from clan_lib.errors import ClanError + +from .schemas import ( + ChatCompletionRequestPayload, + ChatMessage, + FunctionCallType, + MessageContent, + OllamaChatResponse, + OpenAIChatCompletionResponse, + ToolDefinition, +) +from .trace import ( + format_messages_for_trace, + format_tools_for_trace, + write_trace_entry, +) + +log = logging.getLogger(__name__) + + +def _stringify_message_content(content: MessageContent | None) -> str: + """Convert message content payloads to human-readable text for logging.""" + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if isinstance(item, dict) and "text" in item: + text_part = item.get("text") + if isinstance(text_part, str): + parts.append(text_part) + continue + parts.append(json.dumps(item, ensure_ascii=False)) + return "\n".join(parts) + return json.dumps(content, ensure_ascii=False) + + +def _summarize_tools( + tools: Sequence[ToolDefinition], +) -> str: + """Create a concise comma-separated list of tool names for logging.""" + names: list[str] = [] + for tool in tools: + if not isinstance(tool, dict): + continue + function_block = tool.get("function") + if isinstance(function_block, dict) and "name" in function_block: + name = function_block.get("name") + else: + name = tool.get("name") + if isinstance(name, str): + names.append(name) + return ", ".join(names) + + +def _debug_log_request( + provider: str, + messages: list[ChatMessage], + tools: Sequence[ToolDefinition], +) -> None: + """Emit structured debug logs for outbound LLM requests.""" + if not log.isEnabledFor(logging.DEBUG): + return + + log.debug("[%s] >>> sending %d message(s)", provider, len(messages)) + for idx, message in enumerate(messages): + role = message.get("role", "unknown") + content_str = _stringify_message_content(message.get("content")) + log.debug( + "[%s] >>> message[%02d] role=%s len=%d", + provider, + idx, + role, + len(content_str), + ) + if content_str: + log.debug("[%s] >>> message[%02d] content:\n%s", provider, idx, content_str) + + if tools: + log.debug("[%s] >>> tool summary: %s", provider, _summarize_tools(tools)) + log.debug( + "[%s] >>> tool payload:\n%s", + provider, + json.dumps(list(tools), indent=2, ensure_ascii=False), + ) + + +def _debug_log_response( + provider: str, + text: str, + function_calls: list[FunctionCallType], +) -> None: + """Emit structured debug logs for inbound LLM responses.""" + if not log.isEnabledFor(logging.DEBUG): + return + + if text: + log.debug( + "[%s] <<< response text len=%d\n%s", + provider, + len(text), + text, + ) + else: + log.debug("[%s] <<< no textual response", provider) + + if not function_calls: + log.debug("[%s] <<< no function calls", provider) + return + + for idx, call in enumerate(function_calls): + args_repr = call.get("arguments", "") + formatted_args = args_repr + if isinstance(args_repr, str): + try: + parsed_args = json.loads(args_repr) + formatted_args = json.dumps(parsed_args, indent=2, ensure_ascii=False) + except json.JSONDecodeError: + formatted_args = args_repr + log.debug( + "[%s] <<< call[%02d] name=%s\n%s", + provider, + idx, + call.get("name"), + formatted_args, + ) + + +def call_openai_api( + model: str, + messages: list[ChatMessage], + tools: Sequence[ToolDefinition], + timeout: int = 60, + trace_file: Path | None = None, + stage: str = "unknown", + trace_metadata: dict[str, Any] | None = None, +) -> OpenAIChatCompletionResponse: + """Call the OpenAI API for chat completion. + + Args: + model: The OpenAI model to use + messages: List of message dictionaries + tools: List of OpenAI function schemas + timeout: Request timeout in seconds (default: 60) + trace_file: Optional path to write trace entries for debugging + stage: Stage name for trace entries (default: "unknown") + trace_metadata: Optional metadata to include in trace entries + + Returns: + The parsed JSON response from the API + + Raises: + ClanError: If the API call fails + + """ + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + msg = "OPENAI_API_KEY environment variable is required for OpenAI provider" + raise ClanError(msg) + + payload: ChatCompletionRequestPayload = { + "model": model, + "messages": messages, + "tools": list(tools), + } + _debug_log_request("openai", messages, tools) + url = "https://api.openai.com/v1/chat/completions" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + } + + start_time = time.time() + try: + req = urllib.request.Request( # noqa: S310 + url, + data=json.dumps(payload).encode("utf-8"), + headers=headers, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 + if resp.getcode() != HTTPStatus.OK.value: + msg = f"OpenAI API returned status {resp.getcode()}" + raise ClanError(msg) + + raw = resp.read().decode("utf-8") + response = cast("OpenAIChatCompletionResponse", json.loads(raw)) + + # Write trace if requested + if trace_file: + duration_ms = (time.time() - start_time) * 1000 + function_calls, message_content = parse_openai_response( + response, provider="openai" + ) + write_trace_entry( + trace_file=trace_file, + provider="openai", + model=model, + stage=stage, + request={ + "messages": format_messages_for_trace(messages), + "tools": format_tools_for_trace( + cast("list[dict[str, Any]]", list(tools)) + ), + }, + response={ + "function_calls": [ + { + "name": call["name"], + "arguments": json.loads(call["arguments"]) + if isinstance(call["arguments"], str) + else call["arguments"], + } + for call in function_calls + ], + "message": message_content, + }, + duration_ms=duration_ms, + metadata=trace_metadata, + ) + + return response + + except HTTPError as e: + error_body = e.read().decode("utf-8") if e.fp else "" + msg = f"OpenAI returned HTTP {e.code}: {error_body}" + raise ClanError(msg) from e + except URLError as e: + msg = "OpenAI API not reachable" + raise ClanError(msg) from e + except json.JSONDecodeError as e: + msg = "Failed to parse OpenAI API response" + raise ClanError(msg) from e + + +def call_claude_api( + model: str, + messages: list[ChatMessage], + tools: Sequence[ToolDefinition], + base_url: str | None = None, + timeout: int = 60, + trace_file: Path | None = None, + stage: str = "unknown", + trace_metadata: dict[str, Any] | None = None, +) -> OpenAIChatCompletionResponse: + """Call the Claude API (via OpenAI-compatible endpoint) for chat completion. + + Args: + model: The Claude model to use + messages: List of message dictionaries + tools: List of function schemas (OpenAI format) + base_url: Optional base URL for the API (defaults to https://api.anthropic.com/v1/) + timeout: Request timeout in seconds (default: 60) + trace_file: Optional path to write trace entries for debugging + stage: Stage name for trace entries (default: "unknown") + trace_metadata: Optional metadata to include in trace entries + + Returns: + The parsed JSON response from the API + + Raises: + ClanError: If the API call fails + + """ + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + msg = "ANTHROPIC_API_KEY environment variable is required for Claude provider" + raise ClanError(msg) + + if base_url is None: + base_url = os.environ.get("ANTHROPIC_BASE_URL", "https://api.anthropic.com/v1/") + + # Ensure base_url ends with / + if not base_url.endswith("/"): + base_url += "/" + + payload: ChatCompletionRequestPayload = { + "model": model, + "messages": messages, + "tools": list(tools), + } + _debug_log_request("claude", messages, tools) + + url = f"{base_url}chat/completions" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + } + + start_time = time.time() + try: + req = urllib.request.Request( # noqa: S310 + url, + data=json.dumps(payload).encode("utf-8"), + headers=headers, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 + if resp.getcode() != HTTPStatus.OK.value: + msg = f"Claude API returned status {resp.getcode()}" + raise ClanError(msg) + + raw = resp.read().decode("utf-8") + response = cast("OpenAIChatCompletionResponse", json.loads(raw)) + + # Write trace if requested + if trace_file: + duration_ms = (time.time() - start_time) * 1000 + function_calls, message_content = parse_openai_response( + response, provider="claude" + ) + write_trace_entry( + trace_file=trace_file, + provider="claude", + model=model, + stage=stage, + request={ + "messages": format_messages_for_trace(messages), + "tools": format_tools_for_trace( + cast("list[dict[str, Any]]", list(tools)) + ), + }, + response={ + "function_calls": [ + { + "name": call["name"], + "arguments": json.loads(call["arguments"]) + if isinstance(call["arguments"], str) + else call["arguments"], + } + for call in function_calls + ], + "message": message_content, + }, + duration_ms=duration_ms, + metadata=trace_metadata, + ) + + return response + + except HTTPError as e: + error_body = e.read().decode("utf-8") if e.fp else "" + msg = f"Claude returned HTTP {e.code}: {error_body}" + raise ClanError(msg) from e + except URLError as e: + msg = f"Claude API not reachable at {url}" + raise ClanError(msg) from e + except json.JSONDecodeError as e: + msg = "Failed to parse Claude API response" + raise ClanError(msg) from e + + +def call_ollama_api( + model: str, + messages: list[ChatMessage], + tools: Sequence[ToolDefinition], + timeout: int = 120, + trace_file: Path | None = None, + stage: str = "unknown", + max_tokens: int | None = None, + trace_metadata: dict[str, Any] | None = None, +) -> OllamaChatResponse: + """Call the Ollama API for chat completion. + + Args: + model: The Ollama model to use + messages: List of message dictionaries + tools: List of Ollama function schemas + timeout: Request timeout in seconds (default: 120) + trace_file: Optional path to write trace entries for debugging + stage: Stage name for trace entries (default: "unknown") + max_tokens: Maximum number of tokens to generate (default: None = unlimited) + trace_metadata: Optional metadata to include in trace entries + + Returns: + The parsed JSON response from the API + + Raises: + ClanError: If the API call fails + + """ + payload: ChatCompletionRequestPayload = { + "model": model, + "messages": messages, + "stream": False, + "tools": list(tools), + } + + # Add max_tokens limit if specified + if max_tokens is not None: + payload["options"] = {"num_predict": max_tokens} # type: ignore[typeddict-item] + _debug_log_request("ollama", messages, tools) + url = "http://localhost:11434/api/chat" + + start_time = time.time() + try: + req = urllib.request.Request( # noqa: S310 + url, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 + if resp.getcode() != HTTPStatus.OK.value: + msg = f"Ollama API returned status {resp.getcode()}" + raise ClanError(msg) + + raw = resp.read().decode("utf-8") + response = cast("OllamaChatResponse", json.loads(raw)) + + # Write trace if requested + if trace_file: + duration_ms = (time.time() - start_time) * 1000 + function_calls, message_content = parse_ollama_response( + response, provider="ollama" + ) + write_trace_entry( + trace_file=trace_file, + provider="ollama", + model=model, + stage=stage, + request={ + "messages": format_messages_for_trace(messages), + "tools": format_tools_for_trace( + cast("list[dict[str, Any]]", list(tools)) + ), + }, + response={ + "function_calls": [ + { + "name": call["name"], + "arguments": json.loads(call["arguments"]) + if isinstance(call["arguments"], str) + else call["arguments"], + } + for call in function_calls + ], + "message": message_content, + }, + duration_ms=duration_ms, + metadata=trace_metadata, + ) + + return response + + except HTTPError as e: + msg = f"Ollama returned HTTP {e.code} when requesting chat completion." + raise ClanError(msg) from e + except URLError as e: + msg = "Ollama API not reachable at http://localhost:11434" + raise ClanError(msg) from e + except json.JSONDecodeError as e: + msg = "Failed to parse Ollama API response" + raise ClanError(msg) from e + + +def parse_openai_response( + response_data: OpenAIChatCompletionResponse, + provider: str = "openai", +) -> tuple[list[FunctionCallType], str]: + """Parse OpenAI API response to extract function calls. + + Args: + response_data: The raw response from OpenAI API + provider: The provider name for logging purposes (default: "openai") + + Returns: + Tuple of (function_calls, message_content) + + """ + choices = response_data.get("choices") or [] + if not choices: + return [], "" + + message = choices[0].get("message") or {} + tool_calls = message.get("tool_calls") or [] + raw_content = message.get("content") or "" + model_content = _stringify_message_content(raw_content) + + result: list[FunctionCallType] = [] + for tool_call in tool_calls: + tc_id = tool_call.get("id") or f"call_{int(time.time() * 1000)}" + function = tool_call.get("function") or {} + function_name = function.get("name") or "" + function_args = function.get("arguments") or "{}" + + result.append( + FunctionCallType( + id=tc_id, + call_id=tc_id, + type="function_call", + name=function_name, + arguments=function_args, + ) + ) + + _debug_log_response(provider, model_content, result) + + return result, model_content + + +def parse_ollama_response( + response_data: OllamaChatResponse, + provider: str = "ollama", +) -> tuple[list[FunctionCallType], str]: + """Parse Ollama API response to extract function calls. + + Args: + response_data: The raw response from Ollama API + provider: The provider name for logging purposes (default: "ollama") + + Returns: + Tuple of (function_calls, message_content) + + """ + message = response_data.get("message") or {} + tool_calls = message.get("tool_calls") or [] + raw_content = message.get("content") or "" + model_content = _stringify_message_content(raw_content) + + result: list[FunctionCallType] = [] + for idx, tool_call in enumerate(tool_calls): + function = tool_call.get("function") or {} + function_name = function.get("name") or "" + function_args = function.get("arguments") or {} + + # Generate unique IDs (similar to OpenAI format) + call_id = f"call_{idx}_{int(time.time() * 1000)}" + fc_id = f"fc_{idx}_{int(time.time() * 1000)}" + + result.append( + FunctionCallType( + id=fc_id, + call_id=call_id, + type="function_call", + name=function_name, + arguments=json.dumps(function_args), + ) + ) + + _debug_log_response(provider, model_content, result) + + return result, model_content diff --git a/pkgs/clan-cli/clan_lib/llm/llm.py b/pkgs/clan-cli/clan_lib/llm/llm.py new file mode 100644 index 000000000..1e0904ab7 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/llm.py @@ -0,0 +1,65 @@ +"""High-level LLM orchestration functions. + +This module re-exports the LLM orchestration API from submodules. +""" + +# Re-export types and dataclasses +from .llm_types import ( # noqa: F401 + DEFAULT_MODELS, + ChatResult, + ConversationProgressEvent, + DiscoveryProgressEvent, + FinalDecisionProgressEvent, + ModelConfig, + ProgressCallback, + ProgressEvent, + ReadmeFetchProgressEvent, + ServiceSelectionProgressEvent, + ServiceSelectionResult, + get_model_config, +) + +# Re-export high-level orchestrator +from .orchestrator import process_chat_turn # noqa: F401 + +# Re-export low-level phase functions +from .phases import ( # noqa: F401 + execute_readme_requests, + get_llm_discovery_phase, + get_llm_final_decision, + get_llm_service_selection, + llm_final_decision_to_inventory_instances, +) + +# Re-export commonly used functions and types from schemas +from .schemas import ( # noqa: F401 + AiAggregate, + ChatMessage, + ConversationHistory, + FunctionCallType, + JSONValue, + MachineDescription, + OllamaFunctionSchema, + OpenAIFunctionSchema, + PendingFinalDecisionState, + PendingServiceSelectionState, + ReadmeRequest, + SessionState, + SimplifiedServiceSchema, + TagDescription, + aggregate_ollama_function_schemas, + aggregate_openai_function_schemas, + create_get_readme_tool, + create_select_service_tool, + create_simplified_service_schemas, +) + +# Re-export service functions +from .service import create_llm_model, run_llm_service # noqa: F401 + +# Re-export utility functions and constants +from .utils import ( # noqa: F401 + ASSISTANT_MODE_DISCOVERY, + ASSISTANT_MODE_FINAL, + ASSISTANT_MODE_SELECTION, +) diff --git a/pkgs/clan-cli/clan_lib/llm/llm_types.py b/pkgs/clan-cli/clan_lib/llm/llm_types.py new file mode 100644 index 000000000..44b1e0352 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/llm_types.py @@ -0,0 +1,142 @@ +"""Type definitions and dataclasses for LLM orchestration.""" + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Literal + +from clan_lib.nix_models.clan import InventoryInstance + +from .schemas import ChatMessage, SessionState + + +@dataclass(frozen=True) +class DiscoveryProgressEvent: + """Progress event during discovery phase.""" + + service_names: list[str] | None = None + stage: Literal["discovery"] = "discovery" + status: Literal["analyzing", "complete"] = "analyzing" + + +@dataclass(frozen=True) +class ReadmeFetchProgressEvent: + """Progress event during readme fetching.""" + + count: int + service_names: list[str] + stage: Literal["readme_fetch"] = "readme_fetch" + status: Literal["fetching", "complete"] = "fetching" + + +@dataclass(frozen=True) +class ServiceSelectionProgressEvent: + """Progress event during service selection phase.""" + + service_names: list[str] + stage: Literal["service_selection"] = "service_selection" + status: Literal["selecting", "complete"] = "selecting" + + +@dataclass(frozen=True) +class FinalDecisionProgressEvent: + """Progress event during final decision phase.""" + + stage: Literal["final_decision"] = "final_decision" + status: Literal["reviewing", "complete"] = "reviewing" + + +@dataclass(frozen=True) +class ConversationProgressEvent: + """Progress event for conversation continuation.""" + + message: str + stage: Literal["conversation"] = "conversation" + awaiting_response: bool = True + + +@dataclass(frozen=True) +class ServiceSelectionResult: + """Result from service selection step. + + Attributes: + selected_service: Name of the selected service (None if clarification needed) + service_summary: LLM-generated summary of the service (None if clarification needed) + clarifying_message: Clarifying question from LLM (empty string if service selected) + + """ + + selected_service: str | None + service_summary: str | None + clarifying_message: str + + +ProgressEvent = ( + DiscoveryProgressEvent + | ReadmeFetchProgressEvent + | ServiceSelectionProgressEvent + | FinalDecisionProgressEvent + | ConversationProgressEvent +) + +ProgressCallback = Callable[[ProgressEvent], None] + + +@dataclass(frozen=True) +class ChatResult: + """Result of a complete chat turn through the multi-stage workflow. + + Attributes: + proposed_instances: List of inventory instances suggested by the LLM (empty if none) + conversation_history: Updated conversation history after this turn + assistant_message: Message from the assistant (questions, recommendations, or diff preview) + requires_user_response: True if the assistant asked a question and needs a response + error: Error message if something went wrong (None on success) + session_state: Serializable state to pass into the next turn when continuing a workflow + + """ + + proposed_instances: tuple[InventoryInstance, ...] + conversation_history: tuple[ChatMessage, ...] + assistant_message: str + requires_user_response: bool + session_state: SessionState + error: str | None = None + + +@dataclass(frozen=True) +class ModelConfig: + """Configuration for an LLM model. + + Attributes: + name: The model identifier/name + provider: The LLM provider + timeout: Request timeout in seconds (default: 120) + + """ + + name: str + provider: Literal["openai", "ollama", "claude"] + timeout: int = 120 + + +# Default model configurations for each provider +DEFAULT_MODELS: dict[Literal["openai", "ollama", "claude"], ModelConfig] = { + "openai": ModelConfig(name="gpt-4o", provider="openai", timeout=60), + "claude": ModelConfig(name="claude-sonnet-4-5", provider="claude", timeout=60), + "ollama": ModelConfig(name="qwen3:4b-instruct", provider="ollama", timeout=120), +} + + +def get_model_config( + provider: Literal["openai", "ollama", "claude"], +) -> ModelConfig: + """Get the default model configuration for a provider. + + Args: + provider: The LLM provider name + + Returns: + ModelConfig for the specified provider + + """ + return DEFAULT_MODELS[provider] diff --git a/pkgs/clan-cli/clan_lib/llm/orchestrator.py b/pkgs/clan-cli/clan_lib/llm/orchestrator.py new file mode 100644 index 000000000..b37adbdad --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/orchestrator.py @@ -0,0 +1,415 @@ +"""High-level LLM orchestration for multi-stage chat workflow.""" + +import json +from pathlib import Path +from typing import Literal, cast + +from clan_lib.errors import ClanAiError +from clan_lib.flake.flake import Flake +from clan_lib.services.modules import InputName, ServiceReadmeCollection + +from .llm_types import ( + ChatResult, + DiscoveryProgressEvent, + FinalDecisionProgressEvent, + ProgressCallback, + ReadmeFetchProgressEvent, + ServiceSelectionProgressEvent, +) +from .phases import ( + execute_readme_requests, + get_llm_discovery_phase, + get_llm_final_decision, + get_llm_service_selection, + llm_final_decision_to_inventory_instances, +) +from .schemas import ( + ConversationHistory, + JSONValue, + PendingFinalDecisionState, + PendingServiceSelectionState, + SessionState, +) +from .utils import ( + ASSISTANT_MODE_DISCOVERY, + ASSISTANT_MODE_FINAL, + ASSISTANT_MODE_SELECTION, + _assistant_message, + _deserialize_readme_results, + _serialize_readme_results, + _user_message, +) + + +def process_chat_turn( + user_request: str, + flake: Flake, + conversation_history: ConversationHistory | None = None, + provider: Literal["openai", "ollama", "claude"] = "ollama", + progress_callback: ProgressCallback | None = None, + trace_file: Path | None = None, + session_state: SessionState | None = None, +) -> ChatResult: + """High-level API that orchestrates the entire multi-stage chat workflow. + + This function handles the complete flow: + 1. Discovery phase - LLM selects relevant services + 2. Readme fetching - Retrieves detailed documentation + 3. Final decision - LLM makes informed suggestions + 4. Conversion - Transforms suggestions to inventory instances + + Args: + user_request: The user's message/request + flake: The Flake object to get services from + conversation_history: Optional list of prior messages in the conversation + provider: The LLM provider to use + progress_callback: Optional callback for progress updates + trace_file: Optional path to write LLM interaction traces for debugging + session_state: Optional cross-turn state to resume pending workflows + + Returns: + ChatResult containing proposed instances, updated history, and assistant message + + Example: + >>> result = process_chat_turn( + ... "Set up a web server", + ... flake, + ... progress_callback=lambda event: print(f"Stage: {event.stage}") + ... ) + >>> if result.proposed_instances: + ... print("LLM suggested:", result.proposed_instances) + >>> if result.requires_user_response: + ... print("Assistant asks:", result.assistant_message) + + """ + history = list(conversation_history) if conversation_history else [] + state: SessionState = cast( + "SessionState", dict(session_state) if session_state else {} + ) + + def _state_snapshot() -> dict[str, JSONValue]: + try: + return json.loads(json.dumps(state)) + except (TypeError, ValueError): + return dict(state) # type: ignore[arg-type] + + def _metadata(extra: dict[str, JSONValue] | None = None) -> dict[str, JSONValue]: + base: dict[str, JSONValue] = {"session_state_before": _state_snapshot()} + if extra: + base.update(extra) + return base + + def _state_copy() -> SessionState: + return cast("SessionState", dict(state)) + + pending_final_raw = state.get("pending_final_decision") + pending_final: PendingFinalDecisionState | None = ( + pending_final_raw if isinstance(pending_final_raw, dict) else None + ) + + pending_selection_raw = state.get("pending_service_selection") + pending_selection: PendingServiceSelectionState | None = ( + pending_selection_raw if isinstance(pending_selection_raw, dict) else None + ) + resume_readme_results: dict[InputName, ServiceReadmeCollection] | None = None + if pending_selection is not None: + serialized_results = pending_selection.get("readme_results") + if serialized_results is not None: + resume_readme_results = _deserialize_readme_results(serialized_results) + + if resume_readme_results is None: + state.pop("pending_service_selection", None) + else: + state.pop("pending_service_selection", None) + + if pending_final is not None: + service_name = pending_final.get("service_name") + service_summary = pending_final.get("service_summary") + + if isinstance(service_name, str) and isinstance(service_summary, str): + if progress_callback: + progress_callback(FinalDecisionProgressEvent(status="reviewing")) + + function_calls, final_message = get_llm_final_decision( + user_request, + flake, + service_name, + service_summary, + conversation_history, + provider=provider, + trace_file=trace_file, + trace_metadata=_metadata( + { + "selected_service": service_name, + "resume": True, + } + ), + ) + + if progress_callback: + progress_callback(FinalDecisionProgressEvent(status="complete")) + + history.append(_user_message(user_request)) + + if function_calls: + proposed_instances = llm_final_decision_to_inventory_instances( + function_calls + ) + instance_names = [inst["module"]["name"] for inst in proposed_instances] + summary = ( + f"I suggest configuring these services: {', '.join(instance_names)}" + ) + history.append(_assistant_message(summary, mode=ASSISTANT_MODE_FINAL)) + state.pop("pending_final_decision", None) + + return ChatResult( + proposed_instances=tuple(proposed_instances), + conversation_history=tuple(history), + assistant_message=summary, + requires_user_response=False, + error=None, + session_state=_state_copy(), + ) + + if final_message: + history.append( + _assistant_message(final_message, mode=ASSISTANT_MODE_FINAL) + ) + state["pending_final_decision"] = cast( + "PendingFinalDecisionState", + { + "service_name": service_name, + "service_summary": service_summary, + }, + ) + + return ChatResult( + proposed_instances=(), + conversation_history=tuple(history), + assistant_message=final_message, + requires_user_response=True, + error=None, + session_state=_state_copy(), + ) + + state.pop("pending_final_decision", None) + msg = "LLM did not provide any response or recommendations" + raise ClanAiError( + msg, + description="Expected either function calls (configuration) or a clarifying message", + location="Final Decision Phase (pending)", + ) + + state.pop("pending_final_decision", None) + + def _continue_with_service_selection( + readme_results: dict[InputName, ServiceReadmeCollection], + ) -> ChatResult: + # Extract all service names from readme results + all_service_names = [ + service_name + for collection in readme_results.values() + for service_name in collection.readmes + ] + + if progress_callback: + progress_callback( + ServiceSelectionProgressEvent( + service_names=all_service_names, status="selecting" + ) + ) + + selection_result = get_llm_service_selection( + user_request, + readme_results, + conversation_history, + provider=provider, + trace_file=trace_file, + trace_metadata=_metadata(), + ) + + if ( + selection_result.clarifying_message + and not selection_result.selected_service + ): + history.append(_user_message(user_request)) + history.append( + _assistant_message( + selection_result.clarifying_message, + mode=ASSISTANT_MODE_SELECTION, + ) + ) + state["pending_service_selection"] = cast( + "PendingServiceSelectionState", + { + "readme_results": _serialize_readme_results(readme_results), + }, + ) + + return ChatResult( + proposed_instances=(), + conversation_history=tuple(history), + assistant_message=selection_result.clarifying_message, + requires_user_response=True, + error=None, + session_state=_state_copy(), + ) + + if ( + not selection_result.selected_service + or not selection_result.service_summary + ): + msg = "Failed to select service" + raise ClanAiError( + msg, + description=selection_result.clarifying_message + or "No service selected and no clarifying message provided", + location="Service Selection Phase", + ) + + if progress_callback: + progress_callback(FinalDecisionProgressEvent(status="reviewing")) + + function_calls, final_message = get_llm_final_decision( + user_request, + flake, + selection_result.selected_service, + selection_result.service_summary, + conversation_history, + provider=provider, + trace_file=trace_file, + trace_metadata=_metadata( + {"selected_service": selection_result.selected_service} + ), + ) + + if progress_callback: + progress_callback(FinalDecisionProgressEvent(status="complete")) + + if function_calls: + history.append(_user_message(user_request)) + + proposed_instances = llm_final_decision_to_inventory_instances( + function_calls + ) + + instance_names = [inst["module"]["name"] for inst in proposed_instances] + summary = ( + f"I suggest configuring these services: {', '.join(instance_names)}" + ) + history.append(_assistant_message(summary, mode=ASSISTANT_MODE_FINAL)) + state.pop("pending_final_decision", None) + + return ChatResult( + proposed_instances=tuple(proposed_instances), + conversation_history=tuple(history), + assistant_message=summary, + requires_user_response=False, + error=None, + session_state=_state_copy(), + ) + + if final_message: + history.append(_user_message(user_request)) + history.append(_assistant_message(final_message, mode=ASSISTANT_MODE_FINAL)) + state["pending_final_decision"] = cast( + "PendingFinalDecisionState", + { + "service_name": selection_result.selected_service, + "service_summary": selection_result.service_summary, + }, + ) + + return ChatResult( + proposed_instances=(), + conversation_history=tuple(history), + assistant_message=final_message, + requires_user_response=True, + error=None, + session_state=_state_copy(), + ) + + msg = "LLM did not provide any response or recommendations" + raise ClanAiError( + msg, + description="Expected either function calls (configuration) or a clarifying message after service selection", + location="Final Decision Phase", + ) + + if resume_readme_results is not None: + return _continue_with_service_selection(resume_readme_results) + + # Stage 1: Discovery phase + if progress_callback: + progress_callback(DiscoveryProgressEvent(status="analyzing")) + + readme_requests, discovery_message = get_llm_discovery_phase( + user_request, + flake, + conversation_history, + provider=provider, + trace_file=trace_file, + trace_metadata=_metadata(), + ) + + if progress_callback: + selected_services = [req["function_name"] for req in readme_requests] + progress_callback( + DiscoveryProgressEvent( + service_names=selected_services if selected_services else None, + status="complete", + ) + ) + + # If LLM asked a question or made a recommendation without readme requests + if discovery_message and not readme_requests: + history.append(_user_message(user_request)) + history.append( + _assistant_message(discovery_message, mode=ASSISTANT_MODE_DISCOVERY) + ) + + return ChatResult( + proposed_instances=(), + conversation_history=tuple(history), + assistant_message=discovery_message, + requires_user_response=True, + error=None, + session_state=_state_copy(), + ) + + # If we got readme requests, continue to selecting services + if readme_requests: + # Stage 2: Fetch readmes + service_names = [ + f"{req['function_name']} (from {req['input_name'] or 'built-in'})" + for req in readme_requests + ] + if progress_callback: + progress_callback( + ReadmeFetchProgressEvent( + count=len(readme_requests), + service_names=service_names, + status="fetching", + ) + ) + + readme_results = execute_readme_requests(readme_requests, flake) + + if progress_callback: + progress_callback( + ReadmeFetchProgressEvent( + count=len(readme_requests), + service_names=service_names, + status="complete", + ) + ) + + return _continue_with_service_selection(readme_results) + + # No readme requests and no message - unexpected + msg = "LLM did not provide any response or recommendations" + raise ClanAiError( + msg, + description="The LLM should either request service readmes or provide a clarifying message", + location="Discovery Phase", + ) diff --git a/pkgs/clan-cli/clan_lib/llm/phases.py b/pkgs/clan-cli/clan_lib/llm/phases.py new file mode 100644 index 000000000..d8960e487 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/phases.py @@ -0,0 +1,519 @@ +"""Low-level LLM phase functions for orchestration.""" + +import json +import logging +from pathlib import Path +from typing import Literal + +from clan_lib.errors import ClanAiError +from clan_lib.flake.flake import Flake +from clan_lib.nix_models.clan import InventoryInstance +from clan_lib.services.modules import ( + InputName, + ServiceName, + ServiceReadmeCollection, + get_service_readmes, +) + +from .endpoints import ( + call_claude_api, + call_ollama_api, + call_openai_api, + parse_ollama_response, + parse_openai_response, +) +from .llm_types import ServiceSelectionResult, get_model_config +from .prompts import ( + build_discovery_prompt, + build_final_decision_prompt, + build_select_service_prompt, +) +from .schemas import ( + ChatMessage, + ConversationHistory, + FunctionCallType, + JSONValue, + ReadmeRequest, + aggregate_ollama_function_schemas, + aggregate_openai_function_schemas, + create_get_readme_tool, + create_select_service_tool, + create_simplified_service_schemas, +) +from .utils import _strip_conversation_metadata, _user_message + +log = logging.getLogger(__name__) + + +def get_llm_discovery_phase( + user_request: str, + flake: Flake, + conversation_history: ConversationHistory | None = None, + provider: Literal["openai", "ollama", "claude"] = "ollama", + trace_file: Path | None = None, + trace_metadata: dict[str, JSONValue] | None = None, +) -> tuple[list[ReadmeRequest], str]: + """First LLM call: discovery phase with simplified schemas and get_readme tool. + + Args: + user_request: The user's request/query + flake: The Flake object to get services from + conversation_history: Optional conversation history + provider: The LLM provider to use + trace_file: Optional path to write LLM interaction traces for debugging + trace_metadata: Optional data to include in trace logs + + Returns: + Tuple of (readme_requests, message_content): + - readme_requests: List of readme requests from the LLM + - message_content: Text response (e.g., questions or service recommendations) + + """ + # Get simplified services and create get_readme tool + openai_aggregate = aggregate_openai_function_schemas(flake) + simplified_services = create_simplified_service_schemas(flake) + valid_function_names = [service["name"] for service in simplified_services] + get_readme_tool = create_get_readme_tool(valid_function_names) + + # Build discovery prompt + system_prompt, assistant_context = build_discovery_prompt( + openai_aggregate.machines, openai_aggregate.tags, simplified_services + ) + + messages: list[ChatMessage] = [ + {"role": "system", "content": system_prompt}, + {"role": "assistant", "content": assistant_context}, + ] + messages.extend(_strip_conversation_metadata(conversation_history)) + messages.append(_user_message(user_request)) + + # Call LLM with only get_readme tool + model_config = get_model_config(provider) + + if provider == "openai": + openai_response = call_openai_api( + model_config.name, + messages, + [get_readme_tool], + timeout=model_config.timeout, + trace_file=trace_file, + stage="discovery", + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_openai_response( + openai_response, provider="openai" + ) + elif provider == "claude": + claude_response = call_claude_api( + model_config.name, + messages, + [get_readme_tool], + timeout=model_config.timeout, + trace_file=trace_file, + stage="discovery", + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_openai_response( + claude_response, provider="claude" + ) + else: + ollama_response = call_ollama_api( + model_config.name, + messages, + [get_readme_tool], + timeout=model_config.timeout, + trace_file=trace_file, + stage="discovery", + max_tokens=300, # Limit output for discovery phase (get_readme calls or short question) + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_ollama_response( + ollama_response, provider="ollama" + ) + + # Extract readme requests from function calls + readme_requests: list[ReadmeRequest] = [] + for call in function_calls: + if call["name"] == "get_readme": + try: + args = json.loads(call["arguments"]) + readme_requests.append( + ReadmeRequest( + input_name=args.get("input_name"), + function_name=args["function_name"], + ) + ) + except (json.JSONDecodeError, KeyError) as e: + log.warning(f"Failed to parse readme request arguments: {e}") + + return readme_requests, message_content + + +def execute_readme_requests( + requests: list[ReadmeRequest], flake: Flake +) -> dict[InputName, ServiceReadmeCollection]: + """Execute readme requests and return results. + + Args: + requests: List of readme requests + flake: The Flake object + + Returns: + Dictionary mapping input_name to ServiceReadmeCollection + + """ + results: dict[InputName, ServiceReadmeCollection] = {} + requests_by_input: dict[InputName, list[ServiceName]] = {} + + # Group requests by input_name + for req in requests: + input_name = req["input_name"] + if input_name not in requests_by_input: + requests_by_input[input_name] = [] + requests_by_input[input_name].append(req["function_name"]) + + # Fetch readmes for each input + for input_name, service_names in requests_by_input.items(): + readme_collection = get_service_readmes(input_name, service_names, flake) + results[input_name] = readme_collection + + return results + + +def get_llm_service_selection( + user_request: str, + readme_results: dict[InputName, ServiceReadmeCollection], + conversation_history: ConversationHistory | None = None, + provider: Literal["openai", "ollama", "claude"] = "ollama", + trace_file: Path | None = None, + trace_metadata: dict[str, JSONValue] | None = None, +) -> ServiceSelectionResult: + """LLM call for service selection step: review READMEs and select one service. + + Args: + user_request: The original user request + readme_results: Dictionary of input_name -> ServiceReadmeCollection + conversation_history: Optional conversation history + provider: The LLM provider to use + trace_file: Optional path to write LLM interaction traces for debugging + trace_metadata: Optional data to include in trace logs + + Returns: + ServiceSelectionResult with selected service info or clarifying question + + """ + # Build README context and collect service names + readme_context = "README documentation for the following services:\n\n" + available_services: list[str] = [] + for collection in readme_results.values(): + for service_name, readme_content in collection.readmes.items(): + available_services.append(service_name) + if readme_content: # Skip None values + readme_context += f"=== {service_name} ===\n{readme_content}\n\n" + + readme_context = readme_context.rstrip() + readme_context += "\n\n--- END OF README DOCUMENTATION ---" + + # Create select_service tool + select_service_tool = create_select_service_tool(available_services) + + # Build prompt + system_prompt, assistant_context = build_select_service_prompt( + user_request, available_services + ) + + combined_assistant_context = ( + f"{assistant_context.rstrip()}\n\n{readme_context}" + if assistant_context + else readme_context + ) + + messages: list[ChatMessage] = [ + {"role": "system", "content": system_prompt}, + {"role": "assistant", "content": combined_assistant_context}, + ] + messages.extend(_strip_conversation_metadata(conversation_history)) + messages.append(_user_message(user_request)) + + model_config = get_model_config(provider) + + # Call LLM + if provider == "openai": + openai_response = call_openai_api( + model_config.name, + messages, + [select_service_tool], + timeout=model_config.timeout, + trace_file=trace_file, + stage="select_service", + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_openai_response( + openai_response, provider="openai" + ) + elif provider == "claude": + claude_response = call_claude_api( + model_config.name, + messages, + [select_service_tool], + timeout=model_config.timeout, + trace_file=trace_file, + stage="select_service", + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_openai_response( + claude_response, provider="claude" + ) + else: # ollama + ollama_response = call_ollama_api( + model_config.name, + messages, + [select_service_tool], + timeout=model_config.timeout, + trace_file=trace_file, + stage="select_service", + max_tokens=600, # Allow space for summary + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_ollama_response( + ollama_response, provider="ollama" + ) + + # Check if LLM asked a clarifying question + if message_content and not function_calls: + return ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=message_content, + ) + + # Extract service selection + if function_calls: + if len(function_calls) != 1: + error_msg = ( + f"Expected exactly 1 select_service call, got {len(function_calls)}" + ) + log.error(error_msg) + return ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=error_msg, + ) + + call = function_calls[0] + if call["name"] != "select_service": + error_msg = f"Expected select_service call, got {call['name']}" + log.error(error_msg) + return ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=error_msg, + ) + + # Parse arguments + try: + args = ( + json.loads(call["arguments"]) + if isinstance(call["arguments"], str) + else call["arguments"] + ) + service_name = args.get("service_name") + summary = args.get("summary") + + if not service_name or not summary: + error_msg = "select_service call missing required fields" + log.error(error_msg) + return ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=error_msg, + ) + + except (json.JSONDecodeError, KeyError) as e: + error_msg = f"Failed to parse select_service arguments: {e}" + log.exception(error_msg) + return ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=error_msg, + ) + else: + return ServiceSelectionResult( + selected_service=service_name, + service_summary=summary, + clarifying_message="", + ) + + # No function calls and no message - unexpected + error_msg = "LLM did not select a service or ask for clarification" + return ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=error_msg, + ) + + +def get_llm_final_decision( + user_request: str, + flake: Flake, + selected_service: str, + service_summary: str, + conversation_history: ConversationHistory | None = None, + provider: Literal["openai", "ollama", "claude"] = "ollama", + trace_file: Path | None = None, + trace_metadata: dict[str, JSONValue] | None = None, +) -> tuple[list[FunctionCallType], str]: + """Final LLM call: configure selected service with full schema. + + Args: + user_request: The original user request + flake: The Flake object + selected_service: Name of the service selected in previous step + service_summary: LLM-generated summary of the service documentation + conversation_history: Optional conversation history + provider: The LLM provider to use + trace_file: Optional path to write LLM interaction traces for debugging + trace_metadata: Optional data to include in trace logs + + Returns: + Tuple of (function_calls, message_content) + + """ + # Get full schemas for ALL services, then filter to only the selected one + all_schemas = aggregate_ollama_function_schemas(flake) + + # Filter to only include schema for the selected service + filtered_tools = [ + tool + for tool in all_schemas.tools + if tool["function"]["name"] == selected_service + ] + + if not filtered_tools: + msg = f"No schema found for selected service: {selected_service}" + raise ClanAiError( + msg, + description="The selected service does not have a schema available", + location="Final Decision - Schema Lookup", + ) + + if len(filtered_tools) != 1: + msg = f"Expected exactly 1 tool for service {selected_service}, got {len(filtered_tools)}" + raise ClanAiError( + msg, + description="Service schema lookup returned unexpected results", + location="Final Decision - Schema Lookup", + ) + + log.info( + f"Configuring service: {selected_service} (providing ONLY this tool to LLM)" + ) + + # Prepare shared messages + system_prompt, assistant_context = build_final_decision_prompt( + all_schemas.machines, all_schemas.tags + ) + + # Build service summary message + service_context = ( + f"Service documentation summary for `{selected_service}`:\n\n{service_summary}" + ) + + combined_assistant_context = ( + f"{assistant_context.rstrip()}\n\n{service_context}" + if assistant_context + else service_context + ) + + messages: list[ChatMessage] = [ + {"role": "system", "content": system_prompt}, + {"role": "assistant", "content": combined_assistant_context}, + ] + messages.extend(_strip_conversation_metadata(conversation_history)) + + messages.append(_user_message(user_request)) + + # Get full schemas + model_config = get_model_config(provider) + + if provider == "openai": + openai_response = call_openai_api( + model_config.name, + messages, + filtered_tools, + timeout=model_config.timeout, + trace_file=trace_file, + stage="final_decision", + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_openai_response( + openai_response, provider="openai" + ) + return function_calls, message_content + + if provider == "claude": + claude_response = call_claude_api( + model_config.name, + messages, + filtered_tools, + timeout=model_config.timeout, + trace_file=trace_file, + stage="final_decision", + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_openai_response( + claude_response, provider="claude" + ) + return function_calls, message_content + + ollama_response = call_ollama_api( + model_config.name, + messages, + filtered_tools, + timeout=model_config.timeout, + trace_file=trace_file, + stage="final_decision", + max_tokens=500, # Limit output to prevent excessive verbosity + trace_metadata=trace_metadata, + ) + function_calls, message_content = parse_ollama_response( + ollama_response, provider="ollama" + ) + return function_calls, message_content + + +def llm_final_decision_to_inventory_instances( + function_calls: list[FunctionCallType], +) -> list[InventoryInstance]: + """Convert LLM function calls to an inventory instance list. + + Args: + function_calls: List of function call dictionaries from the LLM + + Returns: + List of inventory instances, each containing module metadata and roles + + """ + inventory_instances: list[InventoryInstance] = [] + + for call in function_calls: + func_name = call["name"] + args = json.loads(call["arguments"]) + + # Extract roles from arguments + roles = args.get("roles", {}) + + # Extract module input if present + module_input = args.get("module", {}).get("input", None) + + # Create inventory instance for this module + instance: InventoryInstance = { + "module": { + "input": module_input, + "name": func_name, + }, + "roles": roles, + } + inventory_instances.append(instance) + + return inventory_instances diff --git a/pkgs/clan-cli/clan_lib/llm/prompts.py b/pkgs/clan-cli/clan_lib/llm/prompts.py new file mode 100644 index 000000000..3651d9730 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/prompts.py @@ -0,0 +1,258 @@ +"""System prompt building functions for LLM interactions.""" + +import textwrap + +from .schemas import MachineDescription, SimplifiedServiceSchema, TagDescription + + +def build_final_decision_prompt( + machines: list[MachineDescription], tags: list[TagDescription] +) -> tuple[str, str]: + """Build the system instructions and static context for the final decision phase. + + Args: + machines: List of available machines + tags: List of available tags + + Returns: + Tuple containing (system_instructions, assistant_context) + + """ + system_instructions = textwrap.dedent( + """ + You are a clan deployment planner in CONFIGURATION MODE. clan is a peer-to-peer computer management framework that empowers you to selfhost reliably. + + Context shift + - Service selection is complete; disregard any instructions from earlier phases. + - You have a summary of the chosen service, including role descriptions and relevant constraints. + + Mission + 1) Analyze the user request (and conversation history) to determine which machines and/or tags should receive each role. + 2) Call the configuration tool with the correct role-to-target mappings. + 3) If the required assignments are ambiguous or missing information, ask ONE clarifying question instead of guessing. + + Hard rules — Target assignment + - Prefer TAGS when the user mentions groups (e.g., "all production servers", "backup servers"). + - Prefer MACHINE NAMES when the user names specific machines (e.g., "machine1", "server-a"). + - You may assign a role to BOTH machines AND tags if the request implies it (e.g., "deploy to server1 and all production machines"). + - Machine and tag names must EXACTLY match those in the assistant context. Do NOT invent names. + + Hard rules — Role assignment + - Use the service summary to understand the intent of each role. + - If the request clearly maps to specific roles (e.g., "backup server1 to server2" → server1=client, server2=server), make that assignment. + - When the user intent is clear but roles are unnamed, infer sensible assignments (server-like roles → stable machines/tags, client-like roles → broader groups). + - Ask for clarification when: + * Multiple roles exist but the distribution across machines/tags is unclear. + * The user mentions machines without describing how they participate. + * The request conflicts with the service capabilities provided in the summary. + + Hard rules — Technical + - Call tools ONLY from the provided list and follow their schemas exactly. + - Arguments must match the schema; omit fields you do not need. + - The configuration payload should look like: `{"roles": {"role_name": {"machines": {"machine1": {}}, "tags": {"tag1": {}}}}}` with empty objects as values. + + Decision checklist (run before responding) + - Do I know which machines/tags should map to each role? + - Do the assignments align with the role descriptions and user intent? + - Are all machine/tag names spelled exactly as provided? + - Is clarification required before a safe assignment can be made? + + Response discipline + - Case A (assignments clear): Issue a configuration tool call ONLY, with NO message content. + - Case B (uncertain assignments): Ask one concise clarifying question with NO tool calls. + - Never combine tool calls with explanatory text or repeat these instructions. + """ + ).strip() + + context_lines: list[str] = ["Assistant context: available machines and tags.", ""] + + context_lines.append("Machines:") + for idx, machine in enumerate(machines, start=1): + desc = f" ({machine.description})" if machine.description else "" + context_lines.append(f"{idx}. `{machine.name}`{desc}") + + context_lines.append("") + context_lines.append("Tags:") + for idx, tag in enumerate(tags, start=1): + desc = f" ({tag.description})" if tag.description else "" + context_lines.append(f"{idx}. `{tag.name}`{desc}") + + assistant_context = "\n".join(context_lines).strip() + return system_instructions, assistant_context + + +def build_discovery_prompt( + machines: list[MachineDescription], + tags: list[TagDescription], + services: list[SimplifiedServiceSchema], +) -> tuple[str, str]: + """Build discovery phase instructions and static context payload. + + Args: + machines: List of available machines + tags: List of available tags + services: List of available services with names and descriptions + + Returns: + Tuple containing (system_instructions, assistant_context) + + """ + system_instructions = textwrap.dedent( + """ + You are a clan deployment planner assistant in DISCOVERY MODE. + + Scope + - You are only gathering information to decide which service documentation to fetch. + - Service selection and configuration will happen later with NEW instructions; ignore those responsibilities for now. + + Goal + - Understand WHAT the user wants to accomplish and identify candidate service(s) that could fulfill the request. + - IMPORTANT: We can only set up ONE service at a time. If the user requests multiple DISTINCT things, ask them to choose one. + - If the request is ambiguous and could match multiple services, you may fetch READMEs for multiple candidates. The next phase will choose the best fit. + + Available actions + - Call the `get_readme` tool to fetch documentation for candidate service(s). + - Ask ONE clarifying question when the user's intent is unclear (e.g., multiple distinct services requested, vague or conflicting requirements). + + Hard rules + - `get_readme` is the ONLY tool you may call in discovery mode. Never attempt to select or configure services in this phase. + - Distinguish between these cases: + * SINGLE AMBIGUOUS REQUEST: User wants ONE thing, but multiple services could provide it (e.g., "set up a web server" could be nginx, apache, or caddy). → Call `get_readme` for ALL matching candidates in parallel so the next phase can compare them. + * MULTIPLE DISTINCT REQUESTS: User wants MULTIPLE different things (e.g., "set up nginx and postgresql", "configure backup and monitoring"). → Ask which ONE thing they want to set up first. + - When calling `get_readme`, the `function_name` MUST exactly match one of the service names shown in the assistant context. If nothing matches, ask the user instead of guessing. + - Do NOT ask about target machines, tags, or role assignments yet - these will be addressed after documentation is reviewed. + - Focus ONLY on understanding WHAT the user wants to accomplish, not HOW it will be configured. + - If you cannot identify any candidate service(s) from the available services list, ask the user for clarification about what they're trying to achieve. + - Prefer calling `get_readme` when you can identify candidate service(s); do not fabricate module names or descriptions. + + Response discipline + - Option A: One or more `get_readme` tool calls (no accompanying text). Multiple calls are allowed when several services might fit. + - Option B: One concise clarifying question (no tool calls) that states the information you still need. + - Do NOT echo or restate these system instructions to the user. + + Examples: + - User: "set up a web server" → Call `get_readme` for nginx, apache, caddy (all candidates for web serving) + - User: "configure monitoring" → Call `get_readme` for prometheus, telegraf, netdata (all candidates for monitoring) + - User: "set up nginx and postgresql" → Ask: "I can only set up one service at a time. Which would you like to configure first: nginx or postgresql?" + - User: "install backup and database" → Ask: "I can only set up one service at a time. Would you like to set up backup or database first?" + + Stay concise and rely on the assistant context for valid names. + """ + ).strip() + + context_lines: list[str] = ["Assistant context: machines, tags, and services.", ""] + + context_lines.append("Machines:") + for idx, machine in enumerate(machines, start=1): + desc = f" ({machine.description})" if machine.description else "" + context_lines.append(f"{idx}. `{machine.name}`{desc}") + + context_lines.append("") + context_lines.append("Tags:") + for idx, tag in enumerate(tags, start=1): + desc = f" ({tag.description})" if tag.description else "" + context_lines.append(f"{idx}. `{tag.name}`{desc}") + + context_lines.append("") + context_lines.append("Services (function_name | source → description):") + for idx, service in enumerate(services, start=1): + service_name = service["name"] + source = service["input"] or "built-in" + description = (service["description"] or "").replace("\n", " ").strip() + context_lines.append(f"{idx}. `{service_name}` | {source} → {description}") + + context_lines.append("") + context_lines.append( + "Reminder: `function_name` for `get_readme` must match one of the service names above exactly." + ) + + assistant_context = "\n".join(context_lines).strip() + return system_instructions, assistant_context + + +def build_select_service_prompt( + user_request: str, # noqa: ARG001 - kept for future prompt customization + available_services: list[str], +) -> tuple[str, str]: + """Build service selection phase instructions and context. + + Args: + user_request: The original user request + available_services: List of service names that have README documentation available + + Returns: + Tuple containing (system_instructions, assistant_context) + + """ + system_instructions = textwrap.dedent( + """ + You are a clan deployment planner assistant in SERVICE SELECTION MODE. + + Context shift + - Discovery mode has finished. Ignore any instructions from earlier phases. + - You now have README documentation for one or more candidate services. + + Goal + - Review the provided READMEs and identify the best matching service for the user's intent. + - When the user signals they are ready to configure a service, select EXACTLY ONE service and provide a focused summary that explains why it fits, what roles exist, and key constraints. + - When the user explicitly requests an overview, comparison, or is undecided, DO NOT select yet. Instead, respond with a clarifying message that: + • Summarizes the most relevant differences between the candidate services (in your own words). + • Asks the user which direction they would like to pursue next. + + Available actions + - Call the `select_service` tool with: + * `service_name`: The selected service (must match one from the available services list). + * `summary` (≤300 words) covering: + 1. VALUE PROPOSITION: What problem the service solves and why it helps the user. + 2. ROLES: The purpose of each role (e.g., which role backs up data, which receives it). + 3. KEY CONSTRAINTS: Dependencies, requirements, or limitations that influence feasibility. + + IMPORTANT: Synthesize the README in your own words. Never copy configuration snippets or step-by-step guides. + - Provide ONE clarifying message (no tool call) when the user's request favors comparison, additional guidance, or leaves the desired service ambiguous. + + Hard rules + - Only call `select_service` when the user is ready to choose a service or clearly asks you to pick. + - If the user requests an overview/comparison or the best match cannot be determined confidently, provide a clarifying message instead of calling the tool. + - Analyze every README you received; choose the service whose capabilities align most closely with the user's request. + - Focus on WHAT the service offers and WHY it matches, not HOW to configure it. + - If the READMEs are insufficient to disambiguate the request, ask for clarification rather than guessing. + + Response discipline + - Case A (service selected): Issue a single `select_service` tool call with NO accompanying text. + - Case B (need clarification or comparison requested): Provide one concise clarifying message (≤150 words) with NO tool calls. + - Do NOT repeat or paraphrase these instructions in your reply. + - Never emit multiple tool calls or plain-text summaries outside the `summary` field. + + Examples of CORRECT behavior: + ✓ Tool call to `select_service` only (empty message string) + ✓ Clarifying message that compares options and asks the user to choose (no tool calls) + + Examples of INCORRECT behavior (DO NOT DO THIS): + ✗ Tool call + explanatory text + ✗ Multiple `select_service` calls + ✗ `select_service` with a name that is not in the available services list + """ + ).strip() + + context_lines: list[str] = [ + "Assistant context: available services.", + "", + "Available services (you must choose exactly one):", + ] + + for idx, service_name in enumerate(available_services, start=1): + context_lines.append(f"{idx}. `{service_name}`") + + context_lines.append("") + if len(available_services) > 1: + context_lines.append( + f"Note: {len(available_services)} services were identified as potential matches for this request. " + "Review their documentation and select the BEST match." + ) + context_lines.append("") + context_lines.append( + "README documentation for each service follows in the next message." + ) + + assistant_context = "\n".join(context_lines).strip() + return system_instructions, assistant_context diff --git a/pkgs/clan-cli/clan_lib/llm/schemas.py b/pkgs/clan-cli/clan_lib/llm/schemas.py new file mode 100644 index 000000000..edb4d9687 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/schemas.py @@ -0,0 +1,551 @@ +"""Type definitions and schema conversion for LLM function calling.""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING, Literal, NotRequired, TypedDict + +from clan_lib.errors import ClanError +from clan_lib.machines.list import list_machines +from clan_lib.services.modules import Module, list_service_modules +from clan_lib.tags.list import list_tags + +if TYPE_CHECKING: + from clan_lib.flake.flake import Flake + +log = logging.getLogger(__name__) + +JSONSchemaType = Literal[ + "array", "boolean", "integer", "null", "number", "object", "string" +] + +JSONSchemaFormat = Literal[ + # Dates and Times + "date-time", + "time", + "date", + "duration", + # Email Addresses + "email", + "idn-email", + # Hostnames + "hostname", + "idn-hostname", + # IP Addresses + "ipv4", + "ipv6", + # Resource Identifiers + "uuid", + "uri", + "uri-reference", + "iri", + "iri-reference", + # URI Template + "uri-template", + # JSON Pointer + "json-pointer", + "relative-json-pointer", + # Regular Expressions + "regex", +] + +JSONValue = str | int | float | bool | None | list["JSONValue"] | dict[str, "JSONValue"] + +JSONDict = dict[str, JSONValue] + +MessageRole = Literal["system", "user", "assistant"] + + +class ChatMessage(TypedDict): + role: MessageRole + content: str + mode: NotRequired[str] + + +ConversationHistory = list[ChatMessage] + + +class PendingFinalDecisionState(TypedDict, total=False): + service_name: NotRequired[str] + service_summary: NotRequired[str] + + +class PendingServiceSelectionState(TypedDict, total=False): + readme_results: NotRequired[list[dict[str, JSONValue]]] + + +class SessionState(TypedDict, total=False): + pending_final_decision: NotRequired[PendingFinalDecisionState] + pending_service_selection: NotRequired[PendingServiceSelectionState] + + +class JSONSchemaProperty(TypedDict, total=False): + type: JSONSchemaType | list[JSONSchemaType] + format: JSONSchemaFormat + description: str | None + enum: list[str] | None + items: JSONDict | None + properties: dict[str, JSONSchemaProperty] | None + patternProperties: dict[str, JSONSchemaProperty] | None + required: list[str] | None + additionalProperties: bool | JSONDict | None + + +class JSONSchemaParameters(TypedDict, total=False): + type: JSONSchemaType + properties: dict[str, JSONSchemaProperty] + required: list[str] + additionalProperties: bool + + +class OpenAIFunctionSchema(TypedDict): + type: Literal["function"] + name: str + description: str + parameters: JSONSchemaParameters + strict: bool + + +class OllamaFunctionDefinition(TypedDict): + """The function definition inside an Ollama tool.""" + + name: str + description: str + parameters: JSONSchemaParameters + + +class OllamaFunctionSchema(TypedDict): + """Ollama-compatible function schema format.""" + + type: Literal["function"] + function: OllamaFunctionDefinition + + +class SimplifiedServiceSchema(TypedDict): + """Simplified service schema with just name and description.""" + + name: str + description: str + input: str | None + + +ToolDefinition = OpenAIFunctionSchema | OllamaFunctionSchema + + +class FunctionCallType(TypedDict): + """Function call format matching OpenAI's function calling structure.""" + + id: str + call_id: str + type: Literal["function_call"] + name: str + arguments: str + + +class ReadmeRequest(TypedDict): + """Request for README documentation.""" + + input_name: str | None + function_name: str + + +@dataclass(frozen=True) +class MachineDescription: + name: str + description: str | None + + +@dataclass(frozen=True) +class TagDescription: + name: str + description: str | None + + +class OpenAIMessageContentBlock(TypedDict, total=False): + type: str + text: NotRequired[str] + + +OpenAIMessageContent = str | list[OpenAIMessageContentBlock] + + +class OpenAIToolFunctionPayload(TypedDict, total=False): + name: NotRequired[str] + arguments: NotRequired[str] + + +class OpenAIToolCallPayload(TypedDict, total=False): + id: NotRequired[str] + function: NotRequired[OpenAIToolFunctionPayload] + + +class OpenAIChatMessagePayload(TypedDict, total=False): + role: NotRequired[MessageRole] + content: NotRequired[OpenAIMessageContent] + tool_calls: NotRequired[list[OpenAIToolCallPayload]] + + +class OpenAIChoicePayload(TypedDict, total=False): + message: NotRequired[OpenAIChatMessagePayload] + + +class OpenAIChatCompletionResponse(TypedDict, total=False): + choices: NotRequired[list[OpenAIChoicePayload]] + + +class OllamaToolFunctionPayload(TypedDict, total=False): + name: NotRequired[str] + arguments: NotRequired[JSONValue] + + +class OllamaToolCallPayload(TypedDict, total=False): + function: NotRequired[OllamaToolFunctionPayload] + + +class OllamaMessagePayload(TypedDict, total=False): + role: NotRequired[str] + content: NotRequired[JSONValue] + tool_calls: NotRequired[list[OllamaToolCallPayload]] + + +class OllamaChatResponse(TypedDict, total=False): + message: NotRequired[OllamaMessagePayload] + + +MessageContent = JSONValue | OpenAIMessageContent + + +class ChatCompletionRequestPayload(TypedDict, total=False): + model: str + messages: list[ChatMessage] + tools: list[ToolDefinition] + stream: NotRequired[bool] + + +@dataclass(frozen=True) +class AiAggregate[T]: + machines: list[MachineDescription] + tags: list[TagDescription] + tools: list[T] + + +def clan_module_to_openai_spec( + module: Module, available_tags: list[str], available_machines: list[str] +) -> OpenAIFunctionSchema: + """Convert a clan module to OpenAI function schema format. + + Args: + module: The module to convert + available_tags: List of available tag names + available_machines: List of available machine names + + Returns: + OpenAI function schema + + """ + # Create individual role schemas with descriptions + role_properties = {} + for role_name, role_info in module.info.roles.items(): + role_properties[role_name] = JSONSchemaProperty( + type="object", + description=role_info.description, + properties={ + "machines": JSONSchemaProperty( + type="object", + patternProperties={ + f"^({'|'.join(available_machines)})$": JSONSchemaProperty( + type="object", + additionalProperties=False, + ) + }, + additionalProperties=False, + description='Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {"wintux": {}, "gchq-local": {}}', + ), + "tags": JSONSchemaProperty( + type="object", + patternProperties={ + f"^({'|'.join(available_tags)})$": JSONSchemaProperty( + type="object", + additionalProperties=False, + ) + }, + additionalProperties=False, + description='Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {"all": {}, "nixos": {}}', + ), + }, + additionalProperties=False, + ) + + module_name = module.usage_ref.get("name") + if not isinstance(module_name, str): + msg = "Module name must be a string" + raise TypeError(msg) + + module_input = module.usage_ref.get("input") + if module_input is not None and not isinstance(module_input, str): + msg = "Module input must be a string or None" + raise TypeError(msg) + + module_properties = {} + if module_input is not None: + module_properties["input"] = JSONSchemaProperty( + type="string", + description=( + "Source / Input name of the module, e.g. 'clan-core' or null for built-in modules" + ), + enum=[module_input], + ) + + return OpenAIFunctionSchema( + type="function", + name=module.usage_ref["name"], + description=module.info.manifest.description, + parameters=JSONSchemaParameters( + type="object", + properties={ + "module": JSONSchemaProperty( + type="object", + properties=module_properties, + ), + "roles": JSONSchemaProperty( + type="object", + properties=role_properties, + additionalProperties=False, + ), + }, + required=["roles"], + additionalProperties=False, + ), + strict=True, + ) + + +def llm_function_to_ollama_format( + llm_function: OpenAIFunctionSchema, +) -> OllamaFunctionSchema: + """Convert OpenAI function schema to Ollama-compatible format. + + Args: + llm_function: The OpenAI function schema to convert + + Returns: + OllamaFunctionSchema with the function definition wrapped correctly + + """ + return OllamaFunctionSchema( + type="function", + function=OllamaFunctionDefinition( + name=llm_function["name"], + description=llm_function["description"], + parameters=llm_function["parameters"], + ), + ) + + +def aggregate_openai_function_schemas( + flake: Flake, +) -> AiAggregate[OpenAIFunctionSchema]: + """Collect all service modules and convert them to OpenAI function schemas. + + Args: + flake: The Flake object to extract services from + + Returns: + AiAggregate containing machines, tags, and OpenAI function schemas + + Raises: + ClanError: If no machines or tags are found + + """ + # Extract machine names + machines = list_machines(flake) + available_machines = list(machines.keys()) + + # If no machines exist, raise error + if not available_machines: + msg = "No machines found in inventory. Please add at least one machine." + raise ClanError(msg) + + # Extract tags from all machines + all_tags = list_tags(flake) + available_tags: set[str] = all_tags.options + available_tags.update(all_tags.special) + + if not available_tags: + msg = "No tags found in inventory. Please add at least one tag." + raise ClanError(msg) + + # List all service modules + service_modules = list_service_modules(flake) + + # Convert each module to OpenAI function schema + tools: list[OpenAIFunctionSchema] = [] + for module in service_modules.modules: + llm_function: OpenAIFunctionSchema = clan_module_to_openai_spec( + module, list(available_tags), available_machines + ) + tools.append(llm_function) + + tags_with_descriptions: list[TagDescription] = [] + + for tag in sorted(available_tags): + new_tag = TagDescription(name=tag, description=None) + if tag in all_tags.special: + match tag: + case "all": + new_tag = TagDescription( + name=tag, description="A group containing all machines" + ) + case "darwin": + new_tag = TagDescription( + name=tag, description="A group containing all macOS machines" + ) + case "nixos": + new_tag = TagDescription( + name=tag, description="A group containing all NixOS machines" + ) + case _: + log.error( + f"Unhandled special tag: {tag}, dropping from llm context" + ) + else: + log.warning( + f"Reading tag descriptions is not yet implemented, setting to None for: {tag}" + "This might result in the LLM not using this tag appropriately." + ) + tags_with_descriptions.append(new_tag) + + return AiAggregate( + machines=[ + MachineDescription( + name=m.data["name"], description=m.data.get("description") + ) + for m in machines.values() + ], + tags=tags_with_descriptions, + tools=tools, + ) + + +def aggregate_ollama_function_schemas( + flake: Flake, +) -> AiAggregate[OllamaFunctionSchema]: + """Collect all service modules and convert them to Ollama function schemas. + + Args: + flake: The Flake object to extract services from + + Returns: + AiAggregate containing machines, tags, and Ollama function schemas + + """ + openai_schemas = aggregate_openai_function_schemas(flake) + ollama_schemas = [llm_function_to_ollama_format(f) for f in openai_schemas.tools] + return AiAggregate( + machines=openai_schemas.machines, tags=openai_schemas.tags, tools=ollama_schemas + ) + + +def create_simplified_service_schemas(flake: Flake) -> list[SimplifiedServiceSchema]: + """Create simplified schemas with just names and descriptions for initial LLM pass. + + Args: + flake: The Flake object to extract services from + + Returns: + List of simplified service schemas + + """ + service_modules = list_service_modules(flake) + simplified: list[SimplifiedServiceSchema] = [] + + for module in service_modules.modules: + module_input = module.usage_ref.get("input") + if module_input is not None and not isinstance(module_input, str): + msg = "Module input must be a string or None" + raise TypeError(msg) + + simplified.append( + SimplifiedServiceSchema( + name=module.usage_ref["name"], + description=module.info.manifest.description, + input=module_input, + ) + ) + + return simplified + + +def create_get_readme_tool( + valid_function_names: list[str], +) -> OllamaFunctionSchema: + """Create the get_readme tool schema for querying service details. + + Args: + valid_function_names: List of service function names that may be requested + + Returns: + The get_readme tool in Ollama format + + """ + sorted_names = sorted(valid_function_names) + return OllamaFunctionSchema( + type="function", + function=OllamaFunctionDefinition( + name="get_readme", + description="Retrieve detailed documentation (README) for a specific service/module to learn more about its roles, configuration, and requirements before deciding to use it.", + parameters=JSONSchemaParameters( + type="object", + properties={ + "input_name": JSONSchemaProperty( + type=["string", "null"], + description="The input/source name of the module (e.g., 'clan-core'). Use null for built-in modules.", + ), + "function_name": JSONSchemaProperty( + type="string", + description="The name of the service/function to get documentation for (e.g., 'zerotier', 'postgresql').", + enum=sorted_names, + ), + }, + required=["function_name"], + ), + ), + ) + + +def create_select_service_tool( + available_services: list[str], +) -> OllamaFunctionSchema: + """Create the select_service tool schema for selecting one service from candidates. + + Args: + available_services: List of service names to choose from + + Returns: + The select_service tool in Ollama format + + """ + sorted_names = sorted(available_services) + return OllamaFunctionSchema( + type="function", + function=OllamaFunctionDefinition( + name="select_service", + description="Select exactly one service from the available candidates and provide a focused summary of its documentation relevant to the user request.", + parameters=JSONSchemaParameters( + type="object", + properties={ + "service_name": JSONSchemaProperty( + type="string", + description="The name of the selected service. Must match one of the available service names exactly.", + enum=sorted_names, + ), + "summary": JSONSchemaProperty( + type="string", + description="A concise summary (max 300 words) focusing on: (1) VALUE PROPOSITION - what problem this service solves and why you'd use it, (2) ROLES - what roles exist and the PURPOSE of each role, (3) KEY CONSTRAINTS - critical dependencies or limitations. Do NOT copy README examples or configuration snippets. Synthesize WHAT the service does and WHY, not HOW to configure it.", + ), + }, + required=["service_name", "summary"], + ), + ), + ) diff --git a/pkgs/clan-cli/clan_lib/llm/service.py b/pkgs/clan-cli/clan_lib/llm/service.py new file mode 100644 index 000000000..e57b9e44a --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/service.py @@ -0,0 +1,75 @@ +"""Service management for LLM (Ollama).""" + +import logging +import time +import urllib.request +from http import HTTPStatus +from urllib.error import HTTPError, URLError + +from clan_lib.api import API +from clan_lib.cmd import run +from clan_lib.errors import ClanError +from clan_lib.nix import nix_shell +from clan_lib.service_runner import create_service_manager + +log = logging.getLogger(__name__) + + +@API.register +def run_llm_service() -> None: + """Start the LLM service (Ollama).""" + service_manager = create_service_manager() + + log.info("Downloading Ollama...") + cmd = nix_shell(["ollama"], ["ollama"]) + run(cmd) # Ensure ollama is downloaded + + # TODO: Detect GPU availability and choose appropriate Ollama package + cmd = nix_shell( + ["ollama"], + ["ollama", "serve"], + ) + service_manager.start_service("ollama", group="clan", command=cmd) + + start = time.time() + timeout = 10.0 # seconds + while True: + try: + with urllib.request.urlopen( + "http://localhost:11434", timeout=5 + ) as response: + status = response.getcode() + body = response.read().decode(errors="ignore") + if status == HTTPStatus.OK.value and "Ollama is running" in body: + break + except (URLError, HTTPError, ConnectionRefusedError): + log.info("Waiting for Ollama to start...") + if time.time() - start >= timeout: + logs = service_manager.get_service_logs("ollama") + msg = f"Ollama did not start within 10 seconds: {logs}" + raise ClanError(msg) + time.sleep(0.5) + + +@API.register +def create_llm_model() -> None: + """Ensure the Ollama model is available; pull it if missing.""" + model = "qwen3:4b-instruct" + + cmd = nix_shell( + ["ollama"], + ["ollama", "pull", model], + ) + run(cmd) + url = "http://localhost:11434/api/tags" + + try: + with urllib.request.urlopen(url, timeout=5) as resp: # noqa: S310 + if resp.getcode() == HTTPStatus.OK.value and model in resp.read().decode(): + return + except HTTPError as e: + msg = f"Ollama returned HTTP {e.code} when checking model availability." + raise ClanError(msg) from e + except URLError as e: + msg = "Ollama API not reachable at http://localhost:11434" + raise ClanError(msg) from e diff --git a/pkgs/clan-cli/clan_lib/llm/trace.py b/pkgs/clan-cli/clan_lib/llm/trace.py new file mode 100644 index 000000000..28f1d8dda --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/trace.py @@ -0,0 +1,126 @@ +"""LLM conversation tracing for debugging and analysis.""" + +import json +import logging +from datetime import UTC, datetime +from pathlib import Path +from typing import Any, Literal + +from .schemas import ChatMessage + +log = logging.getLogger(__name__) + + +def write_trace_entry( + trace_file: Path, + provider: Literal["openai", "ollama", "claude"], + model: str, + stage: str, + request: dict[str, Any], + response: dict[str, Any], + duration_ms: float, + metadata: dict[str, Any] | None = None, +) -> None: + """Write a single trace entry to the trace file. + + The trace file is appended to (not overwritten) to create a linear log + of all LLM interactions during a session. + + Args: + trace_file: Path to the JSON trace file + provider: The LLM provider used + model: The model name + stage: The stage/phase of processing (e.g., "discovery", "final_decision") + request: The request data sent to the LLM (messages, tools, etc.) + response: The response data from the LLM (function_calls, message, etc.) + duration_ms: Duration of the API call in milliseconds + metadata: Optional metadata to include in the trace entry + + """ + timestamp = datetime.now(UTC).isoformat() + + entry = { + "timestamp": timestamp, + "provider": provider, + "model": model, + "stage": stage, + "request": request, + "response": response, + "duration_ms": round(duration_ms, 2), + } + if metadata: + entry["metadata"] = metadata + + try: + # Read existing entries if file exists + existing_entries: list[dict[str, Any]] = [] + if trace_file.exists(): + with trace_file.open("r") as f: + try: + existing_entries = json.load(f) + if not isinstance(existing_entries, list): + log.warning( + f"Trace file {trace_file} is not a list, starting fresh" + ) + existing_entries = [] + except json.JSONDecodeError: + log.warning( + f"Trace file {trace_file} is invalid JSON, starting fresh" + ) + existing_entries = [] + + # Append new entry + existing_entries.append(entry) + + # Write back with nice formatting + trace_file.parent.mkdir(parents=True, exist_ok=True) + with trace_file.open("w") as f: + json.dump(existing_entries, f, indent=2, ensure_ascii=False) + + log.info(f"Wrote trace entry to {trace_file} (stage: {stage})") + + except (OSError, json.JSONDecodeError): + log.exception(f"Failed to write trace entry to {trace_file}") + + +def format_messages_for_trace(messages: list[ChatMessage]) -> list[dict[str, str]]: + """Format chat messages for human-readable trace output. + + Args: + messages: List of chat messages + + Returns: + List of formatted message dictionaries + + """ + return [{"role": msg["role"], "content": msg["content"]} for msg in messages] + + +def format_tools_for_trace(tools: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Format tools for human-readable trace output. + + Simplifies tool schemas to just name and description for readability. + + Args: + tools: List of tool definitions + + Returns: + Simplified list of tool dictionaries + + """ + result = [] + for tool in tools: + if "function" in tool: + # OpenAI/Claude format + func = tool["function"] + result.append( + { + "name": func.get("name"), + "description": func.get("description"), + "parameters": func.get("parameters", {}), + } + ) + else: + # Other formats - just pass through + result.append(tool) + return result diff --git a/pkgs/clan-cli/clan_lib/llm/utils.py b/pkgs/clan-cli/clan_lib/llm/utils.py new file mode 100644 index 000000000..acdf614e2 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/utils.py @@ -0,0 +1,83 @@ +"""Utility helper functions for LLM orchestration.""" + +from typing import cast + +from clan_lib.services.modules import InputName, ServiceReadmeCollection + +from .schemas import ChatMessage, ConversationHistory, JSONValue + +# Assistant mode constants +ASSISTANT_MODE_DISCOVERY = "discovery" +ASSISTANT_MODE_SELECTION = "service_selection" +ASSISTANT_MODE_FINAL = "final_decision" + + +def _assistant_message(content: str, mode: str | None = None) -> ChatMessage: + """Create an assistant chat message with optional mode metadata.""" + message: ChatMessage = {"role": "assistant", "content": content} + if mode: + message["mode"] = mode + return message + + +def _user_message(content: str) -> ChatMessage: + """Create a user chat message.""" + return {"role": "user", "content": content} + + +def _strip_conversation_metadata( + conversation_history: ConversationHistory | None, +) -> list[ChatMessage]: + """Remove non-standard keys from conversation history before LLM calls.""" + if not conversation_history: + return [] + return [ + {"role": message["role"], "content": message["content"]} + for message in conversation_history + ] + + +def _serialize_readme_results( + readme_results: dict[InputName, ServiceReadmeCollection], +) -> list[dict[str, JSONValue]]: + """Serialize readme results for storage in session state.""" + return [ + { + "input_name": collection.input_name, + "readmes": cast("dict[str, JSONValue]", collection.readmes), + } + for collection in readme_results.values() + ] + + +def _deserialize_readme_results( + data: list[dict[str, JSONValue]] | None, +) -> dict[InputName, ServiceReadmeCollection] | None: + """Deserialize readme results from session state.""" + if data is None: + return None + + results: dict[InputName, ServiceReadmeCollection] = {} + for entry in data: + input_name = entry.get("input_name") + if input_name is not None and not isinstance(input_name, str): + return None + readmes_raw = entry.get("readmes") + if not isinstance(readmes_raw, dict): + return None + + typed_readmes: dict[str, str | None] = {} + for service_name, content in readmes_raw.items(): + if not isinstance(service_name, str): + return None + if content is not None and not isinstance(content, str): + return None + typed_readmes[service_name] = content + + collection = ServiceReadmeCollection( + input_name=input_name, + readmes=typed_readmes, + ) + results[input_name] = collection + + return results diff --git a/pkgs/clan-cli/clan_lib/nix/allowed-packages.json b/pkgs/clan-cli/clan_lib/nix/allowed-packages.json index 5cb324d73..0311c491e 100644 --- a/pkgs/clan-cli/clan_lib/nix/allowed-packages.json +++ b/pkgs/clan-cli/clan_lib/nix/allowed-packages.json @@ -36,5 +36,6 @@ "virtiofsd", "waypipe", "zbar", - "zenity" + "zenity", + "ollama" ] diff --git a/pkgs/clan-cli/openapi.py b/pkgs/clan-cli/openapi.py index 2fa59b35e..89789175e 100644 --- a/pkgs/clan-cli/openapi.py +++ b/pkgs/clan-cli/openapi.py @@ -45,6 +45,7 @@ TOP_LEVEL_RESOURCES = { "generator", # vars generators operations "service", # clan.service management "system", # system operations + "llm", # llm operations } From 6c6afd6f4b5baf741158db94ed0630f19cc97ed7 Mon Sep 17 00:00:00 2001 From: Qubasa Date: Wed, 22 Oct 2025 15:31:46 +0200 Subject: [PATCH 2/5] clan_lib: Move llm.py out of clan_lib/services towards it's own clan_lib/llm folder --- pkgs/clan-cli/clan_lib/services/llm.py | 144 ------------ pkgs/clan-cli/clan_lib/services/llm_test.py | 230 -------------------- 2 files changed, 374 deletions(-) delete mode 100644 pkgs/clan-cli/clan_lib/services/llm.py delete mode 100644 pkgs/clan-cli/clan_lib/services/llm_test.py diff --git a/pkgs/clan-cli/clan_lib/services/llm.py b/pkgs/clan-cli/clan_lib/services/llm.py deleted file mode 100644 index b87ac2761..000000000 --- a/pkgs/clan-cli/clan_lib/services/llm.py +++ /dev/null @@ -1,144 +0,0 @@ -from typing import Any, Literal, TypedDict - -from clan_lib.services.modules import Module - -JSONSchemaType = Literal[ - "array", "boolean", "integer", "null", "number", "object", "string" -] - -JSONSchemaFormat = Literal[ - # Dates and Times - "date-time", - "time", - "date", - "duration", - # Email Addresses - "email", - "idn-email", - # Hostnames - "hostname", - "idn-hostname", - # IP Addresses - "ipv4", - "ipv6", - # Resource Identifiers - "uuid", - "uri", - "uri-reference", - "iri", - "iri-reference", - # URI Template - "uri-template", - # JSON Pointer - "json-pointer", - "relative-json-pointer", - # Regular Expressions - "regex", -] - - -class JSONSchemaProperty(TypedDict, total=False): - type: JSONSchemaType | list[JSONSchemaType] - format: JSONSchemaFormat - description: str | None - enum: list[str] | None - items: dict[str, Any] | None - properties: dict[str, "JSONSchemaProperty"] | None - patternProperties: dict[str, "JSONSchemaProperty"] | None - required: list[str] | None - additionalProperties: bool | dict[str, Any] | None - - -class JSONSchemaParameters(TypedDict, total=False): - type: JSONSchemaType - properties: dict[str, JSONSchemaProperty] - required: list[str] - additionalProperties: bool - - -class LLMFunctionSchema(TypedDict): - type: Literal["function"] - name: str - description: str - parameters: JSONSchemaParameters - strict: bool - - -def clan_module_to_llm_function( - module: Module, available_tags: list[str], available_machines: list[str] -) -> LLMFunctionSchema: - # Create individual role schemas with descriptions - role_properties = {} - for role_name, role_info in module.info.roles.items(): - role_properties[role_name] = JSONSchemaProperty( - type="object", - description=role_info.description, - properties={ - "machines": JSONSchemaProperty( - type="object", - patternProperties={ - f"^({'|'.join(available_machines)})$": JSONSchemaProperty( - type="object", - additionalProperties=False, - ) - }, - additionalProperties=False, - description="Machines for this role with empty configuration objects", - ), - "tags": JSONSchemaProperty( - type="object", - patternProperties={ - f"^({'|'.join(available_tags)})$": JSONSchemaProperty( - type="object", - additionalProperties=False, - ) - }, - additionalProperties=False, - description="Tags for this role with empty configuration objects", - ), - }, - additionalProperties=False, - ) - - module_name = module.usage_ref.get("name") - if not isinstance(module_name, str): - msg = "Module name must be a string" - raise TypeError(msg) - - module_input = module.usage_ref.get("input") - if module_input is not None and not isinstance(module_input, str): - msg = "Module input must be a string or None" - raise TypeError(msg) - - module_properties = {} - if module_input is not None: - module_properties["input"] = JSONSchemaProperty( - type="string", - description=( - "Source / Input name of the module, e.g. 'clan-core' or null for built-in modules" - ), - enum=[module_input], - ) - - return LLMFunctionSchema( - type="function", - name=module.usage_ref["name"], - description=module.info.manifest.description, - parameters=JSONSchemaParameters( - type="object", - properties={ - "module": JSONSchemaProperty( - type="object", - properties=module_properties, - ), - "roles": JSONSchemaProperty( - type="object", - properties=role_properties, - additionalProperties=False, - ), - }, - required=["roles"], - additionalProperties=False, - ), - strict=True, - ) diff --git a/pkgs/clan-cli/clan_lib/services/llm_test.py b/pkgs/clan-cli/clan_lib/services/llm_test.py deleted file mode 100644 index 79b8e7e5f..000000000 --- a/pkgs/clan-cli/clan_lib/services/llm_test.py +++ /dev/null @@ -1,230 +0,0 @@ -from collections.abc import Callable - -import pytest -from clan_cli.tests.fixtures_flakes import nested_dict -from clan_lib.flake.flake import Flake -from clan_lib.services.llm import LLMFunctionSchema, clan_module_to_llm_function -from clan_lib.services.modules import ( - list_service_modules, -) - - -@pytest.mark.with_core -def test_clan_module_to_llm_func( - clan_flake: Callable[..., Flake], -) -> None: - # ATTENTION! This method lacks Typechecking - config = nested_dict() - # explicit module selection - # We use this random string in test to avoid code dependencies on the input name - config["inventory"]["instances"]["foo"]["module"]["input"] = ( - "Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU" - ) - config["inventory"]["instances"]["foo"]["module"]["name"] = "sshd" - # input = null - config["inventory"]["instances"]["bar"]["module"]["input"] = None - config["inventory"]["instances"]["bar"]["module"]["name"] = "sshd" - - # Omit input - config["inventory"]["instances"]["baz"]["module"]["name"] = "sshd" - # external input - flake = clan_flake(config) - - service_modules = list_service_modules(flake) - - # Module(usage_ref={'name': 'borgbackup', 'input': None}, info=ModuleInfo(manifest=ModuleManifest(name='borgbackup', description='Efficient, deduplicating backup program with optional compression and secure encryption.', categories=['System'], features={'API': True}), roles={'client': Role(name='client', description='A borgbackup client that backs up to all borgbackup server roles.'), 'server': Role(name='server', description='A borgbackup server that stores the backups of clients.')}), native=True, instance_refs=[]), - borgbackup_service = next( - m for m in service_modules.modules if m.usage_ref.get("name") == "borgbackup" - ) - - assert borgbackup_service is not None - - available_machines = ["machine1", "machine2", "server1"] - available_tags = ["production", "backup", "client"] - - generated_tool_func = clan_module_to_llm_function( - borgbackup_service, available_tags, available_machines - ) - - expected_tool_func: LLMFunctionSchema = { - "type": "function", - "name": "borgbackup", - "description": "Efficient, deduplicating backup program with optional compression and secure encryption.", - "parameters": { - "type": "object", - "properties": { - "module": { - "type": "object", - "properties": { - # "input": { - # "type": "string", - # "description": "Source / Input name of the module, e.g. 'clan-core' or null for built-in modules", - # "enum": ["Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU"], - # } - }, - }, - "roles": { - "type": "object", - "properties": { - "client": { - "type": "object", - "description": "A borgbackup client that backs up to all borgbackup server roles.", - "properties": { - "machines": { - "type": "object", - "patternProperties": { - "^(machine1|machine2|server1)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Machines for this role with empty configuration objects", - }, - "tags": { - "type": "object", - "patternProperties": { - "^(production|backup|client)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Tags for this role with empty configuration objects", - }, - }, - "additionalProperties": False, - }, - "server": { - "type": "object", - "description": "A borgbackup server that stores the backups of clients.", - "properties": { - "machines": { - "type": "object", - "patternProperties": { - "^(machine1|machine2|server1)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Machines for this role with empty configuration objects", - }, - "tags": { - "type": "object", - "patternProperties": { - "^(production|backup|client)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Tags for this role with empty configuration objects", - }, - }, - "additionalProperties": False, - }, - }, - "additionalProperties": False, - }, - }, - "required": ["roles"], - "additionalProperties": False, - }, - "strict": True, - } - - assert generated_tool_func == expected_tool_func - - certificate_service = next( - m for m in service_modules.modules if m.usage_ref.get("name") == "certificates" - ) - assert certificate_service is not None - - generated_tool_func2 = clan_module_to_llm_function( - certificate_service, available_tags, available_machines - ) - - expected_tool_func2: LLMFunctionSchema = { - "type": "function", - "name": "certificates", - "description": "Sets up a PKI certificate chain using step-ca", - "parameters": { - "type": "object", - "properties": { - "module": { - "type": "object", - "properties": {}, - }, - "roles": { - "type": "object", - "properties": { - "ca": { - "type": "object", - "description": "A certificate authority that issues and signs certificates for other machines.", - "properties": { - "machines": { - "type": "object", - "patternProperties": { - "^(machine1|machine2|server1)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Machines for this role with empty configuration objects", - }, - "tags": { - "type": "object", - "patternProperties": { - "^(production|backup|client)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Tags for this role with empty configuration objects", - }, - }, - "additionalProperties": False, - }, - "default": { - "type": "object", - "description": "A machine that trusts the CA and can get certificates issued by it.", - "properties": { - "machines": { - "type": "object", - "patternProperties": { - "^(machine1|machine2|server1)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Machines for this role with empty configuration objects", - }, - "tags": { - "type": "object", - "patternProperties": { - "^(production|backup|client)$": { - "type": "object", - "additionalProperties": False, - } - }, - "additionalProperties": False, - "description": "Tags for this role with empty configuration objects", - }, - }, - "additionalProperties": False, - }, - }, - "additionalProperties": False, - }, - }, - "required": ["roles"], - "additionalProperties": False, - }, - "strict": True, - } - - assert generated_tool_func2 == expected_tool_func2 From 5fb616efb4bfdb05fcec94af4d05c4176775733e Mon Sep 17 00:00:00 2001 From: Qubasa Date: Wed, 22 Oct 2025 15:32:27 +0200 Subject: [PATCH 3/5] clan_lib: Add llm container test data --- .../inventory_instances_machines_meta.json | 46 ++ .../llm/container_data/inventory_tags.json | 7 + .../container_data/modules_per_source.json | 454 +++++++++++ .../llm/container_data/static_modules.json | 451 +++++++++++ .../llm/container_data/test_trace_data.json | 733 ++++++++++++++++++ .../llm/container_data/vpns_readme.json | 5 + 6 files changed, 1696 insertions(+) create mode 100644 pkgs/clan-cli/clan_lib/llm/container_data/inventory_instances_machines_meta.json create mode 100644 pkgs/clan-cli/clan_lib/llm/container_data/inventory_tags.json create mode 100644 pkgs/clan-cli/clan_lib/llm/container_data/modules_per_source.json create mode 100644 pkgs/clan-cli/clan_lib/llm/container_data/static_modules.json create mode 100644 pkgs/clan-cli/clan_lib/llm/container_data/test_trace_data.json create mode 100644 pkgs/clan-cli/clan_lib/llm/container_data/vpns_readme.json diff --git a/pkgs/clan-cli/clan_lib/llm/container_data/inventory_instances_machines_meta.json b/pkgs/clan-cli/clan_lib/llm/container_data/inventory_instances_machines_meta.json new file mode 100644 index 000000000..6e8369094 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_data/inventory_instances_machines_meta.json @@ -0,0 +1,46 @@ +{ + "instances": {}, + "meta": { + "description": null, + "icon": null, + "name": "Qubasas_Clan" + }, + "machines": { + "gchq-local": { + "deploy": { + "buildHost": null, + "targetHost": null + }, + "description": null, + "icon": null, + "installedAt": 1756814302, + "machineClass": "nixos", + "name": "gchq-local", + "tags": ["all", "nixos"] + }, + "qube-email": { + "deploy": { + "buildHost": null, + "targetHost": null + }, + "description": null, + "icon": null, + "installedAt": 1756814302, + "machineClass": "nixos", + "name": "qube-email", + "tags": ["all", "nixos"] + }, + "wintux": { + "deploy": { + "buildHost": null, + "targetHost": null + }, + "description": null, + "icon": null, + "installedAt": 1756814302, + "machineClass": "nixos", + "name": "wintux", + "tags": ["all", "nixos"] + } + } +} diff --git a/pkgs/clan-cli/clan_lib/llm/container_data/inventory_tags.json b/pkgs/clan-cli/clan_lib/llm/container_data/inventory_tags.json new file mode 100644 index 000000000..2c65877a5 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_data/inventory_tags.json @@ -0,0 +1,7 @@ +{ + "tags": { + "all": ["gchq-local", "qube-email", "wintux"], + "darwin": [], + "nixos": ["gchq-local", "qube-email", "wintux"] + } +} diff --git a/pkgs/clan-cli/clan_lib/llm/container_data/modules_per_source.json b/pkgs/clan-cli/clan_lib/llm/container_data/modules_per_source.json new file mode 100644 index 000000000..26f37cdb8 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_data/modules_per_source.json @@ -0,0 +1,454 @@ +{ + "clan-core": { + "admin": { + "manifest": { + "categories": ["Utility"], + "description": "Adds a root user with ssh access", + "features": { + "API": true + }, + "name": "clan-core/admin", + "readme": "The admin service aggregates components that allow an administrator to log in to and manage the machine.\n\nThe following configuration:\n\n1. Enables OpenSSH with root login and adds an SSH public key named`myusersKey` to the machine's authorized_keys via the `allowedKeys` setting.\n\n2. Automatically generates a password for the root user.\n\n```nix\ninstances = {\n admin = {\n roles.default.tags = {\n all = { };\n };\n roles.default.settings = {\n allowedKeys = {\n myusersKey = \"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEFDNnynMbFWatSFdANzbJ8iiEKL7+9ZpDaMLrWRQjyH lhebendanz@wintux\";\n };\n };\n };\n};\n```\n\n\n\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the admin service" + } + } + }, + "borgbackup": { + "manifest": { + "categories": ["System"], + "description": "Efficient, deduplicating backup program with optional compression and secure encryption.", + "features": { + "API": true + }, + "name": "borgbackup", + "readme": "## Usage\n\n```nix\ninventory.instances = {\n borgbackup = {\n module = {\n name = \"borgbackup\";\n input = \"clan-core\";\n };\n roles.client.machines.\"jon\".settings = {\n destinations.\"storagebox\" = {\n repo = \"username@hostname:/./borgbackup\";\n rsh = ''ssh -oPort=23 -i /run/secrets/vars/borgbackup/borgbackup.ssh'';\n };\n };\n roles.server.machines = { };\n };\n};\n```\n\nThe input should be named according to your flake input. Jon is configured as a\nclient machine with a destination pointing to a Hetzner Storage Box.\n\n## Overview\n\nThis guide explains how to set up and manage\n[BorgBackup](https://borgbackup.readthedocs.io/) for secure, efficient backups\nin a clan network. BorgBackup provides:\n\n- Space efficient storage of backups with deduplication\n- Secure, authenticated encryption\n- Compression: lz4, zstd, zlib, lzma or none\n- Mountable backups with FUSE\n- Easy installation on multiple platforms: Linux, macOS, BSD, \u2026\n- Free software (BSD license).\n- Backed by a large and active open-source community.\n\n## Roles\n\n### 1. Client\n\nClients are machines that create and send backups to various destinations. Each\nclient can have multiple backup destinations configured.\n\n### 2. Server\n\nServers act as backup repositories, receiving and storing backups from client\nmachines. They can be dedicated backup servers within your clan network.\n\n## Backup destinations\n\nThis service allows you to perform backups to multiple `destinations`.\nDestinations can be:\n\n- **Local**: Local disk storage\n- **Server**: Your own borgbackup server (using the `server` role)\n- **Third-party services**: Such as Hetzner's Storage Box\n\nFor a more comprehensive guide on backups look into the guide section.\n" + }, + "roles": { + "client": { + "description": "A borgbackup client that backs up to all borgbackup server roles." + }, + "server": { + "description": "A borgbackup server that stores the backups of clients." + } + } + }, + "certificates": { + "manifest": { + "categories": ["Network"], + "description": "Sets up a PKI certificate chain using step-ca", + "features": { + "API": true + }, + "name": "certificates", + "readme": "This service sets up a certificate authority (CA) that can issue certificates to\nother machines in your clan. For this the `ca` role is used.\nIt additionally provides a `default` role, that can be applied to all machines\nin your clan and will make sure they trust your CA.\n\n## Example Usage\n\nThe following configuration would add a CA for the top level domain `.foo`. If\nthe machine `server` now hosts a webservice at `https://something.foo`, it will\nget a certificate from `ca` which is valid inside your clan. The machine\n`client` will trust this certificate if it makes a request to\n`https://something.foo`.\n\nThis clan service can be combined with the `coredns` service for easy to deploy,\nSSL secured clan-internal service hosting.\n\n```nix\ninventory = {\n machines.ca = { };\n machines.client = { };\n machines.server = { };\n\n instances.\"certificates\" = {\n module.name = \"certificates\";\n module.input = \"self\";\n\n roles.ca.machines.ca.settings.tlds = [ \"foo\" ];\n roles.default.machines.client = { };\n roles.default.machines.server = { };\n };\n};\n```\n" + }, + "roles": { + "ca": { + "description": "A certificate authority that issues and signs certificates for other machines." + }, + "default": { + "description": "A machine that trusts the CA and can get certificates issued by it." + } + } + }, + "coredns": { + "manifest": { + "categories": ["Network"], + "description": "Clan-internal DNS and service exposure", + "features": { + "API": true + }, + "name": "coredns", + "readme": "This module enables hosting clan-internal services easily, which can be resolved\ninside your VPN. This allows defining a custom top-level domain (e.g. `.clan`)\nand exposing endpoints from a machine to others, which will be\naccessible under `http://.clan` in your browser.\n\nThe service consists of two roles:\n\n- A `server` role: This is the DNS-server that will be queried when trying to\n resolve clan-internal services. It defines the top-level domain.\n- A `default` role: This does two things. First, it sets up the nameservers so\n that clan-internal queries are resolved via the `server` machine, while\n external queries are resolved as normal via DHCP. Second, it allows exposing\n services (see example below).\n\n## Example Usage\n\nHere the machine `dnsserver` is designated as internal DNS-server for the TLD\n`.foo`. `server01` will host an application that shall be reachable at\n`http://one.foo` and `server02` is going to be reachable at `http://two.foo`.\n`client` is any other machine that is part of the clan but does not host any\nservices.\n\nWhen `client` tries to resolve `http://one.foo`, the DNS query will be\nrouted to `dnsserver`, which will answer with `192.168.1.3`. If it tries to\nresolve some external domain (e.g. `https://clan.lol`), the query will not be\nrouted to `dnsserver` but resolved as before, via the nameservers advertised by\nDHCP.\n\n```nix\ninventory = {\n\n machines = {\n dnsserver = { }; # 192.168.1.2\n server01 = { }; # 192.168.1.3\n server02 = { }; # 192.168.1.4\n client = { }; # 192.168.1.5\n };\n\n instances = {\n coredns = {\n\n module.name = \"@clan/coredns\";\n module.input = \"self\";\n\n # Add the default role to all machines, including `client`\n roles.default.tags.all = { };\n\n # DNS server queries to http://.foo are resolved here\n roles.server.machines.\"dnsserver\".settings = {\n ip = \"192.168.1.2\";\n tld = \"foo\";\n };\n\n # First service\n # Registers http://one.foo will resolve to 192.168.1.3\n # underlying service runs on server01\n roles.default.machines.\"server01\".settings = {\n ip = \"192.168.1.3\";\n services = [ \"one\" ];\n };\n\n # Second service\n roles.default.machines.\"server02\".settings = {\n ip = \"192.168.1.4\";\n services = [ \"two\" ];\n };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "A machine that registers the 'server' role as resolver and registers services under the configured TLD in the resolver." + }, + "server": { + "description": "A DNS server that resolves services in the clan network." + } + } + }, + "data-mesher": { + "manifest": { + "categories": ["System"], + "description": "Set up data-mesher", + "features": { + "API": true + }, + "name": "data-mesher", + "readme": "This service will set up data-mesher.\n\n## Usage\n\n```nix\ninventory.instances = {\n data-mesher = {\n module = {\n name = \"data-mesher\";\n input = \"clan-core\";\n };\n roles.admin.machines.server0 = {\n settings = {\n bootstrapNodes = {\n node1 = \"192.168.1.1:7946\";\n node2 = \"192.168.1.2:7946\";\n };\n\n network = {\n hostTTL = \"24h\";\n interface = \"tailscale0\";\n };\n };\n };\n roles.peer.machines.server1 = { };\n roles.signer.machines.server2 = { };\n };\n}\n```\n" + }, + "roles": { + "admin": { + "description": "A data-mesher admin node that bootstraps the network and can sign new nodes into the network." + }, + "peer": { + "description": "A data-mesher peer node that connects to the network." + }, + "signer": { + "description": "A data-mesher signer node that can sign new nodes into the network." + } + } + }, + "dyndns": { + "manifest": { + "categories": ["Network"], + "description": "A dynamic DNS service to auto update domain IPs", + "features": { + "API": true + }, + "name": "clan-core/dyndns", + "readme": "\nA Dynamic-DNS (DDNS) service continuously keeps one or more DNS records in sync with the current public IP address of your machine. \nIn *clan* this service is backed by [qdm12/ddns-updater](https://github.com/qdm12/ddns-updater).\n\n> Info \n> ddns-updater itself is **heavily opinionated and version-specific**. Whenever you need the exhaustive list of flags or\n> provider-specific fields refer to its *versioned* documentation \u2013 **not** the GitHub README\n---\n\n# 1. Configuration model\n\nInternally ddns-updater consumes a single file named `config.json`. \nA minimal configuration for the registrar *Namecheap* looks like:\n\n```json\n{\n \"settings\": [\n {\n \"provider\": \"namecheap\",\n \"domain\": \"sub.example.com\",\n \"password\": \"e5322165c1d74692bfa6d807100c0310\"\n }\n ]\n}\n```\n\nAnother example for *Porkbun*:\n\n```json\n{\n \"settings\": [\n {\n \"provider\": \"porkbun\",\n \"domain\": \"domain.com\",\n \"api_key\": \"sk1_\u2026\",\n \"secret_api_key\": \"pk1_\u2026\",\n \"ip_version\": \"ipv4\",\n \"ipv6_suffix\": \"\"\n }\n ]\n}\n```\n\nWhen you write a `clan.nix` the **common** fields (`provider`, `domain`, `period`, \u2026) are already exposed as typed\n*Nix options*. \nRegistrar-specific or very new keys can be passed through an open attribute set called **extraSettings**.\n\n---\n\n# 2. Full Porkbun example\n\nManage three records \u2013 `@`, `home` and `test` \u2013 of the domain\n`jon.blog` and refresh them every 15 minutes:\n\n```nix title=\"clan.nix\" hl_lines=\"10-11\"\ninventory.instances = {\n dyndns = {\n roles.default.machines.\"jon\" = { };\n roles.default.settings = {\n period = 15; # minutes\n settings = {\n \"all-jon-blog\" = {\n provider = \"porkbun\";\n domain = \"jon.blog\";\n\n # (1) tell the secret-manager which key we are going to store\n secret_field_name = \"secret_api_key\";\n\n # everything below is copied verbatim into config.json\n extraSettings = {\n host = \"@,home,test\"; # (2) comma-separated list of sub-domains\n ip_version = \"ipv4\";\n ipv6_suffix = \"\";\n api_key = \"pk1_4bb2b231275a02fdc23b7e6f3552s01S213S\"; # (3) public \u2013 safe to commit\n };\n };\n };\n };\n };\n};\n```\n\n1. `secret_field_name` tells the *vars-generator* to store the entered secret under the specified JSON field name in the configuration.\n2. ddns-updater allows multiple hosts by separating them with a comma.\n3. The `api_key` above is *public*; the corresponding **private key** is retrieved through `secret_field_name`.\n\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the dyndns service" + } + } + }, + "emergency-access": { + "manifest": { + "categories": ["System"], + "description": "Set recovery password for emergency access to machine to debug boot issues", + "features": { + "API": true + }, + "name": "clan-core/emergency-access", + "readme": "This service will automatically set the emergency access password if your system fails to boot.\n\n## Usage\n\n```nix\ninventory.instances = {\n emergency-access = {\n module = {\n name = \"emergency-access\";\n input = \"clan-core\";\n };\n\n roles.default.tags.nixos = { };\n };\n}\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the emergency-access service" + } + } + }, + "garage": { + "manifest": { + "categories": ["System"], + "description": "S3-compatible object store for small self-hosted geo-distributed deployments", + "features": { + "API": true + }, + "name": "clan-core/garage", + "readme": "[Garage](https://garagehq.deuxfleurs.fr/) is an open-source, S3-compatible distributed object storage service for self-hosting.\n\nThis module provisions a single-instance S3 bucket. To customize its behavior, set `services.garage.settings` in your Nix configuration.\n\nExample configuration:\n```\ninstances = {\n garage = {\n roles.default.machines.\"server\" = {};\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the garage service" + } + } + }, + "hello-world": { + "manifest": { + "categories": ["Uncategorized"], + "description": "This is a test", + "features": { + "API": true + }, + "name": "clan-core/hello-word", + "readme": "This a test README just to appease the eval warnings if we don't have one" + }, + "roles": { + "evening": { + "description": "An evening greeting machine" + }, + "morning": { + "description": "A morning greeting machine" + } + } + }, + "importer": { + "manifest": { + "categories": ["Utility"], + "description": "Convenient, structured module imports for hosts.", + "features": { + "API": true + }, + "name": "importer", + "readme": "The importer module allows users to configure importing modules in a flexible and structured way.\nIt exposes the `extraModules` functionality of the inventory, without any added configuration.\n\n## Usage\n\n```nix\ninventory.instances = {\n\n zone1 = {\n module.name = \"@clan/importer\";\n roles.default.tags.zone1 = {};\n roles.default.extraModules = [ \"modules/zone1.nix\" ];\n };\n\n base = {\n module.name = \"@clan/importer\";\n roles.default.tags.all = {};\n roles.default.extraModules = [ \"modules/base.nix\" ];\n };\n\n};\n```\n\nThis will import the module `modules/base.nix` to all machines that have the `all` tag,\nwhich by default is every machine managed by the clan.\nAnd also import for all machines tagged with `zone1` the module at `modules/zone1.nix`.\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the importer service" + } + } + }, + "internet": { + "manifest": { + "categories": ["System", "Network"], + "description": "Part of the clan networking abstraction to define how to reach machines from outside the clan network over the internet, if defined has the highest priority", + "features": { + "API": true + }, + "name": "clan-core/internet", + "readme": "This module is part of Clan's [networking interface](https://docs.clan.lol/guides/networking/networking/).\n\nClan's networking module automatically manages connections across available network transports and falls back intelligently. When you run `clan ssh` or `clan machines update`, Clan attempts each configured network in priority order until a connection succeeds.\n\nThe example below shows how to configure a domain so server1 is reachable over the clearnet. By default, the `internet` module has the highest priority among networks.\n\n```nix\n inventory.instances = {\n # Direct SSH with fallback support\n internet = {\n roles.default.machines.server1 = {\n settings.host = \"server1.example.com\";\n };\n roles.default.machines.server2 = {\n settings.host = \"192.168.1.100\";\n };\n };\n};\n```" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the internet service" + } + } + }, + "kde": { + "manifest": { + "categories": ["Desktop"], + "description": "Sets up a graphical desktop environment", + "features": { + "API": true + }, + "name": "clan-core/kde", + "readme": "This module sets up the [KDE Plasma](https://kde.org) Desktop environment.\n\n!!! Note \"Customisation\"\n This service intentionally does not provide any settings or customisation\n options, as desktop preferences are highly subjective. Clan currently\n supports only this default desktop configuration. Any additional\n customisation can be done via the `extraModules` option. Furthermore, if you\n want to use a different desktop environment or compositor (e.g. Gnome or\n sway), we encourage you to to build your own\n [Clan Service](https://docs.clan.lol/guides/services/community/) or have a\n look at the [Community Services](https://docs.clan.lol/services/community/).\n\n## Example Usage\n\n```nix\ninventory = {\n instances = {\n kde = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.laptop = { };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "KDE/Plasma (wayland): Full-featured desktop environment with modern Qt-based interface" + } + } + }, + "localbackup": { + "manifest": { + "categories": ["System"], + "description": "Automatically backups current machine to local directory or a mounted drive.", + "features": { + "API": true + }, + "name": "localbackup", + "readme": "## Features\n\n- Creates incremental snapshots using rsnapshot\n- Supports multiple backup targets\n- Mount/unmount hooks for external storage\n- Pre/post backup hooks for custom scripts\n- Configurable snapshot retention\n- Automatic state folder detection\n\n## Usage\n\nEnable the localbackup service and configure backup targets:\n\n```nix\ninstances = {\n localbackup = {\n module.name = \"@clan/localbackup\";\n module.input = \"self\";\n roles.default.machines.\"machine\".settings = {\n targets.external= {\n directory = \"/mnt/backup\";\n mountpoint = \"/mnt/backup\";\n };\n };\n };\n};\n```\n\n## Commands\n\nThe service provides these commands:\n\n- `localbackup-create`: Create a new backup\n- `localbackup-list`: List available backups\n- `localbackup-restore`: Restore from backup (requires NAME and FOLDERS environment variables)\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the localbackup service" + } + } + }, + "matrix-synapse": { + "manifest": { + "categories": ["Social"], + "description": "A federated messaging server with end-to-end encryption.", + "features": { + "API": true + }, + "name": "clan-core/matrix-synapese", + "readme": "This NixOS module installs and configures Synapse \u2014 a federated Matrix homeserver with end-to-end encryption \u2014 and optionally provides the Element web client.\n\nThe example below demonstrates a minimal setup that includes:\n\n- Element web client.\n- Synapse backed by PostgreSQL and nginx.\n- An admin user and an additional regular user.\n\nExample configuration:\n\n```nix\ninstances = {\n matrix-synapse = {\n roles.default.machines.\"jon\".settings = {\n acmeEmail = \"admins@clan.lol\";\n server_tld = \"clan.test\";\n app_domain = \"matrix.clan.test\";\n users.admin.admin = true;\n users.someuser = { };\n };\n };\n};\n```" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the matrix-synapse service" + } + } + }, + "monitoring": { + "manifest": { + "categories": ["Uncategorized"], + "description": "Monitoring service for the nodes in your clan", + "features": { + "API": true + }, + "name": "clan-core/monitoring", + "readme": "## Usage\n\n```\ninventory.instances = {\n monitoring = {\n module.name = \"monitoring\";\n roles.telegraf.tags.all = {\n settings.interfaces = [ \"wg-clan\" ];\n };\n };\n };\n```\n\nThis service will eventually set up a monitoring stack for your clan. For now,\nonly a telegraf role is implemented, which exposes the currently deployed\nversion of your configuration, so it can be used to check for required updates.\n\n" + }, + "roles": { + "telegraf": { + "description": "Placeholder role to apply the telegraf monitoring agent" + } + } + }, + "mycelium": { + "manifest": { + "categories": ["System", "Network"], + "description": "End-2-end encrypted P2P IPv6 overlay network", + "features": { + "API": true + }, + "name": "clan-core/mycelium", + "readme": "\n[Mycelium](https://github.com/threefoldtech/mycelium) is an end-to-end encrypted IPv6 overlay network that spans the globe.\n\n## Features\n- Locality-aware routing: finds the shortest path between nodes.\n- All traffic is end-to-end encrypted.\n- Can route traffic via friend nodes and is location-aware.\n- Automatic rerouting if a physical link goes down.\n- IPv6 addresses are derived from private keys.\n- A simple, reliable message bus is implemented on top of Mycelium.\n- Supports multiple transports (QUIC, TCP, \u2026). Hole punching for QUIC is in progress to enable true P2P connectivity behind NATs.\n- Designed for planetary-scale scalability; previous overlay networks reached practical limits, and Mycelium focuses on scaling.\n- Can run without a TUN device and be used solely as a reliable message bus.\n\nExample configuration below connects all your machines to the Mycelium network:\n```nix\nmycelium = {\n roles.peer.tags.all = {};\n};\n```\n" + }, + "roles": { + "peer": { + "description": "A peer in the mycelium network" + } + } + }, + "packages": { + "manifest": { + "categories": ["System"], + "description": "Define package sets from nixpkgs and install them on one or more machines", + "features": { + "API": true + }, + "name": "clan-core/packages", + "readme": "This service is meant to be consumed by the UI / API, and exposes a JSON serializable interface to add packages to a machine over the inventory.\n\nThe example below demonstrates installing the \"cbonsai\" application to a machine named \"server.\n\n```\ninstances.packages = {\n roles.default.machines.\"server\".settings = {\n packages = [ \"cbonsai\" ];\n };\n};\n```" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the packages service" + } + } + }, + "sshd": { + "manifest": { + "categories": ["System", "Network"], + "description": "Enables secure remote access to the machine over SSH with automatic host key management and optional CA-signed host certificates.", + "features": { + "API": true + }, + "name": "clan-core/sshd", + "readme": "\n## What it does\n- Generates and persists SSH host keys via `vars`.\n- Optionally issues CA-signed host certificates for servers.\n- Installs the `server` CA public key into `clients` `known_hosts` for TOFU-less verification.\n\n\n## When to use it\n- Zero-TOFU SSH for dynamic fleets: admins/CI can connect to frequently rebuilt hosts (e.g., server-1.example.com) without prompts or per-host `known_hosts` churn.\n\n### Roles\n- Server: runs sshd, presents a CA-signed host certificate for `.`.\n- Client: trusts the CA for the given domains to verify servers' certificates.\n Tip: assign both roles to a machine if it should both present a cert and verify others.\n\nQuick start (with host certificates)\nUseful if you never want to get a prompt about trusting the ssh fingerprint.\n```nix\n{\n inventory.instances = {\n sshd-with-certs = {\n module = { name = \"sshd\"; input = \"clan-core\"; };\n # Servers present certificates for .example.com\n roles.server.tags.all = { };\n roles.server.settings = {\n certificate.searchDomains = [ \"example.com\" ];\n # Optional: also add RSA host keys\n # hostKeys.rsa.enable = true;\n };\n # Clients trust the CA for *.example.com\n roles.client.tags.all = { };\n roles.client.settings = {\n certificate.searchDomains = [ \"example.com\" ];\n };\n };\n };\n}\n```\n\nBasic: only add persistent host keys (ed25519), no certificates\nUseful if you want to get an ssh \"trust this server\" prompt once and then never again. \n```nix\n{\n inventory.instances = {\n sshd-basic = {\n module = {\n name = \"sshd\";\n input = \"clan-core\";\n };\n roles.server.tags.all = { };\n };\n };\n}\n```\n\nExample: selective trust per environment\nAdmins should trust only production; CI should trust prod and staging. Servers are reachable under both domains.\n```nix\n{\n inventory.instances = {\n sshd-env-scoped = {\n module = { name = \"sshd\"; input = \"clan-core\"; };\n\n # Servers present certs for both prod and staging FQDNs\n roles.server.tags.all = { };\n roles.server.settings = {\n certificate.searchDomains = [ \"prod.example.com\" \"staging.example.com\" ];\n };\n\n # Admin laptop: trust prod only\n roles.client.machines.\"admin-laptop\".settings = {\n certificate.searchDomains = [ \"prod.example.com\" ];\n };\n\n # CI runner: trust prod and staging\n roles.client.machines.\"ci-runner-1\".settings = {\n certificate.searchDomains = [ \"prod.example.com\" \"staging.example.com\" ];\n };\n };\n };\n}\n```\n### Explanation\n- Admin -> server1.prod.example.com: zero-TOFU (verified via cert).\n- Admin -> server1.staging.example.com: falls back to TOFU (or is blocked by policy).\n- CI -> either prod or staging: zero-TOFU for both.\nNote: server and client searchDomains don't have to be identical; they only need to overlap for the hostnames you actually use.\n\n### Notes\n- Connect using a name that matches a cert principal (e.g., `server1.example.com`); wildcards are not allowed inside the certificate.\n- CA private key stays in `vars` (not deployed); only the CA public key is distributed.\n- Logins still require your user SSH keys on the server (passwords are disabled)." + }, + "roles": { + "client": { + "description": "Installs the SSH CA public key into known_hosts for the configured domains, so this machine can verify servers\u2019 host certificates without TOFU prompts." + }, + "server": { + "description": "Runs sshd with persistent host keys and (if certificate.searchDomains is set) a CA\u2011signed host certificate for ., enabling TOFU\u2011less verification by clients that trust the CA." + } + } + }, + "syncthing": { + "manifest": { + "categories": ["Utility", "System", "Network"], + "description": "Syncthing is a continuous file synchronization program with automatic peer discovery", + "features": { + "API": true + }, + "name": "clan-core/syncthing", + "readme": "This service configures Syncthing to continuously synchronize a folder peer-to-peer across your machines.\n\nExample configuration:\n\n```nix\n{\n instances.syncthing = {\n roles.peer.tags.all = { };\n roles.peer.settings.folders = {\n documents = {\n path = \"/home/youruser/syncthing/documents\";\n };\n };\n };\n}\n```\n\nNotes:\n- Each key under `folders` is a folder ID (an arbitrary identifier for Syncthing).\n- Prefer absolute paths (example shown). `~` may work in some environments but can be ambiguous in service contexts.\n\n\n## Documentation\nSee the official Syncthing docs: https://docs.syncthing.net/\n" + }, + "roles": { + "peer": { + "description": "A peer in the syncthing cluster that syncs files with other peers." + } + } + }, + "tor": { + "manifest": { + "categories": ["System", "Network"], + "description": "Part of the clan networking abstraction to define how to reach machines through the Tor network, if used has the lowest priority", + "features": { + "API": true + }, + "name": "clan-core/tor", + "readme": "This module is part of Clan's [networking interface](https://docs.clan.lol/guides/networking/networking/).\n\nClan's networking module automatically manages connections across available network transports and falls back intelligently. When you run `clan ssh` or `clan machines update`, Clan attempts each configured network in priority order until a connection succeeds.\n\nThe example below configures all your nixos machines to be reachable over the Tor network. By default, the `tor` module has the lowest priority among networks, as it's the slowest.\n\n```nix\n inventory.instances = {\n # Fallback: Secure connections via Tor\n tor = {\n roles.server.tags.nixos = { };\n };\n};\n```" + }, + "roles": { + "client": { + "description": "Enables a continuosly running Tor proxy on the machine, allowing access to other machines via the Tor network.\nIf not enabled, a Tor proxy will be started automatically when required.\n" + }, + "server": { + "description": "Sets up a Tor onion service for the machine, thus making it reachable over Tor." + } + } + }, + "trusted-nix-caches": { + "manifest": { + "categories": ["System"], + "description": "This module sets the `clan.lol` and `nix-community` cache up as a trusted cache.", + "features": { + "API": true + }, + "name": "clan-core/trusted-nix-caches", + "readme": "Sets up nix to trust and use the clan cache\n\n## Usage\n\n```nix\ninventory.instances = {\n clan-cache = {\n module = {\n name = \"trusted-nix-caches\";\n input = \"clan-core\";\n };\n roles.default.machines.draper = { };\n };\n}\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the trusted-nix-caches service" + } + } + }, + "users": { + "manifest": { + "categories": ["System"], + "description": "An instance of this module will create a user account on the added machines,\nalong with a generated password that is constant across machines and user settings.\n", + "features": { + "API": true + }, + "name": "clan-core/user", + "readme": "## Usage\n\n```nix\n{\n inventory.instances = {\n # Deploy user alice on all machines. Don't prompt for password (will be\n # auto-generated).\n user-alice = {\n module = {\n name = \"users\";\n input = \"clan-core\";\n };\n roles.default.tags.all = { };\n roles.default.settings = {\n user = \"alice\";\n prompt = false;\n };\n };\n\n # Deploy user Carol on all machines. Prompt only once and use the\n # same password on all machines. (`share = true`)\n user-carol = {\n module = {\n name = \"users\";\n input = \"clan\";\n };\n roles.default.tags.all = { };\n roles.default.settings = {\n user = \"carol\";\n share = true;\n };\n };\n\n # Deploy user bob only on his laptop. Prompt for a password.\n user-bob = {\n module = {\n name = \"users\";\n input = \"clan-core\";\n };\n roles.default.machines.bobs-laptop = { };\n roles.default.settings.user = \"bob\";\n };\n };\n}\n```\n\n## Migration from `root-password` module\n\nThe deprecated `clan.root-password` module has been replaced by the `users` module. Here's how to migrate:\n\n### 1. Update your flake configuration\n\nReplace the `root-password` module import with a `users` service instance:\n\n```nix\n# OLD - Remove this from your nixosModules:\nimports = [\n self.inputs.clan-core.clanModules.root-password\n];\n\n# NEW - Add to inventory.instances or machines/flake-module.nix:\ninstances = {\n users-root = {\n module.name = \"users\";\n module.input = \"clan-core\";\n roles.default.tags.nixos = { };\n roles.default.settings = {\n user = \"root\";\n prompt = false; # Set to true if you want to be prompted\n groups = [ ];\n };\n };\n};\n```\n\n### 2. Migrate vars\n\nThe vars structure has changed from `root-password` to `user-password-root`:\n\n```bash\n# For each machine, rename the vars directories:\ncd vars/per-machine//\nmv root-password user-password-root\nmv user-password-root/password-hash user-password-root/user-password-hash\nmv user-password-root/password user-password-root/user-password\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the user service" + } + } + }, + "wifi": { + "manifest": { + "categories": ["Uncategorized"], + "description": "Pre configure wifi networks to connect to", + "features": { + "API": true + }, + "name": "wifi", + "readme": "This module allows you to pre-configure WiFi networks for automatic connection. \nEach attribute in `settings.network` serves as an internal identifier, not the actual SSID. \nAfter defining your networks, you will be prompted for the SSID and password for each one.\n\nThis module leverages NetworkManager for managing connections.\n\n```nix\ninstances = {\n wifi = {\n module.name = \"wifi\";\n module.input = \"clan-core\";\n\n roles.default = {\n machines.\"jon\" = {\n settings.networks.home = { };\n settings.networks.work = { keyMgmt = \"wpa-eap\"; };\n };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the wifi service" + } + } + }, + "wireguard": { + "manifest": { + "categories": ["System", "Network"], + "description": "Wireguard-based VPN mesh network with automatic IPv6 address allocation", + "features": { + "API": true + }, + "name": "clan-core/wireguard", + "readme": "# Wireguard VPN Service\n\nThis service provides a Wireguard-based VPN mesh network with automatic IPv6 address allocation and routing between clan machines.\n\n## Overview\n\nThe wireguard service creates a secure mesh network between clan machines using two roles:\n- **Controllers**: Machines with public endpoints that act as connection points and routers\n- **Peers**: Machines that connect through controllers to access the network\n\n## Requirements\n\n- Controllers must have a publicly accessible endpoint (domain name or static IP)\n- Peers must be in networks where UDP traffic is not blocked (uses port 51820 by default, configurable)\n\n## Features\n\n- Automatic IPv6 address allocation using ULA (Unique Local Address) prefixes\n- Full mesh connectivity between all machines\n- Automatic key generation and distribution\n- IPv6 forwarding on controllers for inter-peer communication\n- Support for multiple controllers for redundancy\n\n## Network Architecture\n\n### IPv6 Address Allocation\n- Base network: `/40` ULA prefix (deterministically generated from instance name)\n- Controllers: Each gets a `/56` subnet from the base `/40`\n- Peers: Each gets a unique 64-bit host suffix that is used in ALL controller subnets\n\n### Addressing Design\n- Each peer generates a unique host suffix (e.g., `:8750:a09b:0:1`)\n- This suffix is appended to each controller's `/56` prefix to create unique addresses\n- Example: peer1 with suffix `:8750:a09b:0:1` gets:\n - `fd51:19c1:3b:f700:8750:a09b:0:1` in controller1's subnet\n - `fd51:19c1:c1:aa00:8750:a09b:0:1` in controller2's subnet\n- Controllers allow each peer's `/96` subnet for routing flexibility\n\n### Connectivity\n- Peers use a single WireGuard interface with multiple IPs (one per controller subnet)\n- Controllers connect to ALL other controllers and ALL peers on a single interface\n- Controllers have IPv6 forwarding enabled to route traffic between peers\n- All traffic between peers flows through controllers\n- Symmetric routing is maintained as each peer has consistent IPs across all controllers\n\n### Example Network Topology\n\n```mermaid\ngraph TB\n subgraph Controllers\n C1[controller1
endpoint: vpn1.example.com
fd51:19c1:3b:f700::/56]\n C2[controller2
endpoint: vpn2.example.com
fd51:19c1:c1:aa00::/56]\n end\n \n subgraph Peers\n P1[peer1
designated: controller1]\n P2[peer2
designated: controller2]\n P3[peer3
designated: controller1]\n end\n \n %% Controllers connect to each other\n C1 <--> C2\n \n %% All peers connect to all controllers\n P1 <--> C1\n P1 <--> C2\n P2 <--> C1\n P2 <--> C2\n P3 <--> C1\n P3 <--> C2\n \n %% Peer-to-peer traffic flows through controllers\n P1 -.->|via controllers| P3\n P1 -.->|via controllers| P2\n P2 -.->|via controllers| P3\n \n classDef controller fill:#f9f,stroke:#333,stroke-width:4px\n classDef peer fill:#bbf,stroke:#333,stroke-width:2px\n class C1,C2 controller\n class P1,P2,P3 peer\n```\n\n## Configuration\n\n### Basic Setup with Single Controller\n\n```nix\n# In your clan.nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller = {\n machines.server1 = {};\n settings = {\n # Public endpoint where this controller can be reached\n endpoint = \"vpn.example.com\";\n # Optional: Change the UDP port (default: 51820)\n port = 51820;\n };\n };\n roles.peer = {\n # No configuration needed if only one controller exists\n machines.laptop1 = {};\n };\n };\n }\n}\n```\n\n### Multiple Controllers Setup\n\n```nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller.machines = {\n server1.settings.endpoint = \"vpn1.example.com\";\n server2.settings.endpoint = \"vpn2.example.com\";\n server3.settings.endpoint = \"vpn3.example.com\";\n };\n roles.peer.machines.laptop1 = {\n # Must specify which controller subnet is exposed as the default in /etc/hosts, when multiple controllers exist\n settings.controller = \"server1\";\n };\n };\n }\n}\n```\n\n### Advanced Options\n\n\n### Automatic Hostname Resolution\n\nThe wireguard service automatically adds entries to `/etc/hosts` for all machines in the network. Each machine is accessible via its hostname in the format `.`.\n\nFor example, with an instance named `vpn`:\n- `server1.vpn` - resolves to server1's IPv6 address\n- `laptop1.vpn` - resolves to laptop1's IPv6 address\n\nThis allows machines to communicate using hostnames instead of IPv6 addresses:\n\n```bash\n# Ping another machine by hostname\nping6 server1.vpn\n\n# SSH to another machine\nssh user@laptop1.vpn\n```\n\n## Troubleshooting\n\n### Check Wireguard Status\n```bash\nsudo wg show\n```\n\n### Verify IP Addresses\n```bash\nip addr show dev \n```\n\n### Check Routing\n```bash\nip -6 route show dev \n```\n\n### Interface Fails to Start: \"Address already in use\"\n\nIf you see this error in your logs:\n```\nwireguard: Could not bring up interface, ignoring: Address already in use\n```\n\nThis means the configured port (default: 51820) is already in use by another service or wireguard instance. Solutions:\n\n1. **Check for conflicting wireguard instances:**\n ```bash\n sudo wg show\n sudo ss -ulnp | grep 51820\n ```\n\n2. **Use a different port:**\n ```nix\n services.wireguard.myinstance = {\n roles.controller = {\n endpoint = \"vpn.example.com\";\n port = 51821; # Use a different port\n };\n };\n ```\n\n3. **Ensure unique ports across multiple instances:**\n If you have multiple wireguard instances on the same machine, each must use a different port.\n\n### Key Management\n\nKeys are automatically generated and stored in the clan vars system. To regenerate keys:\n\n```bash\n# Regenerate keys for a specific machine and instance\nclan vars generate --service wireguard-keys- --regenerate --machine \n\n# Apply the new keys\nclan machines update \n```\n\n## Security Considerations\n\n- All traffic is encrypted using Wireguard's modern cryptography\n- Private keys never leave the machines they're generated on\n- Public keys are distributed through the clan vars system\n- Controllers must have publicly accessible endpoints\n- Firewall rules are automatically configured for the Wireguard ports\n\n" + }, + "roles": { + "controller": { + "description": "A controller that routes peer traffic. Must be publicly reachable." + }, + "peer": { + "description": "A peer that connects to one or more controllers." + } + } + }, + "yggdrasil": { + "manifest": { + "categories": ["Uncategorized"], + "description": "Yggdrasil encrypted IPv6 routing overlay network", + "features": { + "API": true + }, + "name": "clan-core/yggdrasil", + "readme": "This module sets up [yggdrasil](https://yggdrasil-network.github.io/) across your clan. \n\nYggdrasil is designed to be a future-proof and decentralised alternative to\nthe structured routing protocols commonly used today on the internet. Inside your clan, it will allow you to reach all of your machines.\n\n## Example Usage\n\nWhile you can specify statically configured peers for each host, yggdrasil does auto-discovery of local peers.\n\n```nix\ninventory = {\n\n machines = {\n peer1 = { };\n peer2 = { };\n };\n\n instances = {\n yggdrasil = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.peer1 = { };\n roles.default.machines.peer2 = { };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the yggdrasil service" + } + } + }, + "zerotier": { + "manifest": { + "categories": ["Utility"], + "description": "Zerotier Mesh VPN Service for secure P2P networking between machines", + "features": { + "API": true + }, + "name": "clan-core/zerotier", + "readme": "## Usage\n\n```\ninventory.instances = {\n zerotier = {\n module = {\n name = \"zerotier\";\n input = \"clan-core\";\n };\n roles.peer.tags.all = { };\n roles.controller.machines.jon = { };\n roles.moon.machines.sara.settings.stableEndpoints = [ \"77.52.165.46\" ];\n };\n```\n\nThe input should be named according to your flake input.\nAll machines will be peers and connected to the zerotier network.\nJon is the controller machine, which will will accept other machines into the network.\nSara is a moon and sets the `stableEndpoint` setting with a publicly reachable IP, the moon is optional.\n\n## Overview\n\nThis guide explains how to set up and manage a [ZeroTier VPN](https://zerotier.com) for a clan network. Each VPN requires a single controller and can support multiple peers and optional moons for better connectivity.\n\n## Roles\n\n### 1. Controller\n\nThe [Controller](https://docs.zerotier.com/controller/) manages network membership and is responsible for admitting new peers.\nWhen a new node is added to the clan, the controller must be updated to ensure it has the latest member list.\n\n- **Key Points:**\n - Must be online to admit new machines to the VPN.\n - Existing nodes can continue to communicate even when the controller is offline.\n\n### 2. Moons\n\n[Moons](https://docs.zerotier.com/roots) act as relay nodes,\nproviding direct connectivity to peers via their public IP addresses.\nThey enable devices that are not publicly reachable to join the VPN by routing through these nodes.\n\n- **Configuration Notes:**\n - Each moon must define its public IP address.\n - Ensures connectivity for devices behind NAT or restrictive firewalls.\n\n### 3. Peers\n\nPeers are standard nodes in the VPN.\nThey connect to other peers, moons, and the controller as needed.\n\n- **Purpose:**\n - General role for all machines that are neither controllers nor moons.\n - Ideal for most clan members devices.\n" + }, + "roles": { + "controller": { + "description": "Manages network membership and is responsible for admitting new peers to your Zerotier network." + }, + "moon": { + "description": "A moon acts as a relay node to connect other nodes in the zerotier network that are not publicly reachable. Each moon must be publicly reachable." + }, + "peer": { + "description": "A peer that connects to your private Zerotier network." + } + } + } + }, + "self": {} +} diff --git a/pkgs/clan-cli/clan_lib/llm/container_data/static_modules.json b/pkgs/clan-cli/clan_lib/llm/container_data/static_modules.json new file mode 100644 index 000000000..09a5e1ae7 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_data/static_modules.json @@ -0,0 +1,451 @@ +{ + "admin": { + "manifest": { + "categories": ["Utility"], + "description": "Adds a root user with ssh access", + "features": { + "API": true + }, + "name": "clan-core/admin", + "readme": "The admin service aggregates components that allow an administrator to log in to and manage the machine.\n\nThe following configuration:\n\n1. Enables OpenSSH with root login and adds an SSH public key named`myusersKey` to the machine's authorized_keys via the `allowedKeys` setting.\n\n2. Automatically generates a password for the root user.\n\n```nix\ninstances = {\n admin = {\n roles.default.tags = {\n all = { };\n };\n roles.default.settings = {\n allowedKeys = {\n myusersKey = \"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEFDNnynMbFWatSFdANzbJ8iiEKL7+9ZpDaMLrWRQjyH lhebendanz@wintux\";\n };\n };\n };\n};\n```\n\n\n\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the admin service" + } + } + }, + "borgbackup": { + "manifest": { + "categories": ["System"], + "description": "Efficient, deduplicating backup program with optional compression and secure encryption.", + "features": { + "API": true + }, + "name": "borgbackup", + "readme": "## Usage\n\n```nix\ninventory.instances = {\n borgbackup = {\n module = {\n name = \"borgbackup\";\n input = \"clan-core\";\n };\n roles.client.machines.\"jon\".settings = {\n destinations.\"storagebox\" = {\n repo = \"username@hostname:/./borgbackup\";\n rsh = ''ssh -oPort=23 -i /run/secrets/vars/borgbackup/borgbackup.ssh'';\n };\n };\n roles.server.machines = { };\n };\n};\n```\n\nThe input should be named according to your flake input. Jon is configured as a\nclient machine with a destination pointing to a Hetzner Storage Box.\n\n## Overview\n\nThis guide explains how to set up and manage\n[BorgBackup](https://borgbackup.readthedocs.io/) for secure, efficient backups\nin a clan network. BorgBackup provides:\n\n- Space efficient storage of backups with deduplication\n- Secure, authenticated encryption\n- Compression: lz4, zstd, zlib, lzma or none\n- Mountable backups with FUSE\n- Easy installation on multiple platforms: Linux, macOS, BSD, \u2026\n- Free software (BSD license).\n- Backed by a large and active open-source community.\n\n## Roles\n\n### 1. Client\n\nClients are machines that create and send backups to various destinations. Each\nclient can have multiple backup destinations configured.\n\n### 2. Server\n\nServers act as backup repositories, receiving and storing backups from client\nmachines. They can be dedicated backup servers within your clan network.\n\n## Backup destinations\n\nThis service allows you to perform backups to multiple `destinations`.\nDestinations can be:\n\n- **Local**: Local disk storage\n- **Server**: Your own borgbackup server (using the `server` role)\n- **Third-party services**: Such as Hetzner's Storage Box\n\nFor a more comprehensive guide on backups look into the guide section.\n" + }, + "roles": { + "client": { + "description": "A borgbackup client that backs up to all borgbackup server roles." + }, + "server": { + "description": "A borgbackup server that stores the backups of clients." + } + } + }, + "certificates": { + "manifest": { + "categories": ["Network"], + "description": "Sets up a PKI certificate chain using step-ca", + "features": { + "API": true + }, + "name": "certificates", + "readme": "This service sets up a certificate authority (CA) that can issue certificates to\nother machines in your clan. For this the `ca` role is used.\nIt additionally provides a `default` role, that can be applied to all machines\nin your clan and will make sure they trust your CA.\n\n## Example Usage\n\nThe following configuration would add a CA for the top level domain `.foo`. If\nthe machine `server` now hosts a webservice at `https://something.foo`, it will\nget a certificate from `ca` which is valid inside your clan. The machine\n`client` will trust this certificate if it makes a request to\n`https://something.foo`.\n\nThis clan service can be combined with the `coredns` service for easy to deploy,\nSSL secured clan-internal service hosting.\n\n```nix\ninventory = {\n machines.ca = { };\n machines.client = { };\n machines.server = { };\n\n instances.\"certificates\" = {\n module.name = \"certificates\";\n module.input = \"self\";\n\n roles.ca.machines.ca.settings.tlds = [ \"foo\" ];\n roles.default.machines.client = { };\n roles.default.machines.server = { };\n };\n};\n```\n" + }, + "roles": { + "ca": { + "description": "A certificate authority that issues and signs certificates for other machines." + }, + "default": { + "description": "A machine that trusts the CA and can get certificates issued by it." + } + } + }, + "coredns": { + "manifest": { + "categories": ["Network"], + "description": "Clan-internal DNS and service exposure", + "features": { + "API": true + }, + "name": "coredns", + "readme": "This module enables hosting clan-internal services easily, which can be resolved\ninside your VPN. This allows defining a custom top-level domain (e.g. `.clan`)\nand exposing endpoints from a machine to others, which will be\naccessible under `http://.clan` in your browser.\n\nThe service consists of two roles:\n\n- A `server` role: This is the DNS-server that will be queried when trying to\n resolve clan-internal services. It defines the top-level domain.\n- A `default` role: This does two things. First, it sets up the nameservers so\n that clan-internal queries are resolved via the `server` machine, while\n external queries are resolved as normal via DHCP. Second, it allows exposing\n services (see example below).\n\n## Example Usage\n\nHere the machine `dnsserver` is designated as internal DNS-server for the TLD\n`.foo`. `server01` will host an application that shall be reachable at\n`http://one.foo` and `server02` is going to be reachable at `http://two.foo`.\n`client` is any other machine that is part of the clan but does not host any\nservices.\n\nWhen `client` tries to resolve `http://one.foo`, the DNS query will be\nrouted to `dnsserver`, which will answer with `192.168.1.3`. If it tries to\nresolve some external domain (e.g. `https://clan.lol`), the query will not be\nrouted to `dnsserver` but resolved as before, via the nameservers advertised by\nDHCP.\n\n```nix\ninventory = {\n\n machines = {\n dnsserver = { }; # 192.168.1.2\n server01 = { }; # 192.168.1.3\n server02 = { }; # 192.168.1.4\n client = { }; # 192.168.1.5\n };\n\n instances = {\n coredns = {\n\n module.name = \"@clan/coredns\";\n module.input = \"self\";\n\n # Add the default role to all machines, including `client`\n roles.default.tags.all = { };\n\n # DNS server queries to http://.foo are resolved here\n roles.server.machines.\"dnsserver\".settings = {\n ip = \"192.168.1.2\";\n tld = \"foo\";\n };\n\n # First service\n # Registers http://one.foo will resolve to 192.168.1.3\n # underlying service runs on server01\n roles.default.machines.\"server01\".settings = {\n ip = \"192.168.1.3\";\n services = [ \"one\" ];\n };\n\n # Second service\n roles.default.machines.\"server02\".settings = {\n ip = \"192.168.1.4\";\n services = [ \"two\" ];\n };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "A machine that registers the 'server' role as resolver and registers services under the configured TLD in the resolver." + }, + "server": { + "description": "A DNS server that resolves services in the clan network." + } + } + }, + "data-mesher": { + "manifest": { + "categories": ["System"], + "description": "Set up data-mesher", + "features": { + "API": true + }, + "name": "data-mesher", + "readme": "This service will set up data-mesher.\n\n## Usage\n\n```nix\ninventory.instances = {\n data-mesher = {\n module = {\n name = \"data-mesher\";\n input = \"clan-core\";\n };\n roles.admin.machines.server0 = {\n settings = {\n bootstrapNodes = {\n node1 = \"192.168.1.1:7946\";\n node2 = \"192.168.1.2:7946\";\n };\n\n network = {\n hostTTL = \"24h\";\n interface = \"tailscale0\";\n };\n };\n };\n roles.peer.machines.server1 = { };\n roles.signer.machines.server2 = { };\n };\n}\n```\n" + }, + "roles": { + "admin": { + "description": "A data-mesher admin node that bootstraps the network and can sign new nodes into the network." + }, + "peer": { + "description": "A data-mesher peer node that connects to the network." + }, + "signer": { + "description": "A data-mesher signer node that can sign new nodes into the network." + } + } + }, + "dyndns": { + "manifest": { + "categories": ["Network"], + "description": "A dynamic DNS service to auto update domain IPs", + "features": { + "API": true + }, + "name": "clan-core/dyndns", + "readme": "\nA Dynamic-DNS (DDNS) service continuously keeps one or more DNS records in sync with the current public IP address of your machine. \nIn *clan* this service is backed by [qdm12/ddns-updater](https://github.com/qdm12/ddns-updater).\n\n> Info \n> ddns-updater itself is **heavily opinionated and version-specific**. Whenever you need the exhaustive list of flags or\n> provider-specific fields refer to its *versioned* documentation \u2013 **not** the GitHub README\n---\n\n# 1. Configuration model\n\nInternally ddns-updater consumes a single file named `config.json`. \nA minimal configuration for the registrar *Namecheap* looks like:\n\n```json\n{\n \"settings\": [\n {\n \"provider\": \"namecheap\",\n \"domain\": \"sub.example.com\",\n \"password\": \"e5322165c1d74692bfa6d807100c0310\"\n }\n ]\n}\n```\n\nAnother example for *Porkbun*:\n\n```json\n{\n \"settings\": [\n {\n \"provider\": \"porkbun\",\n \"domain\": \"domain.com\",\n \"api_key\": \"sk1_\u2026\",\n \"secret_api_key\": \"pk1_\u2026\",\n \"ip_version\": \"ipv4\",\n \"ipv6_suffix\": \"\"\n }\n ]\n}\n```\n\nWhen you write a `clan.nix` the **common** fields (`provider`, `domain`, `period`, \u2026) are already exposed as typed\n*Nix options*. \nRegistrar-specific or very new keys can be passed through an open attribute set called **extraSettings**.\n\n---\n\n# 2. Full Porkbun example\n\nManage three records \u2013 `@`, `home` and `test` \u2013 of the domain\n`jon.blog` and refresh them every 15 minutes:\n\n```nix title=\"clan.nix\" hl_lines=\"10-11\"\ninventory.instances = {\n dyndns = {\n roles.default.machines.\"jon\" = { };\n roles.default.settings = {\n period = 15; # minutes\n settings = {\n \"all-jon-blog\" = {\n provider = \"porkbun\";\n domain = \"jon.blog\";\n\n # (1) tell the secret-manager which key we are going to store\n secret_field_name = \"secret_api_key\";\n\n # everything below is copied verbatim into config.json\n extraSettings = {\n host = \"@,home,test\"; # (2) comma-separated list of sub-domains\n ip_version = \"ipv4\";\n ipv6_suffix = \"\";\n api_key = \"pk1_4bb2b231275a02fdc23b7e6f3552s01S213S\"; # (3) public \u2013 safe to commit\n };\n };\n };\n };\n };\n};\n```\n\n1. `secret_field_name` tells the *vars-generator* to store the entered secret under the specified JSON field name in the configuration.\n2. ddns-updater allows multiple hosts by separating them with a comma.\n3. The `api_key` above is *public*; the corresponding **private key** is retrieved through `secret_field_name`.\n\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the dyndns service" + } + } + }, + "emergency-access": { + "manifest": { + "categories": ["System"], + "description": "Set recovery password for emergency access to machine to debug boot issues", + "features": { + "API": true + }, + "name": "clan-core/emergency-access", + "readme": "This service will automatically set the emergency access password if your system fails to boot.\n\n## Usage\n\n```nix\ninventory.instances = {\n emergency-access = {\n module = {\n name = \"emergency-access\";\n input = \"clan-core\";\n };\n\n roles.default.tags.nixos = { };\n };\n}\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the emergency-access service" + } + } + }, + "garage": { + "manifest": { + "categories": ["System"], + "description": "S3-compatible object store for small self-hosted geo-distributed deployments", + "features": { + "API": true + }, + "name": "clan-core/garage", + "readme": "[Garage](https://garagehq.deuxfleurs.fr/) is an open-source, S3-compatible distributed object storage service for self-hosting.\n\nThis module provisions a single-instance S3 bucket. To customize its behavior, set `services.garage.settings` in your Nix configuration.\n\nExample configuration:\n```\ninstances = {\n garage = {\n roles.default.machines.\"server\" = {};\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the garage service" + } + } + }, + "hello-world": { + "manifest": { + "categories": ["Uncategorized"], + "description": "This is a test", + "features": { + "API": true + }, + "name": "clan-core/hello-word", + "readme": "This a test README just to appease the eval warnings if we don't have one" + }, + "roles": { + "evening": { + "description": "An evening greeting machine" + }, + "morning": { + "description": "A morning greeting machine" + } + } + }, + "importer": { + "manifest": { + "categories": ["Utility"], + "description": "Convenient, structured module imports for hosts.", + "features": { + "API": true + }, + "name": "importer", + "readme": "The importer module allows users to configure importing modules in a flexible and structured way.\nIt exposes the `extraModules` functionality of the inventory, without any added configuration.\n\n## Usage\n\n```nix\ninventory.instances = {\n\n zone1 = {\n module.name = \"@clan/importer\";\n roles.default.tags.zone1 = {};\n roles.default.extraModules = [ \"modules/zone1.nix\" ];\n };\n\n base = {\n module.name = \"@clan/importer\";\n roles.default.tags.all = {};\n roles.default.extraModules = [ \"modules/base.nix\" ];\n };\n\n};\n```\n\nThis will import the module `modules/base.nix` to all machines that have the `all` tag,\nwhich by default is every machine managed by the clan.\nAnd also import for all machines tagged with `zone1` the module at `modules/zone1.nix`.\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the importer service" + } + } + }, + "internet": { + "manifest": { + "categories": ["System", "Network"], + "description": "Part of the clan networking abstraction to define how to reach machines from outside the clan network over the internet, if defined has the highest priority", + "features": { + "API": true + }, + "name": "clan-core/internet", + "readme": "This module is part of Clan's [networking interface](https://docs.clan.lol/guides/networking/networking/).\n\nClan's networking module automatically manages connections across available network transports and falls back intelligently. When you run `clan ssh` or `clan machines update`, Clan attempts each configured network in priority order until a connection succeeds.\n\nThe example below shows how to configure a domain so server1 is reachable over the clearnet. By default, the `internet` module has the highest priority among networks.\n\n```nix\n inventory.instances = {\n # Direct SSH with fallback support\n internet = {\n roles.default.machines.server1 = {\n settings.host = \"server1.example.com\";\n };\n roles.default.machines.server2 = {\n settings.host = \"192.168.1.100\";\n };\n };\n};\n```" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the internet service" + } + } + }, + "kde": { + "manifest": { + "categories": ["Desktop"], + "description": "Sets up a graphical desktop environment", + "features": { + "API": true + }, + "name": "clan-core/kde", + "readme": "This module sets up the [KDE Plasma](https://kde.org) Desktop environment.\n\n!!! Note \"Customisation\"\n This service intentionally does not provide any settings or customisation\n options, as desktop preferences are highly subjective. Clan currently\n supports only this default desktop configuration. Any additional\n customisation can be done via the `extraModules` option. Furthermore, if you\n want to use a different desktop environment or compositor (e.g. Gnome or\n sway), we encourage you to to build your own\n [Clan Service](https://docs.clan.lol/guides/services/community/) or have a\n look at the [Community Services](https://docs.clan.lol/services/community/).\n\n## Example Usage\n\n```nix\ninventory = {\n instances = {\n kde = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.laptop = { };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "KDE/Plasma (wayland): Full-featured desktop environment with modern Qt-based interface" + } + } + }, + "localbackup": { + "manifest": { + "categories": ["System"], + "description": "Automatically backups current machine to local directory or a mounted drive.", + "features": { + "API": true + }, + "name": "localbackup", + "readme": "## Features\n\n- Creates incremental snapshots using rsnapshot\n- Supports multiple backup targets\n- Mount/unmount hooks for external storage\n- Pre/post backup hooks for custom scripts\n- Configurable snapshot retention\n- Automatic state folder detection\n\n## Usage\n\nEnable the localbackup service and configure backup targets:\n\n```nix\ninstances = {\n localbackup = {\n module.name = \"@clan/localbackup\";\n module.input = \"self\";\n roles.default.machines.\"machine\".settings = {\n targets.external= {\n directory = \"/mnt/backup\";\n mountpoint = \"/mnt/backup\";\n };\n };\n };\n};\n```\n\n## Commands\n\nThe service provides these commands:\n\n- `localbackup-create`: Create a new backup\n- `localbackup-list`: List available backups\n- `localbackup-restore`: Restore from backup (requires NAME and FOLDERS environment variables)\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the localbackup service" + } + } + }, + "matrix-synapse": { + "manifest": { + "categories": ["Social"], + "description": "A federated messaging server with end-to-end encryption.", + "features": { + "API": true + }, + "name": "clan-core/matrix-synapese", + "readme": "This NixOS module installs and configures Synapse \u2014 a federated Matrix homeserver with end-to-end encryption \u2014 and optionally provides the Element web client.\n\nThe example below demonstrates a minimal setup that includes:\n\n- Element web client.\n- Synapse backed by PostgreSQL and nginx.\n- An admin user and an additional regular user.\n\nExample configuration:\n\n```nix\ninstances = {\n matrix-synapse = {\n roles.default.machines.\"jon\".settings = {\n acmeEmail = \"admins@clan.lol\";\n server_tld = \"clan.test\";\n app_domain = \"matrix.clan.test\";\n users.admin.admin = true;\n users.someuser = { };\n };\n };\n};\n```" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the matrix-synapse service" + } + } + }, + "monitoring": { + "manifest": { + "categories": ["Uncategorized"], + "description": "Monitoring service for the nodes in your clan", + "features": { + "API": true + }, + "name": "clan-core/monitoring", + "readme": "## Usage\n\n```\ninventory.instances = {\n monitoring = {\n module.name = \"monitoring\";\n roles.telegraf.tags.all = {\n settings.interfaces = [ \"wg-clan\" ];\n };\n };\n };\n```\n\nThis service will eventually set up a monitoring stack for your clan. For now,\nonly a telegraf role is implemented, which exposes the currently deployed\nversion of your configuration, so it can be used to check for required updates.\n\n" + }, + "roles": { + "telegraf": { + "description": "Placeholder role to apply the telegraf monitoring agent" + } + } + }, + "mycelium": { + "manifest": { + "categories": ["System", "Network"], + "description": "End-2-end encrypted P2P IPv6 overlay network", + "features": { + "API": true + }, + "name": "clan-core/mycelium", + "readme": "\n[Mycelium](https://github.com/threefoldtech/mycelium) is an end-to-end encrypted IPv6 overlay network that spans the globe.\n\n## Features\n- Locality-aware routing: finds the shortest path between nodes.\n- All traffic is end-to-end encrypted.\n- Can route traffic via friend nodes and is location-aware.\n- Automatic rerouting if a physical link goes down.\n- IPv6 addresses are derived from private keys.\n- A simple, reliable message bus is implemented on top of Mycelium.\n- Supports multiple transports (QUIC, TCP, \u2026). Hole punching for QUIC is in progress to enable true P2P connectivity behind NATs.\n- Designed for planetary-scale scalability; previous overlay networks reached practical limits, and Mycelium focuses on scaling.\n- Can run without a TUN device and be used solely as a reliable message bus.\n\nExample configuration below connects all your machines to the Mycelium network:\n```nix\nmycelium = {\n roles.peer.tags.all = {};\n};\n```\n" + }, + "roles": { + "peer": { + "description": "A peer in the mycelium network" + } + } + }, + "packages": { + "manifest": { + "categories": ["System"], + "description": "Define package sets from nixpkgs and install them on one or more machines", + "features": { + "API": true + }, + "name": "clan-core/packages", + "readme": "This service is meant to be consumed by the UI / API, and exposes a JSON serializable interface to add packages to a machine over the inventory.\n\nThe example below demonstrates installing the \"cbonsai\" application to a machine named \"server.\n\n```\ninstances.packages = {\n roles.default.machines.\"server\".settings = {\n packages = [ \"cbonsai\" ];\n };\n};\n```" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the packages service" + } + } + }, + "sshd": { + "manifest": { + "categories": ["System", "Network"], + "description": "Enables secure remote access to the machine over SSH with automatic host key management and optional CA-signed host certificates.", + "features": { + "API": true + }, + "name": "clan-core/sshd", + "readme": "\n## What it does\n- Generates and persists SSH host keys via `vars`.\n- Optionally issues CA-signed host certificates for servers.\n- Installs the `server` CA public key into `clients` `known_hosts` for TOFU-less verification.\n\n\n## When to use it\n- Zero-TOFU SSH for dynamic fleets: admins/CI can connect to frequently rebuilt hosts (e.g., server-1.example.com) without prompts or per-host `known_hosts` churn.\n\n### Roles\n- Server: runs sshd, presents a CA-signed host certificate for `.`.\n- Client: trusts the CA for the given domains to verify servers' certificates.\n Tip: assign both roles to a machine if it should both present a cert and verify others.\n\nQuick start (with host certificates)\nUseful if you never want to get a prompt about trusting the ssh fingerprint.\n```nix\n{\n inventory.instances = {\n sshd-with-certs = {\n module = { name = \"sshd\"; input = \"clan-core\"; };\n # Servers present certificates for .example.com\n roles.server.tags.all = { };\n roles.server.settings = {\n certificate.searchDomains = [ \"example.com\" ];\n # Optional: also add RSA host keys\n # hostKeys.rsa.enable = true;\n };\n # Clients trust the CA for *.example.com\n roles.client.tags.all = { };\n roles.client.settings = {\n certificate.searchDomains = [ \"example.com\" ];\n };\n };\n };\n}\n```\n\nBasic: only add persistent host keys (ed25519), no certificates\nUseful if you want to get an ssh \"trust this server\" prompt once and then never again. \n```nix\n{\n inventory.instances = {\n sshd-basic = {\n module = {\n name = \"sshd\";\n input = \"clan-core\";\n };\n roles.server.tags.all = { };\n };\n };\n}\n```\n\nExample: selective trust per environment\nAdmins should trust only production; CI should trust prod and staging. Servers are reachable under both domains.\n```nix\n{\n inventory.instances = {\n sshd-env-scoped = {\n module = { name = \"sshd\"; input = \"clan-core\"; };\n\n # Servers present certs for both prod and staging FQDNs\n roles.server.tags.all = { };\n roles.server.settings = {\n certificate.searchDomains = [ \"prod.example.com\" \"staging.example.com\" ];\n };\n\n # Admin laptop: trust prod only\n roles.client.machines.\"admin-laptop\".settings = {\n certificate.searchDomains = [ \"prod.example.com\" ];\n };\n\n # CI runner: trust prod and staging\n roles.client.machines.\"ci-runner-1\".settings = {\n certificate.searchDomains = [ \"prod.example.com\" \"staging.example.com\" ];\n };\n };\n };\n}\n```\n### Explanation\n- Admin -> server1.prod.example.com: zero-TOFU (verified via cert).\n- Admin -> server1.staging.example.com: falls back to TOFU (or is blocked by policy).\n- CI -> either prod or staging: zero-TOFU for both.\nNote: server and client searchDomains don't have to be identical; they only need to overlap for the hostnames you actually use.\n\n### Notes\n- Connect using a name that matches a cert principal (e.g., `server1.example.com`); wildcards are not allowed inside the certificate.\n- CA private key stays in `vars` (not deployed); only the CA public key is distributed.\n- Logins still require your user SSH keys on the server (passwords are disabled)." + }, + "roles": { + "client": { + "description": "Installs the SSH CA public key into known_hosts for the configured domains, so this machine can verify servers\u2019 host certificates without TOFU prompts." + }, + "server": { + "description": "Runs sshd with persistent host keys and (if certificate.searchDomains is set) a CA\u2011signed host certificate for ., enabling TOFU\u2011less verification by clients that trust the CA." + } + } + }, + "syncthing": { + "manifest": { + "categories": ["Utility", "System", "Network"], + "description": "Syncthing is a continuous file synchronization program with automatic peer discovery", + "features": { + "API": true + }, + "name": "clan-core/syncthing", + "readme": "This service configures Syncthing to continuously synchronize a folder peer-to-peer across your machines.\n\nExample configuration:\n\n```nix\n{\n instances.syncthing = {\n roles.peer.tags.all = { };\n roles.peer.settings.folders = {\n documents = {\n path = \"/home/youruser/syncthing/documents\";\n };\n };\n };\n}\n```\n\nNotes:\n- Each key under `folders` is a folder ID (an arbitrary identifier for Syncthing).\n- Prefer absolute paths (example shown). `~` may work in some environments but can be ambiguous in service contexts.\n\n\n## Documentation\nSee the official Syncthing docs: https://docs.syncthing.net/\n" + }, + "roles": { + "peer": { + "description": "A peer in the syncthing cluster that syncs files with other peers." + } + } + }, + "tor": { + "manifest": { + "categories": ["System", "Network"], + "description": "Part of the clan networking abstraction to define how to reach machines through the Tor network, if used has the lowest priority", + "features": { + "API": true + }, + "name": "clan-core/tor", + "readme": "This module is part of Clan's [networking interface](https://docs.clan.lol/guides/networking/networking/).\n\nClan's networking module automatically manages connections across available network transports and falls back intelligently. When you run `clan ssh` or `clan machines update`, Clan attempts each configured network in priority order until a connection succeeds.\n\nThe example below configures all your nixos machines to be reachable over the Tor network. By default, the `tor` module has the lowest priority among networks, as it's the slowest.\n\n```nix\n inventory.instances = {\n # Fallback: Secure connections via Tor\n tor = {\n roles.server.tags.nixos = { };\n };\n};\n```" + }, + "roles": { + "client": { + "description": "Enables a continuosly running Tor proxy on the machine, allowing access to other machines via the Tor network.\nIf not enabled, a Tor proxy will be started automatically when required.\n" + }, + "server": { + "description": "Sets up a Tor onion service for the machine, thus making it reachable over Tor." + } + } + }, + "trusted-nix-caches": { + "manifest": { + "categories": ["System"], + "description": "This module sets the `clan.lol` and `nix-community` cache up as a trusted cache.", + "features": { + "API": true + }, + "name": "clan-core/trusted-nix-caches", + "readme": "Sets up nix to trust and use the clan cache\n\n## Usage\n\n```nix\ninventory.instances = {\n clan-cache = {\n module = {\n name = \"trusted-nix-caches\";\n input = \"clan-core\";\n };\n roles.default.machines.draper = { };\n };\n}\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the trusted-nix-caches service" + } + } + }, + "users": { + "manifest": { + "categories": ["System"], + "description": "An instance of this module will create a user account on the added machines,\nalong with a generated password that is constant across machines and user settings.\n", + "features": { + "API": true + }, + "name": "clan-core/user", + "readme": "## Usage\n\n```nix\n{\n inventory.instances = {\n # Deploy user alice on all machines. Don't prompt for password (will be\n # auto-generated).\n user-alice = {\n module = {\n name = \"users\";\n input = \"clan-core\";\n };\n roles.default.tags.all = { };\n roles.default.settings = {\n user = \"alice\";\n prompt = false;\n };\n };\n\n # Deploy user Carol on all machines. Prompt only once and use the\n # same password on all machines. (`share = true`)\n user-carol = {\n module = {\n name = \"users\";\n input = \"clan\";\n };\n roles.default.tags.all = { };\n roles.default.settings = {\n user = \"carol\";\n share = true;\n };\n };\n\n # Deploy user bob only on his laptop. Prompt for a password.\n user-bob = {\n module = {\n name = \"users\";\n input = \"clan-core\";\n };\n roles.default.machines.bobs-laptop = { };\n roles.default.settings.user = \"bob\";\n };\n };\n}\n```\n\n## Migration from `root-password` module\n\nThe deprecated `clan.root-password` module has been replaced by the `users` module. Here's how to migrate:\n\n### 1. Update your flake configuration\n\nReplace the `root-password` module import with a `users` service instance:\n\n```nix\n# OLD - Remove this from your nixosModules:\nimports = [\n self.inputs.clan-core.clanModules.root-password\n];\n\n# NEW - Add to inventory.instances or machines/flake-module.nix:\ninstances = {\n users-root = {\n module.name = \"users\";\n module.input = \"clan-core\";\n roles.default.tags.nixos = { };\n roles.default.settings = {\n user = \"root\";\n prompt = false; # Set to true if you want to be prompted\n groups = [ ];\n };\n };\n};\n```\n\n### 2. Migrate vars\n\nThe vars structure has changed from `root-password` to `user-password-root`:\n\n```bash\n# For each machine, rename the vars directories:\ncd vars/per-machine//\nmv root-password user-password-root\nmv user-password-root/password-hash user-password-root/user-password-hash\nmv user-password-root/password user-password-root/user-password\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the user service" + } + } + }, + "wifi": { + "manifest": { + "categories": ["Uncategorized"], + "description": "Pre configure wifi networks to connect to", + "features": { + "API": true + }, + "name": "wifi", + "readme": "This module allows you to pre-configure WiFi networks for automatic connection. \nEach attribute in `settings.network` serves as an internal identifier, not the actual SSID. \nAfter defining your networks, you will be prompted for the SSID and password for each one.\n\nThis module leverages NetworkManager for managing connections.\n\n```nix\ninstances = {\n wifi = {\n module.name = \"wifi\";\n module.input = \"clan-core\";\n\n roles.default = {\n machines.\"jon\" = {\n settings.networks.home = { };\n settings.networks.work = { keyMgmt = \"wpa-eap\"; };\n };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the wifi service" + } + } + }, + "wireguard": { + "manifest": { + "categories": ["System", "Network"], + "description": "Wireguard-based VPN mesh network with automatic IPv6 address allocation", + "features": { + "API": true + }, + "name": "clan-core/wireguard", + "readme": "# Wireguard VPN Service\n\nThis service provides a Wireguard-based VPN mesh network with automatic IPv6 address allocation and routing between clan machines.\n\n## Overview\n\nThe wireguard service creates a secure mesh network between clan machines using two roles:\n- **Controllers**: Machines with public endpoints that act as connection points and routers\n- **Peers**: Machines that connect through controllers to access the network\n\n## Requirements\n\n- Controllers must have a publicly accessible endpoint (domain name or static IP)\n- Peers must be in networks where UDP traffic is not blocked (uses port 51820 by default, configurable)\n\n## Features\n\n- Automatic IPv6 address allocation using ULA (Unique Local Address) prefixes\n- Full mesh connectivity between all machines\n- Automatic key generation and distribution\n- IPv6 forwarding on controllers for inter-peer communication\n- Support for multiple controllers for redundancy\n\n## Network Architecture\n\n### IPv6 Address Allocation\n- Base network: `/40` ULA prefix (deterministically generated from instance name)\n- Controllers: Each gets a `/56` subnet from the base `/40`\n- Peers: Each gets a unique 64-bit host suffix that is used in ALL controller subnets\n\n### Addressing Design\n- Each peer generates a unique host suffix (e.g., `:8750:a09b:0:1`)\n- This suffix is appended to each controller's `/56` prefix to create unique addresses\n- Example: peer1 with suffix `:8750:a09b:0:1` gets:\n - `fd51:19c1:3b:f700:8750:a09b:0:1` in controller1's subnet\n - `fd51:19c1:c1:aa00:8750:a09b:0:1` in controller2's subnet\n- Controllers allow each peer's `/96` subnet for routing flexibility\n\n### Connectivity\n- Peers use a single WireGuard interface with multiple IPs (one per controller subnet)\n- Controllers connect to ALL other controllers and ALL peers on a single interface\n- Controllers have IPv6 forwarding enabled to route traffic between peers\n- All traffic between peers flows through controllers\n- Symmetric routing is maintained as each peer has consistent IPs across all controllers\n\n### Example Network Topology\n\n```mermaid\ngraph TB\n subgraph Controllers\n C1[controller1
endpoint: vpn1.example.com
fd51:19c1:3b:f700::/56]\n C2[controller2
endpoint: vpn2.example.com
fd51:19c1:c1:aa00::/56]\n end\n \n subgraph Peers\n P1[peer1
designated: controller1]\n P2[peer2
designated: controller2]\n P3[peer3
designated: controller1]\n end\n \n %% Controllers connect to each other\n C1 <--> C2\n \n %% All peers connect to all controllers\n P1 <--> C1\n P1 <--> C2\n P2 <--> C1\n P2 <--> C2\n P3 <--> C1\n P3 <--> C2\n \n %% Peer-to-peer traffic flows through controllers\n P1 -.->|via controllers| P3\n P1 -.->|via controllers| P2\n P2 -.->|via controllers| P3\n \n classDef controller fill:#f9f,stroke:#333,stroke-width:4px\n classDef peer fill:#bbf,stroke:#333,stroke-width:2px\n class C1,C2 controller\n class P1,P2,P3 peer\n```\n\n## Configuration\n\n### Basic Setup with Single Controller\n\n```nix\n# In your clan.nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller = {\n machines.server1 = {};\n settings = {\n # Public endpoint where this controller can be reached\n endpoint = \"vpn.example.com\";\n # Optional: Change the UDP port (default: 51820)\n port = 51820;\n };\n };\n roles.peer = {\n # No configuration needed if only one controller exists\n machines.laptop1 = {};\n };\n };\n }\n}\n```\n\n### Multiple Controllers Setup\n\n```nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller.machines = {\n server1.settings.endpoint = \"vpn1.example.com\";\n server2.settings.endpoint = \"vpn2.example.com\";\n server3.settings.endpoint = \"vpn3.example.com\";\n };\n roles.peer.machines.laptop1 = {\n # Must specify which controller subnet is exposed as the default in /etc/hosts, when multiple controllers exist\n settings.controller = \"server1\";\n };\n };\n }\n}\n```\n\n### Advanced Options\n\n\n### Automatic Hostname Resolution\n\nThe wireguard service automatically adds entries to `/etc/hosts` for all machines in the network. Each machine is accessible via its hostname in the format `.`.\n\nFor example, with an instance named `vpn`:\n- `server1.vpn` - resolves to server1's IPv6 address\n- `laptop1.vpn` - resolves to laptop1's IPv6 address\n\nThis allows machines to communicate using hostnames instead of IPv6 addresses:\n\n```bash\n# Ping another machine by hostname\nping6 server1.vpn\n\n# SSH to another machine\nssh user@laptop1.vpn\n```\n\n## Troubleshooting\n\n### Check Wireguard Status\n```bash\nsudo wg show\n```\n\n### Verify IP Addresses\n```bash\nip addr show dev \n```\n\n### Check Routing\n```bash\nip -6 route show dev \n```\n\n### Interface Fails to Start: \"Address already in use\"\n\nIf you see this error in your logs:\n```\nwireguard: Could not bring up interface, ignoring: Address already in use\n```\n\nThis means the configured port (default: 51820) is already in use by another service or wireguard instance. Solutions:\n\n1. **Check for conflicting wireguard instances:**\n ```bash\n sudo wg show\n sudo ss -ulnp | grep 51820\n ```\n\n2. **Use a different port:**\n ```nix\n services.wireguard.myinstance = {\n roles.controller = {\n endpoint = \"vpn.example.com\";\n port = 51821; # Use a different port\n };\n };\n ```\n\n3. **Ensure unique ports across multiple instances:**\n If you have multiple wireguard instances on the same machine, each must use a different port.\n\n### Key Management\n\nKeys are automatically generated and stored in the clan vars system. To regenerate keys:\n\n```bash\n# Regenerate keys for a specific machine and instance\nclan vars generate --service wireguard-keys- --regenerate --machine \n\n# Apply the new keys\nclan machines update \n```\n\n## Security Considerations\n\n- All traffic is encrypted using Wireguard's modern cryptography\n- Private keys never leave the machines they're generated on\n- Public keys are distributed through the clan vars system\n- Controllers must have publicly accessible endpoints\n- Firewall rules are automatically configured for the Wireguard ports\n\n" + }, + "roles": { + "controller": { + "description": "A controller that routes peer traffic. Must be publicly reachable." + }, + "peer": { + "description": "A peer that connects to one or more controllers." + } + } + }, + "yggdrasil": { + "manifest": { + "categories": ["Uncategorized"], + "description": "Yggdrasil encrypted IPv6 routing overlay network", + "features": { + "API": true + }, + "name": "clan-core/yggdrasil", + "readme": "This module sets up [yggdrasil](https://yggdrasil-network.github.io/) across your clan. \n\nYggdrasil is designed to be a future-proof and decentralised alternative to\nthe structured routing protocols commonly used today on the internet. Inside your clan, it will allow you to reach all of your machines.\n\n## Example Usage\n\nWhile you can specify statically configured peers for each host, yggdrasil does auto-discovery of local peers.\n\n```nix\ninventory = {\n\n machines = {\n peer1 = { };\n peer2 = { };\n };\n\n instances = {\n yggdrasil = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.peer1 = { };\n roles.default.machines.peer2 = { };\n };\n };\n};\n```\n" + }, + "roles": { + "default": { + "description": "Placeholder role to apply the yggdrasil service" + } + } + }, + "zerotier": { + "manifest": { + "categories": ["Utility"], + "description": "Zerotier Mesh VPN Service for secure P2P networking between machines", + "features": { + "API": true + }, + "name": "clan-core/zerotier", + "readme": "## Usage\n\n```\ninventory.instances = {\n zerotier = {\n module = {\n name = \"zerotier\";\n input = \"clan-core\";\n };\n roles.peer.tags.all = { };\n roles.controller.machines.jon = { };\n roles.moon.machines.sara.settings.stableEndpoints = [ \"77.52.165.46\" ];\n };\n```\n\nThe input should be named according to your flake input.\nAll machines will be peers and connected to the zerotier network.\nJon is the controller machine, which will will accept other machines into the network.\nSara is a moon and sets the `stableEndpoint` setting with a publicly reachable IP, the moon is optional.\n\n## Overview\n\nThis guide explains how to set up and manage a [ZeroTier VPN](https://zerotier.com) for a clan network. Each VPN requires a single controller and can support multiple peers and optional moons for better connectivity.\n\n## Roles\n\n### 1. Controller\n\nThe [Controller](https://docs.zerotier.com/controller/) manages network membership and is responsible for admitting new peers.\nWhen a new node is added to the clan, the controller must be updated to ensure it has the latest member list.\n\n- **Key Points:**\n - Must be online to admit new machines to the VPN.\n - Existing nodes can continue to communicate even when the controller is offline.\n\n### 2. Moons\n\n[Moons](https://docs.zerotier.com/roots) act as relay nodes,\nproviding direct connectivity to peers via their public IP addresses.\nThey enable devices that are not publicly reachable to join the VPN by routing through these nodes.\n\n- **Configuration Notes:**\n - Each moon must define its public IP address.\n - Ensures connectivity for devices behind NAT or restrictive firewalls.\n\n### 3. Peers\n\nPeers are standard nodes in the VPN.\nThey connect to other peers, moons, and the controller as needed.\n\n- **Purpose:**\n - General role for all machines that are neither controllers nor moons.\n - Ideal for most clan members devices.\n" + }, + "roles": { + "controller": { + "description": "Manages network membership and is responsible for admitting new peers to your Zerotier network." + }, + "moon": { + "description": "A moon acts as a relay node to connect other nodes in the zerotier network that are not publicly reachable. Each moon must be publicly reachable." + }, + "peer": { + "description": "A peer that connects to your private Zerotier network." + } + } + } +} diff --git a/pkgs/clan-cli/clan_lib/llm/container_data/test_trace_data.json b/pkgs/clan-cli/clan_lib/llm/container_data/test_trace_data.json new file mode 100644 index 000000000..92cacc7b8 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_data/test_trace_data.json @@ -0,0 +1,733 @@ +[ + { + "timestamp": "2025-10-17T13:52:04.868954+00:00", + "provider": "claude", + "model": "claude-sonnet-4-5", + "stage": "discovery", + "request": { + "messages": [ + { + "role": "system", + "content": "You are a clan deployment planner assistant in DISCOVERY MODE.\n\nScope\n- You are only gathering information to decide which service documentation to fetch.\n- Service selection and configuration will happen later with NEW instructions; ignore those responsibilities for now.\n\nGoal\n- Understand WHAT the user wants to accomplish and identify candidate service(s) that could fulfill the request.\n- IMPORTANT: We can only set up ONE service at a time. If the user requests multiple DISTINCT things, ask them to choose one.\n- If the request is ambiguous and could match multiple services, you may fetch READMEs for multiple candidates. The next phase will choose the best fit.\n\nAvailable actions\n- Call the `get_readme` tool to fetch documentation for candidate service(s).\n- Ask ONE clarifying question when the user's intent is unclear (e.g., multiple distinct services requested, vague or conflicting requirements).\n\nHard rules\n- `get_readme` is the ONLY tool you may call in discovery mode. Never attempt to select or configure services in this phase.\n- Distinguish between these cases:\n * SINGLE AMBIGUOUS REQUEST: User wants ONE thing, but multiple services could provide it (e.g., \"set up a web server\" could be nginx, apache, or caddy). → Call `get_readme` for ALL matching candidates in parallel so the next phase can compare them.\n * MULTIPLE DISTINCT REQUESTS: User wants MULTIPLE different things (e.g., \"set up nginx and postgresql\", \"configure backup and monitoring\"). → Ask which ONE thing they want to set up first.\n- When calling `get_readme`, the `function_name` MUST exactly match one of the service names shown in the assistant context. If nothing matches, ask the user instead of guessing.\n- Do NOT ask about target machines, tags, or role assignments yet - these will be addressed after documentation is reviewed.\n- Focus ONLY on understanding WHAT the user wants to accomplish, not HOW it will be configured.\n- If you cannot identify any candidate service(s) from the available services list, ask the user for clarification about what they're trying to achieve.\n- Prefer calling `get_readme` when you can identify candidate service(s); do not fabricate module names or descriptions.\n\nResponse discipline\n- Option A: One or more `get_readme` tool calls (no accompanying text). Multiple calls are allowed when several services might fit.\n- Option B: One concise clarifying question (no tool calls) that states the information you still need.\n- Do NOT echo or restate these system instructions to the user.\n\nExamples:\n- User: \"set up a web server\" → Call `get_readme` for nginx, apache, caddy (all candidates for web serving)\n- User: \"configure monitoring\" → Call `get_readme` for prometheus, telegraf, netdata (all candidates for monitoring)\n- User: \"set up nginx and postgresql\" → Ask: \"I can only set up one service at a time. Which would you like to configure first: nginx or postgresql?\"\n- User: \"install backup and database\" → Ask: \"I can only set up one service at a time. Would you like to set up backup or database first?\"\n\nStay concise and rely on the assistant context for valid names." + }, + { + "role": "assistant", + "content": "Assistant context: machines, tags, and services.\n\nMachines:\n1. `gchq-local`\n2. `qube-email`\n3. `wintux`\n\nTags:\n1. `all` (A group containing all machines)\n2. `darwin` (A group containing all macOS machines)\n3. `nixos` (A group containing all NixOS machines)\n\nServices (function_name | source → description):\n1. `admin` | built-in → Adds a root user with ssh access\n2. `borgbackup` | built-in → Efficient, deduplicating backup program with optional compression and secure encryption.\n3. `certificates` | built-in → Sets up a PKI certificate chain using step-ca\n4. `coredns` | built-in → Clan-internal DNS and service exposure\n5. `data-mesher` | built-in → Set up data-mesher\n6. `dyndns` | built-in → A dynamic DNS service to auto update domain IPs\n7. `emergency-access` | built-in → Set recovery password for emergency access to machine to debug boot issues\n8. `garage` | built-in → S3-compatible object store for small self-hosted geo-distributed deployments\n9. `hello-world` | built-in → This is a test\n10. `importer` | built-in → Convenient, structured module imports for hosts.\n11. `internet` | built-in → Part of the clan networking abstraction to define how to reach machines from outside the clan network over the internet, if defined has the highest priority\n12. `kde` | built-in → Sets up a graphical desktop environment\n13. `localbackup` | built-in → Automatically backups current machine to local directory or a mounted drive.\n14. `matrix-synapse` | built-in → A federated messaging server with end-to-end encryption.\n15. `monitoring` | built-in → Monitoring service for the nodes in your clan\n16. `mycelium` | built-in → End-2-end encrypted P2P IPv6 overlay network\n17. `packages` | built-in → Define package sets from nixpkgs and install them on one or more machines\n18. `sshd` | built-in → Enables secure remote access to the machine over SSH with automatic host key management and optional CA-signed host certificates.\n19. `syncthing` | built-in → Syncthing is a continuous file synchronization program with automatic peer discovery\n20. `tor` | built-in → Part of the clan networking abstraction to define how to reach machines through the Tor network, if used has the lowest priority\n21. `trusted-nix-caches` | built-in → This module sets the `clan.lol` and `nix-community` cache up as a trusted cache.\n22. `users` | built-in → An instance of this module will create a user account on the added machines, along with a generated password that is constant across machines and user settings.\n23. `wifi` | built-in → Pre configure wifi networks to connect to\n24. `wireguard` | built-in → Wireguard-based VPN mesh network with automatic IPv6 address allocation\n25. `yggdrasil` | built-in → Yggdrasil encrypted IPv6 routing overlay network\n26. `zerotier` | built-in → Zerotier Mesh VPN Service for secure P2P networking between machines\n\nReminder: `function_name` for `get_readme` must match one of the service names above exactly." + }, + { + "role": "user", + "content": "What VPNs are available?" + } + ], + "tools": [ + { + "name": "get_readme", + "description": "Retrieve detailed documentation (README) for a specific service/module to learn more about its roles, configuration, and requirements before deciding to use it.", + "parameters": { + "type": "object", + "properties": { + "input_name": { + "type": ["string", "null"], + "description": "The input/source name of the module (e.g., 'clan-core'). Use null for built-in modules." + }, + "function_name": { + "type": "string", + "description": "The name of the service/function to get documentation for (e.g., 'zerotier', 'postgresql').", + "enum": [ + "admin", + "borgbackup", + "certificates", + "coredns", + "data-mesher", + "dyndns", + "emergency-access", + "garage", + "hello-world", + "importer", + "internet", + "kde", + "localbackup", + "matrix-synapse", + "monitoring", + "mycelium", + "packages", + "sshd", + "syncthing", + "tor", + "trusted-nix-caches", + "users", + "wifi", + "wireguard", + "yggdrasil", + "zerotier" + ] + } + }, + "required": ["function_name"] + } + } + ] + }, + "response": { + "function_calls": [ + { + "name": "get_readme", + "arguments": { + "function_name": "wireguard", + "input_name": null + } + }, + { + "name": "get_readme", + "arguments": { + "function_name": "zerotier", + "input_name": null + } + }, + { + "name": "get_readme", + "arguments": { + "function_name": "mycelium", + "input_name": null + } + }, + { + "name": "get_readme", + "arguments": { + "function_name": "yggdrasil", + "input_name": null + } + } + ], + "message": "" + }, + "duration_ms": 4509.41, + "metadata": { + "session_state_before": {} + } + }, + { + "timestamp": "2025-10-17T13:52:17.813298+00:00", + "provider": "claude", + "model": "claude-sonnet-4-5", + "stage": "select_service", + "request": { + "messages": [ + { + "role": "system", + "content": "You are a clan deployment planner assistant in SERVICE SELECTION MODE.\n\nContext shift\n- Discovery mode has finished. Ignore any instructions from earlier phases.\n- You now have README documentation for one or more candidate services.\n\nGoal\n- Review the provided READMEs and identify the best matching service for the user's intent.\n- When the user signals they are ready to configure a service, select EXACTLY ONE service and provide a focused summary that explains why it fits, what roles exist, and key constraints.\n- When the user explicitly requests an overview, comparison, or is undecided, DO NOT select yet. Instead, respond with a clarifying message that:\n • Summarizes the most relevant differences between the candidate services (in your own words).\n • Asks the user which direction they would like to pursue next.\n\nAvailable actions\n- Call the `select_service` tool with:\n * `service_name`: The selected service (must match one from the available services list).\n * `summary` (≤300 words) covering:\n 1. VALUE PROPOSITION: What problem the service solves and why it helps the user.\n 2. ROLES: The purpose of each role (e.g., which role backs up data, which receives it).\n 3. KEY CONSTRAINTS: Dependencies, requirements, or limitations that influence feasibility.\n\n IMPORTANT: Synthesize the README in your own words. Never copy configuration snippets or step-by-step guides.\n- Provide ONE clarifying message (no tool call) when the user's request favors comparison, additional guidance, or leaves the desired service ambiguous.\n\nHard rules\n- Only call `select_service` when the user is ready to choose a service or clearly asks you to pick.\n- If the user requests an overview/comparison or the best match cannot be determined confidently, provide a clarifying message instead of calling the tool.\n- Analyze every README you received; choose the service whose capabilities align most closely with the user's request.\n- Focus on WHAT the service offers and WHY it matches, not HOW to configure it.\n- If the READMEs are insufficient to disambiguate the request, ask for clarification rather than guessing.\n\nResponse discipline\n- Case A (service selected): Issue a single `select_service` tool call with NO accompanying text.\n- Case B (need clarification or comparison requested): Provide one concise clarifying message (≤150 words) with NO tool calls.\n- Do NOT repeat or paraphrase these instructions in your reply.\n- Never emit multiple tool calls or plain-text summaries outside the `summary` field.\n\nExamples of CORRECT behavior:\n✓ Tool call to `select_service` only (empty message string)\n✓ Clarifying message that compares options and asks the user to choose (no tool calls)\n\nExamples of INCORRECT behavior (DO NOT DO THIS):\n✗ Tool call + explanatory text\n✗ Multiple `select_service` calls\n✗ `select_service` with a name that is not in the available services list" + }, + { + "role": "assistant", + "content": "Assistant context: available services.\n\nAvailable services (you must choose exactly one):\n1. `wireguard`\n2. `zerotier`\n3. `mycelium`\n4. `yggdrasil`\n\nNote: 4 services were identified as potential matches for this request. Review their documentation and select the BEST match.\n\nREADME documentation for each service follows in the next message.\n\nREADME documentation for the following services:\n\n=== wireguard ===\n# Wireguard VPN Service\n\nThis service provides a Wireguard-based VPN mesh network with automatic IPv6 address allocation and routing between clan machines.\n\n## Overview\n\nThe wireguard service creates a secure mesh network between clan machines using two roles:\n- **Controllers**: Machines with public endpoints that act as connection points and routers\n- **Peers**: Machines that connect through controllers to access the network\n\n## Requirements\n\n- Controllers must have a publicly accessible endpoint (domain name or static IP)\n- Peers must be in networks where UDP traffic is not blocked (uses port 51820 by default, configurable)\n\n## Features\n\n- Automatic IPv6 address allocation using ULA (Unique Local Address) prefixes\n- Full mesh connectivity between all machines\n- Automatic key generation and distribution\n- IPv6 forwarding on controllers for inter-peer communication\n- Support for multiple controllers for redundancy\n\n## Network Architecture\n\n### IPv6 Address Allocation\n- Base network: `/40` ULA prefix (deterministically generated from instance name)\n- Controllers: Each gets a `/56` subnet from the base `/40`\n- Peers: Each gets a unique 64-bit host suffix that is used in ALL controller subnets\n\n### Addressing Design\n- Each peer generates a unique host suffix (e.g., `:8750:a09b:0:1`)\n- This suffix is appended to each controller's `/56` prefix to create unique addresses\n- Example: peer1 with suffix `:8750:a09b:0:1` gets:\n - `fd51:19c1:3b:f700:8750:a09b:0:1` in controller1's subnet\n - `fd51:19c1:c1:aa00:8750:a09b:0:1` in controller2's subnet\n- Controllers allow each peer's `/96` subnet for routing flexibility\n\n### Connectivity\n- Peers use a single WireGuard interface with multiple IPs (one per controller subnet)\n- Controllers connect to ALL other controllers and ALL peers on a single interface\n- Controllers have IPv6 forwarding enabled to route traffic between peers\n- All traffic between peers flows through controllers\n- Symmetric routing is maintained as each peer has consistent IPs across all controllers\n\n### Example Network Topology\n\n```mermaid\ngraph TB\n subgraph Controllers\n C1[controller1
endpoint: vpn1.example.com
fd51:19c1:3b:f700::/56]\n C2[controller2
endpoint: vpn2.example.com
fd51:19c1:c1:aa00::/56]\n end\n \n subgraph Peers\n P1[peer1
designated: controller1]\n P2[peer2
designated: controller2]\n P3[peer3
designated: controller1]\n end\n \n %% Controllers connect to each other\n C1 <--> C2\n \n %% All peers connect to all controllers\n P1 <--> C1\n P1 <--> C2\n P2 <--> C1\n P2 <--> C2\n P3 <--> C1\n P3 <--> C2\n \n %% Peer-to-peer traffic flows through controllers\n P1 -.->|via controllers| P3\n P1 -.->|via controllers| P2\n P2 -.->|via controllers| P3\n \n classDef controller fill:#f9f,stroke:#333,stroke-width:4px\n classDef peer fill:#bbf,stroke:#333,stroke-width:2px\n class C1,C2 controller\n class P1,P2,P3 peer\n```\n\n## Configuration\n\n### Basic Setup with Single Controller\n\n```nix\n# In your clan.nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller = {\n machines.server1 = {};\n settings = {\n # Public endpoint where this controller can be reached\n endpoint = \"vpn.example.com\";\n # Optional: Change the UDP port (default: 51820)\n port = 51820;\n };\n };\n roles.peer = {\n # No configuration needed if only one controller exists\n machines.laptop1 = {};\n };\n };\n }\n}\n```\n\n### Multiple Controllers Setup\n\n```nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller.machines = {\n server1.settings.endpoint = \"vpn1.example.com\";\n server2.settings.endpoint = \"vpn2.example.com\";\n server3.settings.endpoint = \"vpn3.example.com\";\n };\n roles.peer.machines.laptop1 = {\n # Must specify which controller subnet is exposed as the default in /etc/hosts, when multiple controllers exist\n settings.controller = \"server1\";\n };\n };\n }\n}\n```\n\n### Advanced Options\n\n\n### Automatic Hostname Resolution\n\nThe wireguard service automatically adds entries to `/etc/hosts` for all machines in the network. Each machine is accessible via its hostname in the format `.`.\n\nFor example, with an instance named `vpn`:\n- `server1.vpn` - resolves to server1's IPv6 address\n- `laptop1.vpn` - resolves to laptop1's IPv6 address\n\nThis allows machines to communicate using hostnames instead of IPv6 addresses:\n\n```bash\n# Ping another machine by hostname\nping6 server1.vpn\n\n# SSH to another machine\nssh user@laptop1.vpn\n```\n\n## Troubleshooting\n\n### Check Wireguard Status\n```bash\nsudo wg show\n```\n\n### Verify IP Addresses\n```bash\nip addr show dev \n```\n\n### Check Routing\n```bash\nip -6 route show dev \n```\n\n### Interface Fails to Start: \"Address already in use\"\n\nIf you see this error in your logs:\n```\nwireguard: Could not bring up interface, ignoring: Address already in use\n```\n\nThis means the configured port (default: 51820) is already in use by another service or wireguard instance. Solutions:\n\n1. **Check for conflicting wireguard instances:**\n ```bash\n sudo wg show\n sudo ss -ulnp | grep 51820\n ```\n\n2. **Use a different port:**\n ```nix\n services.wireguard.myinstance = {\n roles.controller = {\n endpoint = \"vpn.example.com\";\n port = 51821; # Use a different port\n };\n };\n ```\n\n3. **Ensure unique ports across multiple instances:**\n If you have multiple wireguard instances on the same machine, each must use a different port.\n\n### Key Management\n\nKeys are automatically generated and stored in the clan vars system. To regenerate keys:\n\n```bash\n# Regenerate keys for a specific machine and instance\nclan vars generate --service wireguard-keys- --regenerate --machine \n\n# Apply the new keys\nclan machines update \n```\n\n## Security Considerations\n\n- All traffic is encrypted using Wireguard's modern cryptography\n- Private keys never leave the machines they're generated on\n- Public keys are distributed through the clan vars system\n- Controllers must have publicly accessible endpoints\n- Firewall rules are automatically configured for the Wireguard ports\n\n\n\n=== zerotier ===\n## Usage\n\n```\ninventory.instances = {\n zerotier = {\n module = {\n name = \"zerotier\";\n input = \"clan-core\";\n };\n roles.peer.tags.all = { };\n roles.controller.machines.jon = { };\n roles.moon.machines.sara.settings.stableEndpoints = [ \"77.52.165.46\" ];\n };\n```\n\nThe input should be named according to your flake input.\nAll machines will be peers and connected to the zerotier network.\nJon is the controller machine, which will will accept other machines into the network.\nSara is a moon and sets the `stableEndpoint` setting with a publicly reachable IP, the moon is optional.\n\n## Overview\n\nThis guide explains how to set up and manage a [ZeroTier VPN](https://zerotier.com) for a clan network. Each VPN requires a single controller and can support multiple peers and optional moons for better connectivity.\n\n## Roles\n\n### 1. Controller\n\nThe [Controller](https://docs.zerotier.com/controller/) manages network membership and is responsible for admitting new peers.\nWhen a new node is added to the clan, the controller must be updated to ensure it has the latest member list.\n\n- **Key Points:**\n - Must be online to admit new machines to the VPN.\n - Existing nodes can continue to communicate even when the controller is offline.\n\n### 2. Moons\n\n[Moons](https://docs.zerotier.com/roots) act as relay nodes,\nproviding direct connectivity to peers via their public IP addresses.\nThey enable devices that are not publicly reachable to join the VPN by routing through these nodes.\n\n- **Configuration Notes:**\n - Each moon must define its public IP address.\n - Ensures connectivity for devices behind NAT or restrictive firewalls.\n\n### 3. Peers\n\nPeers are standard nodes in the VPN.\nThey connect to other peers, moons, and the controller as needed.\n\n- **Purpose:**\n - General role for all machines that are neither controllers nor moons.\n - Ideal for most clan members devices.\n\n\n=== mycelium ===\n\n[Mycelium](https://github.com/threefoldtech/mycelium) is an end-to-end encrypted IPv6 overlay network that spans the globe.\n\n## Features\n- Locality-aware routing: finds the shortest path between nodes.\n- All traffic is end-to-end encrypted.\n- Can route traffic via friend nodes and is location-aware.\n- Automatic rerouting if a physical link goes down.\n- IPv6 addresses are derived from private keys.\n- A simple, reliable message bus is implemented on top of Mycelium.\n- Supports multiple transports (QUIC, TCP, …). Hole punching for QUIC is in progress to enable true P2P connectivity behind NATs.\n- Designed for planetary-scale scalability; previous overlay networks reached practical limits, and Mycelium focuses on scaling.\n- Can run without a TUN device and be used solely as a reliable message bus.\n\nExample configuration below connects all your machines to the Mycelium network:\n```nix\nmycelium = {\n roles.peer.tags.all = {};\n};\n```\n\n\n=== yggdrasil ===\nThis module sets up [yggdrasil](https://yggdrasil-network.github.io/) across your clan. \n\nYggdrasil is designed to be a future-proof and decentralised alternative to\nthe structured routing protocols commonly used today on the internet. Inside your clan, it will allow you to reach all of your machines.\n\n## Example Usage\n\nWhile you can specify statically configured peers for each host, yggdrasil does auto-discovery of local peers.\n\n```nix\ninventory = {\n\n machines = {\n peer1 = { };\n peer2 = { };\n };\n\n instances = {\n yggdrasil = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.peer1 = { };\n roles.default.machines.peer2 = { };\n };\n };\n};\n```\n\n--- END OF README DOCUMENTATION ---" + }, + { + "role": "user", + "content": "What VPNs are available?" + } + ], + "tools": [ + { + "name": "select_service", + "description": "Select exactly one service from the available candidates and provide a focused summary of its documentation relevant to the user request.", + "parameters": { + "type": "object", + "properties": { + "service_name": { + "type": "string", + "description": "The name of the selected service. Must match one of the available service names exactly.", + "enum": ["mycelium", "wireguard", "yggdrasil", "zerotier"] + }, + "summary": { + "type": "string", + "description": "A concise summary (max 300 words) focusing on: (1) VALUE PROPOSITION - what problem this service solves and why you'd use it, (2) ROLES - what roles exist and the PURPOSE of each role, (3) KEY CONSTRAINTS - critical dependencies or limitations. Do NOT copy README examples or configuration snippets. Synthesize WHAT the service does and WHY, not HOW to configure it." + } + }, + "required": ["service_name", "summary"] + } + } + ] + }, + "response": { + "function_calls": [], + "message": "You have four VPN/overlay network services available, each with different strengths:\n\n**WireGuard** – A traditional VPN with a controller/peer architecture. Controllers are publicly accessible servers that route traffic between peers. Best for setups where you have one or more servers with static IPs/domains and want reliable, well-established VPN technology.\n\n**ZeroTier** – A managed mesh VPN requiring a controller to admit peers, with optional \"moon\" relay nodes for NAT traversal. The controller must be online to add new machines but existing peers stay connected. Good when you need relays for hard-to-reach devices.\n\n**Mycelium** – A planetary-scale, encrypted IPv6 overlay network with locality-aware routing. Automatically finds shortest paths, reroutes around failures, and can run without TUN devices as a message bus. Designed for true peer-to-peer connectivity at scale.\n\n**Yggdrasil** – A decentralized, future-proof routing protocol with automatic local peer discovery. No controllers needed; machines auto-discover each other on the same network. Simpler setup when you don't have public endpoints.\n\n**Key differences:**\n- **Public endpoint required?** WireGuard (controllers), ZeroTier (moons optional)\n- **Fully decentralized?** Mycelium, Yggdrasil\n- **Auto-discovery?** Yggdrasil (local), Mycelium (global)\n- **Controller dependency?** WireGuard and ZeroTier need controllers; Mycelium and Yggdrasil don't\n\nWhich direction interests you—something with central coordination (WireGuard/ZeroTier) or fully decentralized (Mycelium/Yggdrasil)?" + }, + "duration_ms": 12942.61, + "metadata": { + "session_state_before": {} + } + }, + { + "timestamp": "2025-10-17T13:52:36.187186+00:00", + "provider": "claude", + "model": "claude-sonnet-4-5", + "stage": "select_service", + "request": { + "messages": [ + { + "role": "system", + "content": "You are a clan deployment planner assistant in SERVICE SELECTION MODE.\n\nContext shift\n- Discovery mode has finished. Ignore any instructions from earlier phases.\n- You now have README documentation for one or more candidate services.\n\nGoal\n- Review the provided READMEs and identify the best matching service for the user's intent.\n- When the user signals they are ready to configure a service, select EXACTLY ONE service and provide a focused summary that explains why it fits, what roles exist, and key constraints.\n- When the user explicitly requests an overview, comparison, or is undecided, DO NOT select yet. Instead, respond with a clarifying message that:\n • Summarizes the most relevant differences between the candidate services (in your own words).\n • Asks the user which direction they would like to pursue next.\n\nAvailable actions\n- Call the `select_service` tool with:\n * `service_name`: The selected service (must match one from the available services list).\n * `summary` (≤300 words) covering:\n 1. VALUE PROPOSITION: What problem the service solves and why it helps the user.\n 2. ROLES: The purpose of each role (e.g., which role backs up data, which receives it).\n 3. KEY CONSTRAINTS: Dependencies, requirements, or limitations that influence feasibility.\n\n IMPORTANT: Synthesize the README in your own words. Never copy configuration snippets or step-by-step guides.\n- Provide ONE clarifying message (no tool call) when the user's request favors comparison, additional guidance, or leaves the desired service ambiguous.\n\nHard rules\n- Only call `select_service` when the user is ready to choose a service or clearly asks you to pick.\n- If the user requests an overview/comparison or the best match cannot be determined confidently, provide a clarifying message instead of calling the tool.\n- Analyze every README you received; choose the service whose capabilities align most closely with the user's request.\n- Focus on WHAT the service offers and WHY it matches, not HOW to configure it.\n- If the READMEs are insufficient to disambiguate the request, ask for clarification rather than guessing.\n\nResponse discipline\n- Case A (service selected): Issue a single `select_service` tool call with NO accompanying text.\n- Case B (need clarification or comparison requested): Provide one concise clarifying message (≤150 words) with NO tool calls.\n- Do NOT repeat or paraphrase these instructions in your reply.\n- Never emit multiple tool calls or plain-text summaries outside the `summary` field.\n\nExamples of CORRECT behavior:\n✓ Tool call to `select_service` only (empty message string)\n✓ Clarifying message that compares options and asks the user to choose (no tool calls)\n\nExamples of INCORRECT behavior (DO NOT DO THIS):\n✗ Tool call + explanatory text\n✗ Multiple `select_service` calls\n✗ `select_service` with a name that is not in the available services list" + }, + { + "role": "assistant", + "content": "Assistant context: available services.\n\nAvailable services (you must choose exactly one):\n1. `wireguard`\n2. `zerotier`\n3. `mycelium`\n4. `yggdrasil`\n\nNote: 4 services were identified as potential matches for this request. Review their documentation and select the BEST match.\n\nREADME documentation for each service follows in the next message.\n\nREADME documentation for the following services:\n\n=== wireguard ===\n# Wireguard VPN Service\n\nThis service provides a Wireguard-based VPN mesh network with automatic IPv6 address allocation and routing between clan machines.\n\n## Overview\n\nThe wireguard service creates a secure mesh network between clan machines using two roles:\n- **Controllers**: Machines with public endpoints that act as connection points and routers\n- **Peers**: Machines that connect through controllers to access the network\n\n## Requirements\n\n- Controllers must have a publicly accessible endpoint (domain name or static IP)\n- Peers must be in networks where UDP traffic is not blocked (uses port 51820 by default, configurable)\n\n## Features\n\n- Automatic IPv6 address allocation using ULA (Unique Local Address) prefixes\n- Full mesh connectivity between all machines\n- Automatic key generation and distribution\n- IPv6 forwarding on controllers for inter-peer communication\n- Support for multiple controllers for redundancy\n\n## Network Architecture\n\n### IPv6 Address Allocation\n- Base network: `/40` ULA prefix (deterministically generated from instance name)\n- Controllers: Each gets a `/56` subnet from the base `/40`\n- Peers: Each gets a unique 64-bit host suffix that is used in ALL controller subnets\n\n### Addressing Design\n- Each peer generates a unique host suffix (e.g., `:8750:a09b:0:1`)\n- This suffix is appended to each controller's `/56` prefix to create unique addresses\n- Example: peer1 with suffix `:8750:a09b:0:1` gets:\n - `fd51:19c1:3b:f700:8750:a09b:0:1` in controller1's subnet\n - `fd51:19c1:c1:aa00:8750:a09b:0:1` in controller2's subnet\n- Controllers allow each peer's `/96` subnet for routing flexibility\n\n### Connectivity\n- Peers use a single WireGuard interface with multiple IPs (one per controller subnet)\n- Controllers connect to ALL other controllers and ALL peers on a single interface\n- Controllers have IPv6 forwarding enabled to route traffic between peers\n- All traffic between peers flows through controllers\n- Symmetric routing is maintained as each peer has consistent IPs across all controllers\n\n### Example Network Topology\n\n```mermaid\ngraph TB\n subgraph Controllers\n C1[controller1
endpoint: vpn1.example.com
fd51:19c1:3b:f700::/56]\n C2[controller2
endpoint: vpn2.example.com
fd51:19c1:c1:aa00::/56]\n end\n \n subgraph Peers\n P1[peer1
designated: controller1]\n P2[peer2
designated: controller2]\n P3[peer3
designated: controller1]\n end\n \n %% Controllers connect to each other\n C1 <--> C2\n \n %% All peers connect to all controllers\n P1 <--> C1\n P1 <--> C2\n P2 <--> C1\n P2 <--> C2\n P3 <--> C1\n P3 <--> C2\n \n %% Peer-to-peer traffic flows through controllers\n P1 -.->|via controllers| P3\n P1 -.->|via controllers| P2\n P2 -.->|via controllers| P3\n \n classDef controller fill:#f9f,stroke:#333,stroke-width:4px\n classDef peer fill:#bbf,stroke:#333,stroke-width:2px\n class C1,C2 controller\n class P1,P2,P3 peer\n```\n\n## Configuration\n\n### Basic Setup with Single Controller\n\n```nix\n# In your clan.nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller = {\n machines.server1 = {};\n settings = {\n # Public endpoint where this controller can be reached\n endpoint = \"vpn.example.com\";\n # Optional: Change the UDP port (default: 51820)\n port = 51820;\n };\n };\n roles.peer = {\n # No configuration needed if only one controller exists\n machines.laptop1 = {};\n };\n };\n }\n}\n```\n\n### Multiple Controllers Setup\n\n```nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller.machines = {\n server1.settings.endpoint = \"vpn1.example.com\";\n server2.settings.endpoint = \"vpn2.example.com\";\n server3.settings.endpoint = \"vpn3.example.com\";\n };\n roles.peer.machines.laptop1 = {\n # Must specify which controller subnet is exposed as the default in /etc/hosts, when multiple controllers exist\n settings.controller = \"server1\";\n };\n };\n }\n}\n```\n\n### Advanced Options\n\n\n### Automatic Hostname Resolution\n\nThe wireguard service automatically adds entries to `/etc/hosts` for all machines in the network. Each machine is accessible via its hostname in the format `.`.\n\nFor example, with an instance named `vpn`:\n- `server1.vpn` - resolves to server1's IPv6 address\n- `laptop1.vpn` - resolves to laptop1's IPv6 address\n\nThis allows machines to communicate using hostnames instead of IPv6 addresses:\n\n```bash\n# Ping another machine by hostname\nping6 server1.vpn\n\n# SSH to another machine\nssh user@laptop1.vpn\n```\n\n## Troubleshooting\n\n### Check Wireguard Status\n```bash\nsudo wg show\n```\n\n### Verify IP Addresses\n```bash\nip addr show dev \n```\n\n### Check Routing\n```bash\nip -6 route show dev \n```\n\n### Interface Fails to Start: \"Address already in use\"\n\nIf you see this error in your logs:\n```\nwireguard: Could not bring up interface, ignoring: Address already in use\n```\n\nThis means the configured port (default: 51820) is already in use by another service or wireguard instance. Solutions:\n\n1. **Check for conflicting wireguard instances:**\n ```bash\n sudo wg show\n sudo ss -ulnp | grep 51820\n ```\n\n2. **Use a different port:**\n ```nix\n services.wireguard.myinstance = {\n roles.controller = {\n endpoint = \"vpn.example.com\";\n port = 51821; # Use a different port\n };\n };\n ```\n\n3. **Ensure unique ports across multiple instances:**\n If you have multiple wireguard instances on the same machine, each must use a different port.\n\n### Key Management\n\nKeys are automatically generated and stored in the clan vars system. To regenerate keys:\n\n```bash\n# Regenerate keys for a specific machine and instance\nclan vars generate --service wireguard-keys- --regenerate --machine \n\n# Apply the new keys\nclan machines update \n```\n\n## Security Considerations\n\n- All traffic is encrypted using Wireguard's modern cryptography\n- Private keys never leave the machines they're generated on\n- Public keys are distributed through the clan vars system\n- Controllers must have publicly accessible endpoints\n- Firewall rules are automatically configured for the Wireguard ports\n\n\n\n=== zerotier ===\n## Usage\n\n```\ninventory.instances = {\n zerotier = {\n module = {\n name = \"zerotier\";\n input = \"clan-core\";\n };\n roles.peer.tags.all = { };\n roles.controller.machines.jon = { };\n roles.moon.machines.sara.settings.stableEndpoints = [ \"77.52.165.46\" ];\n };\n```\n\nThe input should be named according to your flake input.\nAll machines will be peers and connected to the zerotier network.\nJon is the controller machine, which will will accept other machines into the network.\nSara is a moon and sets the `stableEndpoint` setting with a publicly reachable IP, the moon is optional.\n\n## Overview\n\nThis guide explains how to set up and manage a [ZeroTier VPN](https://zerotier.com) for a clan network. Each VPN requires a single controller and can support multiple peers and optional moons for better connectivity.\n\n## Roles\n\n### 1. Controller\n\nThe [Controller](https://docs.zerotier.com/controller/) manages network membership and is responsible for admitting new peers.\nWhen a new node is added to the clan, the controller must be updated to ensure it has the latest member list.\n\n- **Key Points:**\n - Must be online to admit new machines to the VPN.\n - Existing nodes can continue to communicate even when the controller is offline.\n\n### 2. Moons\n\n[Moons](https://docs.zerotier.com/roots) act as relay nodes,\nproviding direct connectivity to peers via their public IP addresses.\nThey enable devices that are not publicly reachable to join the VPN by routing through these nodes.\n\n- **Configuration Notes:**\n - Each moon must define its public IP address.\n - Ensures connectivity for devices behind NAT or restrictive firewalls.\n\n### 3. Peers\n\nPeers are standard nodes in the VPN.\nThey connect to other peers, moons, and the controller as needed.\n\n- **Purpose:**\n - General role for all machines that are neither controllers nor moons.\n - Ideal for most clan members devices.\n\n\n=== mycelium ===\n\n[Mycelium](https://github.com/threefoldtech/mycelium) is an end-to-end encrypted IPv6 overlay network that spans the globe.\n\n## Features\n- Locality-aware routing: finds the shortest path between nodes.\n- All traffic is end-to-end encrypted.\n- Can route traffic via friend nodes and is location-aware.\n- Automatic rerouting if a physical link goes down.\n- IPv6 addresses are derived from private keys.\n- A simple, reliable message bus is implemented on top of Mycelium.\n- Supports multiple transports (QUIC, TCP, …). Hole punching for QUIC is in progress to enable true P2P connectivity behind NATs.\n- Designed for planetary-scale scalability; previous overlay networks reached practical limits, and Mycelium focuses on scaling.\n- Can run without a TUN device and be used solely as a reliable message bus.\n\nExample configuration below connects all your machines to the Mycelium network:\n```nix\nmycelium = {\n roles.peer.tags.all = {};\n};\n```\n\n\n=== yggdrasil ===\nThis module sets up [yggdrasil](https://yggdrasil-network.github.io/) across your clan. \n\nYggdrasil is designed to be a future-proof and decentralised alternative to\nthe structured routing protocols commonly used today on the internet. Inside your clan, it will allow you to reach all of your machines.\n\n## Example Usage\n\nWhile you can specify statically configured peers for each host, yggdrasil does auto-discovery of local peers.\n\n```nix\ninventory = {\n\n machines = {\n peer1 = { };\n peer2 = { };\n };\n\n instances = {\n yggdrasil = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.peer1 = { };\n roles.default.machines.peer2 = { };\n };\n };\n};\n```\n\n--- END OF README DOCUMENTATION ---" + }, + { + "role": "user", + "content": "What VPNs are available?" + }, + { + "role": "assistant", + "content": "You have four VPN/overlay network services available, each with different strengths:\n\n**WireGuard** – A traditional VPN with a controller/peer architecture. Controllers are publicly accessible servers that route traffic between peers. Best for setups where you have one or more servers with static IPs/domains and want reliable, well-established VPN technology.\n\n**ZeroTier** – A managed mesh VPN requiring a controller to admit peers, with optional \"moon\" relay nodes for NAT traversal. The controller must be online to add new machines but existing peers stay connected. Good when you need relays for hard-to-reach devices.\n\n**Mycelium** – A planetary-scale, encrypted IPv6 overlay network with locality-aware routing. Automatically finds shortest paths, reroutes around failures, and can run without TUN devices as a message bus. Designed for true peer-to-peer connectivity at scale.\n\n**Yggdrasil** – A decentralized, future-proof routing protocol with automatic local peer discovery. No controllers needed; machines auto-discover each other on the same network. Simpler setup when you don't have public endpoints.\n\n**Key differences:**\n- **Public endpoint required?** WireGuard (controllers), ZeroTier (moons optional)\n- **Fully decentralized?** Mycelium, Yggdrasil\n- **Auto-discovery?** Yggdrasil (local), Mycelium (global)\n- **Controller dependency?** WireGuard and ZeroTier need controllers; Mycelium and Yggdrasil don't\n\nWhich direction interests you—something with central coordination (WireGuard/ZeroTier) or fully decentralized (Mycelium/Yggdrasil)?" + }, + { + "role": "user", + "content": "Hmm zerotier please" + } + ], + "tools": [ + { + "name": "select_service", + "description": "Select exactly one service from the available candidates and provide a focused summary of its documentation relevant to the user request.", + "parameters": { + "type": "object", + "properties": { + "service_name": { + "type": "string", + "description": "The name of the selected service. Must match one of the available service names exactly.", + "enum": ["mycelium", "wireguard", "yggdrasil", "zerotier"] + }, + "summary": { + "type": "string", + "description": "A concise summary (max 300 words) focusing on: (1) VALUE PROPOSITION - what problem this service solves and why you'd use it, (2) ROLES - what roles exist and the PURPOSE of each role, (3) KEY CONSTRAINTS - critical dependencies or limitations. Do NOT copy README examples or configuration snippets. Synthesize WHAT the service does and WHY, not HOW to configure it." + } + }, + "required": ["service_name", "summary"] + } + } + ] + }, + "response": { + "function_calls": [ + { + "name": "select_service", + "arguments": { + "service_name": "zerotier", + "summary": "VALUE PROPOSITION:\nZeroTier solves the problem of connecting machines across different networks into a unified VPN, even when devices are behind NAT or firewalls. It's particularly useful when you need centralized control over network membership and want optional relay nodes to ensure connectivity for hard-to-reach devices.\n\nROLES:\n1. **Controller** - Manages network membership and admits new peers to the VPN. Must be updated when new nodes join the clan. The controller needs to be online to authorize new machines, but existing peers can continue communicating even when the controller is offline.\n\n2. **Moon** (optional) - Acts as a relay node with a publicly reachable IP address. Moons provide direct connectivity pathways for peers that aren't publicly accessible, enabling devices behind NAT or restrictive firewalls to participate in the VPN by routing traffic through these stable endpoints.\n\n3. **Peer** - Standard network nodes that connect to other peers, moons, and the controller. This is the general role for most clan member devices that are neither controllers nor moons.\n\nKEY CONSTRAINTS:\n- Exactly one controller is required per ZeroTier instance\n- The controller must be online to admit new machines to the network\n- Moons must define their public IP addresses in configuration\n- When adding new nodes to the clan, the controller must be updated to maintain the current member list\n- Moons are optional but recommended for environments with NAT or firewall restrictions" + } + } + ], + "message": "" + }, + "duration_ms": 10109.14, + "metadata": { + "session_state_before": {} + } + }, + { + "timestamp": "2025-10-17T13:52:39.955025+00:00", + "provider": "claude", + "model": "claude-sonnet-4-5", + "stage": "final_decision", + "request": { + "messages": [ + { + "role": "system", + "content": "You are a clan deployment planner in CONFIGURATION MODE. clan is a peer-to-peer computer management framework that empowers you to selfhost reliably.\n\nContext shift\n- Service selection is complete; disregard any instructions from earlier phases.\n- You have a summary of the chosen service, including role descriptions and relevant constraints.\n\nMission\n1) Analyze the user request (and conversation history) to determine which machines and/or tags should receive each role.\n2) Call the configuration tool with the correct role-to-target mappings.\n3) If the required assignments are ambiguous or missing information, ask ONE clarifying question instead of guessing.\n\nHard rules — Target assignment\n- Prefer TAGS when the user mentions groups (e.g., \"all production servers\", \"backup servers\").\n- Prefer MACHINE NAMES when the user names specific machines (e.g., \"machine1\", \"server-a\").\n- You may assign a role to BOTH machines AND tags if the request implies it (e.g., \"deploy to server1 and all production machines\").\n- Machine and tag names must EXACTLY match those in the assistant context. Do NOT invent names.\n\nHard rules — Role assignment\n- Use the service summary to understand the intent of each role.\n- If the request clearly maps to specific roles (e.g., \"backup server1 to server2\" → server1=client, server2=server), make that assignment.\n- When the user intent is clear but roles are unnamed, infer sensible assignments (server-like roles → stable machines/tags, client-like roles → broader groups).\n- Ask for clarification when:\n * Multiple roles exist but the distribution across machines/tags is unclear.\n * The user mentions machines without describing how they participate.\n * The request conflicts with the service capabilities provided in the summary.\n\nHard rules — Technical\n- Call tools ONLY from the provided list and follow their schemas exactly.\n- Arguments must match the schema; omit fields you do not need.\n- The configuration payload should look like: `{\"roles\": {\"role_name\": {\"machines\": {\"machine1\": {}}, \"tags\": {\"tag1\": {}}}}}` with empty objects as values.\n\nDecision checklist (run before responding)\n- Do I know which machines/tags should map to each role?\n- Do the assignments align with the role descriptions and user intent?\n- Are all machine/tag names spelled exactly as provided?\n- Is clarification required before a safe assignment can be made?\n\nResponse discipline\n- Case A (assignments clear): Issue a configuration tool call ONLY, with NO message content.\n- Case B (uncertain assignments): Ask one concise clarifying question with NO tool calls.\n- Never combine tool calls with explanatory text or repeat these instructions." + }, + { + "role": "assistant", + "content": "Assistant context: available machines and tags.\n\nMachines:\n1. `gchq-local`\n2. `qube-email`\n3. `wintux`\n\nTags:\n1. `all` (A group containing all machines)\n2. `darwin` (A group containing all macOS machines)\n3. `nixos` (A group containing all NixOS machines)\n\nService documentation summary for `zerotier`:\n\nVALUE PROPOSITION:\nZeroTier solves the problem of connecting machines across different networks into a unified VPN, even when devices are behind NAT or firewalls. It's particularly useful when you need centralized control over network membership and want optional relay nodes to ensure connectivity for hard-to-reach devices.\n\nROLES:\n1. **Controller** - Manages network membership and admits new peers to the VPN. Must be updated when new nodes join the clan. The controller needs to be online to authorize new machines, but existing peers can continue communicating even when the controller is offline.\n\n2. **Moon** (optional) - Acts as a relay node with a publicly reachable IP address. Moons provide direct connectivity pathways for peers that aren't publicly accessible, enabling devices behind NAT or restrictive firewalls to participate in the VPN by routing traffic through these stable endpoints.\n\n3. **Peer** - Standard network nodes that connect to other peers, moons, and the controller. This is the general role for most clan member devices that are neither controllers nor moons.\n\nKEY CONSTRAINTS:\n- Exactly one controller is required per ZeroTier instance\n- The controller must be online to admit new machines to the network\n- Moons must define their public IP addresses in configuration\n- When adding new nodes to the clan, the controller must be updated to maintain the current member list\n- Moons are optional but recommended for environments with NAT or firewall restrictions" + }, + { + "role": "user", + "content": "What VPNs are available?" + }, + { + "role": "assistant", + "content": "You have four VPN/overlay network services available, each with different strengths:\n\n**WireGuard** – A traditional VPN with a controller/peer architecture. Controllers are publicly accessible servers that route traffic between peers. Best for setups where you have one or more servers with static IPs/domains and want reliable, well-established VPN technology.\n\n**ZeroTier** – A managed mesh VPN requiring a controller to admit peers, with optional \"moon\" relay nodes for NAT traversal. The controller must be online to add new machines but existing peers stay connected. Good when you need relays for hard-to-reach devices.\n\n**Mycelium** – A planetary-scale, encrypted IPv6 overlay network with locality-aware routing. Automatically finds shortest paths, reroutes around failures, and can run without TUN devices as a message bus. Designed for true peer-to-peer connectivity at scale.\n\n**Yggdrasil** – A decentralized, future-proof routing protocol with automatic local peer discovery. No controllers needed; machines auto-discover each other on the same network. Simpler setup when you don't have public endpoints.\n\n**Key differences:**\n- **Public endpoint required?** WireGuard (controllers), ZeroTier (moons optional)\n- **Fully decentralized?** Mycelium, Yggdrasil\n- **Auto-discovery?** Yggdrasil (local), Mycelium (global)\n- **Controller dependency?** WireGuard and ZeroTier need controllers; Mycelium and Yggdrasil don't\n\nWhich direction interests you—something with central coordination (WireGuard/ZeroTier) or fully decentralized (Mycelium/Yggdrasil)?" + }, + { + "role": "user", + "content": "Hmm zerotier please" + } + ], + "tools": [ + { + "name": "zerotier", + "description": "Zerotier Mesh VPN Service for secure P2P networking between machines", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": {} + }, + "roles": { + "type": "object", + "properties": { + "controller": { + "type": "object", + "description": "Manages network membership and is responsible for admitting new peers to your Zerotier network.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + }, + "moon": { + "type": "object", + "description": "A moon acts as a relay node to connect other nodes in the zerotier network that are not publicly reachable. Each moon must be publicly reachable.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + }, + "peer": { + "type": "object", + "description": "A peer that connects to your private Zerotier network.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["roles"], + "additionalProperties": false + } + } + ] + }, + "response": { + "function_calls": [], + "message": "Which machine should act as the ZeroTier controller, and do you want any machines to serve as moon relay nodes (or should all machines just be regular peers)?" + }, + "duration_ms": 3761.2, + "metadata": { + "session_state_before": {}, + "selected_service": "zerotier" + } + }, + { + "timestamp": "2025-10-17T13:53:25.050878+00:00", + "provider": "claude", + "model": "claude-sonnet-4-5", + "stage": "final_decision", + "request": { + "messages": [ + { + "role": "system", + "content": "You are a clan deployment planner in CONFIGURATION MODE. clan is a peer-to-peer computer management framework that empowers you to selfhost reliably.\n\nContext shift\n- Service selection is complete; disregard any instructions from earlier phases.\n- You have a summary of the chosen service, including role descriptions and relevant constraints.\n\nMission\n1) Analyze the user request (and conversation history) to determine which machines and/or tags should receive each role.\n2) Call the configuration tool with the correct role-to-target mappings.\n3) If the required assignments are ambiguous or missing information, ask ONE clarifying question instead of guessing.\n\nHard rules — Target assignment\n- Prefer TAGS when the user mentions groups (e.g., \"all production servers\", \"backup servers\").\n- Prefer MACHINE NAMES when the user names specific machines (e.g., \"machine1\", \"server-a\").\n- You may assign a role to BOTH machines AND tags if the request implies it (e.g., \"deploy to server1 and all production machines\").\n- Machine and tag names must EXACTLY match those in the assistant context. Do NOT invent names.\n\nHard rules — Role assignment\n- Use the service summary to understand the intent of each role.\n- If the request clearly maps to specific roles (e.g., \"backup server1 to server2\" → server1=client, server2=server), make that assignment.\n- When the user intent is clear but roles are unnamed, infer sensible assignments (server-like roles → stable machines/tags, client-like roles → broader groups).\n- Ask for clarification when:\n * Multiple roles exist but the distribution across machines/tags is unclear.\n * The user mentions machines without describing how they participate.\n * The request conflicts with the service capabilities provided in the summary.\n\nHard rules — Technical\n- Call tools ONLY from the provided list and follow their schemas exactly.\n- Arguments must match the schema; omit fields you do not need.\n- The configuration payload should look like: `{\"roles\": {\"role_name\": {\"machines\": {\"machine1\": {}}, \"tags\": {\"tag1\": {}}}}}` with empty objects as values.\n\nDecision checklist (run before responding)\n- Do I know which machines/tags should map to each role?\n- Do the assignments align with the role descriptions and user intent?\n- Are all machine/tag names spelled exactly as provided?\n- Is clarification required before a safe assignment can be made?\n\nResponse discipline\n- Case A (assignments clear): Issue a configuration tool call ONLY, with NO message content.\n- Case B (uncertain assignments): Ask one concise clarifying question with NO tool calls.\n- Never combine tool calls with explanatory text or repeat these instructions." + }, + { + "role": "assistant", + "content": "Assistant context: available machines and tags.\n\nMachines:\n1. `gchq-local`\n2. `qube-email`\n3. `wintux`\n\nTags:\n1. `all` (A group containing all machines)\n2. `darwin` (A group containing all macOS machines)\n3. `nixos` (A group containing all NixOS machines)\n\nService documentation summary for `zerotier`:\n\nVALUE PROPOSITION:\nZeroTier solves the problem of connecting machines across different networks into a unified VPN, even when devices are behind NAT or firewalls. It's particularly useful when you need centralized control over network membership and want optional relay nodes to ensure connectivity for hard-to-reach devices.\n\nROLES:\n1. **Controller** - Manages network membership and admits new peers to the VPN. Must be updated when new nodes join the clan. The controller needs to be online to authorize new machines, but existing peers can continue communicating even when the controller is offline.\n\n2. **Moon** (optional) - Acts as a relay node with a publicly reachable IP address. Moons provide direct connectivity pathways for peers that aren't publicly accessible, enabling devices behind NAT or restrictive firewalls to participate in the VPN by routing traffic through these stable endpoints.\n\n3. **Peer** - Standard network nodes that connect to other peers, moons, and the controller. This is the general role for most clan member devices that are neither controllers nor moons.\n\nKEY CONSTRAINTS:\n- Exactly one controller is required per ZeroTier instance\n- The controller must be online to admit new machines to the network\n- Moons must define their public IP addresses in configuration\n- When adding new nodes to the clan, the controller must be updated to maintain the current member list\n- Moons are optional but recommended for environments with NAT or firewall restrictions" + }, + { + "role": "user", + "content": "What VPNs are available?" + }, + { + "role": "assistant", + "content": "You have four VPN/overlay network services available, each with different strengths:\n\n**WireGuard** – A traditional VPN with a controller/peer architecture. Controllers are publicly accessible servers that route traffic between peers. Best for setups where you have one or more servers with static IPs/domains and want reliable, well-established VPN technology.\n\n**ZeroTier** – A managed mesh VPN requiring a controller to admit peers, with optional \"moon\" relay nodes for NAT traversal. The controller must be online to add new machines but existing peers stay connected. Good when you need relays for hard-to-reach devices.\n\n**Mycelium** – A planetary-scale, encrypted IPv6 overlay network with locality-aware routing. Automatically finds shortest paths, reroutes around failures, and can run without TUN devices as a message bus. Designed for true peer-to-peer connectivity at scale.\n\n**Yggdrasil** – A decentralized, future-proof routing protocol with automatic local peer discovery. No controllers needed; machines auto-discover each other on the same network. Simpler setup when you don't have public endpoints.\n\n**Key differences:**\n- **Public endpoint required?** WireGuard (controllers), ZeroTier (moons optional)\n- **Fully decentralized?** Mycelium, Yggdrasil\n- **Auto-discovery?** Yggdrasil (local), Mycelium (global)\n- **Controller dependency?** WireGuard and ZeroTier need controllers; Mycelium and Yggdrasil don't\n\nWhich direction interests you—something with central coordination (WireGuard/ZeroTier) or fully decentralized (Mycelium/Yggdrasil)?" + }, + { + "role": "user", + "content": "Hmm zerotier please" + }, + { + "role": "assistant", + "content": "Which machine should act as the ZeroTier controller, and do you want any machines to serve as moon relay nodes (or should all machines just be regular peers)?" + }, + { + "role": "user", + "content": "what is a moon?" + } + ], + "tools": [ + { + "name": "zerotier", + "description": "Zerotier Mesh VPN Service for secure P2P networking between machines", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": {} + }, + "roles": { + "type": "object", + "properties": { + "controller": { + "type": "object", + "description": "Manages network membership and is responsible for admitting new peers to your Zerotier network.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + }, + "moon": { + "type": "object", + "description": "A moon acts as a relay node to connect other nodes in the zerotier network that are not publicly reachable. Each moon must be publicly reachable.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + }, + "peer": { + "type": "object", + "description": "A peer that connects to your private Zerotier network.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["roles"], + "additionalProperties": false + } + } + ] + }, + "response": { + "function_calls": [], + "message": "A **moon** is a relay node in ZeroTier that helps other peers connect when they can't reach each other directly.\n\n**Why you'd need one:**\n- When peers are behind NAT or firewalls and can't establish direct connections\n- The moon acts as a \"meeting point\" that routes traffic between peers that can't see each other\n\n**Requirements:**\n- Must have a **publicly reachable IP address** (not behind NAT)\n- Needs to be configured with its public IP in the clan setup\n\n**When to use:**\n- ✅ You have a VPS or server with a public IP\n- ✅ Some of your machines are on restrictive networks (corporate, mobile, home NAT)\n- ❌ Skip if all your machines can already reach each other directly or you don't have a public server\n\n**For your setup:**\nLooking at your machines (`gchq-local`, `qube-email`, `wintux`), these sound like local/personal devices. Unless one of them has a public IP address, you probably don't need a moon—just assign one as the controller and make the rest regular peers.\n\nDo any of your machines have a public IP address, or should I configure all three as controller + peers (no moons)?" + }, + "duration_ms": 10151.6, + "metadata": { + "session_state_before": { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": "VALUE PROPOSITION:\nZeroTier solves the problem of connecting machines across different networks into a unified VPN, even when devices are behind NAT or firewalls. It's particularly useful when you need centralized control over network membership and want optional relay nodes to ensure connectivity for hard-to-reach devices.\n\nROLES:\n1. **Controller** - Manages network membership and admits new peers to the VPN. Must be updated when new nodes join the clan. The controller needs to be online to authorize new machines, but existing peers can continue communicating even when the controller is offline.\n\n2. **Moon** (optional) - Acts as a relay node with a publicly reachable IP address. Moons provide direct connectivity pathways for peers that aren't publicly accessible, enabling devices behind NAT or restrictive firewalls to participate in the VPN by routing traffic through these stable endpoints.\n\n3. **Peer** - Standard network nodes that connect to other peers, moons, and the controller. This is the general role for most clan member devices that are neither controllers nor moons.\n\nKEY CONSTRAINTS:\n- Exactly one controller is required per ZeroTier instance\n- The controller must be online to admit new machines to the network\n- Moons must define their public IP addresses in configuration\n- When adding new nodes to the clan, the controller must be updated to maintain the current member list\n- Moons are optional but recommended for environments with NAT or firewall restrictions" + } + }, + "selected_service": "zerotier", + "resume": true + } + }, + { + "timestamp": "2025-10-17T13:53:59.433929+00:00", + "provider": "claude", + "model": "claude-sonnet-4-5", + "stage": "final_decision", + "request": { + "messages": [ + { + "role": "system", + "content": "You are a clan deployment planner in CONFIGURATION MODE. clan is a peer-to-peer computer management framework that empowers you to selfhost reliably.\n\nContext shift\n- Service selection is complete; disregard any instructions from earlier phases.\n- You have a summary of the chosen service, including role descriptions and relevant constraints.\n\nMission\n1) Analyze the user request (and conversation history) to determine which machines and/or tags should receive each role.\n2) Call the configuration tool with the correct role-to-target mappings.\n3) If the required assignments are ambiguous or missing information, ask ONE clarifying question instead of guessing.\n\nHard rules — Target assignment\n- Prefer TAGS when the user mentions groups (e.g., \"all production servers\", \"backup servers\").\n- Prefer MACHINE NAMES when the user names specific machines (e.g., \"machine1\", \"server-a\").\n- You may assign a role to BOTH machines AND tags if the request implies it (e.g., \"deploy to server1 and all production machines\").\n- Machine and tag names must EXACTLY match those in the assistant context. Do NOT invent names.\n\nHard rules — Role assignment\n- Use the service summary to understand the intent of each role.\n- If the request clearly maps to specific roles (e.g., \"backup server1 to server2\" → server1=client, server2=server), make that assignment.\n- When the user intent is clear but roles are unnamed, infer sensible assignments (server-like roles → stable machines/tags, client-like roles → broader groups).\n- Ask for clarification when:\n * Multiple roles exist but the distribution across machines/tags is unclear.\n * The user mentions machines without describing how they participate.\n * The request conflicts with the service capabilities provided in the summary.\n\nHard rules — Technical\n- Call tools ONLY from the provided list and follow their schemas exactly.\n- Arguments must match the schema; omit fields you do not need.\n- The configuration payload should look like: `{\"roles\": {\"role_name\": {\"machines\": {\"machine1\": {}}, \"tags\": {\"tag1\": {}}}}}` with empty objects as values.\n\nDecision checklist (run before responding)\n- Do I know which machines/tags should map to each role?\n- Do the assignments align with the role descriptions and user intent?\n- Are all machine/tag names spelled exactly as provided?\n- Is clarification required before a safe assignment can be made?\n\nResponse discipline\n- Case A (assignments clear): Issue a configuration tool call ONLY, with NO message content.\n- Case B (uncertain assignments): Ask one concise clarifying question with NO tool calls.\n- Never combine tool calls with explanatory text or repeat these instructions." + }, + { + "role": "assistant", + "content": "Assistant context: available machines and tags.\n\nMachines:\n1. `gchq-local`\n2. `qube-email`\n3. `wintux`\n\nTags:\n1. `all` (A group containing all machines)\n2. `darwin` (A group containing all macOS machines)\n3. `nixos` (A group containing all NixOS machines)\n\nService documentation summary for `zerotier`:\n\nVALUE PROPOSITION:\nZeroTier solves the problem of connecting machines across different networks into a unified VPN, even when devices are behind NAT or firewalls. It's particularly useful when you need centralized control over network membership and want optional relay nodes to ensure connectivity for hard-to-reach devices.\n\nROLES:\n1. **Controller** - Manages network membership and admits new peers to the VPN. Must be updated when new nodes join the clan. The controller needs to be online to authorize new machines, but existing peers can continue communicating even when the controller is offline.\n\n2. **Moon** (optional) - Acts as a relay node with a publicly reachable IP address. Moons provide direct connectivity pathways for peers that aren't publicly accessible, enabling devices behind NAT or restrictive firewalls to participate in the VPN by routing traffic through these stable endpoints.\n\n3. **Peer** - Standard network nodes that connect to other peers, moons, and the controller. This is the general role for most clan member devices that are neither controllers nor moons.\n\nKEY CONSTRAINTS:\n- Exactly one controller is required per ZeroTier instance\n- The controller must be online to admit new machines to the network\n- Moons must define their public IP addresses in configuration\n- When adding new nodes to the clan, the controller must be updated to maintain the current member list\n- Moons are optional but recommended for environments with NAT or firewall restrictions" + }, + { + "role": "user", + "content": "What VPNs are available?" + }, + { + "role": "assistant", + "content": "You have four VPN/overlay network services available, each with different strengths:\n\n**WireGuard** – A traditional VPN with a controller/peer architecture. Controllers are publicly accessible servers that route traffic between peers. Best for setups where you have one or more servers with static IPs/domains and want reliable, well-established VPN technology.\n\n**ZeroTier** – A managed mesh VPN requiring a controller to admit peers, with optional \"moon\" relay nodes for NAT traversal. The controller must be online to add new machines but existing peers stay connected. Good when you need relays for hard-to-reach devices.\n\n**Mycelium** – A planetary-scale, encrypted IPv6 overlay network with locality-aware routing. Automatically finds shortest paths, reroutes around failures, and can run without TUN devices as a message bus. Designed for true peer-to-peer connectivity at scale.\n\n**Yggdrasil** – A decentralized, future-proof routing protocol with automatic local peer discovery. No controllers needed; machines auto-discover each other on the same network. Simpler setup when you don't have public endpoints.\n\n**Key differences:**\n- **Public endpoint required?** WireGuard (controllers), ZeroTier (moons optional)\n- **Fully decentralized?** Mycelium, Yggdrasil\n- **Auto-discovery?** Yggdrasil (local), Mycelium (global)\n- **Controller dependency?** WireGuard and ZeroTier need controllers; Mycelium and Yggdrasil don't\n\nWhich direction interests you—something with central coordination (WireGuard/ZeroTier) or fully decentralized (Mycelium/Yggdrasil)?" + }, + { + "role": "user", + "content": "Hmm zerotier please" + }, + { + "role": "assistant", + "content": "Which machine should act as the ZeroTier controller, and do you want any machines to serve as moon relay nodes (or should all machines just be regular peers)?" + }, + { + "role": "user", + "content": "what is a moon?" + }, + { + "role": "assistant", + "content": "A **moon** is a relay node in ZeroTier that helps other peers connect when they can't reach each other directly.\n\n**Why you'd need one:**\n- When peers are behind NAT or firewalls and can't establish direct connections\n- The moon acts as a \"meeting point\" that routes traffic between peers that can't see each other\n\n**Requirements:**\n- Must have a **publicly reachable IP address** (not behind NAT)\n- Needs to be configured with its public IP in the clan setup\n\n**When to use:**\n- ✅ You have a VPS or server with a public IP\n- ✅ Some of your machines are on restrictive networks (corporate, mobile, home NAT)\n- ❌ Skip if all your machines can already reach each other directly or you don't have a public server\n\n**For your setup:**\nLooking at your machines (`gchq-local`, `qube-email`, `wintux`), these sound like local/personal devices. Unless one of them has a public IP address, you probably don't need a moon—just assign one as the controller and make the rest regular peers.\n\nDo any of your machines have a public IP address, or should I configure all three as controller + peers (no moons)?" + }, + { + "role": "user", + "content": "okay then gchq-local as controller and qube-email as moon please everything else as peer" + } + ], + "tools": [ + { + "name": "zerotier", + "description": "Zerotier Mesh VPN Service for secure P2P networking between machines", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": {} + }, + "roles": { + "type": "object", + "properties": { + "controller": { + "type": "object", + "description": "Manages network membership and is responsible for admitting new peers to your Zerotier network.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + }, + "moon": { + "type": "object", + "description": "A moon acts as a relay node to connect other nodes in the zerotier network that are not publicly reachable. Each moon must be publicly reachable.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + }, + "peer": { + "type": "object", + "description": "A peer that connects to your private Zerotier network.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(gchq-local|qube-email|wintux)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {\"wintux\": {}, \"gchq-local\": {}}" + }, + "tags": { + "type": "object", + "patternProperties": { + "^(all|darwin|nixos)$": { + "type": "object", + "additionalProperties": false + } + }, + "additionalProperties": false, + "description": "Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {\"all\": {}, \"nixos\": {}}" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["roles"], + "additionalProperties": false + } + } + ] + }, + "response": { + "function_calls": [ + { + "name": "zerotier", + "arguments": { + "roles": { + "controller": { + "machines": { + "gchq-local": {} + } + }, + "moon": { + "machines": { + "qube-email": {} + } + }, + "peer": { + "machines": { + "wintux": {} + } + } + } + } + } + ], + "message": "" + }, + "duration_ms": 4288.86, + "metadata": { + "session_state_before": { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": "VALUE PROPOSITION:\nZeroTier solves the problem of connecting machines across different networks into a unified VPN, even when devices are behind NAT or firewalls. It's particularly useful when you need centralized control over network membership and want optional relay nodes to ensure connectivity for hard-to-reach devices.\n\nROLES:\n1. **Controller** - Manages network membership and admits new peers to the VPN. Must be updated when new nodes join the clan. The controller needs to be online to authorize new machines, but existing peers can continue communicating even when the controller is offline.\n\n2. **Moon** (optional) - Acts as a relay node with a publicly reachable IP address. Moons provide direct connectivity pathways for peers that aren't publicly accessible, enabling devices behind NAT or restrictive firewalls to participate in the VPN by routing traffic through these stable endpoints.\n\n3. **Peer** - Standard network nodes that connect to other peers, moons, and the controller. This is the general role for most clan member devices that are neither controllers nor moons.\n\nKEY CONSTRAINTS:\n- Exactly one controller is required per ZeroTier instance\n- The controller must be online to admit new machines to the network\n- Moons must define their public IP addresses in configuration\n- When adding new nodes to the clan, the controller must be updated to maintain the current member list\n- Moons are optional but recommended for environments with NAT or firewall restrictions" + } + }, + "selected_service": "zerotier", + "resume": true + } + } +] diff --git a/pkgs/clan-cli/clan_lib/llm/container_data/vpns_readme.json b/pkgs/clan-cli/clan_lib/llm/container_data/vpns_readme.json new file mode 100644 index 000000000..06bb4dad7 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_data/vpns_readme.json @@ -0,0 +1,5 @@ +{ + "wireguard": "# Wireguard VPN Service\n\nThis service provides a Wireguard-based VPN mesh network with automatic IPv6 address allocation and routing between clan machines.\n\n## Overview\n\nThe wireguard service creates a secure mesh network between clan machines using two roles:\n- **Controllers**: Machines with public endpoints that act as connection points and routers\n- **Peers**: Machines that connect through controllers to access the network\n\n## Requirements\n\n- Controllers must have a publicly accessible endpoint (domain name or static IP)\n- Peers must be in networks where UDP traffic is not blocked (uses port 51820 by default, configurable)\n\n## Features\n\n- Automatic IPv6 address allocation using ULA (Unique Local Address) prefixes\n- Full mesh connectivity between all machines\n- Automatic key generation and distribution\n- IPv6 forwarding on controllers for inter-peer communication\n- Support for multiple controllers for redundancy\n\n## Network Architecture\n\n### IPv6 Address Allocation\n- Base network: `/40` ULA prefix (deterministically generated from instance name)\n- Controllers: Each gets a `/56` subnet from the base `/40`\n- Peers: Each gets a unique 64-bit host suffix that is used in ALL controller subnets\n\n### Addressing Design\n- Each peer generates a unique host suffix (e.g., `:8750:a09b:0:1`)\n- This suffix is appended to each controller's `/56` prefix to create unique addresses\n- Example: peer1 with suffix `:8750:a09b:0:1` gets:\n - `fd51:19c1:3b:f700:8750:a09b:0:1` in controller1's subnet\n - `fd51:19c1:c1:aa00:8750:a09b:0:1` in controller2's subnet\n- Controllers allow each peer's `/96` subnet for routing flexibility\n\n### Connectivity\n- Peers use a single WireGuard interface with multiple IPs (one per controller subnet)\n- Controllers connect to ALL other controllers and ALL peers on a single interface\n- Controllers have IPv6 forwarding enabled to route traffic between peers\n- All traffic between peers flows through controllers\n- Symmetric routing is maintained as each peer has consistent IPs across all controllers\n\n### Example Network Topology\n\n```mermaid\ngraph TB\n subgraph Controllers\n C1[controller1
endpoint: vpn1.example.com
fd51:19c1:3b:f700::/56]\n C2[controller2
endpoint: vpn2.example.com
fd51:19c1:c1:aa00::/56]\n end\n \n subgraph Peers\n P1[peer1
designated: controller1]\n P2[peer2
designated: controller2]\n P3[peer3
designated: controller1]\n end\n \n %% Controllers connect to each other\n C1 <--> C2\n \n %% All peers connect to all controllers\n P1 <--> C1\n P1 <--> C2\n P2 <--> C1\n P2 <--> C2\n P3 <--> C1\n P3 <--> C2\n \n %% Peer-to-peer traffic flows through controllers\n P1 -.->|via controllers| P3\n P1 -.->|via controllers| P2\n P2 -.->|via controllers| P3\n \n classDef controller fill:#f9f,stroke:#333,stroke-width:4px\n classDef peer fill:#bbf,stroke:#333,stroke-width:2px\n class C1,C2 controller\n class P1,P2,P3 peer\n```\n\n## Configuration\n\n### Basic Setup with Single Controller\n\n```nix\n# In your clan.nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller = {\n machines.server1 = {};\n settings = {\n # Public endpoint where this controller can be reached\n endpoint = \"vpn.example.com\";\n # Optional: Change the UDP port (default: 51820)\n port = 51820;\n };\n };\n roles.peer = {\n # No configuration needed if only one controller exists\n machines.laptop1 = {};\n };\n };\n }\n}\n```\n\n### Multiple Controllers Setup\n\n```nix\n{\n instances = {\n wireguard = {\n module.name = \"wireguard\";\n module.input = \"clan-core\";\n roles.controller.machines = {\n server1.settings.endpoint = \"vpn1.example.com\";\n server2.settings.endpoint = \"vpn2.example.com\";\n server3.settings.endpoint = \"vpn3.example.com\";\n };\n roles.peer.machines.laptop1 = {\n # Must specify which controller subnet is exposed as the default in /etc/hosts, when multiple controllers exist\n settings.controller = \"server1\";\n };\n };\n }\n}\n```\n\n### Advanced Options\n\n\n### Automatic Hostname Resolution\n\nThe wireguard service automatically adds entries to `/etc/hosts` for all machines in the network. Each machine is accessible via its hostname in the format `.`.\n\nFor example, with an instance named `vpn`:\n- `server1.vpn` - resolves to server1's IPv6 address\n- `laptop1.vpn` - resolves to laptop1's IPv6 address\n\nThis allows machines to communicate using hostnames instead of IPv6 addresses:\n\n```bash\n# Ping another machine by hostname\nping6 server1.vpn\n\n# SSH to another machine\nssh user@laptop1.vpn\n```\n\n## Troubleshooting\n\n### Check Wireguard Status\n```bash\nsudo wg show\n```\n\n### Verify IP Addresses\n```bash\nip addr show dev \n```\n\n### Check Routing\n```bash\nip -6 route show dev \n```\n\n### Interface Fails to Start: \"Address already in use\"\n\nIf you see this error in your logs:\n```\nwireguard: Could not bring up interface, ignoring: Address already in use\n```\n\nThis means the configured port (default: 51820) is already in use by another service or wireguard instance. Solutions:\n\n1. **Check for conflicting wireguard instances:**\n ```bash\n sudo wg show\n sudo ss -ulnp | grep 51820\n ```\n\n2. **Use a different port:**\n ```nix\n services.wireguard.myinstance = {\n roles.controller = {\n endpoint = \"vpn.example.com\";\n port = 51821; # Use a different port\n };\n };\n ```\n\n3. **Ensure unique ports across multiple instances:**\n If you have multiple wireguard instances on the same machine, each must use a different port.\n\n### Key Management\n\nKeys are automatically generated and stored in the clan vars system. To regenerate keys:\n\n```bash\n# Regenerate keys for a specific machine and instance\nclan vars generate --service wireguard-keys- --regenerate --machine \n\n# Apply the new keys\nclan machines update \n```\n\n## Security Considerations\n\n- All traffic is encrypted using Wireguard's modern cryptography\n- Private keys never leave the machines they're generated on\n- Public keys are distributed through the clan vars system\n- Controllers must have publicly accessible endpoints\n- Firewall rules are automatically configured for the Wireguard ports\n\n", + "yggdrasil": "This module sets up [yggdrasil](https://yggdrasil-network.github.io/) across your clan. \n\nYggdrasil is designed to be a future-proof and decentralised alternative to\nthe structured routing protocols commonly used today on the internet. Inside your clan, it will allow you to reach all of your machines.\n\n## Example Usage\n\nWhile you can specify statically configured peers for each host, yggdrasil does auto-discovery of local peers.\n\n```nix\ninventory = {\n\n machines = {\n peer1 = { };\n peer2 = { };\n };\n\n instances = {\n yggdrasil = {\n\n # Deploy on all machines\n roles.default.tags.all = { };\n\n # Or individual hosts\n roles.default.machines.peer1 = { };\n roles.default.machines.peer2 = { };\n };\n };\n};\n```\n", + "zerotier": "## Usage\n\n```\ninventory.instances = {\n zerotier = {\n module = {\n name = \"zerotier\";\n input = \"clan-core\";\n };\n roles.peer.tags.all = { };\n roles.controller.machines.jon = { };\n roles.moon.machines.sara.settings.stableEndpoints = [ \"77.52.165.46\" ];\n };\n```\n\nThe input should be named according to your flake input.\nAll machines will be peers and connected to the zerotier network.\nJon is the controller machine, which will will accept other machines into the network.\nSara is a moon and sets the `stableEndpoint` setting with a publicly reachable IP, the moon is optional.\n\n## Overview\n\nThis guide explains how to set up and manage a [ZeroTier VPN](https://zerotier.com) for a clan network. Each VPN requires a single controller and can support multiple peers and optional moons for better connectivity.\n\n## Roles\n\n### 1. Controller\n\nThe [Controller](https://docs.zerotier.com/controller/) manages network membership and is responsible for admitting new peers.\nWhen a new node is added to the clan, the controller must be updated to ensure it has the latest member list.\n\n- **Key Points:**\n - Must be online to admit new machines to the VPN.\n - Existing nodes can continue to communicate even when the controller is offline.\n\n### 2. Moons\n\n[Moons](https://docs.zerotier.com/roots) act as relay nodes,\nproviding direct connectivity to peers via their public IP addresses.\nThey enable devices that are not publicly reachable to join the VPN by routing through these nodes.\n\n- **Configuration Notes:**\n - Each moon must define its public IP address.\n - Ensures connectivity for devices behind NAT or restrictive firewalls.\n\n### 3. Peers\n\nPeers are standard nodes in the VPN.\nThey connect to other peers, moons, and the controller as needed.\n\n- **Purpose:**\n - General role for all machines that are neither controllers nor moons.\n - Ideal for most clan members devices.\n" +} From 58b88e874fed086776e6fa757e1b353a65a0602e Mon Sep 17 00:00:00 2001 From: Qubasa Date: Wed, 22 Oct 2025 15:32:56 +0200 Subject: [PATCH 4/5] clan_lib: Add llm unit tests --- pkgs/clan-cli/clan_lib/llm/llm_test.py | 289 +++ .../clan_lib/llm/test_process_chat_turn.py | 1957 +++++++++++++++++ 2 files changed, 2246 insertions(+) create mode 100644 pkgs/clan-cli/clan_lib/llm/llm_test.py create mode 100644 pkgs/clan-cli/clan_lib/llm/test_process_chat_turn.py diff --git a/pkgs/clan-cli/clan_lib/llm/llm_test.py b/pkgs/clan-cli/clan_lib/llm/llm_test.py new file mode 100644 index 000000000..6b2acb73e --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/llm_test.py @@ -0,0 +1,289 @@ +from collections.abc import Callable + +import pytest +from clan_cli.tests.fixtures_flakes import nested_dict +from clan_lib.flake.flake import Flake +from clan_lib.llm.llm import ( + OpenAIFunctionSchema, + aggregate_openai_function_schemas, + llm_final_decision_to_inventory_instances, +) +from clan_lib.llm.schemas import FunctionCallType, clan_module_to_openai_spec +from clan_lib.services.modules import list_service_modules + + +@pytest.mark.with_core +def test_clan_module_to_llm_func( + clan_flake: Callable[..., Flake], +) -> None: + # ATTENTION! This method lacks Typechecking + config = nested_dict() + # explicit module selection + # We use this random string in test to avoid code dependencies on the input name + config["inventory"]["instances"]["foo"]["module"]["input"] = ( + "Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU" + ) + config["inventory"]["instances"]["foo"]["module"]["name"] = "sshd" + # input = null + config["inventory"]["instances"]["bar"]["module"]["input"] = None + config["inventory"]["instances"]["bar"]["module"]["name"] = "sshd" + + config["inventory"]["machines"] = { + "machine1": { + "tags": ["production", "backup"], + }, + "machine2": { + "tags": ["client"], + }, + "machine3": { + "tags": ["client"], + }, + } + config["inventory"]["tags"] = { + "production": [], + "backup": [], + "client": [], + } + + # Omit input + config["inventory"]["instances"]["baz"]["module"]["name"] = "sshd" + # external input + flake = clan_flake(config) + + service_modules = list_service_modules(flake) + + # Module(usage_ref={'name': 'borgbackup', 'input': None}, info=ModuleInfo(manifest=ModuleManifest(name='borgbackup', description='Efficient, deduplicating backup program with optional compression and secure encryption.', categories=['System'], features={'API': True}), roles={'client': Role(name='client', description='A borgbackup client that backs up to all borgbackup server roles.'), 'server': Role(name='server', description='A borgbackup server that stores the backups of clients.')}), native=True, instance_refs=[]), + borgbackup_service = next( + m for m in service_modules.modules if m.usage_ref.get("name") == "borgbackup" + ) + + assert borgbackup_service is not None + + available_machines = ["machine1", "machine2", "server1"] + available_tags = ["production", "backup", "client"] + + generated_tool_func = clan_module_to_openai_spec( + borgbackup_service, available_tags, available_machines + ) + + expected_tool_func: OpenAIFunctionSchema = { + "type": "function", + "name": "borgbackup", + "description": "Efficient, deduplicating backup program with optional compression and secure encryption.", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": {}, + }, + "roles": { + "type": "object", + "properties": { + "client": { + "type": "object", + "description": "A borgbackup client that backs up to all borgbackup server roles.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {"wintux": {}, "gchq-local": {}}', + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {"all": {}, "nixos": {}}', + }, + }, + "additionalProperties": False, + }, + "server": { + "type": "object", + "description": "A borgbackup server that stores the backups of clients.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {"wintux": {}, "gchq-local": {}}', + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {"all": {}, "nixos": {}}', + }, + }, + "additionalProperties": False, + }, + }, + "additionalProperties": False, + }, + }, + "required": ["roles"], + "additionalProperties": False, + }, + "strict": True, + } + + assert generated_tool_func == expected_tool_func + + certificate_service = next( + m for m in service_modules.modules if m.usage_ref.get("name") == "certificates" + ) + assert certificate_service is not None + + generated_tool_func2 = clan_module_to_openai_spec( + certificate_service, available_tags, available_machines + ) + + expected_tool_func2: OpenAIFunctionSchema = { + "type": "function", + "name": "certificates", + "description": "Sets up a PKI certificate chain using step-ca", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": {}, + }, + "roles": { + "type": "object", + "properties": { + "ca": { + "type": "object", + "description": "A certificate authority that issues and signs certificates for other machines.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {"wintux": {}, "gchq-local": {}}', + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {"all": {}, "nixos": {}}', + }, + }, + "additionalProperties": False, + }, + "default": { + "type": "object", + "description": "A machine that trusts the CA and can get certificates issued by it.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Machines to assign this role to. Format: each machine name is a key with an empty object {} as value. Example: {"wintux": {}, "gchq-local": {}}', + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": 'Tags to assign this role to. Format: each tag name is a key with an empty object {} as value. Example: {"all": {}, "nixos": {}}', + }, + }, + "additionalProperties": False, + }, + }, + "additionalProperties": False, + }, + }, + "required": ["roles"], + "additionalProperties": False, + }, + "strict": True, + } + + assert generated_tool_func2 == expected_tool_func2 + + aggregate = aggregate_openai_function_schemas(flake) + + assert len(aggregate.tools) >= 2 + + +def test_llm_final_decision_to_inventory_conversion() -> None: + """Test conversion of LLM final decision to inventory format.""" + final_decision: list[FunctionCallType] = [ + { + "id": "toolu_01XHjHUMzZVTcDCqaYQJEWu5", + "call_id": "toolu_01XHjHUMzZVTcDCqaYQJEWu5", + "type": "function_call", + "name": "matrix-synapse", + "arguments": '{"roles": {"default": {"machines": {"gchq-local": {}}}}}', + }, + { + "id": "toolu_01TsjKZ87J3fi6RNzNzu33ff", + "call_id": "toolu_01TsjKZ87J3fi6RNzNzu33ff", + "type": "function_call", + "name": "monitoring", + "arguments": '{"module": { "input": "qubasas-clan" }, "roles": {"telegraf": {"tags": {"all": {}}}}}', + }, + ] + assert isinstance(final_decision, list) + + expected = [ + { + "module": { + "input": None, + "name": "matrix-synapse", + }, + "roles": {"default": {"machines": {"gchq-local": {}}}}, + }, + { + "module": { + "input": "qubasas-clan", + "name": "monitoring", + }, + "roles": {"telegraf": {"tags": {"all": {}}}}, + }, + ] + + result = llm_final_decision_to_inventory_instances(final_decision) + assert result == expected diff --git a/pkgs/clan-cli/clan_lib/llm/test_process_chat_turn.py b/pkgs/clan-cli/clan_lib/llm/test_process_chat_turn.py new file mode 100644 index 000000000..0c11df2c3 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/test_process_chat_turn.py @@ -0,0 +1,1957 @@ +"""Unit tests for process_chat_turn using mocked endpoints from mytrace.json.""" + +import json +from collections.abc import Generator +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast +from unittest.mock import MagicMock, patch + +import pytest +from clan_lib.errors import ClanAiError +from clan_lib.flake.flake import Flake +from clan_lib.llm.endpoints import ( + FunctionCallType, + OllamaChatResponse, + OpenAIChatCompletionResponse, + parse_ollama_response, + parse_openai_response, +) +from clan_lib.llm.llm import ( + DiscoveryProgressEvent, + FinalDecisionProgressEvent, + ReadmeFetchProgressEvent, + ServiceSelectionProgressEvent, + ServiceSelectionResult, + execute_readme_requests, + get_llm_final_decision, + get_llm_service_selection, + process_chat_turn, +) +from clan_lib.llm.schemas import ( + AiAggregate, + MachineDescription, + ReadmeRequest, + SessionState, + TagDescription, +) +from clan_lib.services.modules import ServiceReadmeCollection + +if TYPE_CHECKING: + from clan_lib.llm.schemas import ChatMessage + + +@pytest.fixture +def trace_data() -> list[dict[str, Any]]: + """Load trace data from mytrace.json.""" + trace_file = Path(__file__).parent / "container_data" / "test_trace_data.json" + with trace_file.open() as f: + return json.load(f) + + +@pytest.fixture +def mock_flake() -> MagicMock: + """Create a mock Flake object with test data.""" + return MagicMock(spec=Flake) + # Add any necessary attributes or methods the test needs + + +@pytest.fixture(autouse=True) +def mock_schema_aggregators() -> Generator[None]: + """Mock the schema aggregation functions to avoid complex setup.""" + machines = [ + MachineDescription(name="gchq-local", description=None), + MachineDescription(name="qube-email", description=None), + MachineDescription(name="wintux", description=None), + ] + tags = [ + TagDescription(name="all", description="A group containing all machines"), + TagDescription( + name="nixos", description="A group containing all NixOS machines" + ), + TagDescription( + name="darwin", description="A group containing all macOS machines" + ), + ] + + mock_aggregate: AiAggregate = AiAggregate( + machines=machines, + tags=tags, + tools=[], # Empty tools list since we're mocking the API calls anyway + ) + + with ( + patch( + "clan_lib.llm.phases.aggregate_openai_function_schemas", + return_value=mock_aggregate, + ), + patch( + "clan_lib.llm.phases.aggregate_ollama_function_schemas", + return_value=mock_aggregate, + ), + patch("clan_lib.llm.phases.create_simplified_service_schemas", return_value=[]), + patch("clan_lib.llm.phases.create_get_readme_tool", return_value={}), + ): + yield + + +def create_openai_response( + function_calls: list[dict[str, Any]], message: str +) -> OpenAIChatCompletionResponse: + """Create an OpenAI-compatible response from function calls and message.""" + tool_calls = [] + for i, call in enumerate(function_calls): + tool_calls.append( + { + "id": f"call_{i}", + "type": "function", + "function": { + "name": call["name"], + "arguments": json.dumps(call["arguments"]), + }, + } + ) + + # Cast to the expected type since we're creating a minimal response for testing + return cast( + "OpenAIChatCompletionResponse", + { + "choices": [ + { + "message": { + "role": "assistant", + "content": message, + "tool_calls": tool_calls if tool_calls else None, + }, + } + ], + }, + ) + + +class TestProcessChatTurn: + """Test process_chat_turn with mocked API responses from trace data.""" + + def test_discovery_phase( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test the discovery phase with VPN query.""" + # Get the first trace entry (discovery phase) + discovery_trace = trace_data[0] + assert discovery_trace["stage"] == "discovery" + + response_data = discovery_trace["response"] + function_calls = response_data["function_calls"] + message = response_data["message"] + + # Create OpenAI-compatible response + openai_response = create_openai_response(function_calls, message) + + # Mock the Claude API call + with ( + patch( + "clan_lib.llm.phases.call_claude_api", return_value=openai_response + ) as mock_call, + patch("clan_lib.llm.orchestrator.execute_readme_requests") as mock_execute, + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + # Mock readme results + mock_execute.return_value = { + None: MagicMock( + input_name=None, + readmes={ + "wireguard": "# Wireguard README", + "zerotier": "# ZeroTier README", + "mycelium": "# Mycelium README", + "yggdrasil": "# Yggdrasil README", + }, + ) + } + + # Mock the service selection phase - this should return early with clarifying message + service_selection_trace = trace_data[1] + mock_selection.return_value = ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=service_selection_trace["response"]["message"], + ) + + # Mock final decision (shouldn't be called, but mock it anyway for safety) + mock_final.return_value = ([], "") + + # Run process_chat_turn + result = process_chat_turn( + user_request="What VPNs are available?", + flake=mock_flake, + conversation_history=None, + provider="claude", + ) + + # Verify the call was made + assert mock_call.called + + # Final decision should NOT be called since we return early with clarifying message + assert not mock_final.called + + # Verify the result + assert result.requires_user_response is True + assert "VPN" in result.assistant_message + assert len(result.conversation_history) > 0 + + def test_service_selection_with_user_choice( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test service selection when user makes a choice.""" + # Get the trace entry where user selects zerotier + selection_trace = trace_data[2] + assert selection_trace["stage"] == "select_service" + + response_data = selection_trace["response"] + function_calls = response_data["function_calls"] + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "select_service" + + # Build conversation history up to this point + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + { + "role": "assistant", + "content": trace_data[1]["response"]["message"], + }, + ] + + # Create session state with pending service selection (resuming workflow) + session_state: SessionState = cast( + "SessionState", + { + "pending_service_selection": { + "readme_results": [ + { + "input_name": None, + "readmes": { + "wireguard": "# Wireguard README", + "zerotier": "# ZeroTier README", + "mycelium": "# Mycelium README", + "yggdrasil": "# Yggdrasil README", + }, + } + ] + } + }, + ) + + # Mock the service selection and final decision + with ( + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_selection.return_value = ServiceSelectionResult( + selected_service="zerotier", + service_summary=function_calls[0]["arguments"]["summary"], + clarifying_message="", + ) + + # Mock the final decision phase to ask a question + final_trace = trace_data[3] + mock_final.return_value = ( + [], + final_trace["response"]["message"], + ) + + # Run process_chat_turn with session state + result = process_chat_turn( + user_request="Hmm zerotier please", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + # Verify the result + assert result.requires_user_response is True + assert "controller" in result.assistant_message.lower() + + def test_final_decision_with_configuration( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test final decision phase with actual configuration.""" + # Get the last trace entry (final decision with configuration) + final_trace = trace_data[-1] + assert final_trace["stage"] == "final_decision" + + response_data = final_trace["response"] + function_calls = response_data["function_calls"] + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "zerotier" + + # Build full conversation history + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + { + "role": "assistant", + "content": trace_data[1]["response"]["message"], + }, + {"role": "user", "content": "Hmm zerotier please"}, + { + "role": "assistant", + "content": trace_data[3]["response"]["message"], + }, + {"role": "user", "content": "what is a moon?"}, + { + "role": "assistant", + "content": trace_data[4]["response"]["message"], + }, + ] + + # Mock with pending state + session_state: SessionState = cast( + "SessionState", + { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": trace_data[2]["response"]["function_calls"][0][ + "arguments" + ]["summary"], + } + }, + ) + + # Mock the final decision call + with patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final: + mock_final.return_value = ( + [ + FunctionCallType( + id="call_0", + call_id="call_0", + type="function_call", + name=function_calls[0]["name"], + arguments=json.dumps(function_calls[0]["arguments"]), + ) + ], + "", + ) + + # Run process_chat_turn + result = process_chat_turn( + user_request="okay then gchq-local as controller and qube-email as moon please everything else as peer", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + # Verify the result + assert result.requires_user_response is False + assert len(result.proposed_instances) == 1 + instance = result.proposed_instances[0] + assert instance["module"]["name"] == "zerotier" + assert "controller" in instance["roles"] + assert "moon" in instance["roles"] + assert "peer" in instance["roles"] + assert "gchq-local" in instance["roles"]["controller"]["machines"] + assert "qube-email" in instance["roles"]["moon"]["machines"] + assert "wintux" in instance["roles"]["peer"]["machines"] + + def test_conversation_state_progression( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test that conversation state properly progresses between turns.""" + # Test Turn 1: Discovery phase + discovery_response = create_openai_response( + trace_data[0]["response"]["function_calls"], + trace_data[0]["response"]["message"], + ) + + with ( + patch( + "clan_lib.llm.phases.call_claude_api", return_value=discovery_response + ), + patch("clan_lib.llm.orchestrator.execute_readme_requests") as mock_exec, + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_exec.return_value = { + None: MagicMock( + input_name=None, + readmes={ + "wireguard": "README", + "zerotier": "README", + "mycelium": "README", + "yggdrasil": "README", + }, + ) + } + mock_selection.return_value = ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=trace_data[1]["response"]["message"], + ) + mock_final.return_value = ([], "") + + result1 = process_chat_turn( + user_request="What VPNs are available?", + flake=mock_flake, + provider="claude", + ) + + # Verify final decision was not called + assert not mock_final.called + + # Verify discovery completed and moved to service selection + assert result1.requires_user_response is True + assert "VPN" in result1.assistant_message + # Session state should have pending_service_selection + assert "pending_service_selection" in result1.session_state + + # Test Turn 2: Continue with session state + with ( + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_selection.return_value = ServiceSelectionResult( + selected_service="zerotier", + service_summary=trace_data[2]["response"]["function_calls"][0][ + "arguments" + ]["summary"], + clarifying_message="", + ) + mock_final.return_value = ([], trace_data[3]["response"]["message"]) + + result2 = process_chat_turn( + user_request="Hmm zerotier please", + flake=mock_flake, + conversation_history=list(result1.conversation_history), + provider="claude", + session_state=result1.session_state, + ) + + # Verify we progressed to final decision phase + assert result2.requires_user_response is True + assert "pending_final_decision" in result2.session_state + # Conversation history should have grown + assert len(result2.conversation_history) > len(result1.conversation_history) + + def test_final_message_branch_sets_pending_state( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test that final_message branch properly sets pending_final_decision state.""" + # Build conversation history up to service selection complete + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": trace_data[1]["response"]["message"]}, + {"role": "user", "content": "Hmm zerotier please"}, + ] + + # Mock session state with selected service + session_state: SessionState = cast( + "SessionState", + { + "pending_service_selection": { + "readme_results": [ + { + "input_name": None, + "readmes": {"zerotier": "# ZeroTier README"}, + } + ] + } + }, + ) + + # Use trace entry where LLM asks clarifying question in final decision + clarify_trace = trace_data[3] + assert len(clarify_trace["response"]["function_calls"]) == 0 + assert clarify_trace["response"]["message"] != "" + + # Mock to return a service selection, then a clarifying message in final decision + with ( + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_selection.return_value = ServiceSelectionResult( + selected_service="zerotier", + service_summary="ZeroTier is a mesh VPN...", + clarifying_message="", + ) + # Return empty function_calls but with a clarifying message + mock_final.return_value = ([], clarify_trace["response"]["message"]) + + result = process_chat_turn( + user_request="Set up zerotier with gchq-local as controller", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + # Verify the final_message branch was taken + assert result.requires_user_response is True + assert result.assistant_message == clarify_trace["response"]["message"] + # Verify pending_final_decision state is set + assert "pending_final_decision" in result.session_state + assert ( + result.session_state["pending_final_decision"]["service_name"] + == "zerotier" + ) + assert result.session_state["pending_final_decision"]["service_summary"] + # No proposed instances yet + assert len(result.proposed_instances) == 0 + + def test_discovery_message_without_readme_requests( + self, mock_flake: MagicMock + ) -> None: + """Test discovery phase when LLM responds with message but no README requests.""" + # Create a response with a message but no get_readme function calls + discovery_message = "I need more information about your network setup. Do you have any machines with static public IP addresses?" + response = create_openai_response([], discovery_message) + + with ( + patch("clan_lib.llm.phases.call_claude_api", return_value=response), + patch( + "clan_lib.llm.phases.create_simplified_service_schemas" + ) as mock_simplified, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_simplified.return_value = [ + { + "name": "wireguard", + "description": "WireGuard VPN", + "input": None, + }, + { + "name": "zerotier", + "description": "ZeroTier mesh VPN", + "input": None, + }, + ] + mock_final.return_value = ([], "") + + result = process_chat_turn( + user_request="I want to set up a VPN", + flake=mock_flake, + provider="claude", + ) + + # Verify final decision was not called (discovery message without readmes) + assert not mock_final.called + + # Verify the discovery_message without readme_requests branch + assert result.requires_user_response is True + assert result.assistant_message == discovery_message + # No pending state should be set (discovery is asking for clarification) + assert "pending_service_selection" not in result.session_state + assert "pending_final_decision" not in result.session_state + # No proposed instances + assert len(result.proposed_instances) == 0 + # Conversation history should contain the exchange + assert len(result.conversation_history) == 2 + assert result.conversation_history[0]["role"] == "user" + assert result.conversation_history[1]["role"] == "assistant" + + def test_function_calls_in_service_selection( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test when LLM provides configuration directly after service selection.""" + # Use the final trace entry with zerotier configuration + final_trace = trace_data[-1] + function_calls = final_trace["response"]["function_calls"] + assert len(function_calls) == 1 + + # Mock session state with pending service selection + session_state: SessionState = cast( + "SessionState", + { + "pending_service_selection": { + "readme_results": [ + { + "input_name": None, + "readmes": {"zerotier": "# ZeroTier README"}, + } + ] + } + }, + ) + + # Build conversation history + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": "Here are the available VPNs..."}, + ] + + # Mock to select service and immediately provide configuration + with ( + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + ): + mock_selection.return_value = ServiceSelectionResult( + selected_service="zerotier", + service_summary="ZeroTier mesh VPN", + clarifying_message="", + ) + + # Return function calls (configuration) without asking questions + mock_final.return_value = ( + [ + FunctionCallType( + id="call_0", + call_id="call_0", + type="function_call", + name="zerotier", + arguments=json.dumps(function_calls[0]["arguments"]), + ) + ], + "", # No message, just configuration + ) + + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier VPN"}, + } + ] + ) + + result = process_chat_turn( + user_request="Use zerotier with gchq-local as controller, qube-email as moon, rest as peers", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + # Verify the function_calls branch in _continue_with_service_selection + assert result.requires_user_response is False + assert len(result.proposed_instances) == 1 + assert result.proposed_instances[0]["module"]["name"] == "zerotier" + # Should have configuration in roles + args = result.proposed_instances[0]["roles"] + assert "controller" in args + assert "moon" in args + assert "peer" in args + # No error + assert result.error is None + + +class TestGetLlmServiceSelection: + """Test get_llm_service_selection with mocked API responses from trace data.""" + + def test_service_selection_with_readmes( + self, trace_data: list[dict[str, Any]] + ) -> None: + """Test service selection phase with README data.""" + # Use trace entry for service selection (stage: select_service) + selection_trace = trace_data[2] + assert selection_trace["stage"] == "select_service" + + # Create README results from trace data + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, + readmes={ + "wireguard": "# Wireguard VPN\nA fast VPN...", + "zerotier": "# ZeroTier\nA mesh VPN...", + "mycelium": "# Mycelium\nOverlay network...", + "yggdrasil": "# Yggdrasil\nDecentralized routing...", + }, + ) + } + + # Build conversation history up to this point + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + { + "role": "assistant", + "content": trace_data[1]["response"]["message"], + }, + ] + + # Create expected response with select_service call + function_calls = selection_trace["response"]["function_calls"] + response = create_openai_response(function_calls, "") + + # Mock the API call + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="Hmm zerotier please", + readme_results=readme_results, + conversation_history=conversation_history, + provider="claude", + ) + + # Verify the result + assert result.selected_service == "zerotier" + assert result.service_summary is not None + assert "controller" in result.service_summary.lower() + assert result.clarifying_message == "" + + def test_service_selection_asks_clarifying_question( + self, trace_data: list[dict[str, Any]] + ) -> None: + """Test service selection when LLM asks for clarification.""" + # Use trace entry where LLM asks clarifying question (stage: select_service) + clarify_trace = trace_data[1] + assert clarify_trace["stage"] == "select_service" + # Verify this is a clarification (no function calls, has message) + assert len(clarify_trace["response"]["function_calls"]) == 0 + assert clarify_trace["response"]["message"] != "" + + # Create README results + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, + readmes={ + "wireguard": "# Wireguard README", + "zerotier": "# ZeroTier README", + "mycelium": "# Mycelium README", + "yggdrasil": "# Yggdrasil README", + }, + ) + } + + # No function calls, just a message + response = create_openai_response([], clarify_trace["response"]["message"]) + + # Mock the API call + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="What VPNs are available?", + readme_results=readme_results, + provider="claude", + ) + + # Verify the result - should be a clarifying question + assert result.selected_service is None + assert result.service_summary is None + assert result.clarifying_message != "" + assert "VPN" in result.clarifying_message + + +class TestGetLlmFinalDecision: + """Test get_llm_final_decision with mocked API responses from trace data.""" + + def test_final_decision_with_configuration( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test final decision phase with service configuration.""" + # Use the last trace entry (final decision with configuration) + final_trace = trace_data[-1] + assert final_trace["stage"] == "final_decision" + + response_data = final_trace["response"] + function_calls = response_data["function_calls"] + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "zerotier" + + # Build conversation history + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": trace_data[1]["response"]["message"]}, + {"role": "user", "content": "Hmm zerotier please"}, + {"role": "assistant", "content": trace_data[3]["response"]["message"]}, + {"role": "user", "content": "what is a moon?"}, + {"role": "assistant", "content": trace_data[4]["response"]["message"]}, + ] + + # Mock the schema lookup to return zerotier schema + mock_schema = { + "type": "function", + "function": { + "name": "zerotier", + "description": "ZeroTier VPN configuration", + "parameters": { + "type": "object", + "properties": { + "roles": {"type": "object"}, + }, + }, + }, + } + + # Create response with zerotier function call + response = create_openai_response(function_calls, "") + + # Mock API and schema aggregation + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api", return_value=response), + ): + mock_agg.return_value = MagicMock(tools=[mock_schema]) + + function_call_results, message = get_llm_final_decision( + user_request="okay then gchq-local as controller and qube-email as moon please everything else as peer", + flake=mock_flake, + selected_service="zerotier", + service_summary="ZeroTier is a mesh VPN...", + conversation_history=conversation_history, + provider="claude", + ) + + # Verify the result + assert len(function_call_results) == 1 + assert function_call_results[0]["name"] == "zerotier" + # Parse the arguments to verify structure + args = json.loads(function_call_results[0]["arguments"]) + assert "roles" in args + assert "controller" in args["roles"] + assert "moon" in args["roles"] + assert "peer" in args["roles"] + + def test_final_decision_asks_clarifying_question( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test final decision when LLM asks for more information.""" + # Use trace entry where LLM asks about moon (stage: final_decision) + clarify_trace = trace_data[3] + assert clarify_trace["stage"] == "final_decision" + # Verify this is a clarification (no function calls, has message) + assert len(clarify_trace["response"]["function_calls"]) == 0 + assert clarify_trace["response"]["message"] != "" + + # Build conversation history + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": trace_data[1]["response"]["message"]}, + {"role": "user", "content": "Hmm zerotier please"}, + ] + + # Mock schema + mock_schema = { + "type": "function", + "function": { + "name": "zerotier", + "description": "ZeroTier VPN", + }, + } + + # No function calls, just a clarifying message + response = create_openai_response([], clarify_trace["response"]["message"]) + + # Mock API and schema aggregation + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api", return_value=response), + ): + mock_agg.return_value = MagicMock(tools=[mock_schema]) + + function_call_results, message = get_llm_final_decision( + user_request="gchq-local as controller please", + flake=mock_flake, + selected_service="zerotier", + service_summary="ZeroTier is a mesh VPN...", + conversation_history=conversation_history, + provider="claude", + ) + + # Verify the result - should be a clarifying question + assert len(function_call_results) == 0 + assert message != "" + assert "controller" in message.lower() + + +class TestExecuteReadmeRequests: + """Test execute_readme_requests function.""" + + def test_execute_readme_requests_groups_by_input( + self, mock_flake: MagicMock + ) -> None: + """Test that execute_readme_requests groups requests by input_name.""" + # Create readme requests from different inputs + requests: list[ReadmeRequest] = [ + {"input_name": None, "function_name": "wireguard"}, + {"input_name": None, "function_name": "zerotier"}, + {"input_name": "custom-services", "function_name": "my-service"}, + ] + + # Mock get_service_readmes to return different collections per input + with patch("clan_lib.llm.phases.get_service_readmes") as mock_get_readmes: + # Setup return values for different inputs + def get_readmes_side_effect( + input_name: str | None, _service_names: list[str], _flake: MagicMock + ) -> ServiceReadmeCollection: + if input_name is None: + return ServiceReadmeCollection( + input_name=None, + readmes={ + "wireguard": "# WireGuard README", + "zerotier": "# ZeroTier README", + }, + ) + return ServiceReadmeCollection( + input_name="custom-services", + readmes={"my-service": "# My Service README"}, + ) + + mock_get_readmes.side_effect = get_readmes_side_effect + + # Execute the requests + results = execute_readme_requests(requests, mock_flake) + + # Verify grouping and fetching + assert len(results) == 2 # Two different input sources + assert None in results + assert "custom-services" in results + + # Verify built-in services + assert results[None].input_name is None + assert "wireguard" in results[None].readmes + assert "zerotier" in results[None].readmes + + # Verify custom service + assert results["custom-services"].input_name == "custom-services" + assert "my-service" in results["custom-services"].readmes + + # Verify get_service_readmes was called correctly + assert mock_get_readmes.call_count == 2 + + def test_execute_readme_requests_single_input(self, mock_flake: MagicMock) -> None: + """Test execute_readme_requests with all requests from same input.""" + requests: list[ReadmeRequest] = [ + {"input_name": None, "function_name": "wireguard"}, + {"input_name": None, "function_name": "zerotier"}, + {"input_name": None, "function_name": "mycelium"}, + ] + + with patch("clan_lib.llm.phases.get_service_readmes") as mock_get_readmes: + mock_get_readmes.return_value = ServiceReadmeCollection( + input_name=None, + readmes={ + "wireguard": "# WireGuard", + "zerotier": "# ZeroTier", + "mycelium": "# Mycelium", + }, + ) + + results = execute_readme_requests(requests, mock_flake) + + # Should only have one input + assert len(results) == 1 + assert None in results + + # Verify all services are in the result + assert len(results[None].readmes) == 3 + + # Verify get_service_readmes was called once with all service names + mock_get_readmes.assert_called_once() + call_args = mock_get_readmes.call_args + assert call_args[0][0] is None # input_name + assert set(call_args[0][1]) == {"wireguard", "zerotier", "mycelium"} + + +class TestProcessChatTurnPendingFinalDecision: + """Test process_chat_turn when resuming from pending_final_decision state.""" + + def test_final_message_branch_in_pending_final_decision( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test the if final_message branch at line 425 (resume from pending_final_decision).""" + # Build conversation history including the question that led to pending state + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": trace_data[1]["response"]["message"]}, + {"role": "user", "content": "Hmm zerotier please"}, + { + "role": "assistant", + "content": trace_data[3]["response"]["message"], + }, # Question about controller + ] + + # Session state with pending_final_decision + session_state: SessionState = cast( + "SessionState", + { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": "ZeroTier is a mesh VPN that provides...", + } + }, + ) + + # Use trace entry where LLM asks another clarifying question + # (e.g., asking about moons after user answered about controller) + clarify_trace = trace_data[4] + assert clarify_trace["stage"] == "final_decision" + assert len(clarify_trace["response"]["function_calls"]) == 0 + assert clarify_trace["response"]["message"] != "" + + # Mock the final decision to return another clarifying question + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api") as mock_call, + ): + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier VPN"}, + } + ] + ) + + # Return no function calls but a clarifying message + response = create_openai_response([], clarify_trace["response"]["message"]) + mock_call.return_value = response + + result = process_chat_turn( + user_request="gchq-local as controller", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + # Verify the if final_message branch at line 425 was taken + assert result.requires_user_response is True + assert result.assistant_message == clarify_trace["response"]["message"] + + # Verify pending_final_decision state is STILL set (not cleared) + assert "pending_final_decision" in result.session_state + assert ( + result.session_state["pending_final_decision"]["service_name"] + == "zerotier" + ) + + # No proposed instances yet + assert len(result.proposed_instances) == 0 + + # Conversation history should have grown + assert len(result.conversation_history) == len(conversation_history) + 2 + + def test_pending_final_decision_completes_with_configuration( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test completing configuration from pending_final_decision state.""" + # Build conversation history + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": "Here are the VPNs..."}, + {"role": "user", "content": "Use zerotier"}, + {"role": "assistant", "content": "Which machine as controller?"}, + ] + + # Session state with pending_final_decision + session_state: SessionState = cast( + "SessionState", + { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": "ZeroTier mesh VPN", + } + }, + ) + + # Use final trace entry with complete configuration + final_trace = trace_data[-1] + function_calls = final_trace["response"]["function_calls"] + assert len(function_calls) == 1 + + # Mock to return configuration + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api") as mock_call, + ): + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier VPN"}, + } + ] + ) + + response = create_openai_response(function_calls, "") + mock_call.return_value = response + + result = process_chat_turn( + user_request="gchq-local as controller, qube-email as moon, rest as peers", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + # Verify configuration completed + assert result.requires_user_response is False + assert len(result.proposed_instances) == 1 + assert result.proposed_instances[0]["module"]["name"] == "zerotier" + + # Verify pending_final_decision state is CLEARED + assert "pending_final_decision" not in result.session_state + + # No error + assert result.error is None + + +class TestProgressCallbacks: + """Test progress_callback functionality in process_chat_turn.""" + + def test_progress_callback_during_readme_fetch( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test that progress_callback is called during README fetching.""" + # Use trace entry with README requests + discovery_trace = trace_data[0] + function_calls = discovery_trace["response"]["function_calls"] + assert len(function_calls) > 0 + + # Track progress events + progress_events: list[Any] = [] + + def track_progress(event: Any) -> None: + progress_events.append(event) + + # Create response with get_readme calls + response = create_openai_response(function_calls, "") + + with ( + patch("clan_lib.llm.phases.call_claude_api", return_value=response), + patch("clan_lib.llm.orchestrator.execute_readme_requests") as mock_execute, + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_execute.return_value = { + None: ServiceReadmeCollection( + input_name=None, + readmes={ + "wireguard": "# WireGuard README", + "zerotier": "# ZeroTier README", + "mycelium": "# Mycelium README", + "yggdrasil": "# Yggdrasil README", + }, + ) + } + mock_selection.return_value = ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message=trace_data[1]["response"]["message"], + ) + mock_final.return_value = ([], "") + + result = process_chat_turn( + user_request="What VPNs are available?", + flake=mock_flake, + provider="claude", + progress_callback=track_progress, + ) + + # Verify final decision was not called + assert not mock_final.called + + # Verify progress events were sent + assert len(progress_events) > 0 + + # Check for discovery progress events + discovery_events = [ + e for e in progress_events if isinstance(e, DiscoveryProgressEvent) + ] + assert len(discovery_events) >= 2 # At least start and complete + + # Check for readme fetch progress events + fetch_events = [ + e for e in progress_events if isinstance(e, ReadmeFetchProgressEvent) + ] + assert len(fetch_events) >= 2 # fetching and complete + + # Verify the fetching event has correct data + fetching_event = next(e for e in fetch_events if e.status == "fetching") + assert fetching_event.count == len(function_calls) + # Service names include "(from built-in)" or "(from )" suffix + assert any("wireguard" in name for name in fetching_event.service_names) + + # Verify the complete event + complete_event = next(e for e in fetch_events if e.status == "complete") + assert complete_event.count == len(function_calls) + + # Result should still be successful + assert result.requires_user_response is True + + def test_progress_callback_through_full_workflow( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test progress_callback through entire workflow from discovery to config.""" + progress_events: list[Any] = [] + + def track_progress(event: Any) -> None: + progress_events.append(event) + + # Setup for full workflow + discovery_response = create_openai_response( + trace_data[0]["response"]["function_calls"], + trace_data[0]["response"]["message"], + ) + + with ( + patch( + "clan_lib.llm.phases.call_claude_api", return_value=discovery_response + ), + patch("clan_lib.llm.orchestrator.execute_readme_requests") as mock_execute, + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + ): + mock_execute.return_value = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier README"} + ) + } + mock_selection.return_value = ServiceSelectionResult( + selected_service="zerotier", + service_summary="ZeroTier mesh VPN", + clarifying_message="", + ) + # Return configuration + final_trace = trace_data[-1] + mock_final.return_value = ( + [ + FunctionCallType( + id="call_0", + call_id="call_0", + type="function_call", + name="zerotier", + arguments=json.dumps( + final_trace["response"]["function_calls"][0]["arguments"] + ), + ) + ], + "", + ) + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier VPN"}, + } + ] + ) + + result = process_chat_turn( + user_request="Setup zerotier with gchq-local as controller", + flake=mock_flake, + provider="claude", + progress_callback=track_progress, + ) + + # Verify we got progress events for all phases + discovery_events = [ + e for e in progress_events if isinstance(e, DiscoveryProgressEvent) + ] + fetch_events = [ + e for e in progress_events if isinstance(e, ReadmeFetchProgressEvent) + ] + selection_events = [ + e + for e in progress_events + if isinstance(e, ServiceSelectionProgressEvent) + ] + final_events = [ + e for e in progress_events if isinstance(e, FinalDecisionProgressEvent) + ] + + # Should have events from all phases + assert len(discovery_events) > 0 + assert len(fetch_events) > 0 + assert len(selection_events) > 0 + assert len(final_events) > 0 + + # Result should be successful with config + assert result.requires_user_response is False + assert len(result.proposed_instances) == 1 + + +class TestErrorCases: + """Test error handling in process_chat_turn.""" + + def test_llm_provides_no_readme_requests_and_no_message( + self, mock_flake: MagicMock + ) -> None: + """Test error case when LLM provides neither readme requests nor message.""" + # Create response with no function calls and no message (unexpected) + response = create_openai_response([], "") + + with ( + patch("clan_lib.llm.phases.call_claude_api", return_value=response), + pytest.raises(ClanAiError, match="did not provide any response"), + ): + process_chat_turn( + user_request="Setup a VPN", + flake=mock_flake, + provider="claude", + ) + + def test_exception_during_processing(self, mock_flake: MagicMock) -> None: + """Test exception handling in process_chat_turn.""" + # Mock to raise an exception during discovery + with ( + patch( + "clan_lib.llm.orchestrator.get_llm_discovery_phase", + side_effect=ValueError("Test error"), + ), + pytest.raises(ValueError, match="Test error"), + ): + process_chat_turn( + user_request="Setup a VPN", + flake=mock_flake, + provider="claude", + ) + + def test_exception_with_existing_conversation_history( + self, mock_flake: MagicMock + ) -> None: + """Test exception handling with existing conversation history.""" + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "What VPNs are available?"}, + {"role": "assistant", "content": "Here are the options..."}, + ] + + with ( + patch( + "clan_lib.llm.orchestrator.get_llm_discovery_phase", + side_effect=RuntimeError("Network error"), + ), + pytest.raises(RuntimeError, match="Network error"), + ): + process_chat_turn( + user_request="Setup zerotier", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + ) + + def test_progress_callback_final_decision_reviewing_and_complete( + self, trace_data: list[dict[str, Any]], mock_flake: MagicMock + ) -> None: + """Test FinalDecisionProgressEvent with reviewing and complete statuses.""" + progress_events: list[Any] = [] + + def track_progress(event: Any) -> None: + progress_events.append(event) + + # Build conversation history and session state for pending_final_decision + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "Setup VPN"}, + {"role": "assistant", "content": "Which service?"}, + {"role": "user", "content": "Use zerotier"}, + {"role": "assistant", "content": "Which machine as controller?"}, + ] + + session_state: SessionState = cast( + "SessionState", + { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": "ZeroTier mesh VPN", + } + }, + ) + + # Use final trace with configuration + final_trace = trace_data[-1] + function_calls = final_trace["response"]["function_calls"] + + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api") as mock_call, + ): + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier VPN"}, + } + ] + ) + response = create_openai_response(function_calls, "") + mock_call.return_value = response + + result = process_chat_turn( + user_request="gchq-local as controller, qube-email as moon, rest as peers", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + progress_callback=track_progress, + ) + + # Verify we got FinalDecisionProgressEvent with both statuses + final_events = [ + e for e in progress_events if isinstance(e, FinalDecisionProgressEvent) + ] + assert len(final_events) >= 2 + + # Check for "reviewing" status + reviewing_events = [e for e in final_events if e.status == "reviewing"] + assert len(reviewing_events) >= 1 + + # Check for "complete" status + complete_events = [e for e in final_events if e.status == "complete"] + assert len(complete_events) >= 1 + + # Result should be successful + assert result.requires_user_response is False + assert len(result.proposed_instances) == 1 + + def test_service_selection_fails_no_service_selected( + self, mock_flake: MagicMock + ) -> None: + """Test error when service selection returns no service and no clarification.""" + # Setup discovery phase to return READMEs + discovery_response = create_openai_response( + [ + {"name": "get_readme", "arguments": {"function_name": "zerotier"}}, + ], + "", + ) + + with ( + patch( + "clan_lib.llm.phases.call_claude_api", return_value=discovery_response + ), + patch("clan_lib.llm.orchestrator.execute_readme_requests") as mock_execute, + patch( + "clan_lib.llm.orchestrator.get_llm_service_selection" + ) as mock_selection, + patch("clan_lib.llm.orchestrator.get_llm_final_decision") as mock_final, + ): + mock_execute.return_value = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier"} + ) + } + # Return invalid selection result (no service, no clarification) + mock_selection.return_value = ServiceSelectionResult( + selected_service=None, + service_summary=None, + clarifying_message="", + ) + mock_final.return_value = ([], "") + + # Should raise ClanAiError + with pytest.raises(ClanAiError, match="Failed to select service"): + process_chat_turn( + user_request="Setup VPN", + flake=mock_flake, + provider="claude", + ) + + +class TestGetLlmServiceSelectionErrors: + """Test error cases in get_llm_service_selection.""" + + def test_multiple_select_service_calls(self) -> None: + """Test error when LLM returns multiple select_service calls.""" + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier"} + ) + } + + # Create response with multiple select_service calls + function_calls = [ + {"name": "select_service", "arguments": {"service_name": "zerotier"}}, + {"name": "select_service", "arguments": {"service_name": "wireguard"}}, + ] + response = create_openai_response(function_calls, "") + + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="Setup VPN", + readme_results=readme_results, + provider="claude", + ) + + # Should return error - no service selected + assert result.selected_service is None + assert result.service_summary is None + # Clarifying message should indicate the error + assert result.clarifying_message != "" + + def test_wrong_function_call_name(self) -> None: + """Test error when LLM calls wrong function instead of select_service.""" + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier"} + ) + } + + # Create response with wrong function name + function_calls = [ + {"name": "configure_service", "arguments": {"service_name": "zerotier"}}, + ] + response = create_openai_response(function_calls, "") + + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="Setup VPN", + readme_results=readme_results, + provider="claude", + ) + + # Should return error + assert result.selected_service is None + assert result.service_summary is None + + def test_missing_required_fields(self) -> None: + """Test error when select_service call is missing required fields.""" + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier"} + ) + } + + # Create response with missing summary field + function_calls = [ + { + "name": "select_service", + "arguments": {"service_name": "zerotier"}, # Missing 'summary' + }, + ] + response = create_openai_response(function_calls, "") + + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="Setup VPN", + readme_results=readme_results, + provider="claude", + ) + + # Should return error + assert result.selected_service is None + assert result.service_summary is None + + def test_invalid_json_arguments(self) -> None: + """Test error when select_service arguments cannot be parsed.""" + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier"} + ) + } + + # Create a malformed response (manually construct to avoid JSON parsing) + response = cast( + "OpenAIChatCompletionResponse", + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_0", + "type": "function", + "function": { + "name": "select_service", + "arguments": "{invalid json}", # Invalid JSON + }, + } + ], + }, + } + ], + }, + ) + + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="Setup VPN", + readme_results=readme_results, + provider="claude", + ) + + # Should handle error gracefully + assert result.selected_service is None + assert result.service_summary is None + + def test_no_function_calls_and_no_message(self) -> None: + """Test error when LLM provides no function calls and no message.""" + readme_results: dict[str | None, ServiceReadmeCollection] = { + None: ServiceReadmeCollection( + input_name=None, readmes={"zerotier": "# ZeroTier"} + ) + } + + # Response with no function calls and no message + response = create_openai_response([], "") + + with patch("clan_lib.llm.phases.call_claude_api", return_value=response): + result = get_llm_service_selection( + user_request="Setup VPN", + readme_results=readme_results, + provider="claude", + ) + + # Should return error + assert result.selected_service is None + assert result.service_summary is None + assert result.clarifying_message != "" + + +class TestGetLlmFinalDecisionErrors: + """Test error cases in get_llm_final_decision.""" + + def test_multiple_tools_for_service(self, mock_flake: MagicMock) -> None: + """Test error when multiple tools match the selected service.""" + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api") as mock_call, + ): + # Mock multiple tools with same name (unexpected) + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier 1"}, + }, + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier 2"}, + }, + ] + ) + + response = create_openai_response([], "test message") + mock_call.return_value = response + + # Should raise ClanAiError + with pytest.raises(ClanAiError, match="Expected exactly 1 tool"): + get_llm_final_decision( + user_request="Setup zerotier", + flake=mock_flake, + selected_service="zerotier", + service_summary="ZeroTier VPN", + provider="claude", + ) + + def test_pending_final_decision_no_response_error( + self, mock_flake: MagicMock + ) -> None: + """Test error when LLM provides neither function_calls nor message in pending_final_decision.""" + # Build conversation history + conversation_history: list[ChatMessage] = [ + {"role": "user", "content": "Setup VPN"}, + {"role": "assistant", "content": "Which service?"}, + {"role": "user", "content": "Use zerotier"}, + {"role": "assistant", "content": "Which machine as controller?"}, + ] + + # Session state with pending_final_decision + session_state: SessionState = cast( + "SessionState", + { + "pending_final_decision": { + "service_name": "zerotier", + "service_summary": "ZeroTier mesh VPN", + } + }, + ) + + # Mock to return neither function_calls nor message (unexpected) + with ( + patch("clan_lib.llm.phases.aggregate_ollama_function_schemas") as mock_agg, + patch("clan_lib.llm.phases.call_claude_api") as mock_call, + ): + mock_agg.return_value = MagicMock( + tools=[ + { + "type": "function", + "function": {"name": "zerotier", "description": "ZeroTier VPN"}, + } + ] + ) + # Empty response - no function calls, no message + response = create_openai_response([], "") + mock_call.return_value = response + + # Should raise ClanAiError + with pytest.raises(ClanAiError, match="LLM did not provide any response"): + process_chat_turn( + user_request="gchq-local as controller", + flake=mock_flake, + conversation_history=conversation_history, + provider="claude", + session_state=session_state, + ) + + +class TestParseOpenaiResponse: + """Test parse_openai_response function from endpoints.py.""" + + def test_parse_with_function_calls_and_content(self) -> None: + """Test parsing response with both function calls and text content.""" + response = cast( + "OpenAIChatCompletionResponse", + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Here is my response", + "tool_calls": [ + { + "id": "call_123", + "type": "function", + "function": { + "name": "test_function", + "arguments": '{"arg1": "value1"}', + }, + } + ], + } + } + ] + }, + ) + + function_calls, message = parse_openai_response(response) + + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "test_function" + assert function_calls[0]["arguments"] == '{"arg1": "value1"}' + assert function_calls[0]["id"] == "call_123" + assert function_calls[0]["type"] == "function_call" + assert message == "Here is my response" + + def test_parse_with_no_choices(self) -> None: + """Test parsing response with no choices.""" + response = cast("OpenAIChatCompletionResponse", {"choices": []}) + + function_calls, message = parse_openai_response(response) + + assert len(function_calls) == 0 + assert message == "" + + def test_parse_with_missing_choices_key(self) -> None: + """Test parsing response with missing choices key.""" + response = cast("OpenAIChatCompletionResponse", {}) + + function_calls, message = parse_openai_response(response) + + assert len(function_calls) == 0 + assert message == "" + + def test_parse_with_no_tool_calls(self) -> None: + """Test parsing response with content but no tool calls.""" + response = cast( + "OpenAIChatCompletionResponse", + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Just a text response", + } + } + ] + }, + ) + + function_calls, message = parse_openai_response(response) + + assert len(function_calls) == 0 + assert message == "Just a text response" + + def test_parse_with_tool_calls_but_no_content(self) -> None: + """Test parsing response with tool calls but empty content.""" + response = cast( + "OpenAIChatCompletionResponse", + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_456", + "type": "function", + "function": { + "name": "configure_service", + "arguments": '{"service": "zerotier"}', + }, + } + ], + } + } + ] + }, + ) + + function_calls, message = parse_openai_response(response) + + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "configure_service" + assert message == "" + + def test_parse_with_multiple_tool_calls(self) -> None: + """Test parsing response with multiple function calls.""" + response = cast( + "OpenAIChatCompletionResponse", + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Calling multiple functions", + "tool_calls": [ + { + "id": "call_1", + "function": { + "name": "func1", + "arguments": "{}", + }, + }, + { + "id": "call_2", + "function": { + "name": "func2", + "arguments": '{"key": "value"}', + }, + }, + ], + } + } + ] + }, + ) + + function_calls, message = parse_openai_response(response) + + assert len(function_calls) == 2 + assert function_calls[0]["name"] == "func1" + assert function_calls[1]["name"] == "func2" + assert message == "Calling multiple functions" + + +class TestParseOllamaResponse: + """Test parse_ollama_response function from endpoints.py.""" + + def test_parse_with_function_calls_and_content(self) -> None: + """Test parsing Ollama response with both function calls and content.""" + response = cast( + "OllamaChatResponse", + { + "message": { + "role": "assistant", + "content": "Here is my response", + "tool_calls": [ + { + "function": { + "name": "test_function", + "arguments": {"arg1": "value1"}, + } + } + ], + } + }, + ) + + function_calls, message = parse_ollama_response(response) + + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "test_function" + # Ollama response has dict arguments that get JSON stringified + args = json.loads(function_calls[0]["arguments"]) + assert args == {"arg1": "value1"} + assert function_calls[0]["type"] == "function_call" + assert message == "Here is my response" + + def test_parse_with_no_message(self) -> None: + """Test parsing Ollama response with no message.""" + response = cast("OllamaChatResponse", {}) + + function_calls, message = parse_ollama_response(response) + + assert len(function_calls) == 0 + assert message == "" + + def test_parse_with_no_tool_calls(self) -> None: + """Test parsing Ollama response with content but no tool calls.""" + response = cast( + "OllamaChatResponse", + { + "message": { + "role": "assistant", + "content": "Just a text response", + } + }, + ) + + function_calls, message = parse_ollama_response(response) + + assert len(function_calls) == 0 + assert message == "Just a text response" + + def test_parse_with_tool_calls_but_no_content(self) -> None: + """Test parsing Ollama response with tool calls but empty content.""" + response = cast( + "OllamaChatResponse", + { + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "function": { + "name": "configure_service", + "arguments": {"service": "zerotier"}, + } + } + ], + } + }, + ) + + function_calls, message = parse_ollama_response(response) + + assert len(function_calls) == 1 + assert function_calls[0]["name"] == "configure_service" + assert message == "" + + def test_parse_with_multiple_tool_calls(self) -> None: + """Test parsing Ollama response with multiple function calls.""" + response = cast( + "OllamaChatResponse", + { + "message": { + "role": "assistant", + "content": "Calling multiple functions", + "tool_calls": [ + { + "function": { + "name": "func1", + "arguments": {}, + } + }, + { + "function": { + "name": "func2", + "arguments": {"key": "value"}, + } + }, + ], + } + }, + ) + + function_calls, message = parse_ollama_response(response) + + assert len(function_calls) == 2 + assert function_calls[0]["name"] == "func1" + assert function_calls[1]["name"] == "func2" + assert message == "Calling multiple functions" From 51141772b37105566591e48dfe82a3b7baa41751 Mon Sep 17 00:00:00 2001 From: Qubasa Date: Wed, 22 Oct 2025 15:33:13 +0200 Subject: [PATCH 5/5] clan_lib: Add llm integration tests --- checks/flake-module.nix | 1 + checks/llm/default.nix | 83 ++++++ checks/llm/qwen3-4b-instruct.nix | 70 +++++ pkgs/clan-cli/clan_lib/llm/container_test.py | 274 +++++++++++++++++++ 4 files changed, 428 insertions(+) create mode 100644 checks/llm/default.nix create mode 100644 checks/llm/qwen3-4b-instruct.nix create mode 100644 pkgs/clan-cli/clan_lib/llm/container_test.py diff --git a/checks/flake-module.nix b/checks/flake-module.nix index 651f5a0e4..41b0a400b 100644 --- a/checks/flake-module.nix +++ b/checks/flake-module.nix @@ -87,6 +87,7 @@ in # Container Tests nixos-test-container = self.clanLib.test.containerTest ./container nixosTestArgs; nixos-systemd-abstraction = self.clanLib.test.containerTest ./systemd-abstraction nixosTestArgs; + nixos-llm-test = self.clanLib.test.containerTest ./llm nixosTestArgs; nixos-test-user-firewall-iptables = self.clanLib.test.containerTest ./user-firewall/iptables.nix nixosTestArgs; nixos-test-user-firewall-nftables = self.clanLib.test.containerTest ./user-firewall/nftables.nix nixosTestArgs; nixos-test-extra-python-packages = self.clanLib.test.containerTest ./test-extra-python-packages nixosTestArgs; diff --git a/checks/llm/default.nix b/checks/llm/default.nix new file mode 100644 index 000000000..c79beb0db --- /dev/null +++ b/checks/llm/default.nix @@ -0,0 +1,83 @@ +{ self, pkgs, ... }: + +let + + cli = self.packages.${pkgs.hostPlatform.system}.clan-cli-full; + + ollama-model = pkgs.callPackage ./qwen3-4b-instruct.nix { }; +in +{ + name = "llm"; + + nodes = { + peer1 = + { pkgs, ... }: + { + + users.users.text-user = { + isNormalUser = true; + linger = true; + uid = 1000; + extraGroups = [ "systemd-journal" ]; + }; + + # Set environment variables for user systemd + environment.extraInit = '' + if [ "$(id -u)" = "1000" ]; then + export XDG_RUNTIME_DIR="/run/user/1000" + export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" + + ollama_dir="$HOME/.ollama" + mkdir -p "$ollama_dir" + ln -sf ${ollama-model}/models "$ollama_dir"/models + fi + ''; + + # Enable PAM for user systemd sessions + security.pam.services.systemd-user = { + startSession = true; + # Workaround for containers - use pam_permit to avoid helper binary issues + text = pkgs.lib.mkForce '' + account required pam_permit.so + session required pam_permit.so + session required pam_env.so conffile=/etc/pam/environment readenv=0 + session required ${pkgs.systemd}/lib/security/pam_systemd.so + ''; + }; + + environment.systemPackages = [ + cli + pkgs.ollama + (cli.pythonRuntime.withPackages ( + ps: with ps; [ + pytest + pytest-xdist + (cli.pythonRuntime.pkgs.toPythonModule cli) + self.legacyPackages.${pkgs.hostPlatform.system}.nixosTestLib + ] + )) + ]; + }; + }; + + testScript = + { ... }: + '' + start_all() + + peer1.wait_for_unit("multi-user.target") + peer1.wait_for_unit("user@1000.service") + + # Fix user journal permissions so text-user can read their own logs + peer1.succeed("chown text-user:systemd-journal /var/log/journal/*/user-1000.journal*") + peer1.succeed("chmod 640 /var/log/journal/*/user-1000.journal*") + # the -o adopts="" is needed to overwrite any args coming from pyproject.toml + # -p no:cacheprovider disables pytest's cacheprovider which tries to write to the nix store in this case + cmd = "su - text-user -c 'pytest -s -n0 -m service_runner -p no:cacheprovider -o addopts="" ${cli.passthru.sourceWithTests}/clan_lib/llm'" + print("Running tests with command: " + cmd) + + + # Run tests as text-user (environment variables are set automatically) + peer1.succeed(cmd) + ''; +} diff --git a/checks/llm/qwen3-4b-instruct.nix b/checks/llm/qwen3-4b-instruct.nix new file mode 100644 index 000000000..fcff28ebc --- /dev/null +++ b/checks/llm/qwen3-4b-instruct.nix @@ -0,0 +1,70 @@ +{ pkgs }: + +let + # Got them from https://github.com/Gholamrezadar/ollama-direct-downloader + + # Download manifest + manifest = pkgs.fetchurl { + url = "https://registry.ollama.ai/v2/library/qwen3/manifests/4b-instruct"; + # You'll need to calculate this hash - run the derivation once and it will tell you the correct hash + hash = "sha256-Dtze80WT6sGqK+nH0GxDLc+BlFrcpeyi8nZiwY8Wi6A="; + }; + + # Download blobs + blob1 = pkgs.fetchurl { + url = "https://registry.ollama.ai/v2/library/qwen3/blobs/sha256:b72accf9724e93698c57cbd3b1af2d3341b3d05ec2089d86d273d97964853cd2"; + hash = "sha256-tyrM+XJOk2mMV8vTsa8tM0Gz0F7CCJ2G0nPZeWSFPNI="; + }; + + blob2 = pkgs.fetchurl { + url = "https://registry.ollama.ai/v2/library/qwen3/blobs/sha256:85e4a5b7b8ef0e48af0e8658f5aaab9c2324c76c1641493f4d1e25fce54b18b9"; + hash = "sha256-heSlt7jvDkivDoZY9aqrnCMkx2wWQUk/TR4l/OVLGLk="; + }; + + blob3 = pkgs.fetchurl { + url = "https://registry.ollama.ai/v2/library/qwen3/blobs/sha256:eade0a07cac7712787bbce23d12f9306adb4781d873d1df6e16f7840fa37afec"; + hash = "sha256-6t4KB8rHcSeHu84j0S+TBq20eB2HPR324W94QPo3r+w="; + }; + + blob4 = pkgs.fetchurl { + url = "https://registry.ollama.ai/v2/library/qwen3/blobs/sha256:d18a5cc71b84bc4af394a31116bd3932b42241de70c77d2b76d69a314ec8aa12"; + hash = "sha256-0YpcxxuEvErzlKMRFr05MrQiQd5wx30rdtaaMU7IqhI="; + }; + + blob5 = pkgs.fetchurl { + url = "https://registry.ollama.ai/v2/library/qwen3/blobs/sha256:0914c7781e001948488d937994217538375b4fd8c1466c5e7a625221abd3ea7a"; + hash = "sha256-CRTHeB4AGUhIjZN5lCF1ODdbT9jBRmxeemJSIavT6no="; + }; +in +pkgs.stdenv.mkDerivation { + pname = "ollama-qwen3-4b-instruct"; + version = "1.0"; + + dontUnpack = true; + + buildPhase = '' + mkdir -p $out/models/manifests/registry.ollama.ai/library/qwen3 + mkdir -p $out/models/blobs + + # Copy manifest + cp ${manifest} $out/models/manifests/registry.ollama.ai/library/qwen3/4b-instruct + + # Copy blobs with correct names + cp ${blob1} $out/models/blobs/sha256-b72accf9724e93698c57cbd3b1af2d3341b3d05ec2089d86d273d97964853cd2 + cp ${blob2} $out/models/blobs/sha256-85e4a5b7b8ef0e48af0e8658f5aaab9c2324c76c1641493f4d1e25fce54b18b9 + cp ${blob3} $out/models/blobs/sha256-eade0a07cac7712787bbce23d12f9306adb4781d873d1df6e16f7840fa37afec + cp ${blob4} $out/models/blobs/sha256-d18a5cc71b84bc4af394a31116bd3932b42241de70c77d2b76d69a314ec8aa12 + cp ${blob5} $out/models/blobs/sha256-0914c7781e001948488d937994217538375b4fd8c1466c5e7a625221abd3ea7a + ''; + + installPhase = '' + # buildPhase already created everything in $out + : + ''; + + meta = with pkgs.lib; { + description = "Qwen3 4B Instruct model for Ollama"; + license = "apache-2.0"; + platforms = platforms.all; + }; +} diff --git a/pkgs/clan-cli/clan_lib/llm/container_test.py b/pkgs/clan-cli/clan_lib/llm/container_test.py new file mode 100644 index 000000000..c9f9be420 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/llm/container_test.py @@ -0,0 +1,274 @@ +import contextlib +import json +from collections.abc import Iterator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from clan_lib.flake.flake import Flake +from clan_lib.llm.llm import ( + process_chat_turn, +) +from clan_lib.llm.service import create_llm_model, run_llm_service +from clan_lib.service_runner import create_service_manager + + +@pytest.fixture +def mock_flake() -> MagicMock: + """Create a mock Flake object with test data.""" + flake_mock = MagicMock(spec=Flake) + test_data_dir = Path(__file__).parent / "container_data" + + def load_json(filename: str) -> dict | list: + """Load and parse a JSON file from container_data directory.""" + return json.loads((test_data_dir / filename).read_text()) + + # Configure flake.select to return values based on the argument + def select_side_effect(arg: str) -> dict | list: + # Handle staticModules readme requests dynamically + if arg.startswith( + "clanInternals.inventoryClass.staticModules.{" + ) and arg.endswith("}.manifest.readme"): + # Extract service names from the pattern: {service1,service2,...} + services_part = arg.split("{")[1].split("}")[0] + requested_services = [s.strip() for s in services_part.split(",")] + + # Load all VPN readmes (always returns a dict for this file) + all_readmes = load_json("vpns_readme.json") + assert isinstance(all_readmes, dict), ( + "vpns_readme.json should contain a dict" + ) + + # Return only the requested services + return { + svc: all_readmes[svc] + for svc in requested_services + if svc in all_readmes + } + + match arg: + case "clanInternals.inventoryClass.inventory.{instances,machines,meta}": + return load_json("inventory_instances_machines_meta.json") + case "clanInternals.inventoryClass.inventory.{tags}": + return load_json("inventory_tags.json") + case "clanInternals.inventoryClass.modulesPerSource": + return load_json("modules_per_source.json") + case "clanInternals.inventoryClass.staticModules": + return load_json("static_modules.json") + case _: + msg = f"Unexpected flake.select argument: {arg}" + raise ValueError(msg) + + flake_mock.select.side_effect = select_side_effect + return flake_mock + + +@pytest.fixture +def mock_nix_shell() -> Iterator[MagicMock]: + """Patch nix_shell function with test data.""" + + # Configure nix_shell to return values based on the arguments + def nix_shell_side_effect(packages: list[str], cmd: list[str]) -> list[str]: + match (tuple(packages), tuple(cmd)): + case (("ollama",), ("ollama", "pull", _)): + return ["ollama", "list"] + case (("ollama",), _): + return cmd + case _: + msg = f"Unexpected nix_shell arguments: packages={packages}, cmd={cmd}" + raise ValueError(msg) + + with patch("clan_lib.llm.service.nix_shell") as mock: + mock.side_effect = nix_shell_side_effect + yield mock + + +@pytest.fixture +def llm_service() -> Iterator[None]: + """Start LLM service and create model, ensuring cleanup.""" + service_manager = create_service_manager() + + try: + run_llm_service() + create_llm_model() + yield + finally: + # Always attempt to stop the service, even if setup failed + with contextlib.suppress(Exception): + service_manager.stop_service("ollama") + + +@pytest.mark.service_runner +@pytest.mark.usefixtures("mock_nix_shell", "llm_service") +def test_full_conversation_flow(mock_flake: MagicMock) -> None: + """Comprehensive test that exercises the complete conversation flow with the actual LLM service. + + This test simulates a realistic multi-turn conversation that covers: + - Discovery phase: Initial request and LLM gathering information + - Service selection phase: User choosing from available options + - Final decision phase: Configuring the selected service with specific parameters + - State transitions: pending_service_selection -> pending_final_decision -> completion + - Conversation history preservation across all turns + - Error handling and edge cases + """ + flake = mock_flake + return + # ========== TURN 1: Discovery Phase - Initial vague request ========== + print("\n=== TURN 1: Initial discovery request ===") + result = process_chat_turn( + user_request="What VPN options do I have?", + flake=flake, + provider="ollama", + ) + + # Verify discovery phase behavior + assert result.requires_user_response is True, ( + "Should require user response in discovery" + ) + assert len(result.conversation_history) >= 2, ( + "Should have user + assistant messages" + ) + assert result.conversation_history[0]["role"] == "user" + assert result.conversation_history[0]["content"] == "What VPN options do I have?" + assert result.conversation_history[-1]["role"] == "assistant" + assert len(result.assistant_message) > 0, "Assistant should provide a response" + + # Should transition to service selection phase with pending state + assert "pending_service_selection" in result.session_state, ( + "Should have pending service selection" + ) + assert "readme_results" in result.session_state["pending_service_selection"] + + # No instances yet + assert len(result.proposed_instances) == 0 + assert result.error is None + + print(f"Assistant: {result.assistant_message[:200]}...") + print(f"State: {list(result.session_state.keys())}") + print(f"History length: {len(result.conversation_history)}") + + # ========== TURN 2: Service Selection Phase - User makes a choice ========== + print("\n=== TURN 2: User selects ZeroTier ===") + result = process_chat_turn( + user_request="I'll use ZeroTier please", + flake=flake, + conversation_history=list(result.conversation_history), + provider="ollama", + session_state=result.session_state, + ) + + # Verify conversation history growth and preservation + assert len(result.conversation_history) > 2, "History should grow" + assert result.conversation_history[0]["content"] == "What VPN options do I have?" + assert result.conversation_history[2]["content"] == "I'll use ZeroTier please" + + # Should either ask for configuration details or provide direct config + # Most likely will ask for more details (pending_final_decision) + if result.requires_user_response: + # LLM is asking for configuration details + assert len(result.assistant_message) > 0 + # Should transition to final decision phase + if "pending_final_decision" not in result.session_state: + # Might still be in service selection asking clarifications + assert "pending_service_selection" in result.session_state + else: + # LLM provided configuration immediately (less likely) + assert len(result.proposed_instances) > 0 + assert result.proposed_instances[0]["module"]["name"] == "zerotier" + + print( + f"Assistant: {result.assistant_message[:200] if result.assistant_message else 'No message'}..." + ) + print(f"State: {list(result.session_state.keys())}") + print(f"Requires response: {result.requires_user_response}") + + # ========== Continue conversation until we reach final decision or completion ========== + max_turns = 10 + turn_count = 2 + + while result.requires_user_response and turn_count < max_turns: + turn_count += 1 + print(f"\n=== TURN {turn_count}: Continuing conversation ===") + + # Determine appropriate response based on current state + if "pending_service_selection" in result.session_state: + # Still selecting service + user_request = "Yes, ZeroTier" + elif "pending_final_decision" in result.session_state: + # Configuring the service + user_request = "Set up gchq-local as controller, qube-email as moon, and wintux as peer" + else: + # Generic continuation + user_request = "Yes, that sounds good. Use gchq-local as controller." + + print(f"User: {user_request}") + + result = process_chat_turn( + user_request=user_request, + flake=flake, + conversation_history=list(result.conversation_history), + provider="ollama", + session_state=result.session_state, + ) + + # Verify conversation history continues to grow + assert len(result.conversation_history) == (turn_count * 2), ( + f"History should have {turn_count * 2} messages (turn {turn_count})" + ) + + # Verify history preservation + assert ( + result.conversation_history[0]["content"] == "What VPN options do I have?" + ) + + print( + f"Assistant: {result.assistant_message[:200] if result.assistant_message else 'No message'}..." + ) + print(f"State: {list(result.session_state.keys())}") + print(f"Requires response: {result.requires_user_response}") + print(f"Proposed instances: {len(result.proposed_instances)}") + + # Check for completion + if not result.requires_user_response: + print("\n=== Conversation completed! ===") + break + + # ========== Final Verification ========== + assert turn_count < max_turns, f"Conversation took too many turns ({turn_count})" + + # If conversation completed, verify we have valid configuration + if not result.requires_user_response: + assert len(result.proposed_instances) > 0, ( + "Should have at least one proposed instance" + ) + instance = result.proposed_instances[0] + + # Verify instance structure + assert "module" in instance + assert "name" in instance["module"] + assert instance["module"]["name"] in [ + "zerotier", + "wireguard", + "yggdrasil", + "mycelium", + ] + + # Should have roles configuration + if "roles" in instance: + print(f"\nConfiguration roles: {list(instance['roles'].keys())}") + + # Should not be in pending state anymore + assert "pending_service_selection" not in result.session_state + assert "pending_final_decision" not in result.session_state + + assert result.error is None, f"Should not have error: {result.error}" + + print(f"\nFinal instance: {instance['module']['name']}") + print(f"Total conversation turns: {turn_count}") + print(f"Final history length: {len(result.conversation_history)}") + else: + # Conversation didn't complete but should have made progress + assert len(result.conversation_history) > 2 + assert result.error is None + print(f"\nConversation in progress after {turn_count} turns") + print(f"Current state: {list(result.session_state.keys())}")