From b56230e3a7f1a50b63d08af83e8dcf4558ab17a5 Mon Sep 17 00:00:00 2001 From: Qubasa Date: Fri, 3 Oct 2025 12:32:33 +0200 Subject: [PATCH] clan_lib: Add clan_module_to_llm_function for ai integration --- pkgs/clan-cli/clan_lib/services/llm.py | 144 ++++++++++++ pkgs/clan-cli/clan_lib/services/llm_test.py | 230 ++++++++++++++++++++ 2 files changed, 374 insertions(+) create mode 100644 pkgs/clan-cli/clan_lib/services/llm.py create mode 100644 pkgs/clan-cli/clan_lib/services/llm_test.py diff --git a/pkgs/clan-cli/clan_lib/services/llm.py b/pkgs/clan-cli/clan_lib/services/llm.py new file mode 100644 index 000000000..b87ac2761 --- /dev/null +++ b/pkgs/clan-cli/clan_lib/services/llm.py @@ -0,0 +1,144 @@ +from typing import Any, Literal, TypedDict + +from clan_lib.services.modules import Module + +JSONSchemaType = Literal[ + "array", "boolean", "integer", "null", "number", "object", "string" +] + +JSONSchemaFormat = Literal[ + # Dates and Times + "date-time", + "time", + "date", + "duration", + # Email Addresses + "email", + "idn-email", + # Hostnames + "hostname", + "idn-hostname", + # IP Addresses + "ipv4", + "ipv6", + # Resource Identifiers + "uuid", + "uri", + "uri-reference", + "iri", + "iri-reference", + # URI Template + "uri-template", + # JSON Pointer + "json-pointer", + "relative-json-pointer", + # Regular Expressions + "regex", +] + + +class JSONSchemaProperty(TypedDict, total=False): + type: JSONSchemaType | list[JSONSchemaType] + format: JSONSchemaFormat + description: str | None + enum: list[str] | None + items: dict[str, Any] | None + properties: dict[str, "JSONSchemaProperty"] | None + patternProperties: dict[str, "JSONSchemaProperty"] | None + required: list[str] | None + additionalProperties: bool | dict[str, Any] | None + + +class JSONSchemaParameters(TypedDict, total=False): + type: JSONSchemaType + properties: dict[str, JSONSchemaProperty] + required: list[str] + additionalProperties: bool + + +class LLMFunctionSchema(TypedDict): + type: Literal["function"] + name: str + description: str + parameters: JSONSchemaParameters + strict: bool + + +def clan_module_to_llm_function( + module: Module, available_tags: list[str], available_machines: list[str] +) -> LLMFunctionSchema: + # Create individual role schemas with descriptions + role_properties = {} + for role_name, role_info in module.info.roles.items(): + role_properties[role_name] = JSONSchemaProperty( + type="object", + description=role_info.description, + properties={ + "machines": JSONSchemaProperty( + type="object", + patternProperties={ + f"^({'|'.join(available_machines)})$": JSONSchemaProperty( + type="object", + additionalProperties=False, + ) + }, + additionalProperties=False, + description="Machines for this role with empty configuration objects", + ), + "tags": JSONSchemaProperty( + type="object", + patternProperties={ + f"^({'|'.join(available_tags)})$": JSONSchemaProperty( + type="object", + additionalProperties=False, + ) + }, + additionalProperties=False, + description="Tags for this role with empty configuration objects", + ), + }, + additionalProperties=False, + ) + + module_name = module.usage_ref.get("name") + if not isinstance(module_name, str): + msg = "Module name must be a string" + raise TypeError(msg) + + module_input = module.usage_ref.get("input") + if module_input is not None and not isinstance(module_input, str): + msg = "Module input must be a string or None" + raise TypeError(msg) + + module_properties = {} + if module_input is not None: + module_properties["input"] = JSONSchemaProperty( + type="string", + description=( + "Source / Input name of the module, e.g. 'clan-core' or null for built-in modules" + ), + enum=[module_input], + ) + + return LLMFunctionSchema( + type="function", + name=module.usage_ref["name"], + description=module.info.manifest.description, + parameters=JSONSchemaParameters( + type="object", + properties={ + "module": JSONSchemaProperty( + type="object", + properties=module_properties, + ), + "roles": JSONSchemaProperty( + type="object", + properties=role_properties, + additionalProperties=False, + ), + }, + required=["roles"], + additionalProperties=False, + ), + strict=True, + ) diff --git a/pkgs/clan-cli/clan_lib/services/llm_test.py b/pkgs/clan-cli/clan_lib/services/llm_test.py new file mode 100644 index 000000000..79b8e7e5f --- /dev/null +++ b/pkgs/clan-cli/clan_lib/services/llm_test.py @@ -0,0 +1,230 @@ +from collections.abc import Callable + +import pytest +from clan_cli.tests.fixtures_flakes import nested_dict +from clan_lib.flake.flake import Flake +from clan_lib.services.llm import LLMFunctionSchema, clan_module_to_llm_function +from clan_lib.services.modules import ( + list_service_modules, +) + + +@pytest.mark.with_core +def test_clan_module_to_llm_func( + clan_flake: Callable[..., Flake], +) -> None: + # ATTENTION! This method lacks Typechecking + config = nested_dict() + # explicit module selection + # We use this random string in test to avoid code dependencies on the input name + config["inventory"]["instances"]["foo"]["module"]["input"] = ( + "Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU" + ) + config["inventory"]["instances"]["foo"]["module"]["name"] = "sshd" + # input = null + config["inventory"]["instances"]["bar"]["module"]["input"] = None + config["inventory"]["instances"]["bar"]["module"]["name"] = "sshd" + + # Omit input + config["inventory"]["instances"]["baz"]["module"]["name"] = "sshd" + # external input + flake = clan_flake(config) + + service_modules = list_service_modules(flake) + + # Module(usage_ref={'name': 'borgbackup', 'input': None}, info=ModuleInfo(manifest=ModuleManifest(name='borgbackup', description='Efficient, deduplicating backup program with optional compression and secure encryption.', categories=['System'], features={'API': True}), roles={'client': Role(name='client', description='A borgbackup client that backs up to all borgbackup server roles.'), 'server': Role(name='server', description='A borgbackup server that stores the backups of clients.')}), native=True, instance_refs=[]), + borgbackup_service = next( + m for m in service_modules.modules if m.usage_ref.get("name") == "borgbackup" + ) + + assert borgbackup_service is not None + + available_machines = ["machine1", "machine2", "server1"] + available_tags = ["production", "backup", "client"] + + generated_tool_func = clan_module_to_llm_function( + borgbackup_service, available_tags, available_machines + ) + + expected_tool_func: LLMFunctionSchema = { + "type": "function", + "name": "borgbackup", + "description": "Efficient, deduplicating backup program with optional compression and secure encryption.", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": { + # "input": { + # "type": "string", + # "description": "Source / Input name of the module, e.g. 'clan-core' or null for built-in modules", + # "enum": ["Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU"], + # } + }, + }, + "roles": { + "type": "object", + "properties": { + "client": { + "type": "object", + "description": "A borgbackup client that backs up to all borgbackup server roles.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Machines for this role with empty configuration objects", + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Tags for this role with empty configuration objects", + }, + }, + "additionalProperties": False, + }, + "server": { + "type": "object", + "description": "A borgbackup server that stores the backups of clients.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Machines for this role with empty configuration objects", + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Tags for this role with empty configuration objects", + }, + }, + "additionalProperties": False, + }, + }, + "additionalProperties": False, + }, + }, + "required": ["roles"], + "additionalProperties": False, + }, + "strict": True, + } + + assert generated_tool_func == expected_tool_func + + certificate_service = next( + m for m in service_modules.modules if m.usage_ref.get("name") == "certificates" + ) + assert certificate_service is not None + + generated_tool_func2 = clan_module_to_llm_function( + certificate_service, available_tags, available_machines + ) + + expected_tool_func2: LLMFunctionSchema = { + "type": "function", + "name": "certificates", + "description": "Sets up a PKI certificate chain using step-ca", + "parameters": { + "type": "object", + "properties": { + "module": { + "type": "object", + "properties": {}, + }, + "roles": { + "type": "object", + "properties": { + "ca": { + "type": "object", + "description": "A certificate authority that issues and signs certificates for other machines.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Machines for this role with empty configuration objects", + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Tags for this role with empty configuration objects", + }, + }, + "additionalProperties": False, + }, + "default": { + "type": "object", + "description": "A machine that trusts the CA and can get certificates issued by it.", + "properties": { + "machines": { + "type": "object", + "patternProperties": { + "^(machine1|machine2|server1)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Machines for this role with empty configuration objects", + }, + "tags": { + "type": "object", + "patternProperties": { + "^(production|backup|client)$": { + "type": "object", + "additionalProperties": False, + } + }, + "additionalProperties": False, + "description": "Tags for this role with empty configuration objects", + }, + }, + "additionalProperties": False, + }, + }, + "additionalProperties": False, + }, + }, + "required": ["roles"], + "additionalProperties": False, + }, + "strict": True, + } + + assert generated_tool_func2 == expected_tool_func2