clan_lib: Add clan_module_to_llm_function for ai integration

This commit is contained in:
Qubasa
2025-10-03 12:32:33 +02:00
parent 4f00a22921
commit b56230e3a7
2 changed files with 374 additions and 0 deletions

View File

@@ -0,0 +1,144 @@
from typing import Any, Literal, TypedDict
from clan_lib.services.modules import Module
JSONSchemaType = Literal[
"array", "boolean", "integer", "null", "number", "object", "string"
]
JSONSchemaFormat = Literal[
# Dates and Times
"date-time",
"time",
"date",
"duration",
# Email Addresses
"email",
"idn-email",
# Hostnames
"hostname",
"idn-hostname",
# IP Addresses
"ipv4",
"ipv6",
# Resource Identifiers
"uuid",
"uri",
"uri-reference",
"iri",
"iri-reference",
# URI Template
"uri-template",
# JSON Pointer
"json-pointer",
"relative-json-pointer",
# Regular Expressions
"regex",
]
class JSONSchemaProperty(TypedDict, total=False):
type: JSONSchemaType | list[JSONSchemaType]
format: JSONSchemaFormat
description: str | None
enum: list[str] | None
items: dict[str, Any] | None
properties: dict[str, "JSONSchemaProperty"] | None
patternProperties: dict[str, "JSONSchemaProperty"] | None
required: list[str] | None
additionalProperties: bool | dict[str, Any] | None
class JSONSchemaParameters(TypedDict, total=False):
type: JSONSchemaType
properties: dict[str, JSONSchemaProperty]
required: list[str]
additionalProperties: bool
class LLMFunctionSchema(TypedDict):
type: Literal["function"]
name: str
description: str
parameters: JSONSchemaParameters
strict: bool
def clan_module_to_llm_function(
module: Module, available_tags: list[str], available_machines: list[str]
) -> LLMFunctionSchema:
# Create individual role schemas with descriptions
role_properties = {}
for role_name, role_info in module.info.roles.items():
role_properties[role_name] = JSONSchemaProperty(
type="object",
description=role_info.description,
properties={
"machines": JSONSchemaProperty(
type="object",
patternProperties={
f"^({'|'.join(available_machines)})$": JSONSchemaProperty(
type="object",
additionalProperties=False,
)
},
additionalProperties=False,
description="Machines for this role with empty configuration objects",
),
"tags": JSONSchemaProperty(
type="object",
patternProperties={
f"^({'|'.join(available_tags)})$": JSONSchemaProperty(
type="object",
additionalProperties=False,
)
},
additionalProperties=False,
description="Tags for this role with empty configuration objects",
),
},
additionalProperties=False,
)
module_name = module.usage_ref.get("name")
if not isinstance(module_name, str):
msg = "Module name must be a string"
raise TypeError(msg)
module_input = module.usage_ref.get("input")
if module_input is not None and not isinstance(module_input, str):
msg = "Module input must be a string or None"
raise TypeError(msg)
module_properties = {}
if module_input is not None:
module_properties["input"] = JSONSchemaProperty(
type="string",
description=(
"Source / Input name of the module, e.g. 'clan-core' or null for built-in modules"
),
enum=[module_input],
)
return LLMFunctionSchema(
type="function",
name=module.usage_ref["name"],
description=module.info.manifest.description,
parameters=JSONSchemaParameters(
type="object",
properties={
"module": JSONSchemaProperty(
type="object",
properties=module_properties,
),
"roles": JSONSchemaProperty(
type="object",
properties=role_properties,
additionalProperties=False,
),
},
required=["roles"],
additionalProperties=False,
),
strict=True,
)

View File

@@ -0,0 +1,230 @@
from collections.abc import Callable
import pytest
from clan_cli.tests.fixtures_flakes import nested_dict
from clan_lib.flake.flake import Flake
from clan_lib.services.llm import LLMFunctionSchema, clan_module_to_llm_function
from clan_lib.services.modules import (
list_service_modules,
)
@pytest.mark.with_core
def test_clan_module_to_llm_func(
clan_flake: Callable[..., Flake],
) -> None:
# ATTENTION! This method lacks Typechecking
config = nested_dict()
# explicit module selection
# We use this random string in test to avoid code dependencies on the input name
config["inventory"]["instances"]["foo"]["module"]["input"] = (
"Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU"
)
config["inventory"]["instances"]["foo"]["module"]["name"] = "sshd"
# input = null
config["inventory"]["instances"]["bar"]["module"]["input"] = None
config["inventory"]["instances"]["bar"]["module"]["name"] = "sshd"
# Omit input
config["inventory"]["instances"]["baz"]["module"]["name"] = "sshd"
# external input
flake = clan_flake(config)
service_modules = list_service_modules(flake)
# Module(usage_ref={'name': 'borgbackup', 'input': None}, info=ModuleInfo(manifest=ModuleManifest(name='borgbackup', description='Efficient, deduplicating backup program with optional compression and secure encryption.', categories=['System'], features={'API': True}), roles={'client': Role(name='client', description='A borgbackup client that backs up to all borgbackup server roles.'), 'server': Role(name='server', description='A borgbackup server that stores the backups of clients.')}), native=True, instance_refs=[]),
borgbackup_service = next(
m for m in service_modules.modules if m.usage_ref.get("name") == "borgbackup"
)
assert borgbackup_service is not None
available_machines = ["machine1", "machine2", "server1"]
available_tags = ["production", "backup", "client"]
generated_tool_func = clan_module_to_llm_function(
borgbackup_service, available_tags, available_machines
)
expected_tool_func: LLMFunctionSchema = {
"type": "function",
"name": "borgbackup",
"description": "Efficient, deduplicating backup program with optional compression and secure encryption.",
"parameters": {
"type": "object",
"properties": {
"module": {
"type": "object",
"properties": {
# "input": {
# "type": "string",
# "description": "Source / Input name of the module, e.g. 'clan-core' or null for built-in modules",
# "enum": ["Y2xhbi1jaW9yZS1uZXZlci1kZXBlbmQtb24tbWU"],
# }
},
},
"roles": {
"type": "object",
"properties": {
"client": {
"type": "object",
"description": "A borgbackup client that backs up to all borgbackup server roles.",
"properties": {
"machines": {
"type": "object",
"patternProperties": {
"^(machine1|machine2|server1)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Machines for this role with empty configuration objects",
},
"tags": {
"type": "object",
"patternProperties": {
"^(production|backup|client)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Tags for this role with empty configuration objects",
},
},
"additionalProperties": False,
},
"server": {
"type": "object",
"description": "A borgbackup server that stores the backups of clients.",
"properties": {
"machines": {
"type": "object",
"patternProperties": {
"^(machine1|machine2|server1)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Machines for this role with empty configuration objects",
},
"tags": {
"type": "object",
"patternProperties": {
"^(production|backup|client)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Tags for this role with empty configuration objects",
},
},
"additionalProperties": False,
},
},
"additionalProperties": False,
},
},
"required": ["roles"],
"additionalProperties": False,
},
"strict": True,
}
assert generated_tool_func == expected_tool_func
certificate_service = next(
m for m in service_modules.modules if m.usage_ref.get("name") == "certificates"
)
assert certificate_service is not None
generated_tool_func2 = clan_module_to_llm_function(
certificate_service, available_tags, available_machines
)
expected_tool_func2: LLMFunctionSchema = {
"type": "function",
"name": "certificates",
"description": "Sets up a PKI certificate chain using step-ca",
"parameters": {
"type": "object",
"properties": {
"module": {
"type": "object",
"properties": {},
},
"roles": {
"type": "object",
"properties": {
"ca": {
"type": "object",
"description": "A certificate authority that issues and signs certificates for other machines.",
"properties": {
"machines": {
"type": "object",
"patternProperties": {
"^(machine1|machine2|server1)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Machines for this role with empty configuration objects",
},
"tags": {
"type": "object",
"patternProperties": {
"^(production|backup|client)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Tags for this role with empty configuration objects",
},
},
"additionalProperties": False,
},
"default": {
"type": "object",
"description": "A machine that trusts the CA and can get certificates issued by it.",
"properties": {
"machines": {
"type": "object",
"patternProperties": {
"^(machine1|machine2|server1)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Machines for this role with empty configuration objects",
},
"tags": {
"type": "object",
"patternProperties": {
"^(production|backup|client)$": {
"type": "object",
"additionalProperties": False,
}
},
"additionalProperties": False,
"description": "Tags for this role with empty configuration objects",
},
},
"additionalProperties": False,
},
},
"additionalProperties": False,
},
},
"required": ["roles"],
"additionalProperties": False,
},
"strict": True,
}
assert generated_tool_func2 == expected_tool_func2