Add networking module

This adds a (for now hidden) clan network command that exposes list,
ping, overview subcommands to get informations about configured
networks.
ClanServices can now use the exports to define network specific
information.

This is not the complete feature yet, as we are lacking more tests and
documentation, but merging this now makes it easier to iterate.
This commit is contained in:
lassulus
2025-07-15 08:59:41 +02:00
parent 5022f6f26c
commit 60768cc537
16 changed files with 753 additions and 3 deletions

View File

@@ -0,0 +1,47 @@
{ ... }:
{
_class = "clan.service";
manifest.name = "clan-core/internet";
manifest.description = "direct access (or via ssh jumphost) to machines";
manifest.categories = [
"System"
"Network"
];
roles.default = {
interface =
{ lib, ... }:
{
options = {
host = lib.mkOption {
type = lib.types.str;
description = ''
ip address or hostname (domain) of the machine
'';
};
jumphosts = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
description = ''
optional list of jumphosts to use to connect to the machine
'';
};
};
};
perInstance =
{
roles,
lib,
settings,
...
}:
{
exports.networking = {
# TODO add user space network support to clan-cli
peers = lib.mapAttrs (_name: machine: {
host.plain = machine.settings.host;
SSHOptions = map (_x: "-J x") machine.settings.jumphosts;
}) roles.default.machines;
};
};
};
}

View File

@@ -0,0 +1,9 @@
{ lib, ... }:
let
module = lib.modules.importApply ./default.nix { };
in
{
clan.modules = {
internet = module;
};
}

View File

@@ -0,0 +1,88 @@
{ ... }:
{
_class = "clan.service";
manifest.name = "clan-core/tor";
manifest.description = "Onion routing, use Hidden services to connect your machines";
manifest.categories = [
"System"
"Network"
];
roles.default = {
# interface =
# { lib, ... }:
# {
# options = {
# OciSettings = lib.mkOption {
# type = lib.types.raw;
# default = null;
# description = "NixOS settings for virtualisation.oci-container.<name>.settings";
# };
# buildContainer = lib.mkOption {
# type = lib.types.nullOr lib.types.str;
# default = null;
# };
# };
# };
perInstance =
{
instanceName,
roles,
lib,
...
}:
{
exports.networking = {
priority = lib.mkDefault 10;
# TODO add user space network support to clan-cli
module = "clan_lib.network.tor";
peers = lib.mapAttrs (name: machine: {
host.var = {
machine = name;
generator = "tor_${instanceName}";
file = "hostname";
};
}) roles.default.machines;
};
nixosModule =
{
pkgs,
config,
...
}:
{
config = {
services.tor = {
enable = true;
relay.onionServices."clan_${instanceName}" = {
version = 3;
# TODO get ports from instance machine config
map = [
{
port = 22;
target.port = 22;
}
];
secretKey = config.clan.core.vars.generators."tor_${instanceName}".files.hs_ed25519_secret_key.path;
};
};
clan.core.vars.generators."tor_${instanceName}" = {
files.hs_ed25519_secret_key = { };
files.hostname = { };
runtimeInputs = with pkgs; [
coreutils
tor
];
script = ''
mkdir -p data
echo -e "DataDirectory ./data\nSocksPort 0\nHiddenServiceDir ./hs\nHiddenServicePort 80 127.0.0.1:80" > torrc
timeout 2 tor -f torrc || :
mv hs/hs_ed25519_secret_key $out/hs_ed25519_secret_key
mv hs/hostname $out/hostname
'';
};
};
};
};
};
}

View File

@@ -0,0 +1,9 @@
{ lib, ... }:
let
module = lib.modules.importApply ./default.nix { };
in
{
clan.modules = {
tor = module;
};
}

View File

@@ -465,6 +465,10 @@ Learn how to use `clanServices` in practice in the [Using clanServices guide](..
service_links: dict[str, dict[str, dict[str, Any]]] = json.load(f3)
for module_name, module_info in service_links.items():
# Skip specific modules that are not ready for documentation
if module_name in ["internet", "tor"]:
continue
output = f"# {module_name}\n\n"
# output += f"`clan.modules.{module_name}`\n"
output += f"*{module_info['manifest']['description']}*\n"

View File

@@ -78,7 +78,87 @@ in
internal = true;
visible = false;
type = types.deferredModule;
default = { };
default = {
options.networking = lib.mkOption {
default = null;
type = lib.types.nullOr (
lib.types.submodule {
options = {
priority = lib.mkOption {
type = lib.types.int;
default = 1000;
description = ''
priority with which this network should be tried.
higher priority means it gets used earlier in the chain
'';
};
module = lib.mkOption {
# type = lib.types.enum [
# "clan_lib.network.direct"
# "clan_lib.network.tor"
# ];
type = lib.types.str;
default = "clan_lib.network.direct";
description = ''
the technology this network uses to connect to the target
This is used for userspace networking with socks proxies.
'';
};
# should we call this machines? hosts?
peers = lib.mkOption {
# <name>
type = lib.types.attrsOf (
lib.types.submodule (
{ name, ... }:
{
options = {
name = lib.mkOption {
type = lib.types.str;
default = name;
};
SSHOptions = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
};
host = lib.mkOption {
description = '''';
type = lib.types.attrTag {
plain = lib.mkOption {
type = lib.types.str;
description = ''
a plain value, which can be read directly from the config
'';
};
var = lib.mkOption {
type = lib.types.submodule {
options = {
machine = lib.mkOption {
type = lib.types.str;
example = "jon";
};
generator = lib.mkOption {
type = lib.types.str;
example = "tor-ssh";
};
file = lib.mkOption {
type = lib.types.str;
example = "hostname";
};
};
};
};
};
};
};
}
)
);
};
};
}
);
};
};
description = ''
A module that is used to define the module of flake level exports -

View File

@@ -25,6 +25,7 @@ from .facts import cli as facts
from .flash import cli as flash_cli
from .hyperlink import help_hyperlink
from .machines import cli as machines
from .network import cli as network_cli
from .profiler import profile
from .ssh import deploy_info as ssh_cli
from .vars import cli as vars_cli
@@ -428,6 +429,26 @@ Examples:
)
select.register_parser(parser_select)
parser_network = subparsers.add_parser(
"network",
aliases=["net"],
# TODO: Add help="Manage networks" when network code is ready
# help="Manage networks",
description="Manage networks",
epilog=(
"""
show information about configured networks
Examples:
$ clan network list
Will list networks
"""
),
formatter_class=argparse.RawTextHelpFormatter,
)
network_cli.register_parser(parser_network)
parser_state = subparsers.add_parser(
"state",
aliases=["st"],
@@ -462,7 +483,7 @@ For more detailed information, visit: {help_hyperlink("getting-started", "https:
state.register_parser(parser_state)
if argcomplete:
argcomplete.autocomplete(parser, exclude=["morph"])
argcomplete.autocomplete(parser, exclude=["morph", "network", "net"])
register_common_flags(parser)

View File

@@ -0,0 +1,72 @@
# !/usr/bin/env python3
import argparse
from .list import register_list_parser
from .overview import register_overview_parser
from .ping import register_ping_parser
# takes a (sub)parser and configures it
def register_parser(parser: argparse.ArgumentParser) -> None:
subparser = parser.add_subparsers(
title="command",
description="the command to run",
help="the command to run",
required=True,
)
list_parser = subparser.add_parser(
"list",
help="list all networks",
epilog=(
"""
This subcommand allows listing all networks
```
[NETWORK1] [PRIORITY] [MODULE] [PEER1, PEER2]
[NETOWKR2] [PRIORITY] [MODULE] [PEER1, PEER2]
```
Examples:
$ clan network list
"""
),
formatter_class=argparse.RawTextHelpFormatter,
)
register_list_parser(list_parser)
ping_parser = subparser.add_parser(
"ping",
help="ping a machine to check if it's online",
epilog=(
"""
This subcommand allows pinging a machine to check if it's online
Examples:
$ clan network ping machine1
Check machine1 on all networks (in priority order)
$ clan network ping machine1 --network tor
Check machine1 only on the tor network
"""
),
formatter_class=argparse.RawTextHelpFormatter,
)
register_ping_parser(ping_parser)
overview_parser = subparser.add_parser(
"overview",
help="show the overview of all network and hosts",
epilog=(
"""
This command shows the complete state of all networks
Examples:
$ clan network overview
"""
),
formatter_class=argparse.RawTextHelpFormatter,
)
register_overview_parser(overview_parser)

View File

@@ -0,0 +1,64 @@
import argparse
import logging
from clan_lib.flake import Flake
from clan_lib.network.network import networks_from_flake
log = logging.getLogger(__name__)
def list_command(args: argparse.Namespace) -> None:
flake: Flake = args.flake
networks = networks_from_flake(flake)
if not networks:
print("No networks found")
return
# Calculate column widths
col_network = max(12, max(len(name) for name in networks))
col_priority = 8
col_module = max(
10, max(len(net.module_name.split(".")[-1]) for net in networks.values())
)
col_running = 8
# Print header
header = f"{'Network':<{col_network}} {'Priority':<{col_priority}} {'Module':<{col_module}} {'Running':<{col_running}} {'Peers'}"
print(header)
print("-" * len(header))
# Print network entries
for network_name, network in sorted(
networks.items(), key=lambda network: -network[1].priority
):
# Extract simple module name from full module path
module_name = network.module_name.split(".")[-1]
# Create peer list with truncation
peer_names = list(network.peers.keys())
max_peers_shown = 3
if not peer_names:
peers_str = "No peers"
elif len(peer_names) <= max_peers_shown:
peers_str = ", ".join(peer_names)
else:
shown_peers = peer_names[:max_peers_shown]
remaining = len(peer_names) - max_peers_shown
peers_str = f"{', '.join(shown_peers)} ...({remaining} more)"
# Check if network is running
try:
is_running = network.is_running()
running_status = "Yes" if is_running else "No"
except Exception:
running_status = "Error"
print(
f"{network_name:<{col_network}} {network.priority:<{col_priority}} {module_name:<{col_module}} {running_status:<{col_running}} {peers_str}"
)
def register_list_parser(parser: argparse.ArgumentParser) -> None:
parser.set_defaults(func=list_command)

View File

@@ -0,0 +1,21 @@
import argparse
import logging
from clan_lib.flake import Flake
from clan_lib.network.network import get_network_overview, networks_from_flake
log = logging.getLogger(__name__)
def overview_command(args: argparse.Namespace) -> None:
flake: Flake = args.flake
networks = networks_from_flake(flake)
overview = get_network_overview(networks)
for network_name, network in overview.items():
print(f"{network_name} {'[ONLINE]' if network['status'] else '[OFFLINE]'}")
for peer_name, peer in network["peers"].items():
print(f"\t{peer_name}: {'[OFFLINE]' if not peer else f'[{peer}]'}")
def register_overview_parser(parser: argparse.ArgumentParser) -> None:
parser.set_defaults(func=overview_command)

View File

@@ -0,0 +1,67 @@
import argparse
import logging
from clan_lib.errors import ClanError
from clan_lib.flake import Flake
from clan_lib.network.network import networks_from_flake
log = logging.getLogger(__name__)
def ping_command(args: argparse.Namespace) -> None:
flake: Flake = args.flake
machine = args.machine
network_name = args.network
networks = networks_from_flake(flake)
if not networks:
print("No networks found in the flake")
# If network is specified, only check that network
if network_name:
networks_to_check = [(network_name, networks[network_name])]
else:
# Sort networks by priority (highest first)
networks_to_check = sorted(networks.items(), key=lambda x: -x[1].priority)
found = False
results = []
for net_name, network in networks_to_check:
if machine in network.peers:
found = True
# Check if network technology is running
if not network.is_running():
results.append(f"{machine} ({net_name}): network not running")
continue
# Check if peer is online
ping = network.ping(machine)
results.append(f"{machine} ({net_name}): {ping}")
if not found:
msg = f"Machine '{machine}' not found in any network"
raise ClanError(msg)
# Print all results
for result in results:
print(result)
def register_ping_parser(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"machine",
type=str,
help="Machine name to ping",
)
parser.add_argument(
"--network",
"-n",
type=str,
help="Specific network to use for ping (if not specified, checks all networks)",
)
parser.set_defaults(func=ping_command)

View File

@@ -0,0 +1,9 @@
from clan_lib.network.network import NetworkTechnologyBase
class NetworkTechnology(NetworkTechnologyBase):
"""Direct network connection technology - checks SSH connectivity"""
def is_running(self) -> bool:
"""Direct connections are always 'running' as they don't require a daemon"""
return True

View File

@@ -0,0 +1,133 @@
import importlib
import logging
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from functools import cached_property
from typing import Any
from clan_cli.vars.get import get_machine_var
from clan_lib.errors import ClanError
from clan_lib.flake import Flake
from clan_lib.ssh.parse import parse_ssh_uri
from clan_lib.ssh.remote import Remote, check_machine_ssh_reachable
log = logging.getLogger(__name__)
@dataclass(frozen=True)
class Peer:
_host: dict[str, str | dict[str, str]]
flake: Flake
@cached_property
def host(self) -> str:
if "plain" in self._host and isinstance(self._host["plain"], str):
return self._host["plain"]
if "var" in self._host and isinstance(self._host["var"], dict):
_var: dict[str, str] = self._host["var"]
var = get_machine_var(
str(self.flake),
_var["machine"],
f"{_var['generator']}/{_var['file']}",
)
return var.value.decode()
msg = f"Unknown Var Type {self._host}"
raise ClanError(msg)
class NetworkTechnologyBase(ABC):
@abstractmethod
def is_running(self) -> bool:
pass
# TODO this will depend on the network implementation if we do user networking at some point, so it should be abstractmethod
def ping(self, peer: Peer) -> None | float:
if self.is_running():
try:
# Parse the peer's host address to create a Remote object, use peer here since we don't have the machine_name here
remote = parse_ssh_uri(machine_name="peer", address=peer.host)
# Use the existing SSH reachability check
now = time.time()
result = check_machine_ssh_reachable(remote)
if result.ok:
return (time.time() - now) * 1000
return None
except Exception as e:
log.debug(f"Error checking peer {peer.host}: {e}")
return None
return None
@dataclass(frozen=True)
class Network:
peers: dict[str, Peer]
module_name: str
priority: int = 1000
@cached_property
def module(self) -> NetworkTechnologyBase:
module = importlib.import_module(self.module_name)
return module.NetworkTechnology()
def is_running(self) -> bool:
return self.module.is_running()
def ping(self, peer: str) -> float | None:
return self.module.ping(self.peers[peer])
def networks_from_flake(flake: Flake) -> dict[str, Network]:
networks: dict[str, Network] = {}
networks_ = flake.select("clan.exports.instances.*.networking")
for network_name, network in networks_.items():
if network:
peers: dict[str, Peer] = {}
for _peer in network["peers"].values():
peers[_peer["name"]] = Peer(_host=_peer["host"], flake=flake)
networks[network_name] = Network(
peers=peers,
module_name=network["module"],
priority=network["priority"],
)
return networks
def get_best_remote(machine_name: str, networks: dict[str, Network]) -> Remote | None:
for network_name, network in sorted(
networks.items(), key=lambda network: -network[1].priority
):
if machine_name in network.peers:
if network.is_running() and network.ping(machine_name):
print(f"connecting via {network_name}")
return Remote.from_ssh_uri(
machine_name=machine_name,
address=network.peers[machine_name].host,
)
return None
def get_network_overview(networks: dict[str, Network]) -> dict:
result: dict[str, dict[str, Any]] = {}
for network_name, network in networks.items():
result[network_name] = {}
result[network_name]["status"] = None
result[network_name]["peers"] = {}
network_online = False
if network.module.is_running():
result[network_name]["status"] = True
network_online = True
for peer_name in network.peers:
if network_online:
try:
result[network_name]["peers"][peer_name] = network.ping(peer_name)
except ClanError:
log.warning(
f"getting host for machine: {peer_name} in network: {network_name} failed"
)
else:
result[network_name]["peers"][peer_name] = None
return result

View File

@@ -0,0 +1,106 @@
from typing import Any
from unittest.mock import MagicMock, patch
from clan_lib.flake import Flake
from clan_lib.network.network import Network, Peer, networks_from_flake
@patch("clan_lib.network.network.get_machine_var")
def test_networks_from_flake(mock_get_machine_var: MagicMock) -> None:
# Create a mock flake
flake = MagicMock(spec=Flake)
# Mock the var decryption
def mock_var_side_effect(flake_path: str, machine: str, var_path: str) -> Any:
if machine == "machine1" and var_path == "wireguard/address":
mock_var = MagicMock()
mock_var.value.decode.return_value = "192.168.1.10"
return mock_var
if machine == "machine2" and var_path == "wireguard/address":
mock_var = MagicMock()
mock_var.value.decode.return_value = "192.168.1.11"
return mock_var
return None
mock_get_machine_var.side_effect = mock_var_side_effect
# Define the expected return value from flake.select
mock_networking_data = {
"vpn-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {
"var": {
"machine": "machine1",
"generator": "wireguard",
"file": "address",
}
},
},
"machine2": {
"name": "machine2",
"host": {
"var": {
"machine": "machine2",
"generator": "wireguard",
"file": "address",
}
},
},
},
"module": "clan_lib.network.tor",
"priority": 1000,
},
"local-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {"plain": "10.0.0.10"},
},
"machine3": {
"name": "machine3",
"host": {"plain": "10.0.0.12"},
},
},
"module": "clan_lib.network.direct",
"priority": 500,
},
}
# Mock the select method
flake.select.return_value = mock_networking_data
# Call the function
networks = networks_from_flake(flake)
# Verify the flake.select was called with the correct pattern
flake.select.assert_called_once_with("clan.exports.instances.*.networking")
# Verify the returned networks
assert len(networks) == 2
assert "vpn-network" in networks
assert "local-network" in networks
# Check vpn-network
vpn_network = networks["vpn-network"]
assert isinstance(vpn_network, Network)
assert vpn_network.module_name == "clan_lib.network.tor"
assert vpn_network.priority == 1000
assert len(vpn_network.peers) == 2
assert "machine1" in vpn_network.peers
assert "machine2" in vpn_network.peers
# Check peer details - this will call get_machine_var to decrypt the var
machine1_peer = vpn_network.peers["machine1"]
assert isinstance(machine1_peer, Peer)
assert machine1_peer.host == "192.168.1.10"
assert machine1_peer.flake == flake
# Check local-network
local_network = networks["local-network"]
assert local_network.module_name == "clan_lib.network.direct"
assert local_network.priority == 500
assert len(local_network.peers) == 2
assert "machine1" in local_network.peers
assert "machine3" in local_network.peers

View File

@@ -0,0 +1,20 @@
from urllib.error import URLError
from urllib.request import urlopen
from .network import NetworkTechnologyBase
class NetworkTechnology(NetworkTechnologyBase):
socks_port: int
command_port: int
def is_running(self) -> bool:
"""Check if Tor is running by sending HTTP request to SOCKS port."""
try:
response = urlopen("http://127.0.0.1:9050", timeout=5)
content = response.read().decode("utf-8", errors="ignore")
return "tor" in content.lower()
except URLError as e:
return "tor" in str(e).lower()
except Exception:
return False

View File

@@ -6,7 +6,7 @@ from pathlib import Path
from clan_cli.cli import create_parser
hidden_subcommands = ["machine", "b", "f", "m", "se", "st", "va"]
hidden_subcommands = ["machine", "b", "f", "m", "se", "st", "va", "net", "network"]
@dataclass