Add networking module

This adds a (for now hidden) clan network command that exposes list,
ping, overview subcommands to get informations about configured
networks.
ClanServices can now use the exports to define network specific
information.

This is not the complete feature yet, as we are lacking more tests and
documentation, but merging this now makes it easier to iterate.
This commit is contained in:
lassulus
2025-07-15 08:59:41 +02:00
parent 5022f6f26c
commit 60768cc537
16 changed files with 753 additions and 3 deletions

View File

@@ -0,0 +1,9 @@
from clan_lib.network.network import NetworkTechnologyBase
class NetworkTechnology(NetworkTechnologyBase):
"""Direct network connection technology - checks SSH connectivity"""
def is_running(self) -> bool:
"""Direct connections are always 'running' as they don't require a daemon"""
return True

View File

@@ -0,0 +1,133 @@
import importlib
import logging
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from functools import cached_property
from typing import Any
from clan_cli.vars.get import get_machine_var
from clan_lib.errors import ClanError
from clan_lib.flake import Flake
from clan_lib.ssh.parse import parse_ssh_uri
from clan_lib.ssh.remote import Remote, check_machine_ssh_reachable
log = logging.getLogger(__name__)
@dataclass(frozen=True)
class Peer:
_host: dict[str, str | dict[str, str]]
flake: Flake
@cached_property
def host(self) -> str:
if "plain" in self._host and isinstance(self._host["plain"], str):
return self._host["plain"]
if "var" in self._host and isinstance(self._host["var"], dict):
_var: dict[str, str] = self._host["var"]
var = get_machine_var(
str(self.flake),
_var["machine"],
f"{_var['generator']}/{_var['file']}",
)
return var.value.decode()
msg = f"Unknown Var Type {self._host}"
raise ClanError(msg)
class NetworkTechnologyBase(ABC):
@abstractmethod
def is_running(self) -> bool:
pass
# TODO this will depend on the network implementation if we do user networking at some point, so it should be abstractmethod
def ping(self, peer: Peer) -> None | float:
if self.is_running():
try:
# Parse the peer's host address to create a Remote object, use peer here since we don't have the machine_name here
remote = parse_ssh_uri(machine_name="peer", address=peer.host)
# Use the existing SSH reachability check
now = time.time()
result = check_machine_ssh_reachable(remote)
if result.ok:
return (time.time() - now) * 1000
return None
except Exception as e:
log.debug(f"Error checking peer {peer.host}: {e}")
return None
return None
@dataclass(frozen=True)
class Network:
peers: dict[str, Peer]
module_name: str
priority: int = 1000
@cached_property
def module(self) -> NetworkTechnologyBase:
module = importlib.import_module(self.module_name)
return module.NetworkTechnology()
def is_running(self) -> bool:
return self.module.is_running()
def ping(self, peer: str) -> float | None:
return self.module.ping(self.peers[peer])
def networks_from_flake(flake: Flake) -> dict[str, Network]:
networks: dict[str, Network] = {}
networks_ = flake.select("clan.exports.instances.*.networking")
for network_name, network in networks_.items():
if network:
peers: dict[str, Peer] = {}
for _peer in network["peers"].values():
peers[_peer["name"]] = Peer(_host=_peer["host"], flake=flake)
networks[network_name] = Network(
peers=peers,
module_name=network["module"],
priority=network["priority"],
)
return networks
def get_best_remote(machine_name: str, networks: dict[str, Network]) -> Remote | None:
for network_name, network in sorted(
networks.items(), key=lambda network: -network[1].priority
):
if machine_name in network.peers:
if network.is_running() and network.ping(machine_name):
print(f"connecting via {network_name}")
return Remote.from_ssh_uri(
machine_name=machine_name,
address=network.peers[machine_name].host,
)
return None
def get_network_overview(networks: dict[str, Network]) -> dict:
result: dict[str, dict[str, Any]] = {}
for network_name, network in networks.items():
result[network_name] = {}
result[network_name]["status"] = None
result[network_name]["peers"] = {}
network_online = False
if network.module.is_running():
result[network_name]["status"] = True
network_online = True
for peer_name in network.peers:
if network_online:
try:
result[network_name]["peers"][peer_name] = network.ping(peer_name)
except ClanError:
log.warning(
f"getting host for machine: {peer_name} in network: {network_name} failed"
)
else:
result[network_name]["peers"][peer_name] = None
return result

View File

@@ -0,0 +1,106 @@
from typing import Any
from unittest.mock import MagicMock, patch
from clan_lib.flake import Flake
from clan_lib.network.network import Network, Peer, networks_from_flake
@patch("clan_lib.network.network.get_machine_var")
def test_networks_from_flake(mock_get_machine_var: MagicMock) -> None:
# Create a mock flake
flake = MagicMock(spec=Flake)
# Mock the var decryption
def mock_var_side_effect(flake_path: str, machine: str, var_path: str) -> Any:
if machine == "machine1" and var_path == "wireguard/address":
mock_var = MagicMock()
mock_var.value.decode.return_value = "192.168.1.10"
return mock_var
if machine == "machine2" and var_path == "wireguard/address":
mock_var = MagicMock()
mock_var.value.decode.return_value = "192.168.1.11"
return mock_var
return None
mock_get_machine_var.side_effect = mock_var_side_effect
# Define the expected return value from flake.select
mock_networking_data = {
"vpn-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {
"var": {
"machine": "machine1",
"generator": "wireguard",
"file": "address",
}
},
},
"machine2": {
"name": "machine2",
"host": {
"var": {
"machine": "machine2",
"generator": "wireguard",
"file": "address",
}
},
},
},
"module": "clan_lib.network.tor",
"priority": 1000,
},
"local-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {"plain": "10.0.0.10"},
},
"machine3": {
"name": "machine3",
"host": {"plain": "10.0.0.12"},
},
},
"module": "clan_lib.network.direct",
"priority": 500,
},
}
# Mock the select method
flake.select.return_value = mock_networking_data
# Call the function
networks = networks_from_flake(flake)
# Verify the flake.select was called with the correct pattern
flake.select.assert_called_once_with("clan.exports.instances.*.networking")
# Verify the returned networks
assert len(networks) == 2
assert "vpn-network" in networks
assert "local-network" in networks
# Check vpn-network
vpn_network = networks["vpn-network"]
assert isinstance(vpn_network, Network)
assert vpn_network.module_name == "clan_lib.network.tor"
assert vpn_network.priority == 1000
assert len(vpn_network.peers) == 2
assert "machine1" in vpn_network.peers
assert "machine2" in vpn_network.peers
# Check peer details - this will call get_machine_var to decrypt the var
machine1_peer = vpn_network.peers["machine1"]
assert isinstance(machine1_peer, Peer)
assert machine1_peer.host == "192.168.1.10"
assert machine1_peer.flake == flake
# Check local-network
local_network = networks["local-network"]
assert local_network.module_name == "clan_lib.network.direct"
assert local_network.priority == 500
assert len(local_network.peers) == 2
assert "machine1" in local_network.peers
assert "machine3" in local_network.peers

View File

@@ -0,0 +1,20 @@
from urllib.error import URLError
from urllib.request import urlopen
from .network import NetworkTechnologyBase
class NetworkTechnology(NetworkTechnologyBase):
socks_port: int
command_port: int
def is_running(self) -> bool:
"""Check if Tor is running by sending HTTP request to SOCKS port."""
try:
response = urlopen("http://127.0.0.1:9050", timeout=5)
content = response.read().decode("utf-8", errors="ignore")
return "tor" in content.lower()
except URLError as e:
return "tor" in str(e).lower()
except Exception:
return False