clan-cli network: refactor, use new networking in ssh and install commands

This commit is contained in:
lassulus
2025-08-02 11:01:52 +02:00
committed by DavHau
parent 017989841d
commit a50b25eea2
9 changed files with 313 additions and 371 deletions

View File

@@ -19,12 +19,10 @@ class NetworkTechnology(NetworkTechnologyBase):
"""Direct connections are always 'running' as they don't require a daemon"""
return True
def ping(self, peer: Peer) -> None | float:
def ping(self, remote: Remote) -> None | float:
if self.is_running():
try:
# Parse the peer's host address to create a Remote object, use peer here since we don't have the machine_name here
remote = Remote.from_ssh_uri(machine_name="peer", address=peer.host)
# Use the existing SSH reachability check
now = time.time()
@@ -33,7 +31,7 @@ class NetworkTechnology(NetworkTechnologyBase):
return (time.time() - now) * 1000
except ClanError as e:
log.debug(f"Error checking peer {peer.host}: {e}")
log.debug(f"Error checking peer {remote}: {e}")
return None
return None

View File

@@ -12,10 +12,10 @@ from clan_cli.vars.get import get_machine_var
from clan_lib.errors import ClanError
from clan_lib.flake import Flake
from clan_lib.import_utils import ClassSource, import_with_source
from clan_lib.ssh.remote import Remote
if TYPE_CHECKING:
from clan_lib.machines.machines import Machine
from clan_lib.ssh.remote import Remote
log = logging.getLogger(__name__)
@@ -52,7 +52,7 @@ class Peer:
.lstrip("\n")
)
raise ClanError(msg)
return var.value.decode()
return var.value.decode().strip()
msg = f"Unknown Var Type {self._host}"
raise ClanError(msg)
@@ -76,7 +76,7 @@ class Network:
return self.module.is_running()
def ping(self, peer: str) -> float | None:
return self.module.ping(self.peers[peer])
return self.module.ping(self.remote(peer))
def remote(self, peer: str) -> "Remote":
# TODO raise exception if peer is not in peers
@@ -96,7 +96,7 @@ class NetworkTechnologyBase(ABC):
pass
@abstractmethod
def ping(self, peer: Peer) -> None | float:
def ping(self, remote: "Remote") -> None | float:
pass
@contextmanager
@@ -109,12 +109,18 @@ def networks_from_flake(flake: Flake) -> dict[str, Network]:
# TODO more precaching, for example for vars
flake.precache(
[
"clan.exports.instances.*.networking",
"clan.?exports.instances.*.networking",
]
)
networks: dict[str, Network] = {}
networks_ = flake.select("clan.exports.instances.*.networking")
for network_name, network in networks_.items():
networks_ = flake.select("clan.?exports.instances.*.networking")
if "exports" not in networks_:
msg = """You are not exporting the clan exports through your flake.
Please add exports next to clanInternals and nixosConfiguration into the global flake.
"""
log.warning(msg)
return {}
for network_name, network in networks_["exports"].items():
if network:
peers: dict[str, Peer] = {}
for _peer in network["peers"].values():
@@ -129,19 +135,8 @@ def networks_from_flake(flake: Flake) -> dict[str, Network]:
return networks
def get_best_network(machine_name: str, networks: dict[str, Network]) -> Network | None:
for network_name, network in sorted(
networks.items(), key=lambda network: -network[1].priority
):
if machine_name in network.peers:
if network.is_running() and network.ping(machine_name):
print(f"connecting via {network_name}")
return network
return None
@contextmanager
def get_remote_for_machine(machine: "Machine") -> Iterator["Remote"]:
def get_best_remote(machine: "Machine") -> Iterator["Remote"]:
"""
Context manager that yields the best remote connection for a machine following this priority:
1. If machine has targetHost in inventory, return a direct connection
@@ -158,9 +153,6 @@ def get_remote_for_machine(machine: "Machine") -> Iterator["Remote"]:
ClanError: If no connection method works
"""
# Get networks from the flake
networks = networks_from_flake(machine.flake)
# Step 1: Check if targetHost is set in inventory
inv_machine = machine.get_inv_machine()
target_host = inv_machine.get("deploy", {}).get("targetHost")
@@ -176,39 +168,45 @@ def get_remote_for_machine(machine: "Machine") -> Iterator["Remote"]:
log.debug(f"Inventory targetHost not reachable for {machine.name}: {e}")
# Step 2: Try existing networks by priority
sorted_networks = sorted(networks.items(), key=lambda x: -x[1].priority)
try:
networks = networks_from_flake(machine.flake)
for network_name, network in sorted_networks:
if machine.name not in network.peers:
continue
sorted_networks = sorted(networks.items(), key=lambda x: -x[1].priority)
# Check if network is running and machine is reachable
if network.is_running():
try:
ping_time = network.ping(machine.name)
if ping_time is not None:
log.info(
f"Machine {machine.name} reachable via {network_name} network"
)
yield network.remote(machine.name)
return
except Exception as e:
log.debug(f"Failed to reach {machine.name} via {network_name}: {e}")
else:
try:
log.debug(f"Establishing connection for network {network_name}")
with network.module.connection(network) as connected_network:
ping_time = connected_network.ping(machine.name)
for network_name, network in sorted_networks:
if machine.name not in network.peers:
continue
# Check if network is running and machine is reachable
log.debug(f"trying to connect via {network_name}")
if network.is_running():
try:
ping_time = network.ping(machine.name)
if ping_time is not None:
log.info(
f"Machine {machine.name} reachable via {network_name} network after connection"
f"Machine {machine.name} reachable via {network_name} network"
)
yield connected_network.remote(machine.name)
yield network.remote(machine.name)
return
except Exception as e:
log.debug(
f"Failed to establish connection to {machine.name} via {network_name}: {e}"
)
except Exception as e:
log.debug(f"Failed to reach {machine.name} via {network_name}: {e}")
else:
try:
log.debug(f"Establishing connection for network {network_name}")
with network.module.connection(network) as connected_network:
ping_time = connected_network.ping(machine.name)
if ping_time is not None:
log.info(
f"Machine {machine.name} reachable via {network_name} network after connection"
)
yield connected_network.remote(machine.name)
return
except Exception as e:
log.debug(
f"Failed to establish connection to {machine.name} via {network_name}: {e}"
)
except Exception as e:
log.debug(f"Failed to use networking modules to determine machines remote: {e}")
# Step 3: Try targetHost from machine nixos config
try:

View File

@@ -26,46 +26,48 @@ def test_networks_from_flake(mock_get_machine_var: MagicMock) -> None:
# Define the expected return value from flake.select
mock_networking_data = {
"vpn-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {
"var": {
"machine": "machine1",
"generator": "wireguard",
"file": "address",
}
"exports": {
"vpn-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {
"var": {
"machine": "machine1",
"generator": "wireguard",
"file": "address",
}
},
},
"machine2": {
"name": "machine2",
"host": {
"var": {
"machine": "machine2",
"generator": "wireguard",
"file": "address",
}
},
},
},
"machine2": {
"name": "machine2",
"host": {
"var": {
"machine": "machine2",
"generator": "wireguard",
"file": "address",
}
"module": "clan_lib.network.tor",
"priority": 1000,
},
"local-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {"plain": "10.0.0.10"},
},
"machine3": {
"name": "machine3",
"host": {"plain": "10.0.0.12"},
},
},
"module": "clan_lib.network.direct",
"priority": 500,
},
"module": "clan_lib.network.tor",
"priority": 1000,
},
"local-network": {
"peers": {
"machine1": {
"name": "machine1",
"host": {"plain": "10.0.0.10"},
},
"machine3": {
"name": "machine3",
"host": {"plain": "10.0.0.12"},
},
},
"module": "clan_lib.network.direct",
"priority": 500,
},
}
}
# Mock the select method
@@ -75,7 +77,7 @@ def test_networks_from_flake(mock_get_machine_var: MagicMock) -> None:
networks = networks_from_flake(flake)
# Verify the flake.select was called with the correct pattern
flake.select.assert_called_once_with("clan.exports.instances.*.networking")
flake.select.assert_called_once_with("clan.?exports.instances.*.networking")
# Verify the returned networks
assert len(networks) == 2

View File

@@ -1,5 +1,8 @@
import json
import logging
from collections.abc import Iterator
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
from typing import Any
@@ -13,9 +16,33 @@ from clan_lib.ssh.remote import Remote
log = logging.getLogger(__name__)
def parse_qr_json_to_networks(
qr_data: dict[str, Any], flake: Flake
) -> dict[str, dict[str, Any]]:
@dataclass(frozen=True)
class RemoteWithNetwork:
network: Network
remote: Remote
@dataclass(frozen=True)
class QRCodeData:
addresses: list[RemoteWithNetwork]
@contextmanager
def get_best_remote(self) -> Iterator[Remote]:
for address in self.addresses:
try:
log.debug(f"Establishing connection via {address}")
with address.network.module.connection(
address.network
) as connected_network:
ping_time = connected_network.module.ping(address.remote)
if ping_time is not None:
log.info(f"reachable via {address} after connection")
yield address.remote
except Exception as e:
log.debug(f"Failed to establish connection via {address}: {e}")
def read_qr_json(qr_data: dict[str, Any], flake: Flake) -> QRCodeData:
"""
Parse QR code JSON contents and output a dict of networks with remotes.
@@ -45,32 +72,31 @@ def parse_qr_json_to_networks(
}
}
"""
networks: dict[str, dict[str, Any]] = {}
addresses: list[RemoteWithNetwork] = []
password = qr_data.get("pass")
# Process clearnet addresses
clearnet_addrs = qr_data.get("addrs", [])
if clearnet_addrs:
# For now, just use the first address
addr = clearnet_addrs[0]
if isinstance(addr, str):
peer = Peer(name="installer", _host={"plain": addr}, flake=flake)
network = Network(
peers={"installer": peer},
module_name="clan_lib.network.direct",
priority=1000,
)
# Create the remote with password
remote = Remote.from_ssh_uri(
machine_name="installer",
address=addr,
).override(password=password)
for addr in clearnet_addrs:
if isinstance(addr, str):
peer = Peer(name="installer", _host={"plain": addr}, flake=flake)
network = Network(
peers={"installer": peer},
module_name="clan_lib.network.direct",
priority=1000,
)
# Create the remote with password
remote = Remote.from_ssh_uri(
machine_name="installer",
address=addr,
).override(password=password)
networks["direct"] = {"network": network, "remote": remote}
else:
msg = f"Invalid address format: {addr}"
raise ClanError(msg)
addresses.append(RemoteWithNetwork(network=network, remote=remote))
else:
msg = f"Invalid address format: {addr}"
raise ClanError(msg)
# Process tor address
if tor_addr := qr_data.get("tor"):
@@ -86,12 +112,12 @@ def parse_qr_json_to_networks(
address=tor_addr,
).override(password=password, socks_port=9050, socks_wrapper=["torify"])
networks["tor"] = {"network": network, "remote": remote}
addresses.append(RemoteWithNetwork(network=network, remote=remote))
return networks
return QRCodeData(addresses=addresses)
def parse_qr_image_to_json(image_path: Path) -> dict[str, Any]:
def read_qr_image(image_path: Path) -> dict[str, Any]:
"""
Parse a QR code image and extract the JSON data.

View File

@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
from clan_lib.errors import ClanError
from clan_lib.network import Network, NetworkTechnologyBase, Peer
from clan_lib.network.tor.lib import is_tor_running, spawn_tor
from clan_lib.ssh.remote import Remote
if TYPE_CHECKING:
from clan_lib.ssh.remote import Remote
@@ -27,11 +28,9 @@ class NetworkTechnology(NetworkTechnologyBase):
"""Check if Tor is running by sending HTTP request to SOCKS port."""
return is_tor_running(self.proxy)
def ping(self, peer: Peer) -> None | float:
def ping(self, remote: Remote) -> None | float:
if self.is_running():
try:
remote = self.remote(peer)
# Use the existing SSH reachability check
now = time.time()
remote.check_machine_ssh_reachable()
@@ -39,7 +38,7 @@ class NetworkTechnology(NetworkTechnologyBase):
return (time.time() - now) * 1000
except ClanError as e:
log.debug(f"Error checking peer {peer.host}: {e}")
log.debug(f"Error checking peer {remote}: {e}")
return None
return None