clan-cli: Rename Host -> Remote move to clan_lib and mark as frozen

This commit is contained in:
Qubasa
2025-05-22 14:08:27 +02:00
parent 91994445ff
commit e14f30bdc0
31 changed files with 453 additions and 429 deletions

View File

@@ -30,8 +30,9 @@ def check_machine_online(
timeout = opts.timeout if opts and opts.timeout else 2
for _ in range(opts.retries if opts and opts.retries else 10):
with machine.target_host() as target:
res = target.run(
host = machine.target_host()
with host.ssh_control_master() as ssh:
res = ssh.run(
["true"],
RunOpts(timeout=timeout, check=False, needs_user_terminal=True),
)

View File

@@ -6,13 +6,14 @@ from clan_lib.errors import ClanError
def create_backup(machine: Machine, provider: str | None = None) -> None:
machine.info(f"creating backup for {machine.name}")
backup_scripts = machine.eval_nix("config.clan.core.backups")
host = machine.target_host()
if provider is None:
if not backup_scripts["providers"]:
msg = "No providers specified"
raise ClanError(msg)
with machine.target_host() as host:
with host.ssh_control_master() as ssh:
for provider in backup_scripts["providers"]:
proc = host.run(
proc = ssh.run(
[backup_scripts["providers"][provider]["create"]],
)
if proc.returncode != 0:
@@ -23,8 +24,8 @@ def create_backup(machine: Machine, provider: str | None = None) -> None:
if provider not in backup_scripts["providers"]:
msg = f"provider {provider} not found"
raise ClanError(msg)
with machine.target_host() as host:
proc = host.run(
with host.ssh_control_master() as ssh:
proc = ssh.run(
[backup_scripts["providers"][provider]["create"]],
)
if proc.returncode != 0:

View File

@@ -2,10 +2,10 @@ import json
from dataclasses import dataclass
from clan_cli.machines.machines import Machine
from clan_cli.ssh.host import Host
from clan_lib.cmd import Log, RunOpts
from clan_lib.errors import ClanError
from clan_lib.ssh.remote import Remote
@dataclass
@@ -14,14 +14,15 @@ class Backup:
job_name: str | None = None
def list_provider(machine: Machine, host: Host, provider: str) -> list[Backup]:
def list_provider(machine: Machine, host: Remote, provider: str) -> list[Backup]:
results = []
backup_metadata = machine.eval_nix("config.clan.core.backups")
list_command = backup_metadata["providers"][provider]["list"]
proc = host.run(
[list_command],
RunOpts(log=Log.NONE, check=False),
)
with host.ssh_control_master() as ssh:
proc = ssh.run(
[list_command],
RunOpts(log=Log.NONE, check=False),
)
if proc.returncode != 0:
# TODO this should be a warning, only raise exception if no providers succeed
msg = f"Failed to list backups for provider {provider}:"
@@ -44,12 +45,12 @@ def list_provider(machine: Machine, host: Host, provider: str) -> list[Backup]:
def list_backups(machine: Machine, provider: str | None = None) -> list[Backup]:
backup_metadata = machine.eval_nix("config.clan.core.backups")
results = []
with machine.target_host() as host:
if provider is None:
for _provider in backup_metadata["providers"]:
results += list_provider(machine, host, _provider)
host = machine.target_host()
if provider is None:
for _provider in backup_metadata["providers"]:
results += list_provider(machine, host, _provider)
else:
results += list_provider(machine, host, provider)
else:
results += list_provider(machine, host, provider)
return results

View File

@@ -1,12 +1,12 @@
from clan_cli.machines.machines import Machine
from clan_cli.ssh.host import Host
from clan_lib.cmd import Log, RunOpts
from clan_lib.errors import ClanError
from clan_lib.ssh.remote import Remote
def restore_service(
machine: Machine, host: Host, name: str, provider: str, service: str
machine: Machine, host: Remote, name: str, provider: str, service: str
) -> None:
backup_metadata = machine.eval_nix("config.clan.core.backups")
backup_folders = machine.eval_nix("config.clan.core.state")
@@ -21,34 +21,35 @@ def restore_service(
# FIXME: If we have too many folder this might overflow the stack.
env["FOLDERS"] = ":".join(set(folders))
if pre_restore := backup_folders[service]["preRestoreCommand"]:
proc = host.run(
[pre_restore],
with host.ssh_control_master() as ssh:
if pre_restore := backup_folders[service]["preRestoreCommand"]:
proc = ssh.run(
[pre_restore],
RunOpts(log=Log.STDERR),
extra_env=env,
)
if proc.returncode != 0:
msg = f"failed to run preRestoreCommand: {pre_restore}, error was: {proc.stdout}"
raise ClanError(msg)
proc = ssh.run(
[backup_metadata["providers"][provider]["restore"]],
RunOpts(log=Log.STDERR),
extra_env=env,
)
if proc.returncode != 0:
msg = f"failed to run preRestoreCommand: {pre_restore}, error was: {proc.stdout}"
msg = f"failed to restore backup: {backup_metadata['providers'][provider]['restore']}"
raise ClanError(msg)
proc = host.run(
[backup_metadata["providers"][provider]["restore"]],
RunOpts(log=Log.STDERR),
extra_env=env,
)
if proc.returncode != 0:
msg = f"failed to restore backup: {backup_metadata['providers'][provider]['restore']}"
raise ClanError(msg)
if post_restore := backup_folders[service]["postRestoreCommand"]:
proc = host.run(
[post_restore],
RunOpts(log=Log.STDERR),
extra_env=env,
)
if proc.returncode != 0:
msg = f"failed to run postRestoreCommand: {post_restore}, error was: {proc.stdout}"
raise ClanError(msg)
if post_restore := backup_folders[service]["postRestoreCommand"]:
proc = ssh.run(
[post_restore],
RunOpts(log=Log.STDERR),
extra_env=env,
)
if proc.returncode != 0:
msg = f"failed to run postRestoreCommand: {post_restore}, error was: {proc.stdout}"
raise ClanError(msg)
def restore_backup(
@@ -58,7 +59,8 @@ def restore_backup(
service: str | None = None,
) -> None:
errors = []
with machine.target_host() as host:
host = machine.target_host()
with host.ssh_control_master():
if service is None:
backup_folders = machine.eval_nix("config.clan.core.state")
for _service in backup_folders:

View File

View File

@@ -0,0 +1,73 @@
import re
import urllib.parse
from pathlib import Path
from typing import TYPE_CHECKING, Any
from clan_cli.ssh.host_key import HostKeyCheck
from clan_lib.errors import ClanError
if TYPE_CHECKING:
from clan_lib.ssh.remote import Remote
def parse_deployment_address(
*,
machine_name: str,
address: str,
host_key_check: HostKeyCheck,
forward_agent: bool = True,
meta: dict[str, Any] | None = None,
private_key: Path | None = None,
) -> "Remote":
parts = address.split("?", maxsplit=1)
endpoint, maybe_options = parts if len(parts) == 2 else (parts[0], "")
parts = endpoint.split("@")
match len(parts):
case 2:
user, host_port = parts
case 1:
user, host_port = "root", parts[0]
case _:
msg = f"Invalid host, got `{address}` but expected something like `[user@]hostname[:port]`"
raise ClanError(msg)
# Make this check now rather than failing with a `ValueError`
# when looking up the port from the `urlsplit` result below:
if host_port.count(":") > 1 and not re.match(r".*\[.*]", host_port):
msg = f"Invalid hostname: {address}. IPv6 addresses must be enclosed in brackets , e.g. [::1]"
raise ClanError(msg)
options: dict[str, str] = {}
for o in maybe_options.split("&"):
if len(o) == 0:
continue
parts = o.split("=", maxsplit=1)
if len(parts) != 2:
msg = (
f"Invalid option in host `{address}`: option `{o}` does not have "
f"a value (i.e. expected something like `name=value`)"
)
raise ClanError(msg)
name, value = parts
options[name] = value
result = urllib.parse.urlsplit(f"//{host_port}")
if not result.hostname:
msg = f"Invalid host, got `{address}` but expected something like `[user@]hostname[:port]`"
raise ClanError(msg)
hostname = result.hostname
port = result.port
from clan_lib.ssh.remote import Remote
return Remote(
address=hostname,
user=user,
port=port,
private_key=private_key,
host_key_check=host_key_check,
command_prefix=machine_name,
forward_agent=forward_agent,
ssh_options=options,
)

View File

@@ -0,0 +1,300 @@
# ruff: noqa: SLF001
import logging
import os
import shlex
import socket
import subprocess
import sys
from collections.abc import Iterator
from contextlib import contextmanager
from dataclasses import dataclass, field
from pathlib import Path
from shlex import quote
from tempfile import TemporaryDirectory
from clan_cli.ssh.host_key import HostKeyCheck
from clan_lib.cmd import CmdOut, RunOpts, run
from clan_lib.colors import AnsiColor
from clan_lib.errors import ClanError # Assuming these are available
from clan_lib.nix import nix_shell
from clan_lib.ssh.parse import parse_deployment_address
cmdlog = logging.getLogger(__name__)
# Seconds until a message is printed when _run produces no output.
NO_OUTPUT_TIMEOUT = 20
@dataclass(frozen=True)
class Remote:
address: str
user: str
command_prefix: str
port: int | None = None
private_key: Path | None = None
password: str | None = None
forward_agent: bool = True
host_key_check: HostKeyCheck = HostKeyCheck.ASK
verbose_ssh: bool = False
ssh_options: dict[str, str] = field(default_factory=dict)
tor_socks: bool = False
_control_path_dir: Path | None = None
def __str__(self) -> str:
return self.target
@property
def target(self) -> str:
return f"{self.user}@{self.address}"
@classmethod
def with_user(cls, host: "Remote", user: str) -> "Remote":
"""
Return a new Remote object with the specified user.
"""
return cls(
address=host.address,
user=user,
command_prefix=host.command_prefix,
port=host.port,
private_key=host.private_key,
password=host.password,
forward_agent=host.forward_agent,
host_key_check=host.host_key_check,
verbose_ssh=host.verbose_ssh,
ssh_options=host.ssh_options,
tor_socks=host.tor_socks,
)
@classmethod
def from_deployment_address(
cls,
*,
machine_name: str,
address: str,
host_key_check: HostKeyCheck,
forward_agent: bool = True,
private_key: Path | None = None,
) -> "Remote":
"""
Parse a deployment address and return a Host object.
"""
return parse_deployment_address(
machine_name=machine_name,
address=address,
host_key_check=host_key_check,
forward_agent=forward_agent,
private_key=private_key,
)
def run_local(
self,
cmd: list[str],
opts: RunOpts | None = None,
extra_env: dict[str, str] | None = None,
) -> CmdOut:
"""
Command to run locally for the host
"""
if opts is None:
opts = RunOpts()
env = opts.env or os.environ.copy()
if extra_env:
env.update(extra_env)
displayed_cmd = " ".join(cmd)
cmdlog.info(
f"$ {displayed_cmd}",
extra={
"command_prefix": self.command_prefix,
"color": AnsiColor.GREEN.value,
},
)
opts.env = env
opts.prefix = self.command_prefix
return run(cmd, opts)
@contextmanager
def ssh_control_master(self) -> Iterator["Remote"]:
"""
Context manager to manage SSH ControlMaster connections.
This will create a temporary directory for the control socket.
"""
directory = None
if sys.platform == "darwin" and os.environ.get("TMPDIR", "").startswith(
"/var/folders/"
):
directory = "/tmp/"
# Use more specific prefix for the temp dir to avoid potential collisions if multiple hosts used
prefix = f"clan-ssh-{self.address}-{self.port or 22}-{self.user}-"
temp_dir = TemporaryDirectory(prefix=prefix, dir=directory)
yield Remote(
address=self.address,
user=self.user,
command_prefix=self.command_prefix,
port=self.port,
private_key=self.private_key,
password=self.password,
forward_agent=self.forward_agent,
host_key_check=self.host_key_check,
verbose_ssh=self.verbose_ssh,
ssh_options=self.ssh_options,
tor_socks=self.tor_socks,
_control_path_dir=Path(temp_dir.name),
)
temp_dir.cleanup()
def run(
self,
cmd: list[str],
opts: RunOpts | None = None,
become_root: bool = False,
extra_env: dict[str, str] | None = None,
tty: bool = False,
verbose_ssh: bool = False,
quiet: bool = False,
control_master: bool = True,
) -> CmdOut:
"""
Internal method to run a command on the host via ssh.
`control_path_dir`: If provided, SSH ControlMaster options will be used.
"""
if extra_env is None:
extra_env = {}
if opts is None:
opts = RunOpts()
sudo = ""
if become_root and self.user != "root":
sudo = "sudo -- "
env_vars = []
for k, v in extra_env.items():
env_vars.append(f"{shlex.quote(k)}={shlex.quote(v)}")
if opts.prefix is None:
opts.prefix = self.command_prefix
opts.needs_user_terminal = True
if opts.cwd is not None:
msg = "cwd is not supported for remote commands"
raise ClanError(msg)
displayed_cmd = ""
export_cmd = ""
if env_vars:
export_cmd = f"export {' '.join(env_vars)}; "
displayed_cmd += export_cmd
displayed_cmd += " ".join(cmd)
if not quiet:
cmdlog.info(
f"$ {displayed_cmd}",
extra={
"command_prefix": self.command_prefix,
"color": AnsiColor.GREEN.value,
},
)
bash_cmd = export_cmd
if opts.shell:
bash_cmd += " ".join(cmd)
opts.shell = False
else:
bash_cmd += 'exec "$@"'
ssh_cmd_list = self.ssh_cmd(
verbose_ssh=verbose_ssh, tty=tty, control_master=control_master
)
ssh_cmd_list.extend(
["--", f"{sudo}bash -c {quote(bash_cmd)} -- {' '.join(map(quote, cmd))}"]
)
return run(ssh_cmd_list, opts)
def nix_ssh_env(
self,
env: dict[str, str] | None = None,
control_master: bool = True,
) -> dict[str, str]:
if env is None:
env = {}
env["NIX_SSHOPTS"] = " ".join(
self.ssh_cmd_opts(control_master=control_master) # Renamed
)
return env
def ssh_cmd_opts(
self,
control_master: bool = True,
) -> list[str]:
effective_control_path_dir = self._control_path_dir
if self._control_path_dir is None and not control_master:
effective_control_path_dir = None
elif self._control_path_dir is None and control_master:
msg = "Control path directory is not set. Please with Remote.ssh_control_master() as ctx to set it."
raise ClanError(msg)
ssh_opts = ["-A"] if self.forward_agent else []
if self.port:
ssh_opts.extend(["-p", str(self.port)])
for k, v in self.ssh_options.items():
ssh_opts.extend(["-o", f"{k}={shlex.quote(v)}"])
ssh_opts.extend(self.host_key_check.to_ssh_opt())
if self.private_key:
ssh_opts.extend(["-i", str(self.private_key)])
if effective_control_path_dir:
socket_path = (
effective_control_path_dir
/ f"clan-{self.address}-{self.port or 22}-{self.user}"
)
ssh_opts.extend(["-o", "ControlPersist=30m"])
ssh_opts.extend(["-o", f"ControlPath={socket_path}"])
ssh_opts.extend(["-o", "ControlMaster=auto"])
return ssh_opts
def ssh_cmd(
self, verbose_ssh: bool = False, tty: bool = False, control_master: bool = True
) -> list[str]:
packages = []
password_args = []
if self.password:
packages.append("sshpass")
password_args = ["sshpass", "-p", self.password]
current_ssh_opts = self.ssh_cmd_opts(control_master=control_master)
if verbose_ssh or self.verbose_ssh:
current_ssh_opts.extend(["-v"])
if tty:
current_ssh_opts.extend(["-t"])
if self.tor_socks:
packages.append("netcat")
current_ssh_opts.extend(
["-o", "ProxyCommand=nc -x 127.0.0.1:9050 -X 5 %h %p"]
)
cmd = [
*password_args,
"ssh",
self.target,
*current_ssh_opts,
]
return nix_shell(packages, cmd)
def interactive_ssh(self) -> None:
cmd_list = self.ssh_cmd(tty=True)
subprocess.run(cmd_list)
def is_ssh_reachable(host: Remote) -> bool:
address_family = socket.AF_INET6 if ":" in host.address else socket.AF_INET
with socket.socket(address_family, socket.SOCK_STREAM) as sock:
sock.settimeout(2)
try:
sock.connect((host.address, host.port or 22))
except OSError:
return False
else:
return True

View File

@@ -14,7 +14,6 @@ from clan_cli.machines.machines import Machine
from clan_cli.secrets.key import generate_key
from clan_cli.secrets.sops import maybe_get_admin_public_key
from clan_cli.secrets.users import add_user
from clan_cli.ssh.host import Host
from clan_cli.ssh.host_key import HostKeyCheck
from clan_cli.vars.generate import generate_vars_for_machine, get_generators_closure
@@ -28,6 +27,7 @@ from clan_lib.inventory import patch_inventory_with
from clan_lib.nix import nix_command
from clan_lib.nix_models.inventory import Machine as InventoryMachine
from clan_lib.nix_models.inventory import MachineDeploy
from clan_lib.ssh.remote import Remote
log = logging.getLogger(__name__)
@@ -118,9 +118,9 @@ def fix_flake_inputs(clan_dir: Path, clan_core_dir: Path) -> None:
@pytest.mark.with_core
@pytest.mark.skipif(sys.platform == "darwin", reason="sshd fails to start on darwin")
def test_clan_create_api(
temporary_home: Path, test_lib_root: Path, clan_core: Path, hosts: list[Host]
temporary_home: Path, test_lib_root: Path, clan_core: Path, hosts: list[Remote]
) -> None:
host_ip = hosts[0].host
host_ip = hosts[0].address
host_user = hosts[0].user
vm_name = "test-clan"
clan_core_dir_var = str(clan_core)
@@ -176,7 +176,9 @@ def test_clan_create_api(
clan_dir_flake = Flake(str(dest_clan_dir))
machines: list[Machine] = []
host = Host(user=host_user, host=host_ip, port=int(ssh_port_var))
host = Remote(
user=host_user, address=host_ip, port=int(ssh_port_var), command_prefix=vm_name
)
# TODO: We need to merge Host and Machine class these duplicate targetHost stuff is a nightmare
inv_machine = InventoryMachine(
name=vm_name, deploy=MachineDeploy(targetHost=f"{host.target}:{ssh_port_var}")