bind ssh controlmaster to live time of CLI

This commit is contained in:
Jörg Thalheim
2025-05-04 16:11:26 +02:00
parent 44b237d9be
commit b01691cb64
25 changed files with 184 additions and 159 deletions

View File

@@ -139,26 +139,26 @@ def generate_machine_hardware_info(opts: HardwareGenerateOptions) -> HardwareCon
"--show-hardware-config",
]
host = machine.target_host
host.ssh_options["StrictHostKeyChecking"] = "accept-new"
host.ssh_options["UserKnownHostsFile"] = "/dev/null"
if opts.password:
host.password = opts.password
with machine.target_host() as host:
host.ssh_options["StrictHostKeyChecking"] = "accept-new"
host.ssh_options["UserKnownHostsFile"] = "/dev/null"
if opts.password:
host.password = opts.password
out = host.run(config_command, become_root=True, opts=RunOpts(check=False))
if out.returncode != 0:
if "nixos-facter" in out.stderr and "not found" in out.stderr:
machine.error(str(out.stderr))
msg = (
"Please use our custom nixos install images from https://github.com/nix-community/nixos-images/releases/tag/nixos-unstable. "
"nixos-factor only works on nixos / clan systems currently."
)
out = host.run(config_command, become_root=True, opts=RunOpts(check=False))
if out.returncode != 0:
if "nixos-facter" in out.stderr and "not found" in out.stderr:
machine.error(str(out.stderr))
msg = (
"Please use our custom nixos install images from https://github.com/nix-community/nixos-images/releases/tag/nixos-unstable. "
"nixos-factor only works on nixos / clan systems currently."
)
raise ClanError(msg)
machine.error(str(out))
msg = f"Failed to inspect {opts.machine}. Address: {host.target}"
raise ClanError(msg)
machine.error(str(out))
msg = f"Failed to inspect {opts.machine}. Address: {host.target}"
raise ClanError(msg)
backup_file = None
if hw_file.exists():
backup_file = hw_file.with_suffix(".bak")

View File

@@ -36,7 +36,6 @@ class BuildOn(Enum):
@dataclass
class InstallOptions:
machine: Machine
target_host: str
kexec: str | None = None
debug: bool = False
no_reboot: bool = False
@@ -52,17 +51,16 @@ class InstallOptions:
@API.register
def install_machine(opts: InstallOptions) -> None:
machine = opts.machine
machine.override_target_host = opts.target_host
machine.info(f"installing {machine.name}")
h = machine.target_host
machine.info(f"target host: {h.target}")
machine.debug(f"installing {machine.name}")
generate_facts([machine])
generate_vars([machine])
with TemporaryDirectory(prefix="nixos-install-") as _base_directory:
with (
TemporaryDirectory(prefix="nixos-install-") as _base_directory,
machine.target_host() as host,
):
base_directory = Path(_base_directory).resolve()
activation_secrets = base_directory / "activation_secrets"
upload_dir = activation_secrets / machine.secrets_upload_directory.lstrip("/")
@@ -134,14 +132,14 @@ def install_machine(opts: InstallOptions) -> None:
if opts.build_on:
cmd += ["--build-on", opts.build_on.value]
if h.port:
cmd += ["--ssh-port", str(h.port)]
if host.port:
cmd += ["--ssh-port", str(host.port)]
if opts.kexec:
cmd += ["--kexec", opts.kexec]
if opts.debug:
cmd.append("--debug")
cmd.append(h.target)
cmd.append(host.target)
if opts.use_tor:
# nix copy does not support tor socks proxy
# cmd.append("--ssh-option")
@@ -178,17 +176,15 @@ def install_command(args: argparse.Namespace) -> None:
deploy_info: DeployInfo | None = ssh_command_parse(args)
if args.target_host:
target_host = args.target_host
machine.override_target_host = args.target_host
elif deploy_info:
host = find_reachable_host(deploy_info, host_key_check)
if host is None:
use_tor = True
target_host = f"root@{deploy_info.tor}"
machine.override_target_host = f"root@{deploy_info.tor}"
else:
target_host = host.target
machine.override_target_host = host.target
password = deploy_info.pwd
else:
target_host = machine.target_host.target
if args.password:
password = args.password
@@ -197,19 +193,16 @@ def install_command(args: argparse.Namespace) -> None:
else:
password = None
if not target_host:
msg = "No target host provided, please provide a target host."
raise ClanError(msg)
if not args.yes:
ask = input(f"Install {args.machine} to {target_host}? [y/N] ")
ask = input(
f"Install {args.machine} to {machine.target_host_address}? [y/N] "
)
if ask != "y":
return None
return install_machine(
InstallOptions(
machine=machine,
target_host=target_host,
kexec=args.kexec,
phases=args.phases,
debug=args.debug,

View File

@@ -2,6 +2,8 @@ import importlib
import json
import logging
import re
from collections.abc import Iterator
from contextlib import contextmanager
from dataclasses import dataclass, field
from functools import cached_property
from pathlib import Path
@@ -145,9 +147,9 @@ class Machine:
def flake_dir(self) -> Path:
return self.flake.path
@property
def target_host(self) -> Host:
return parse_deployment_address(
@contextmanager
def target_host(self) -> Iterator[Host]:
yield parse_deployment_address(
self.name,
self.target_host_address,
self.host_key_check,
@@ -155,23 +157,25 @@ class Machine:
meta={"machine": self},
)
@property
def build_host(self) -> Host:
@contextmanager
def build_host(self) -> Iterator[Host | None]:
"""
The host where the machine is built and deployed from.
Can be the same as the target host.
"""
build_host = self.override_build_host or self.deployment.get("buildHost")
if build_host is None:
return self.target_host
with self.target_host() as target_host:
yield target_host
return
# enable ssh agent forwarding to allow the build host to access the target host
return parse_deployment_address(
yield parse_deployment_address(
self.name,
build_host,
self.host_key_check,
forward_agent=True,
private_key=self.private_key,
meta={"machine": self, "target_host": self.target_host},
meta={"machine": self},
)
@cached_property

View File

@@ -5,6 +5,7 @@ import os
import re
import shlex
import sys
from contextlib import ExitStack
from clan_lib.api import API
@@ -43,8 +44,7 @@ def is_local_input(node: dict[str, dict[str, str]]) -> bool:
)
def upload_sources(machine: Machine) -> str:
host = machine.build_host
def upload_sources(machine: Machine, host: Host) -> str:
env = host.nix_ssh_env(os.environ.copy())
flake_url = (
@@ -126,22 +126,25 @@ def update_machines(base_path: str, machines: list[InventoryMachine]) -> None:
deploy_machines(group_machines)
def deploy_machines(machines: list[Machine]) -> None:
"""
Deploy to all hosts in parallel
"""
def deploy_machine(machine: Machine) -> None:
with ExitStack() as stack:
target_host = stack.enter_context(machine.target_host())
build_host = stack.enter_context(machine.build_host())
if machine._class_ == "darwin":
if not machine.deploy_as_root and target_host.user == "root":
msg = f"'targetHost' should be set to a non-root user for deploying to nix-darwin on machine '{machine.name}'"
raise ClanError(msg)
host = build_host or target_host
def deploy(machine: Machine) -> None:
host = machine.build_host
generate_facts([machine], service=None, regenerate=False)
generate_vars([machine], generator_name=None, regenerate=False)
upload_secrets(machine)
upload_secret_vars(machine)
upload_secrets(machine, target_host)
upload_secret_vars(machine, target_host)
path = upload_sources(
machine=machine,
)
path = upload_sources(machine, host)
nix_options = [
"--show-trace",
@@ -166,10 +169,9 @@ def deploy_machines(machines: list[Machine]) -> None:
"",
]
target_host: Host | None = host.meta.get("target_host")
if target_host:
if build_host:
become_root = False
nix_options += ["--target-host", target_host.target]
nix_options += ["--target-host", build_host.target]
if target_host.user != "root":
nix_options += ["--use-remote-sudo"]
@@ -211,19 +213,19 @@ def deploy_machines(machines: list[Machine]) -> None:
become_root=become_root,
)
def deploy_machines(machines: list[Machine]) -> None:
"""
Deploy to all hosts in parallel
"""
with AsyncRuntime() as runtime:
for machine in machines:
if machine._class_ == "darwin":
if not machine.deploy_as_root and machine.target_host.user == "root":
msg = f"'targetHost' should be set to a non-root user for deploying to nix-darwin on machine '{machine.name}'"
raise ClanError(msg)
machine.info(f"Updating {machine.name}")
runtime.async_run(
AsyncOpts(
tid=machine.name, async_ctx=AsyncContext(prefix=machine.name)
),
deploy,
deploy_machine,
machine,
)
runtime.join_all()