Files
clan-core/pkgs/clan-cli/clan_cli/machines/update.py
Louis Opter 775be88a9c clan-cli: filter any sops recipients set in the environment for encryption
This forces sops to use our config file, otherwise if any of the
environment variables set to specify recipients is present then
`--config` will be ignored (see [env_check]).

That's simple enough, still I ended up refactoring how we call sops for
correctness, and to align with its behavior. The code now distinguishes
between public and private keys explicitly. `secrets.decrypt_secret`
does not try to lookup for public and private keys anymore.

With this changeset, some people might have to adjust their environment
as public age and PGP keys will be discovered like sops would do. In
particular if multiple public keys are discovered, then the user will
have to specify which one to use for the clan.

This also makes the following changes:

- try to use `/dev/shm` when swapping a secret (it's what [pass] does
  fwiw);
- alias immediate values for readability;
- remove some float comparison that could never succeed, and use sops'
  exit status instead;
- remove unused function `maybe_get_sops_key`.

[env_check]: 8c567aa8a7/cmd/sops/main.go (L2229)
[pass]: http://passwordstore.org/
2024-11-27 06:27:53 +00:00

241 lines
8.0 KiB
Python

import argparse
import json
import logging
import os
import shlex
import sys
from clan_cli.api import API
from clan_cli.clan_uri import FlakeId
from clan_cli.cmd import run
from clan_cli.completions import (
add_dynamic_completer,
complete_machines,
)
from clan_cli.errors import ClanError
from clan_cli.facts.generate import generate_facts
from clan_cli.facts.upload import upload_secrets
from clan_cli.inventory import Machine as InventoryMachine
from clan_cli.machines.machines import Machine
from clan_cli.nix import nix_command, nix_metadata
from clan_cli.ssh.host import Host, HostKeyCheck
from clan_cli.vars.generate import generate_vars
from clan_cli.vars.upload import upload_secret_vars
from .inventory import get_all_machines, get_selected_machines
from .machine_group import MachineGroup
log = logging.getLogger(__name__)
def is_path_input(node: dict[str, dict[str, str]]) -> bool:
locked = node.get("locked")
if not locked:
return False
return locked["type"] == "path" or locked.get("url", "").startswith("file://")
def upload_sources(machine: Machine, always_upload_source: bool = False) -> str:
host = machine.build_host
env = host.nix_ssh_env(os.environ.copy())
if not always_upload_source:
flake_url = (
str(machine.flake.path) if machine.flake.is_local() else machine.flake.url
)
flake_data = nix_metadata(flake_url)
url = flake_data["resolvedUrl"]
has_path_inputs = any(
is_path_input(node) for node in flake_data["locks"]["nodes"].values()
)
if not has_path_inputs and not is_path_input(flake_data):
# No need to upload sources, we can just build the flake url directly
# FIXME: this might fail for private repositories?
return url
if not has_path_inputs:
# Just copy the flake to the remote machine, we can substitute other inputs there.
path = flake_data["path"]
cmd = nix_command(
[
"copy",
"--to",
f"ssh://{host.target}",
"--no-check-sigs",
path,
]
)
run(cmd, env=env, error_msg="failed to upload sources", prefix=machine.name)
return path
# Slow path: we need to upload all sources to the remote machine
cmd = nix_command(
[
"flake",
"archive",
"--to",
f"ssh://{host.target}",
"--json",
flake_url,
]
)
proc = run(cmd, env=env, error_msg="failed to upload sources")
try:
return json.loads(proc.stdout)["path"]
except (json.JSONDecodeError, OSError) as e:
msg = f"failed to parse output of {shlex.join(cmd)}: {e}\nGot: {proc.stdout}"
raise ClanError(msg) from e
@API.register
def update_machines(base_path: str, machines: list[InventoryMachine]) -> None:
group_machines: list[Machine] = []
# Convert InventoryMachine to Machine
for machine in machines:
m = Machine(
name=machine.name,
flake=FlakeId(base_path),
)
if not machine.deploy.targetHost:
msg = f"'TargetHost' is not set for machine '{machine.name}'"
raise ClanError(msg)
# Copy targetHost to machine
m.override_target_host = machine.deploy.targetHost
group_machines.append(m)
deploy_machine(MachineGroup(group_machines))
def deploy_machine(machines: MachineGroup) -> None:
"""
Deploy to all hosts in parallel
"""
def deploy(machine: Machine) -> None:
host = machine.build_host
generate_facts([machine], service=None, regenerate=False)
generate_vars([machine], generator_name=None, regenerate=False)
upload_secrets(machine)
upload_secret_vars(machine)
path = upload_sources(
machine,
)
nix_options = [
"--show-trace",
"--fast",
"--option",
"keep-going",
"true",
"--option",
"accept-flake-config",
"true",
"--build-host",
"",
*machine.nix_options,
"--flake",
f"{path}#{machine.name}",
]
switch_cmd = ["nixos-rebuild", "switch", *nix_options]
test_cmd = ["nixos-rebuild", "test", *nix_options]
target_host: Host | None = host.meta.get("target_host")
if target_host:
switch_cmd.extend(["--target-host", target_host.target])
test_cmd.extend(["--target-host", target_host.target])
env = host.nix_ssh_env(None)
ret = host.run(switch_cmd, extra_env=env, check=False)
# if the machine is mobile, we retry to deploy with the quirk method
is_mobile = machine.deployment.get("nixosMobileWorkaround", False)
if is_mobile and ret.returncode != 0:
log.info("Mobile machine detected, applying quirk deployment method")
ret = host.run(test_cmd, extra_env=env)
# retry nixos-rebuild switch if the first attempt failed
elif ret.returncode != 0:
ret = host.run(switch_cmd, extra_env=env)
if len(machines.group.hosts) > 1:
machines.run_function(deploy)
else:
deploy(machines.machines[0])
def update(args: argparse.Namespace) -> None:
if args.flake is None:
msg = "Could not find clan flake toplevel directory"
raise ClanError(msg)
machines = []
if len(args.machines) == 1 and args.target_host is not None:
machine = Machine(
name=args.machines[0], flake=args.flake, nix_options=args.option
)
machine.override_target_host = args.target_host
machine.host_key_check = HostKeyCheck.from_str(args.host_key_check)
machines.append(machine)
elif args.target_host is not None:
print("target host can only be specified for a single machine")
exit(1)
else:
if len(args.machines) == 0:
ignored_machines = []
for machine in get_all_machines(args.flake, args.option):
if machine.deployment.get("requireExplicitUpdate", False):
continue
try:
machine.build_host # noqa: B018
except ClanError: # check if we have a build host set
ignored_machines.append(machine)
continue
machine.host_key_check = HostKeyCheck.from_str(args.host_key_check)
machines.append(machine)
if not machines and ignored_machines != []:
print(
"WARNING: No machines to update."
"The following defined machines were ignored because they"
"do not have the `clan.core.networking.targetHost` nixos option set:",
file=sys.stderr,
)
for machine in ignored_machines:
print(machine, file=sys.stderr)
else:
machines = get_selected_machines(args.flake, args.option, args.machines)
for machine in machines:
machine.host_key_check = HostKeyCheck.from_str(args.host_key_check)
host_group = MachineGroup(machines)
deploy_machine(host_group)
def register_update_parser(parser: argparse.ArgumentParser) -> None:
machines_parser = parser.add_argument(
"machines",
type=str,
nargs="*",
default=[],
metavar="MACHINE",
help="Machine to update. If no machine is specified, all machines will be updated.",
)
add_dynamic_completer(machines_parser, complete_machines)
parser.add_argument(
"--host-key-check",
choices=["strict", "ask", "tofu", "none"],
default="ask",
help="Host key (.ssh/known_hosts) check mode.",
)
parser.add_argument(
"--target-host",
type=str,
help="Address of the machine to update, in the format of user@host:1234.",
)
parser.set_defaults(func=update)