diff --git a/nixosModules/clanCore/outputs.nix b/nixosModules/clanCore/outputs.nix index 4369d4bfe..d38a7a094 100644 --- a/nixosModules/clanCore/outputs.nix +++ b/nixosModules/clanCore/outputs.nix @@ -44,6 +44,17 @@ ''; default = false; }; + deployment.nixosMobileWorkaround = lib.mkOption { + type = lib.types.bool; + description = '' + if true, the deployment will first do a nixos-rebuild switch + to register the boot profile the command will fail applying it to the running system + which is why afterwards we execute a nixos-rebuild test to apply + the new config without having to reboot. + This is a nixos-mobile deployment bug and will be removed in the future + ''; + default = false; + }; vm.create = lib.mkOption { type = lib.types.path; description = '' @@ -75,6 +86,7 @@ }; sops.defaultGroups = config.clan.core.sops.defaultGroups; inherit (config.clan.core.networking) targetHost buildHost; + inherit (config.system.clan.deployment) nixosMobileWorkaround; inherit (config.clan.deployment) requireExplicitUpdate; }; system.clan.deployment.file = pkgs.writeText "deployment.json" ( diff --git a/pkgs/clan-cli/clan_cli/cmd.py b/pkgs/clan-cli/clan_cli/cmd.py index f43487b6c..2943d313c 100644 --- a/pkgs/clan-cli/clan_cli/cmd.py +++ b/pkgs/clan-cli/clan_cli/cmd.py @@ -197,9 +197,11 @@ def run( logger.debug(f"{msg} \n{callers_str}") if input: - print_trace( - f"$: echo '{input.decode('utf-8', 'replace')}' | {indent_command(cmd)}" - ) + if any(not ch.isprintable() for ch in input.decode("ascii", "replace")): + filtered_input = "<>" + else: + filtered_input = input.decode("ascii", "replace") + print_trace(f"$: echo '{filtered_input}' | {indent_command(cmd)}") elif logger.isEnabledFor(logging.DEBUG): print_trace(f"$: {indent_command(cmd)}") @@ -229,7 +231,7 @@ def run( global TIME_TABLE if TIME_TABLE: - TIME_TABLE.add(shlex.join(cmd), start - timeit.default_timer()) + TIME_TABLE.add(shlex.join(cmd), timeit.default_timer() - start) # Wait for the subprocess to finish cmd_out = CmdOut( diff --git a/pkgs/clan-cli/clan_cli/facts/upload.py b/pkgs/clan-cli/clan_cli/facts/upload.py index e81b6c14c..49dcddb2b 100644 --- a/pkgs/clan-cli/clan_cli/facts/upload.py +++ b/pkgs/clan-cli/clan_cli/facts/upload.py @@ -4,10 +4,8 @@ import logging from pathlib import Path from tempfile import TemporaryDirectory -from clan_cli.cmd import Log, run from clan_cli.completions import add_dynamic_completer, complete_machines from clan_cli.machines.machines import Machine -from clan_cli.nix import nix_shell log = logging.getLogger(__name__) @@ -19,30 +17,13 @@ def upload_secrets(machine: Machine) -> None: if not secret_facts_store.needs_upload(): log.info("Secrets already uploaded") return - with TemporaryDirectory(prefix="facts-upload-") as tempdir: - secret_facts_store.upload(Path(tempdir)) - host = machine.target_host - run( - nix_shell( - ["nixpkgs#rsync"], - [ - "rsync", - "-e", - " ".join(["ssh", *host.ssh_cmd_opts()]), - "--recursive", - "--links", - "--times", - "--compress", - "--delete", - "--chmod=D700,F600", - f"{tempdir!s}/", - f"{host.target_for_rsync}:{machine.secrets_upload_directory}/", - ], - ), - log=Log.BOTH, - needs_user_terminal=True, - ) + with TemporaryDirectory(prefix="facts-upload-") as tempdir: + local_secret_dir = Path(tempdir) + secret_facts_store.upload(local_secret_dir) + remote_secret_dir = Path(machine.secrets_upload_directory) + + machine.target_host.upload(local_secret_dir, remote_secret_dir) def upload_command(args: argparse.Namespace) -> None: diff --git a/pkgs/clan-cli/clan_cli/machines/update.py b/pkgs/clan-cli/clan_cli/machines/update.py index 3994d37cd..500505fab 100644 --- a/pkgs/clan-cli/clan_cli/machines/update.py +++ b/pkgs/clan-cli/clan_cli/machines/update.py @@ -11,7 +11,6 @@ from clan_cli.cmd import run from clan_cli.completions import ( add_dynamic_completer, complete_machines, - complete_target_host, ) from clan_cli.errors import ClanError from clan_cli.facts.generate import generate_facts @@ -19,7 +18,7 @@ from clan_cli.facts.upload import upload_secrets from clan_cli.inventory import Machine as InventoryMachine from clan_cli.machines.machines import Machine from clan_cli.nix import nix_command, nix_metadata -from clan_cli.ssh import HostKeyCheck +from clan_cli.ssh import Host, HostKeyCheck from clan_cli.vars.generate import generate_vars from clan_cli.vars.upload import upload_secret_vars @@ -116,7 +115,6 @@ def deploy_machine(machines: MachineGroup) -> None: def deploy(machine: Machine) -> None: host = machine.build_host - generate_facts([machine], None, False) generate_vars([machine], None, False) @@ -127,9 +125,7 @@ def deploy_machine(machines: MachineGroup) -> None: machine, ) - cmd = [ - "nixos-rebuild", - "switch", + nix_options = [ "--show-trace", "--fast", "--option", @@ -144,15 +140,26 @@ def deploy_machine(machines: MachineGroup) -> None: "--flake", f"{path}#{machine.name}", ] - if target_host := host.meta.get("target_host"): - target_host = f"{target_host.user or 'root'}@{target_host.host}" - cmd.extend(["--target-host", target_host]) + switch_cmd = ["nixos-rebuild", "switch", *nix_options] + test_cmd = ["nixos-rebuild", "test", *nix_options] + + target_host: Host | None = host.meta.get("target_host") + if target_host: + switch_cmd.extend(["--target-host", target_host.target]) + test_cmd.extend(["--target-host", target_host.target]) env = host.nix_ssh_env(None) - ret = host.run(cmd, extra_env=env, check=False) - # re-retry switch if the first time fails - if ret.returncode != 0: - ret = host.run(cmd, extra_env=env) + ret = host.run(switch_cmd, extra_env=env, check=False) + + # if the machine is mobile, we retry to deploy with the quirk method + is_mobile = machine.deployment.get("nixosMobileWorkaround", False) + if is_mobile and ret.returncode != 0: + log.info("Mobile machine detected, applying quirk deployment method") + ret = host.run(test_cmd, extra_env=env) + + # retry nixos-rebuild switch if the first attempt failed + elif ret.returncode != 0: + ret = host.run(switch_cmd, extra_env=env) if len(machines.group.hosts) > 1: machines.run_function(deploy) @@ -226,17 +233,9 @@ def register_update_parser(parser: argparse.ArgumentParser) -> None: default="ask", help="Host key (.ssh/known_hosts) check mode.", ) - - target_host_parser = parser.add_argument( + parser.add_argument( "--target-host", type=str, help="Address of the machine to update, in the format of user@host:1234.", ) - add_dynamic_completer(target_host_parser, complete_target_host) - - parser.add_argument( - "--darwin", - type=str, - help="Hack to deploy darwin machines. This will be removed in the future when we have full darwin integration.", - ) parser.set_defaults(func=update) diff --git a/pkgs/clan-cli/clan_cli/profiler.py b/pkgs/clan-cli/clan_cli/profiler.py index c36b316d9..2f6b9203b 100644 --- a/pkgs/clan-cli/clan_cli/profiler.py +++ b/pkgs/clan-cli/clan_cli/profiler.py @@ -113,6 +113,6 @@ def profile(func: Callable) -> Callable: raise return res - if os.getenv("PERF", "0") == "1": + if os.getenv("CLAN_CLI_PERF", "0") == "1": return wrapper return func diff --git a/pkgs/clan-cli/clan_cli/ssh/__init__.py b/pkgs/clan-cli/clan_cli/ssh/__init__.py index 522f2aa78..8ce5f602a 100644 --- a/pkgs/clan-cli/clan_cli/ssh/__init__.py +++ b/pkgs/clan-cli/clan_cli/ssh/__init__.py @@ -8,6 +8,7 @@ import select import shlex import subprocess import sys +import tarfile import time import urllib.parse from collections.abc import Callable, Iterator @@ -15,10 +16,12 @@ from contextlib import ExitStack, contextmanager from enum import Enum from pathlib import Path from shlex import quote +from tempfile import TemporaryDirectory from threading import Thread from typing import IO, Any, Generic, TypeVar -from clan_cli.cmd import terminate_process_group +from clan_cli.cmd import Log, terminate_process_group +from clan_cli.cmd import run as local_run from clan_cli.errors import ClanError # https://no-color.org @@ -207,7 +210,7 @@ class Host: self.host_key_check = host_key_check self.meta = meta self.verbose_ssh = verbose_ssh - self.ssh_options = ssh_options + self._ssh_options = ssh_options def __repr__(self) -> str: return str(self) @@ -525,29 +528,89 @@ class Host: def nix_ssh_env(self, env: dict[str, str] | None) -> dict[str, str]: if env is None: env = {} - env["NIX_SSHOPTS"] = " ".join(self.ssh_cmd_opts()) + env["NIX_SSHOPTS"] = " ".join(self.ssh_cmd_opts) return env + def upload( + self, + local_src: Path, # must be a directory + remote_dest: Path, # must be a directory + file_user: str = "root", + file_group: str = "root", + dir_mode: int = 0o700, + file_mode: int = 0o400, + ) -> None: + # check if the remote destination is a directory (no suffix) + if remote_dest.suffix: + msg = "Only directories are allowed" + raise ClanError(msg) + + if not local_src.is_dir(): + msg = "Only directories are allowed" + raise ClanError(msg) + + # Create the tarball from the temporary directory + with TemporaryDirectory(prefix="facts-upload-") as tardir: + tar_path = Path(tardir) / "upload.tar.gz" + # We set the permissions of the files and directories in the tarball to read only and owned by root + # As first uploading the tarball and then changing the permissions can lead an attacker to + # do a race condition attack + with tarfile.open(str(tar_path), "w:gz") as tar: + for root, dirs, files in local_src.walk(): + for mdir in dirs: + dir_path = Path(root) / mdir + tarinfo = tar.gettarinfo( + dir_path, arcname=str(dir_path.relative_to(str(local_src))) + ) + tarinfo.mode = dir_mode + tarinfo.uname = file_user + tarinfo.gname = file_group + tar.addfile(tarinfo) + for file in files: + file_path = Path(root) / file + tarinfo = tar.gettarinfo( + file_path, + arcname=str(file_path.relative_to(str(local_src))), + ) + tarinfo.mode = file_mode + tarinfo.uname = file_user + tarinfo.gname = file_group + with file_path.open("rb") as f: + tar.addfile(tarinfo, f) + + cmd = [ + *self.ssh_cmd(), + "rm", + "-r", + str(remote_dest), + ";", + "mkdir", + f"--mode={dir_mode:o}", + "-p", + str(remote_dest), + "&&", + "tar", + "-C", + str(remote_dest), + "-xvzf", + "-", + ] + + # TODO accept `input` to be an IO object instead of bytes so that we don't have to read the tarfile into memory. + with tar_path.open("rb") as f: + local_run(cmd, input=f.read(), log=Log.BOTH, needs_user_terminal=True) + + @property def ssh_cmd_opts( self, - verbose_ssh: bool = False, - tty: bool = False, ) -> list[str]: ssh_opts = ["-A"] if self.forward_agent else [] - for k, v in self.ssh_options.items(): + for k, v in self._ssh_options.items(): ssh_opts.extend(["-o", f"{k}={shlex.quote(v)}"]) - if self.port: - ssh_opts.extend(["-p", str(self.port)]) - if self.key: - ssh_opts.extend(["-i", self.key]) - ssh_opts.extend(self.host_key_check.to_ssh_opt()) - if verbose_ssh or self.verbose_ssh: - ssh_opts.extend(["-v"]) - if tty: - ssh_opts.extend(["-t"]) + return ssh_opts def ssh_cmd( @@ -555,10 +618,21 @@ class Host: verbose_ssh: bool = False, tty: bool = False, ) -> list[str]: + ssh_opts = self.ssh_cmd_opts + if verbose_ssh or self.verbose_ssh: + ssh_opts.extend(["-v"]) + if tty: + ssh_opts.extend(["-t"]) + + if self.port: + ssh_opts.extend(["-p", str(self.port)]) + if self.key: + ssh_opts.extend(["-i", self.key]) + return [ "ssh", self.target, - *self.ssh_cmd_opts(verbose_ssh=verbose_ssh, tty=tty), + *ssh_opts, ] @@ -658,6 +732,9 @@ class HostGroup: timeout: float = math.inf, tty: bool = False, ) -> None: + if cwd is not None: + msg = "cwd is not supported for remote commands" + raise ClanError(msg) if extra_env is None: extra_env = {} try: diff --git a/pkgs/clan-cli/clan_cli/vars/upload.py b/pkgs/clan-cli/clan_cli/vars/upload.py index a75df063a..ce22cad67 100644 --- a/pkgs/clan-cli/clan_cli/vars/upload.py +++ b/pkgs/clan-cli/clan_cli/vars/upload.py @@ -4,10 +4,8 @@ import logging from pathlib import Path from tempfile import TemporaryDirectory -from clan_cli.cmd import Log, run from clan_cli.completions import add_dynamic_completer, complete_machines from clan_cli.machines.machines import Machine -from clan_cli.nix import nix_shell log = logging.getLogger(__name__) @@ -20,29 +18,10 @@ def upload_secret_vars(machine: Machine) -> None: log.info("Secrets already uploaded") return with TemporaryDirectory(prefix="vars-upload-") as tempdir: - secret_store.upload(Path(tempdir)) - host = machine.target_host - - ssh_cmd = host.ssh_cmd() - run( - nix_shell( - ["nixpkgs#rsync"], - [ - "rsync", - "-e", - " ".join(["ssh"] + ssh_cmd[2:]), - "--recursive", - "--links", - "--times", - "--compress", - "--delete", - "--chmod=D700,F600", - f"{tempdir!s}/", - f"{host.target_for_rsync}:{machine.secret_vars_upload_directory}/", - ], - ), - log=Log.BOTH, - needs_user_terminal=True, + secret_dir = Path(tempdir) + secret_store.upload(secret_dir) + machine.target_host.upload( + secret_dir, Path(machine.secret_vars_upload_directory) ) diff --git a/pkgs/clan-cli/tests/data/sshd_config b/pkgs/clan-cli/tests/data/sshd_config index c6e685a21..4212331f2 100644 --- a/pkgs/clan-cli/tests/data/sshd_config +++ b/pkgs/clan-cli/tests/data/sshd_config @@ -5,3 +5,4 @@ MaxStartups 64:30:256 AuthorizedKeysFile $host_key.pub AcceptEnv REALPATH PasswordAuthentication no +Subsystem sftp $sftp_server \ No newline at end of file diff --git a/pkgs/clan-cli/tests/fixtures_flakes.py b/pkgs/clan-cli/tests/fixtures_flakes.py index 6843fea24..9193f0d90 100644 --- a/pkgs/clan-cli/tests/fixtures_flakes.py +++ b/pkgs/clan-cli/tests/fixtures_flakes.py @@ -38,7 +38,7 @@ def substitute( str(clan_core_flake), ) line = line.replace("__CLAN_SOPS_KEY_PATH__", sops_key) - line = line.replace("__CLAN_SOPS_KEY_DIR__", str(flake)) + line = line.replace("__CLAN_SOPS_KEY_DIR__", str(flake / "facts")) buf += line print(f"file: {file}") print(f"clan_core: {clan_core_flake}") diff --git a/pkgs/clan-cli/tests/host_group.py b/pkgs/clan-cli/tests/host_group.py index 2021b09a0..7c71e7d24 100644 --- a/pkgs/clan-cli/tests/host_group.py +++ b/pkgs/clan-cli/tests/host_group.py @@ -9,7 +9,7 @@ from sshd import Sshd @pytest.fixture def host_group(sshd: Sshd) -> HostGroup: login = pwd.getpwuid(os.getuid()).pw_name - return HostGroup( + group = HostGroup( [ Host( "127.0.0.1", @@ -20,3 +20,4 @@ def host_group(sshd: Sshd) -> HostGroup: ) ] ) + return group diff --git a/pkgs/clan-cli/tests/sshd.py b/pkgs/clan-cli/tests/sshd.py index 8abdee422..24f55840c 100644 --- a/pkgs/clan-cli/tests/sshd.py +++ b/pkgs/clan-cli/tests/sshd.py @@ -26,12 +26,13 @@ class Sshd: class SshdConfig: def __init__( - self, path: Path, login_shell: Path, key: str, preload_lib: Path + self, path: Path, login_shell: Path, key: str, preload_lib: Path, log_file: Path ) -> None: self.path = path self.login_shell = login_shell self.key = key self.preload_lib = preload_lib + self.log_file = log_file @pytest.fixture(scope="session") @@ -43,7 +44,14 @@ def sshd_config(test_root: Path) -> Iterator[SshdConfig]: host_key = test_root / "data" / "ssh_host_ed25519_key" host_key.chmod(0o600) template = (test_root / "data" / "sshd_config").read_text() - content = string.Template(template).substitute({"host_key": host_key}) + sshd = shutil.which("sshd") + assert sshd is not None + sshdp = Path(sshd) + sftp_server = sshdp.parent.parent / "libexec" / "sftp-server" + assert sftp_server is not None + content = string.Template(template).substitute( + {"host_key": host_key, "sftp_server": sftp_server} + ) config = tmpdir / "sshd_config" config.write_text(content) login_shell = tmpdir / "shell" @@ -84,8 +92,8 @@ exec {bash} -l "${{@}}" ], check=True, ) - - yield SshdConfig(config, login_shell, str(host_key), lib_path) + log_file = tmpdir / "sshd.log" + yield SshdConfig(config, login_shell, str(host_key), lib_path, log_file) @pytest.fixture @@ -106,7 +114,17 @@ def sshd( "LOGIN_SHELL": str(sshd_config.login_shell), } proc = command.run( - [sshd, "-f", str(sshd_config.path), "-D", "-p", str(port)], extra_env=env + [ + sshd, + "-E", + str(sshd_config.log_file), + "-f", + str(sshd_config.path), + "-D", + "-p", + str(port), + ], + extra_env=env, ) monkeypatch.delenv("SSH_AUTH_SOCK", raising=False) while True: diff --git a/pkgs/clan-cli/tests/test_secrets_upload.py b/pkgs/clan-cli/tests/test_secrets_upload.py index 1b333397a..7189e1884 100644 --- a/pkgs/clan-cli/tests/test_secrets_upload.py +++ b/pkgs/clan-cli/tests/test_secrets_upload.py @@ -53,9 +53,11 @@ def test_secrets_upload( new_text = flake.read_text().replace("__CLAN_TARGET_ADDRESS__", addr) flake.write_text(new_text) + cli.run(["facts", "upload", "--flake", str(test_flake_with_core.path), "vm1"]) # the flake defines this path as the location where the sops key should be installed - sops_key = test_flake_with_core.path.joinpath("key.txt") + sops_key = test_flake_with_core.path / "facts" / "key.txt" + # breakpoint() assert sops_key.exists() assert sops_key.read_text() == age_keys[0].privkey