From 2b5cdd66f9bca5245eb04979ecc007b2eb7fdec5 Mon Sep 17 00:00:00 2001 From: Qubasa Date: Fri, 6 Sep 2024 18:53:02 +0200 Subject: [PATCH] clan-cli: Make run_vm_in_thread fail fast. Fix generate_flake to support machine_id --- pkgs/clan-cli/tests/fixtures_flakes.py | 20 ++++++- pkgs/clan-cli/tests/helpers/vms.py | 65 ++++++++++++++------- pkgs/clan-cli/tests/test_vars.py | 11 ++++ pkgs/clan-cli/tests/test_vars_deployment.py | 7 ++- pkgs/clan-cli/tests/test_vms_cli.py | 18 +++--- 5 files changed, 87 insertions(+), 34 deletions(-) diff --git a/pkgs/clan-cli/tests/fixtures_flakes.py b/pkgs/clan-cli/tests/fixtures_flakes.py index 071260b84..bc2b2cb9f 100644 --- a/pkgs/clan-cli/tests/fixtures_flakes.py +++ b/pkgs/clan-cli/tests/fixtures_flakes.py @@ -11,6 +11,7 @@ from typing import NamedTuple import pytest from clan_cli.dirs import nixpkgs_source from fixture_error import FixtureError +from helpers import cli from root import CLAN_CORE log = logging.getLogger(__name__) @@ -50,9 +51,14 @@ class FlakeForTest(NamedTuple): path: Path +from age_keys import KEYS, KeyPair + + def generate_flake( temporary_home: Path, flake_template: Path, + monkeypatch: pytest.MonkeyPatch, + sops_key: KeyPair = KEYS[0], substitutions: dict[str, str] | None = None, # define the machines directly including their config machine_configs: dict[str, dict] | None = None, @@ -75,7 +81,6 @@ def generate_flake( ) ) """ - # copy the template to a new temporary location if inventory is None: inventory = {} @@ -133,6 +138,19 @@ def generate_flake( sp.run(["git", "config", "user.name", "clan-tool"], cwd=flake, check=True) sp.run(["git", "config", "user.email", "clan@example.com"], cwd=flake, check=True) sp.run(["git", "commit", "-a", "-m", "Initial commit"], cwd=flake, check=True) + monkeypatch.setenv("SOPS_AGE_KEY", sops_key.privkey) + cli.run( + [ + "secrets", + "users", + "add", + "user1", + sops_key.pubkey, + "--flake", + str(flake), + "--debug", + ] + ) return FlakeForTest(flake) diff --git a/pkgs/clan-cli/tests/helpers/vms.py b/pkgs/clan-cli/tests/helpers/vms.py index 1cd7e5a00..35f065e8f 100644 --- a/pkgs/clan-cli/tests/helpers/vms.py +++ b/pkgs/clan-cli/tests/helpers/vms.py @@ -8,6 +8,7 @@ from pathlib import Path from time import sleep from clan_cli.dirs import vm_state_dir +from clan_cli.errors import ClanError from clan_cli.qemu.qga import QgaSession from clan_cli.qemu.qmp import QEMUMonitorProtocol @@ -21,33 +22,46 @@ def find_free_port() -> int: return sock.getsockname()[1] -def run_vm_in_thread(machine_name: str, ssh_port: int | None = None) -> int: +class VmThread(threading.Thread): + def __init__(self, machine_name: str, ssh_port: int | None = None) -> None: + super().__init__() + self.machine_name = machine_name + self.ssh_port = ssh_port + self.exception: Exception | None = None + self.daemon = True + + def run(self) -> None: + try: + cli.run( + ["vms", "run", self.machine_name, "--publish", f"{self.ssh_port}:22"] + ) + except Exception as ex: + # print exception details + print(traceback.format_exc(), file=sys.stderr) + print(sys.exc_info()[2], file=sys.stderr) + self.exception = ex + + +def run_vm_in_thread(machine_name: str, ssh_port: int | None = None) -> VmThread: # runs machine and prints exceptions if ssh_port is None: ssh_port = find_free_port() - def run() -> None: - try: - cli.run(["vms", "run", machine_name, "--publish", f"{ssh_port}:22"]) - except Exception: - # print exception details - print(traceback.format_exc(), file=sys.stderr) - print(sys.exc_info()[2], file=sys.stderr) - - # run the machine in a separate thread - t = threading.Thread(target=run, name="run") - t.daemon = True - t.start() - return ssh_port + vm_thread = VmThread(machine_name, ssh_port) + vm_thread.start() + return vm_thread # wait for qmp socket to exist -def wait_vm_up(machine_name: str, flake_url: str | None = None) -> None: +def wait_vm_up(machine_name: str, vm: VmThread, flake_url: str | None = None) -> None: if flake_url is None: flake_url = str(Path.cwd()) socket_file = vm_state_dir(flake_url, machine_name) / "qmp.sock" - timeout: float = 1200 # in seconds + timeout: float = 600 while True: + if vm.exception: + msg = "VM failed to start" + raise ClanError(msg) from vm.exception if timeout <= 0: msg = f"qmp socket {socket_file} not found. Is the VM running?" raise TimeoutError(msg) @@ -58,12 +72,15 @@ def wait_vm_up(machine_name: str, flake_url: str | None = None) -> None: # wait for vm to be down by checking if qmp socket is down -def wait_vm_down(machine_name: str, flake_url: str | None = None) -> None: +def wait_vm_down(machine_name: str, vm: VmThread, flake_url: str | None = None) -> None: if flake_url is None: flake_url = str(Path.cwd()) socket_file = vm_state_dir(flake_url, machine_name) / "qmp.sock" - timeout: float = 600 + timeout: float = 300 while socket_file.exists(): + if vm.exception: + msg = "VM failed to start" + raise ClanError(msg) from vm.exception if timeout <= 0: msg = f"qmp socket {socket_file} still exists. Is the VM down?" raise TimeoutError(msg) @@ -72,11 +89,13 @@ def wait_vm_down(machine_name: str, flake_url: str | None = None) -> None: # wait for vm to be up then connect and return qmp instance -def qmp_connect(machine_name: str, flake_url: str | None = None) -> QEMUMonitorProtocol: +def qmp_connect( + machine_name: str, vm: VmThread, flake_url: str | None = None +) -> QEMUMonitorProtocol: if flake_url is None: flake_url = str(Path.cwd()) state_dir = vm_state_dir(flake_url, machine_name) - wait_vm_up(machine_name, flake_url) + wait_vm_up(machine_name, vm, flake_url) qmp = QEMUMonitorProtocol( address=str(os.path.realpath(state_dir / "qmp.sock")), ) @@ -85,9 +104,11 @@ def qmp_connect(machine_name: str, flake_url: str | None = None) -> QEMUMonitorP # wait for vm to be up then connect and return qga instance -def qga_connect(machine_name: str, flake_url: str | None = None) -> QgaSession: +def qga_connect( + machine_name: str, vm: VmThread, flake_url: str | None = None +) -> QgaSession: if flake_url is None: flake_url = str(Path.cwd()) state_dir = vm_state_dir(flake_url, machine_name) - wait_vm_up(machine_name, flake_url) + wait_vm_up(machine_name, vm, flake_url) return QgaSession(os.path.realpath(state_dir / "qga.sock")) diff --git a/pkgs/clan-cli/tests/test_vars.py b/pkgs/clan-cli/tests/test_vars.py index b173eabcb..d045f8e55 100644 --- a/pkgs/clan-cli/tests/test_vars.py +++ b/pkgs/clan-cli/tests/test_vars.py @@ -76,6 +76,7 @@ def test_generate_public_var( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) machine = Machine(name="my_machine", flake=FlakeId(str(flake.path))) @@ -105,6 +106,7 @@ def test_generate_secret_var_sops( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) sops_setup.init() @@ -140,6 +142,7 @@ def test_generate_secret_var_sops_with_default_group( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) sops_setup.init() @@ -170,6 +173,7 @@ def test_generate_secret_var_password_store( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) gnupghome = temporary_home / "gpg" @@ -237,6 +241,7 @@ def test_generate_secret_for_multiple_machines( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"machine1": machine1_config, "machine2": machine2_config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) sops_setup.init() @@ -282,6 +287,7 @@ def test_dependant_generators( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) cli.run(["vars", "generate", "--flake", str(flake.path), "my_machine"]) @@ -321,6 +327,7 @@ def test_prompt( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) monkeypatch.setattr("sys.stdin", StringIO(input_value)) @@ -359,6 +366,7 @@ def test_share_flag( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) sops_setup.init() @@ -398,6 +406,7 @@ def test_prompt_create_file( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) sops_setup.init() @@ -426,6 +435,7 @@ def test_api_get_prompts( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) monkeypatch.setattr("sys.stdin", StringIO("input1")) @@ -454,6 +464,7 @@ def test_api_set_prompts( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) machine = Machine(name="my_machine", flake=FlakeId(str(flake.path))) diff --git a/pkgs/clan-cli/tests/test_vars_deployment.py b/pkgs/clan-cli/tests/test_vars_deployment.py index e7de870aa..5c844dd69 100644 --- a/pkgs/clan-cli/tests/test_vars_deployment.py +++ b/pkgs/clan-cli/tests/test_vars_deployment.py @@ -42,6 +42,7 @@ def test_vm_deployment( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs={"my_machine": config}, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) sops_setup.init() @@ -65,8 +66,8 @@ def test_vm_deployment( ) ).stdout.strip() assert "no-such-path" not in my_secret_path - run_vm_in_thread("my_machine") - qga = qga_connect("my_machine") + vm = run_vm_in_thread("my_machine") + qga = qga_connect("my_machine", vm) # check my_secret is deployed _, out, _ = qga.run("cat /run/secrets/vars/my_generator/my_secret", check=True) assert out == "hello\n" @@ -81,4 +82,4 @@ def test_vm_deployment( ) assert returncode != 0 qga.exec_cmd("poweroff") - wait_vm_down("my_machine") + wait_vm_down("my_machine", vm) diff --git a/pkgs/clan-cli/tests/test_vms_cli.py b/pkgs/clan-cli/tests/test_vms_cli.py index 5609b152c..756a89322 100644 --- a/pkgs/clan-cli/tests/test_vms_cli.py +++ b/pkgs/clan-cli/tests/test_vms_cli.py @@ -73,16 +73,17 @@ def test_vm_qmp( "services": {"getty": {"autologinUser": "root"}}, } }, + monkeypatch=monkeypatch, ) # 'clan vms run' must be executed from within the flake monkeypatch.chdir(flake.path) # start the VM - run_vm_in_thread("my_machine") + vm = run_vm_in_thread("my_machine") # connect with qmp - qmp = qmp_connect("my_machine") + qmp = qmp_connect("my_machine", vm) # verify that issuing a command works # result = qmp.cmd_obj({"execute": "query-status"}) @@ -121,14 +122,15 @@ def test_vm_persistence( temporary_home, flake_template=CLAN_CORE / "templates" / "minimal", machine_configs=config, + monkeypatch=monkeypatch, ) monkeypatch.chdir(flake.path) - run_vm_in_thread("my_machine") + vm = run_vm_in_thread("my_machine") # wait for the VM to start and connect qga - qga = qga_connect("my_machine") + qga = qga_connect("my_machine", vm) # create state via qmp command instead of systemd service qga.run("echo 'dream2nix' > /var/my-state/root", check=True) @@ -139,13 +141,13 @@ def test_vm_persistence( qga.exec_cmd("poweroff") # wait for socket to be down (systemd service 'poweroff' rebooting machine) - wait_vm_down("my_machine") + wait_vm_down("my_machine", vm) # start vm again - run_vm_in_thread("my_machine") + vm = run_vm_in_thread("my_machine") # connect second time - qga = qga_connect("my_machine") + qga = qga_connect("my_machine", vm) # check state exists qga.run("cat /var/my-state/test", check=True) # ensure root file is owned by root @@ -171,5 +173,5 @@ def test_vm_persistence( assert exitcode == 0, out # use qmp to shutdown the machine (prevent zombie qemu processes) - qmp = qmp_connect("my_machine") + qmp = qmp_connect("my_machine", vm) qmp.command("system_powerdown")