clan-cli: Make run_vm_in_thread fail fast. Fix generate_flake to support machine_id

This commit is contained in:
Qubasa
2024-09-06 18:53:02 +02:00
parent b16a53f510
commit 2b5cdd66f9
5 changed files with 87 additions and 34 deletions

View File

@@ -11,6 +11,7 @@ from typing import NamedTuple
import pytest import pytest
from clan_cli.dirs import nixpkgs_source from clan_cli.dirs import nixpkgs_source
from fixture_error import FixtureError from fixture_error import FixtureError
from helpers import cli
from root import CLAN_CORE from root import CLAN_CORE
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -50,9 +51,14 @@ class FlakeForTest(NamedTuple):
path: Path path: Path
from age_keys import KEYS, KeyPair
def generate_flake( def generate_flake(
temporary_home: Path, temporary_home: Path,
flake_template: Path, flake_template: Path,
monkeypatch: pytest.MonkeyPatch,
sops_key: KeyPair = KEYS[0],
substitutions: dict[str, str] | None = None, substitutions: dict[str, str] | None = None,
# define the machines directly including their config # define the machines directly including their config
machine_configs: dict[str, dict] | None = None, machine_configs: dict[str, dict] | None = None,
@@ -75,7 +81,6 @@ def generate_flake(
) )
) )
""" """
# copy the template to a new temporary location # copy the template to a new temporary location
if inventory is None: if inventory is None:
inventory = {} inventory = {}
@@ -133,6 +138,19 @@ def generate_flake(
sp.run(["git", "config", "user.name", "clan-tool"], cwd=flake, check=True) sp.run(["git", "config", "user.name", "clan-tool"], cwd=flake, check=True)
sp.run(["git", "config", "user.email", "clan@example.com"], cwd=flake, check=True) sp.run(["git", "config", "user.email", "clan@example.com"], cwd=flake, check=True)
sp.run(["git", "commit", "-a", "-m", "Initial commit"], cwd=flake, check=True) sp.run(["git", "commit", "-a", "-m", "Initial commit"], cwd=flake, check=True)
monkeypatch.setenv("SOPS_AGE_KEY", sops_key.privkey)
cli.run(
[
"secrets",
"users",
"add",
"user1",
sops_key.pubkey,
"--flake",
str(flake),
"--debug",
]
)
return FlakeForTest(flake) return FlakeForTest(flake)

View File

@@ -8,6 +8,7 @@ from pathlib import Path
from time import sleep from time import sleep
from clan_cli.dirs import vm_state_dir from clan_cli.dirs import vm_state_dir
from clan_cli.errors import ClanError
from clan_cli.qemu.qga import QgaSession from clan_cli.qemu.qga import QgaSession
from clan_cli.qemu.qmp import QEMUMonitorProtocol from clan_cli.qemu.qmp import QEMUMonitorProtocol
@@ -21,33 +22,46 @@ def find_free_port() -> int:
return sock.getsockname()[1] return sock.getsockname()[1]
def run_vm_in_thread(machine_name: str, ssh_port: int | None = None) -> int: class VmThread(threading.Thread):
def __init__(self, machine_name: str, ssh_port: int | None = None) -> None:
super().__init__()
self.machine_name = machine_name
self.ssh_port = ssh_port
self.exception: Exception | None = None
self.daemon = True
def run(self) -> None:
try:
cli.run(
["vms", "run", self.machine_name, "--publish", f"{self.ssh_port}:22"]
)
except Exception as ex:
# print exception details
print(traceback.format_exc(), file=sys.stderr)
print(sys.exc_info()[2], file=sys.stderr)
self.exception = ex
def run_vm_in_thread(machine_name: str, ssh_port: int | None = None) -> VmThread:
# runs machine and prints exceptions # runs machine and prints exceptions
if ssh_port is None: if ssh_port is None:
ssh_port = find_free_port() ssh_port = find_free_port()
def run() -> None: vm_thread = VmThread(machine_name, ssh_port)
try: vm_thread.start()
cli.run(["vms", "run", machine_name, "--publish", f"{ssh_port}:22"]) return vm_thread
except Exception:
# print exception details
print(traceback.format_exc(), file=sys.stderr)
print(sys.exc_info()[2], file=sys.stderr)
# run the machine in a separate thread
t = threading.Thread(target=run, name="run")
t.daemon = True
t.start()
return ssh_port
# wait for qmp socket to exist # wait for qmp socket to exist
def wait_vm_up(machine_name: str, flake_url: str | None = None) -> None: def wait_vm_up(machine_name: str, vm: VmThread, flake_url: str | None = None) -> None:
if flake_url is None: if flake_url is None:
flake_url = str(Path.cwd()) flake_url = str(Path.cwd())
socket_file = vm_state_dir(flake_url, machine_name) / "qmp.sock" socket_file = vm_state_dir(flake_url, machine_name) / "qmp.sock"
timeout: float = 1200 # in seconds timeout: float = 600
while True: while True:
if vm.exception:
msg = "VM failed to start"
raise ClanError(msg) from vm.exception
if timeout <= 0: if timeout <= 0:
msg = f"qmp socket {socket_file} not found. Is the VM running?" msg = f"qmp socket {socket_file} not found. Is the VM running?"
raise TimeoutError(msg) raise TimeoutError(msg)
@@ -58,12 +72,15 @@ def wait_vm_up(machine_name: str, flake_url: str | None = None) -> None:
# wait for vm to be down by checking if qmp socket is down # wait for vm to be down by checking if qmp socket is down
def wait_vm_down(machine_name: str, flake_url: str | None = None) -> None: def wait_vm_down(machine_name: str, vm: VmThread, flake_url: str | None = None) -> None:
if flake_url is None: if flake_url is None:
flake_url = str(Path.cwd()) flake_url = str(Path.cwd())
socket_file = vm_state_dir(flake_url, machine_name) / "qmp.sock" socket_file = vm_state_dir(flake_url, machine_name) / "qmp.sock"
timeout: float = 600 timeout: float = 300
while socket_file.exists(): while socket_file.exists():
if vm.exception:
msg = "VM failed to start"
raise ClanError(msg) from vm.exception
if timeout <= 0: if timeout <= 0:
msg = f"qmp socket {socket_file} still exists. Is the VM down?" msg = f"qmp socket {socket_file} still exists. Is the VM down?"
raise TimeoutError(msg) raise TimeoutError(msg)
@@ -72,11 +89,13 @@ def wait_vm_down(machine_name: str, flake_url: str | None = None) -> None:
# wait for vm to be up then connect and return qmp instance # wait for vm to be up then connect and return qmp instance
def qmp_connect(machine_name: str, flake_url: str | None = None) -> QEMUMonitorProtocol: def qmp_connect(
machine_name: str, vm: VmThread, flake_url: str | None = None
) -> QEMUMonitorProtocol:
if flake_url is None: if flake_url is None:
flake_url = str(Path.cwd()) flake_url = str(Path.cwd())
state_dir = vm_state_dir(flake_url, machine_name) state_dir = vm_state_dir(flake_url, machine_name)
wait_vm_up(machine_name, flake_url) wait_vm_up(machine_name, vm, flake_url)
qmp = QEMUMonitorProtocol( qmp = QEMUMonitorProtocol(
address=str(os.path.realpath(state_dir / "qmp.sock")), address=str(os.path.realpath(state_dir / "qmp.sock")),
) )
@@ -85,9 +104,11 @@ def qmp_connect(machine_name: str, flake_url: str | None = None) -> QEMUMonitorP
# wait for vm to be up then connect and return qga instance # wait for vm to be up then connect and return qga instance
def qga_connect(machine_name: str, flake_url: str | None = None) -> QgaSession: def qga_connect(
machine_name: str, vm: VmThread, flake_url: str | None = None
) -> QgaSession:
if flake_url is None: if flake_url is None:
flake_url = str(Path.cwd()) flake_url = str(Path.cwd())
state_dir = vm_state_dir(flake_url, machine_name) state_dir = vm_state_dir(flake_url, machine_name)
wait_vm_up(machine_name, flake_url) wait_vm_up(machine_name, vm, flake_url)
return QgaSession(os.path.realpath(state_dir / "qga.sock")) return QgaSession(os.path.realpath(state_dir / "qga.sock"))

View File

@@ -76,6 +76,7 @@ def test_generate_public_var(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
machine = Machine(name="my_machine", flake=FlakeId(str(flake.path))) machine = Machine(name="my_machine", flake=FlakeId(str(flake.path)))
@@ -105,6 +106,7 @@ def test_generate_secret_var_sops(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
sops_setup.init() sops_setup.init()
@@ -140,6 +142,7 @@ def test_generate_secret_var_sops_with_default_group(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
sops_setup.init() sops_setup.init()
@@ -170,6 +173,7 @@ def test_generate_secret_var_password_store(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
gnupghome = temporary_home / "gpg" gnupghome = temporary_home / "gpg"
@@ -237,6 +241,7 @@ def test_generate_secret_for_multiple_machines(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"machine1": machine1_config, "machine2": machine2_config}, machine_configs={"machine1": machine1_config, "machine2": machine2_config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
sops_setup.init() sops_setup.init()
@@ -282,6 +287,7 @@ def test_dependant_generators(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
cli.run(["vars", "generate", "--flake", str(flake.path), "my_machine"]) cli.run(["vars", "generate", "--flake", str(flake.path), "my_machine"])
@@ -321,6 +327,7 @@ def test_prompt(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
monkeypatch.setattr("sys.stdin", StringIO(input_value)) monkeypatch.setattr("sys.stdin", StringIO(input_value))
@@ -359,6 +366,7 @@ def test_share_flag(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
sops_setup.init() sops_setup.init()
@@ -398,6 +406,7 @@ def test_prompt_create_file(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
sops_setup.init() sops_setup.init()
@@ -426,6 +435,7 @@ def test_api_get_prompts(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
monkeypatch.setattr("sys.stdin", StringIO("input1")) monkeypatch.setattr("sys.stdin", StringIO("input1"))
@@ -454,6 +464,7 @@ def test_api_set_prompts(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
machine = Machine(name="my_machine", flake=FlakeId(str(flake.path))) machine = Machine(name="my_machine", flake=FlakeId(str(flake.path)))

View File

@@ -42,6 +42,7 @@ def test_vm_deployment(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs={"my_machine": config}, machine_configs={"my_machine": config},
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
sops_setup.init() sops_setup.init()
@@ -65,8 +66,8 @@ def test_vm_deployment(
) )
).stdout.strip() ).stdout.strip()
assert "no-such-path" not in my_secret_path assert "no-such-path" not in my_secret_path
run_vm_in_thread("my_machine") vm = run_vm_in_thread("my_machine")
qga = qga_connect("my_machine") qga = qga_connect("my_machine", vm)
# check my_secret is deployed # check my_secret is deployed
_, out, _ = qga.run("cat /run/secrets/vars/my_generator/my_secret", check=True) _, out, _ = qga.run("cat /run/secrets/vars/my_generator/my_secret", check=True)
assert out == "hello\n" assert out == "hello\n"
@@ -81,4 +82,4 @@ def test_vm_deployment(
) )
assert returncode != 0 assert returncode != 0
qga.exec_cmd("poweroff") qga.exec_cmd("poweroff")
wait_vm_down("my_machine") wait_vm_down("my_machine", vm)

View File

@@ -73,16 +73,17 @@ def test_vm_qmp(
"services": {"getty": {"autologinUser": "root"}}, "services": {"getty": {"autologinUser": "root"}},
} }
}, },
monkeypatch=monkeypatch,
) )
# 'clan vms run' must be executed from within the flake # 'clan vms run' must be executed from within the flake
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
# start the VM # start the VM
run_vm_in_thread("my_machine") vm = run_vm_in_thread("my_machine")
# connect with qmp # connect with qmp
qmp = qmp_connect("my_machine") qmp = qmp_connect("my_machine", vm)
# verify that issuing a command works # verify that issuing a command works
# result = qmp.cmd_obj({"execute": "query-status"}) # result = qmp.cmd_obj({"execute": "query-status"})
@@ -121,14 +122,15 @@ def test_vm_persistence(
temporary_home, temporary_home,
flake_template=CLAN_CORE / "templates" / "minimal", flake_template=CLAN_CORE / "templates" / "minimal",
machine_configs=config, machine_configs=config,
monkeypatch=monkeypatch,
) )
monkeypatch.chdir(flake.path) monkeypatch.chdir(flake.path)
run_vm_in_thread("my_machine") vm = run_vm_in_thread("my_machine")
# wait for the VM to start and connect qga # wait for the VM to start and connect qga
qga = qga_connect("my_machine") qga = qga_connect("my_machine", vm)
# create state via qmp command instead of systemd service # create state via qmp command instead of systemd service
qga.run("echo 'dream2nix' > /var/my-state/root", check=True) qga.run("echo 'dream2nix' > /var/my-state/root", check=True)
@@ -139,13 +141,13 @@ def test_vm_persistence(
qga.exec_cmd("poweroff") qga.exec_cmd("poweroff")
# wait for socket to be down (systemd service 'poweroff' rebooting machine) # wait for socket to be down (systemd service 'poweroff' rebooting machine)
wait_vm_down("my_machine") wait_vm_down("my_machine", vm)
# start vm again # start vm again
run_vm_in_thread("my_machine") vm = run_vm_in_thread("my_machine")
# connect second time # connect second time
qga = qga_connect("my_machine") qga = qga_connect("my_machine", vm)
# check state exists # check state exists
qga.run("cat /var/my-state/test", check=True) qga.run("cat /var/my-state/test", check=True)
# ensure root file is owned by root # ensure root file is owned by root
@@ -171,5 +173,5 @@ def test_vm_persistence(
assert exitcode == 0, out assert exitcode == 0, out
# use qmp to shutdown the machine (prevent zombie qemu processes) # use qmp to shutdown the machine (prevent zombie qemu processes)
qmp = qmp_connect("my_machine") qmp = qmp_connect("my_machine", vm)
qmp.command("system_powerdown") qmp.command("system_powerdown")