clan_vm_manager: Fix switch desync when pressed too fast. Secrets folder shared between build and run. clan_cli: run_vm now can have custom tmpdir location

This commit is contained in:
Qubasa
2024-03-06 16:32:19 +07:00
parent 0337fb3a67
commit b25a559ff3
4 changed files with 86 additions and 67 deletions

View File

@@ -101,7 +101,13 @@ def prepare_disk(
return disk_img return disk_img
def run_vm(vm: VmConfig, nix_options: list[str] = []) -> None: def run_vm(
vm: VmConfig,
*,
cachedir: Path | None = None,
socketdir: Path | None = None,
nix_options: list[str] = [],
) -> None:
machine = Machine(vm.machine_name, vm.flake_url) machine = Machine(vm.machine_name, vm.flake_url)
log.debug(f"Creating VM for {machine}") log.debug(f"Creating VM for {machine}")
@@ -109,72 +115,78 @@ def run_vm(vm: VmConfig, nix_options: list[str] = []) -> None:
# otherwise, when using /tmp, we risk running out of memory # otherwise, when using /tmp, we risk running out of memory
cache = user_cache_dir() / "clan" cache = user_cache_dir() / "clan"
cache.mkdir(exist_ok=True) cache.mkdir(exist_ok=True)
with TemporaryDirectory(dir=cache) as cachedir, TemporaryDirectory() as sockets:
tmpdir = Path(cachedir)
# TODO: We should get this from the vm argument if cachedir is None:
nixos_config = build_vm(machine, tmpdir, nix_options) cache_tmp = TemporaryDirectory(dir=cache)
cachedir = Path(cache_tmp.name)
state_dir = vm_state_dir(str(vm.flake_url), machine.name) if socketdir is None:
state_dir.mkdir(parents=True, exist_ok=True) socket_tmp = TemporaryDirectory()
socketdir = Path(socket_tmp.name)
# specify socket files for qmp and qga # TODO: We should get this from the vm argument
qmp_socket_file = Path(sockets) / "qmp.sock" nixos_config = build_vm(machine, cachedir, nix_options)
qga_socket_file = Path(sockets) / "qga.sock"
# Create symlinks to the qmp/qga sockets to be able to find them later.
# This indirection is needed because we cannot put the sockets directly
# in the state_dir.
# The reason is, qemu has a length limit of 108 bytes for the qmp socket
# path which is violated easily.
qmp_link = state_dir / "qmp.sock"
if os.path.lexists(qmp_link):
qmp_link.unlink()
qmp_link.symlink_to(qmp_socket_file)
qga_link = state_dir / "qga.sock" state_dir = vm_state_dir(str(vm.flake_url), machine.name)
if os.path.lexists(qga_link): state_dir.mkdir(parents=True, exist_ok=True)
qga_link.unlink()
qga_link.symlink_to(qga_socket_file)
rootfs_img = prepare_disk(tmpdir) # specify socket files for qmp and qga
state_img = state_dir / "state.qcow2" qmp_socket_file = socketdir / "qmp.sock"
if not state_img.exists(): qga_socket_file = socketdir / "qga.sock"
state_img = prepare_disk( # Create symlinks to the qmp/qga sockets to be able to find them later.
directory=state_dir, # This indirection is needed because we cannot put the sockets directly
file_name="state.qcow2", # in the state_dir.
size="50G", # The reason is, qemu has a length limit of 108 bytes for the qmp socket
) # path which is violated easily.
virtiofsd_socket = Path(sockets) / "virtiofsd.sock" qmp_link = state_dir / "qmp.sock"
qemu_cmd = qemu_command( if os.path.lexists(qmp_link):
vm, qmp_link.unlink()
nixos_config, qmp_link.symlink_to(qmp_socket_file)
secrets_dir=Path(nixos_config["secrets_dir"]),
rootfs_img=rootfs_img, qga_link = state_dir / "qga.sock"
state_img=state_img, if os.path.lexists(qga_link):
virtiofsd_socket=virtiofsd_socket, qga_link.unlink()
qmp_socket_file=qmp_socket_file, qga_link.symlink_to(qga_socket_file)
qga_socket_file=qga_socket_file,
rootfs_img = prepare_disk(cachedir)
state_img = state_dir / "state.qcow2"
if not state_img.exists():
state_img = prepare_disk(
directory=state_dir,
file_name="state.qcow2",
size="50G",
) )
virtiofsd_socket = socketdir / "virtiofsd.sock"
qemu_cmd = qemu_command(
vm,
nixos_config,
secrets_dir=Path(nixos_config["secrets_dir"]),
rootfs_img=rootfs_img,
state_img=state_img,
virtiofsd_socket=virtiofsd_socket,
qmp_socket_file=qmp_socket_file,
qga_socket_file=qga_socket_file,
)
packages = ["nixpkgs#qemu"] packages = ["nixpkgs#qemu"]
env = os.environ.copy() env = os.environ.copy()
if vm.graphics and not vm.waypipe: if vm.graphics and not vm.waypipe:
packages.append("nixpkgs#virt-viewer") packages.append("nixpkgs#virt-viewer")
remote_viewer_mimetypes = module_root() / "vms" / "mimetypes" remote_viewer_mimetypes = module_root() / "vms" / "mimetypes"
env[ env[
"XDG_DATA_DIRS" "XDG_DATA_DIRS"
] = f"{remote_viewer_mimetypes}:{env.get('XDG_DATA_DIRS', '')}" ] = f"{remote_viewer_mimetypes}:{env.get('XDG_DATA_DIRS', '')}"
with start_waypipe( with start_waypipe(qemu_cmd.vsock_cid, f"[{vm.machine_name}] "), start_virtiofsd(
qemu_cmd.vsock_cid, f"[{vm.machine_name}] " virtiofsd_socket
), start_virtiofsd(virtiofsd_socket): ):
run( run(
nix_shell(packages, qemu_cmd.args), nix_shell(packages, qemu_cmd.args),
env=env, env=env,
log=Log.BOTH, log=Log.BOTH,
error_msg=f"Could not start vm {machine}", error_msg=f"Could not start vm {machine}",
) )
@dataclass @dataclass
@@ -196,7 +208,7 @@ def run_command(args: argparse.Namespace) -> None:
vm = inspect_vm(machine=machine) vm = inspect_vm(machine=machine)
run_vm(vm, run_options.nix_options) run_vm(vm, nix_options=run_options.nix_options)
def register_run_parser(parser: argparse.ArgumentParser) -> None: def register_run_parser(parser: argparse.ArgumentParser) -> None:

View File

@@ -54,7 +54,8 @@ class MainApplication(Adw.Application):
def on_shutdown(self, source: "MainApplication") -> None: def on_shutdown(self, source: "MainApplication") -> None:
log.debug("Shutting down Adw.Application") log.debug("Shutting down Adw.Application")
log.debug(f"get_windows: {self.get_windows()}") if self.get_windows() == []:
log.warning("No windows to destroy")
if self.window: if self.window:
# TODO: Doesn't seem to raise the destroy signal. Need to investigate # TODO: Doesn't seem to raise the destroy signal. Need to investigate
# self.get_windows() returns an empty list. Desync between window and application? # self.get_windows() returns an empty list. Desync between window and application?

View File

@@ -14,7 +14,6 @@ from typing import IO, ClassVar
import gi import gi
from clan_cli import vms from clan_cli import vms
from clan_cli.clan_uri import ClanScheme, ClanURI from clan_cli.clan_uri import ClanScheme, ClanURI
from clan_cli.errors import ClanError
from clan_cli.history.add import HistoryEntry from clan_cli.history.add import HistoryEntry
from clan_cli.machines.machines import Machine from clan_cli.machines.machines import Machine
@@ -102,6 +101,7 @@ class VMObject(GObject.Object):
def _on_switch_toggle(self, switch: Gtk.Switch, user_state: bool) -> None: def _on_switch_toggle(self, switch: Gtk.Switch, user_state: bool) -> None:
if switch.get_active(): if switch.get_active():
switch.set_state(False) switch.set_state(False)
switch.set_sensitive(False)
self.start() self.start()
else: else:
switch.set_state(True) switch.set_state(True)
@@ -142,6 +142,8 @@ class VMObject(GObject.Object):
tstart = datetime.now() tstart = datetime.now()
log.info(f"Building VM {self.get_id()}") log.info(f"Building VM {self.get_id()}")
log_dir = Path(str(self.log_dir.name)) log_dir = Path(str(self.log_dir.name))
# Start the build process
self.build_process = spawn( self.build_process = spawn(
on_except=None, on_except=None,
out_file=log_dir / "build.log", out_file=log_dir / "build.log",
@@ -150,7 +152,7 @@ class VMObject(GObject.Object):
tmpdir=log_dir, tmpdir=log_dir,
) )
GLib.idle_add(self._vm_status_changed_task) GLib.idle_add(self._vm_status_changed_task)
self.switch.set_sensitive(True)
# Start the logs watcher # Start the logs watcher
self._logs_id = GLib.timeout_add( self._logs_id = GLib.timeout_add(
50, self._get_logs_task, self.build_process 50, self._get_logs_task, self.build_process
@@ -184,6 +186,8 @@ class VMObject(GObject.Object):
out_file=Path(str(self.log_dir.name)) / "vm.log", out_file=Path(str(self.log_dir.name)) / "vm.log",
func=vms.run.run_vm, func=vms.run.run_vm,
vm=self.data.flake.vm, vm=self.data.flake.vm,
cachedir=log_dir,
socketdir=log_dir,
) )
log.debug(f"Started VM {self.get_id()}") log.debug(f"Started VM {self.get_id()}")
GLib.idle_add(self._vm_status_changed_task) GLib.idle_add(self._vm_status_changed_task)
@@ -268,7 +272,7 @@ class VMObject(GObject.Object):
try: try:
with self.machine.vm.qmp_ctx() as qmp: with self.machine.vm.qmp_ctx() as qmp:
qmp.command("system_powerdown") qmp.command("system_powerdown")
except (OSError, ClanError) as ex: except Exception as ex:
log.debug(f"QMP command 'system_powerdown' ignored. Error: {ex}") log.debug(f"QMP command 'system_powerdown' ignored. Error: {ex}")
# Try 20 times to stop the VM # Try 20 times to stop the VM
@@ -298,8 +302,12 @@ class VMObject(GObject.Object):
log.warning(f"Tried to kill VM {self.get_id()} is not running") log.warning(f"Tried to kill VM {self.get_id()} is not running")
return return
log.info(f"Killing VM {self.get_id()} now") log.info(f"Killing VM {self.get_id()} now")
self.vm_process.kill_group()
self.build_process.kill_group() if self.vm_process.proc.is_alive():
self.vm_process.kill_group()
if self.build_process.proc.is_alive():
self.build_process.kill_group()
def read_whole_log(self) -> str: def read_whole_log(self) -> str:
if not self.vm_process.out_file.exists(): if not self.vm_process.out_file.exists():

View File

@@ -18,8 +18,6 @@ log = logging.getLogger(__name__)
class JoinValue(GObject.Object): class JoinValue(GObject.Object):
# TODO: custom signals for async join
__gsignals__: ClassVar = { __gsignals__: ClassVar = {
"join_finished": (GObject.SignalFlags.RUN_FIRST, None, []), "join_finished": (GObject.SignalFlags.RUN_FIRST, None, []),
} }