From 5753f32930a3deaf4501f2a4e4aa7715f3fc944c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sat, 14 Oct 2023 15:54:56 +0200 Subject: [PATCH] add container test driver --- checks/container/default.nix | 14 + checks/flake-module.nix | 17 + checks/lib/container-driver/module.nix | 88 +++++ checks/lib/container-driver/package.nix | 9 + checks/lib/container-driver/pyproject.toml | 34 ++ .../container-driver/test-script-prepend.py | 9 + .../container-driver/test_driver/__init__.py | 353 ++++++++++++++++++ .../lib/container-driver/test_driver/py.typed | 0 checks/lib/container-test.nix | 33 ++ 9 files changed, 557 insertions(+) create mode 100644 checks/container/default.nix create mode 100644 checks/lib/container-driver/module.nix create mode 100644 checks/lib/container-driver/package.nix create mode 100644 checks/lib/container-driver/pyproject.toml create mode 100644 checks/lib/container-driver/test-script-prepend.py create mode 100644 checks/lib/container-driver/test_driver/__init__.py create mode 100644 checks/lib/container-driver/test_driver/py.typed create mode 100644 checks/lib/container-test.nix diff --git a/checks/container/default.nix b/checks/container/default.nix new file mode 100644 index 000000000..37128b221 --- /dev/null +++ b/checks/container/default.nix @@ -0,0 +1,14 @@ +(import ../lib/container-test.nix) ({ ... }: { + name = "secrets"; + + nodes.machine = { ... }: { + networking.hostName = "machine"; + services.openssh.enable = true; + services.openssh.startWhenNeeded = false; + }; + testScript = '' + start_all() + machine.succeed("systemctl status sshd") + machine.wait_for_unit("sshd") + ''; +}) diff --git a/checks/flake-module.nix b/checks/flake-module.nix index 3fd509a42..66447809d 100644 --- a/checks/flake-module.nix +++ b/checks/flake-module.nix @@ -14,6 +14,7 @@ nixosTests = lib.optionalAttrs (pkgs.stdenv.isLinux) { # import our test secrets = import ./secrets nixosTestArgs; + container = import ./container nixosTestArgs; }; schemaTests = pkgs.callPackages ./schemas.nix { inherit self; @@ -25,5 +26,21 @@ // lib.mapAttrs' (name: config: lib.nameValuePair "home-manager-${name}" config.activation-script) (self'.legacyPackages.homeConfigurations or { }); in nixosTests // schemaTests // flakeOutputs; + legacyPackages = { + nixosTests = + let + nixosTestArgs = { + # reference to nixpkgs for the current system + inherit pkgs; + # this gives us a reference to our flake but also all flake inputs + inherit self; + }; + in + lib.optionalAttrs (pkgs.stdenv.isLinux) { + # import our test + secrets = import ./secrets nixosTestArgs; + container = import ./container nixosTestArgs; + }; + }; }; } diff --git a/checks/lib/container-driver/module.nix b/checks/lib/container-driver/module.nix new file mode 100644 index 000000000..10adac320 --- /dev/null +++ b/checks/lib/container-driver/module.nix @@ -0,0 +1,88 @@ +{ hostPkgs, lib, config, ... }: +let + testDriver = hostPkgs.python3.pkgs.callPackage ./package.nix { + inherit (config) extraPythonPackages; + inherit (hostPkgs.pkgs) util-linux systemd; + }; + containers = map (m: m.system.build.toplevel) (lib.attrValues config.nodes); + pythonizeName = name: + let + head = lib.substring 0 1 name; + tail = lib.substring 1 (-1) name; + in + (if builtins.match "[A-z_]" head == null then "_" else head) + + lib.stringAsChars (c: if builtins.match "[A-z0-9_]" c == null then "_" else c) tail; + nodeHostNames = + let + nodesList = map (c: c.system.name) (lib.attrValues config.nodes); + in + nodesList ++ lib.optional (lib.length nodesList == 1 && !lib.elem "machine" nodesList) "machine"; + machineNames = map (name: "${name}: Machine;") pythonizedNames; + pythonizedNames = map pythonizeName nodeHostNames; +in +{ + driver = lib.mkForce (hostPkgs.runCommand "nixos-test-driver-${config.name}" + { + nativeBuildInputs = [ + hostPkgs.makeWrapper + ] ++ lib.optionals (!config.skipTypeCheck) [ hostPkgs.mypy ]; + buildInputs = [ testDriver ]; + testScript = config.testScriptString; + preferLocalBuild = true; + passthru = config.passthru; + meta = config.meta // { + mainProgram = "nixos-test-driver"; + }; + } + '' + mkdir -p $out/bin + + containers=(${toString containers}) + + ${lib.optionalString (!config.skipTypeCheck) '' + # prepend type hints so the test script can be type checked with mypy + cat "${./test-script-prepend.py}" >> testScriptWithTypes + echo "${builtins.toString machineNames}" >> testScriptWithTypes + echo -n "$testScript" >> testScriptWithTypes + + echo "Running type check (enable/disable: config.skipTypeCheck)" + echo "See https://nixos.org/manual/nixos/stable/#test-opt-skipTypeCheck" + + mypy --no-implicit-optional \ + --pretty \ + --no-color-output \ + testScriptWithTypes + ''} + + echo -n "$testScript" >> $out/test-script + + ln -s ${testDriver}/bin/nixos-test-driver $out/bin/nixos-test-driver + + wrapProgram $out/bin/nixos-test-driver \ + ${lib.concatStringsSep " " (map (name: "--add-flags '--container ${name}'") containers)} \ + --add-flags "--test-script '$out/test-script'" + ''); + + test = lib.mkForce (lib.lazyDerivation { + # lazyDerivation improves performance when only passthru items and/or meta are used. + derivation = hostPkgs.stdenv.mkDerivation { + name = "vm-test-run-${config.name}"; + + requiredSystemFeatures = [ "uid-range" ]; + + buildCommand = '' + mkdir -p $out + + # effectively mute the XMLLogger + export LOGFILE=/dev/null + + ${config.driver}/bin/nixos-test-driver -o $out + ''; + + passthru = config.passthru; + + meta = config.meta; + }; + inherit (config) passthru meta; + }); +} diff --git a/checks/lib/container-driver/package.nix b/checks/lib/container-driver/package.nix new file mode 100644 index 000000000..bac4fa93b --- /dev/null +++ b/checks/lib/container-driver/package.nix @@ -0,0 +1,9 @@ +{ extraPythonPackages, buildPythonApplication, self, setuptools, util-linux, systemd }: +buildPythonApplication { + pname = "test-driver"; + version = "0.0.1"; + propagatedBuildInputs = [ util-linux systemd ] ++ extraPythonPackages self; + nativeBuildInputs = [ setuptools ]; + format = "pyproject"; + src = ./.; +} diff --git a/checks/lib/container-driver/pyproject.toml b/checks/lib/container-driver/pyproject.toml new file mode 100644 index 000000000..8bfaa32f8 --- /dev/null +++ b/checks/lib/container-driver/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "nixos-test-driver" +version = "0.0.0" + +[project.scripts] +nixos-test-driver = "test_driver:main" + +[tool.setuptools.packages] +find = {} + +[tool.setuptools.package-data] +test_driver = ["py.typed"] + +[tool.ruff] +line-length = 88 + +select = ["E", "F", "I", "U", "N"] +ignore = ["E501"] + +[tool.black] +line-length = 88 +target-version = ['py39'] +include = '\.pyi?$' + +[tool.mypy] +python_version = "3.10" +warn_redundant_casts = true +disallow_untyped_calls = true +disallow_untyped_defs = true +no_implicit_optional = true diff --git a/checks/lib/container-driver/test-script-prepend.py b/checks/lib/container-driver/test-script-prepend.py new file mode 100644 index 000000000..6a55bf21d --- /dev/null +++ b/checks/lib/container-driver/test-script-prepend.py @@ -0,0 +1,9 @@ +# This file contains type hints that can be prepended to Nix test scripts so they can be type +# checked. + +from typing import Callable, List + +from test_driver import Machine + +start_all: Callable[[], None] +machines: List[Machine] diff --git a/checks/lib/container-driver/test_driver/__init__.py b/checks/lib/container-driver/test_driver/__init__.py new file mode 100644 index 000000000..df41ade7f --- /dev/null +++ b/checks/lib/container-driver/test_driver/__init__.py @@ -0,0 +1,353 @@ +import argparse +import os +import re +import subprocess +import time +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, Optional, Tuple + + +def prepare_machine_root(machinename: str, root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + root.joinpath("etc").mkdir(parents=True, exist_ok=True) + root.joinpath(".env").write_text( + "\n".join(f"{k}={v}" for k, v in os.environ.items()) + ) + + +def pythonize_name(name: str) -> str: + return re.sub(r"^[^A-z_]|[^A-z0-9_]", "_", name) + + +def retry(fn: Callable, timeout: int = 900) -> None: + """Call the given function repeatedly, with 1 second intervals, + until it returns True or a timeout is reached. + """ + + for _ in range(timeout): + if fn(False): + return + time.sleep(1) + + if not fn(True): + raise Exception(f"action timed out after {timeout} seconds") + + +class Machine: + def __init__(self, name: str, toplevel: Path, rootdir: Path, out_dir: str): + self.name = name + self.toplevel = toplevel + self.out_dir = out_dir + self.process: subprocess.Popen | None = None + self.rootdir: Path = rootdir + + def start(self) -> None: + prepare_machine_root(self.name, self.rootdir) + cmd = [ + "systemd-nspawn", + "--keep-unit", + "-M", + self.name, + "-D", + self.rootdir, + "--register=no", + "--resolv-conf=off", + "--bind-ro=/nix/store", + "--bind", + self.out_dir, + "--bind=/proc:/run/host/proc", + "--bind=/sys:/run/host/sys", + "--private-network", + self.toplevel.joinpath("init"), + ] + env = os.environ.copy() + env["SYSTEMD_NSPAWN_UNIFIED_HIERARCHY"] = "1" + self.process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, env=env) + self.container_pid = self.get_systemd_process() + + def get_systemd_process(self) -> int: + assert self.process is not None, "Machine not started" + assert self.process.stdout is not None, "Machine has no stdout" + for line in self.process.stdout: + print(line, end="") + if line.startswith("systemd[1]: Startup finished in"): + break + else: + raise RuntimeError(f"Failed to start container {self.name}") + childs = ( + Path(f"/proc/{self.process.pid}/task/{self.process.pid}/children") + .read_text() + .split() + ) + assert ( + len(childs) == 1 + ), f"Expected exactly one child process for systemd-nspawn, got {childs}" + try: + return int(childs[0]) + except ValueError: + raise RuntimeError(f"Failed to parse child process id {childs[0]}") + + def get_unit_info(self, unit: str) -> Dict[str, str]: + proc = self.systemctl(f'--no-pager show "{unit}"') + if proc.returncode != 0: + raise Exception( + f'retrieving systemctl info for unit "{unit}"' + + f" failed with exit code {proc.returncode}" + ) + + line_pattern = re.compile(r"^([^=]+)=(.*)$") + + def tuple_from_line(line: str) -> Tuple[str, str]: + match = line_pattern.match(line) + assert match is not None + return match[1], match[2] + + return dict( + tuple_from_line(line) + for line in proc.stdout.split("\n") + if line_pattern.match(line) + ) + + def execute( + self, + command: str, + check_return: bool = True, + check_output: bool = True, + timeout: Optional[int] = 900, + ) -> subprocess.CompletedProcess: + """ + Execute a shell command, returning a list `(status, stdout)`. + + Commands are run with `set -euo pipefail` set: + + - If several commands are separated by `;` and one fails, the + command as a whole will fail. + + - For pipelines, the last non-zero exit status will be returned + (if there is one; otherwise zero will be returned). + + - Dereferencing unset variables fails the command. + + - It will wait for stdout to be closed. + + If the command detaches, it must close stdout, as `execute` will wait + for this to consume all output reliably. This can be achieved by + redirecting stdout to stderr `>&2`, to `/dev/console`, `/dev/null` or + a file. Examples of detaching commands are `sleep 365d &`, where the + shell forks a new process that can write to stdout and `xclip -i`, where + the `xclip` command itself forks without closing stdout. + + Takes an optional parameter `check_return` that defaults to `True`. + Setting this parameter to `False` will not check for the return code + and return -1 instead. This can be used for commands that shut down + the VM and would therefore break the pipe that would be used for + retrieving the return code. + + A timeout for the command can be specified (in seconds) using the optional + `timeout` parameter, e.g., `execute(cmd, timeout=10)` or + `execute(cmd, timeout=None)`. The default is 900 seconds. + """ + + # Always run command with shell opts + command = f"set -euo pipefail; {command}" + + proc = subprocess.run( + [ + "nsenter", + "--target", + str(self.container_pid), + "--mount", + "--uts", + "--ipc", + "--net", + "--pid", + "--cgroup", + "/bin/sh", + "-c", + command, + ], + timeout=timeout, + check=False, + stdout=subprocess.PIPE, + text=True, + ) + return proc + + def systemctl(self, q: str) -> subprocess.CompletedProcess: + """ + Runs `systemctl` commands with optional support for + `systemctl --user` + + ```py + # run `systemctl list-jobs --no-pager` + machine.systemctl("list-jobs --no-pager") + + # spawn a shell for `any-user` and run + # `systemctl --user list-jobs --no-pager` + machine.systemctl("list-jobs --no-pager", "any-user") + ``` + """ + return self.execute(f"systemctl {q}") + + def wait_for_unit(self, unit: str, timeout: int = 900) -> None: + """ + Wait for a systemd unit to get into "active" state. + Throws exceptions on "failed" and "inactive" states as well as after + timing out. + """ + + def check_active(_: Any) -> bool: + info = self.get_unit_info(unit) + state = info["ActiveState"] + if state == "failed": + raise Exception(f'unit "{unit}" reached state "{state}"') + + if state == "inactive": + proc = self.systemctl("list-jobs --full 2>&1") + if "No jobs" in proc.stdout: + info = self.get_unit_info(unit) + if info["ActiveState"] == state: + raise Exception( + f'unit "{unit}" is inactive and there are no pending jobs' + ) + + return state == "active" + + retry(check_active, timeout) + + def succeed(self, command: str, timeout: int | None = None) -> str: + res = self.execute(command, timeout=timeout) + if res.returncode != 0: + raise RuntimeError(f"Failed to run command {command}") + return res.stdout + + def shutdown(self) -> None: + """ + Shut down the machine, waiting for the VM to exit. + """ + if self.process: + self.process.terminate() + self.process.wait() + self.process = None + + def release(self) -> None: + self.shutdown() + + +def setup_filesystems() -> None: + # We don't care about cleaning up the mount points, since we're running in a nix sandbox. + Path("/run").mkdir(parents=True, exist_ok=True) + subprocess.run(["mount", "-t", "tmpfs", "none", "/run"], check=True) + subprocess.run(["mount", "-t", "cgroup2", "none", "/sys/fs/cgroup"], check=True) + Path("/etc").chmod(0o755) + Path("/etc/os-release").touch() + Path("/etc/machine-id").write_text("a5ea3f98dedc0278b6f3cc8c37eeaeac") + + +class Driver: + def __init__(self, containers: list[Path], testscript: str, out_dir: str): + self.containers = containers + self.testscript = testscript + self.out_dir = out_dir + setup_filesystems() + + self.tempdir = TemporaryDirectory() + tempdir_path = Path(self.tempdir.name) + + self.machines = [] + for container in containers: + name_match = re.match(r".*-nixos-system-(.+)-\d.+", container.name) + if not name_match: + raise ValueError(f"Unable to extract hostname from {container.name}") + name = name_match.group(1) + self.machines.append( + Machine( + name=name, + toplevel=container, + rootdir=tempdir_path / name, + out_dir=self.out_dir, + ) + ) + + def start_all(self) -> None: + for machine in self.machines: + machine.start() + + def test_symbols(self) -> dict[str, Any]: + general_symbols = dict( + start_all=self.start_all, + machines=self.machines, + driver=self, + Machine=Machine, # for typing + ) + machine_symbols = {pythonize_name(m.name): m for m in self.machines} + # If there's exactly one machine, make it available under the name + # "machine", even if it's not called that. + if len(self.machines) == 1: + (machine_symbols["machine"],) = self.machines + print( + "additionally exposed symbols:\n " + + ", ".join(map(lambda m: m.name, self.machines)) + + ",\n " + + ", ".join(list(general_symbols.keys())) + ) + return {**general_symbols, **machine_symbols} + + def test_script(self) -> None: + """Run the test script""" + exec(self.testscript, self.test_symbols(), None) + + def run_tests(self) -> None: + """Run the test script (for non-interactive test runs)""" + self.test_script() + + def __enter__(self) -> "Driver": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + for machine in self.machines: + machine.release() + + +def writeable_dir(arg: str) -> Path: + """Raises an ArgumentTypeError if the given argument isn't a writeable directory + Note: We want to fail as early as possible if a directory isn't writeable, + since an executed nixos-test could fail (very late) because of the test-driver + writing in a directory without proper permissions. + """ + path = Path(arg) + if not path.is_dir(): + raise argparse.ArgumentTypeError(f"{path} is not a directory") + if not os.access(path, os.W_OK): + raise argparse.ArgumentTypeError(f"{path} is not a writeable directory") + return path + + +def main() -> None: + arg_parser = argparse.ArgumentParser(prog="nixos-test-driver") + arg_parser.add_argument( + "--containers", + nargs="+", + type=Path, + help="container system toplevel paths", + ) + arg_parser.add_argument( + "--test-script", + help="the test script to run", + type=Path, + ) + arg_parser.add_argument( + "-o", + "--output-directory", + default=Path.cwd(), + help="the directory to bind to /run/test-results", + type=writeable_dir, + ) + args = arg_parser.parse_args() + with Driver( + args.containers, + args.test_script.read_text(), + args.output_directory.resolve(), + ) as driver: + driver.run_tests() diff --git a/checks/lib/container-driver/test_driver/py.typed b/checks/lib/container-driver/test_driver/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/checks/lib/container-test.nix b/checks/lib/container-test.nix new file mode 100644 index 000000000..3753167a0 --- /dev/null +++ b/checks/lib/container-test.nix @@ -0,0 +1,33 @@ +test: +{ pkgs +, self +, ... +}: +let + inherit (pkgs) lib; + nixos-lib = import (pkgs.path + "/nixos/lib") { }; +in +(nixos-lib.runTest ({ hostPkgs, ... }: { + hostPkgs = pkgs; + # speed-up evaluation + defaults = { + documentation.enable = lib.mkDefault false; + boot.isContainer = true; + + # undo qemu stuff + system.build.initialRamdisk = ""; + virtualisation.sharedDirectories = lib.mkForce { }; + networking.useDHCP = false; + + # we have not private networking so far + networking.interfaces = lib.mkForce { }; + #networking.primaryIPAddress = lib.mkForce null; + systemd.services.backdoor.enable = false; + }; + # to accept external dependencies such as disko + node.specialArgs.self = self; + imports = [ + test + ./container-driver/module.nix + ]; +})).config.result