Merge pull request 'fix: clan machines install on machines without hardware configuration' (#2983) from fix/systemless-installs into main

Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/2983
Reviewed-by: kenji <aks.kenji@protonmail.com>
This commit is contained in:
Luis Hebendanz
2025-03-10 13:19:17 +00:00
7 changed files with 267 additions and 50 deletions

View File

@@ -12,6 +12,7 @@ in
./flash/flake-module.nix ./flash/flake-module.nix
./impure/flake-module.nix ./impure/flake-module.nix
./installation/flake-module.nix ./installation/flake-module.nix
./installation-without-system/flake-module.nix
./morph/flake-module.nix ./morph/flake-module.nix
./nixos-documentation/flake-module.nix ./nixos-documentation/flake-module.nix
]; ];
@@ -49,7 +50,7 @@ in
flakeOutputs = flakeOutputs =
lib.mapAttrs' ( lib.mapAttrs' (
name: config: lib.nameValuePair "nixos-${name}" config.config.system.build.toplevel name: config: lib.nameValuePair "nixos-${name}" config.config.system.build.toplevel
) self.nixosConfigurations ) (lib.filterAttrs (n: _v: n != "test-install-machine-without-system") self.nixosConfigurations)
// lib.mapAttrs' (n: lib.nameValuePair "package-${n}") self'.packages // lib.mapAttrs' (n: lib.nameValuePair "package-${n}") self'.packages
// lib.mapAttrs' (n: lib.nameValuePair "devShell-${n}") self'.devShells // lib.mapAttrs' (n: lib.nameValuePair "devShell-${n}") self'.devShells
// lib.mapAttrs' (name: config: lib.nameValuePair "home-manager-${name}" config.activation-script) ( // lib.mapAttrs' (name: config: lib.nameValuePair "home-manager-${name}" config.activation-script) (

View File

@@ -0,0 +1,211 @@
{
self,
lib,
inputs,
...
}:
{
# The purpose of this test is to ensure `clan machines install` works
# for machines that don't have a hardware config yet.
# If this test starts failing it could be due to the `facter.json` being out of date
# you can get a new one by adding
# client.fail("cat test-flake/machines/test-install-machine/facter.json >&2")
# to the installation test.
clan.machines.test-install-machine-without-system = {
fileSystems."/".device = lib.mkDefault "/dev/vda";
boot.loader.grub.device = lib.mkDefault "/dev/vda";
imports = [ self.nixosModules.test-install-machine-without-system ];
};
clan.machines.test-install-machine-with-system = {
facter.reportPath = "${inputs.test-fixtures}/nixos-vm-facter-json/facter.json";
fileSystems."/".device = lib.mkDefault "/dev/vda";
boot.loader.grub.device = lib.mkDefault "/dev/vda";
imports = [ self.nixosModules.test-install-machine-without-system ];
};
flake.nixosModules = {
test-install-machine-without-system =
{ lib, modulesPath, ... }:
{
imports = [
(modulesPath + "/testing/test-instrumentation.nix") # we need these 2 modules always to be able to run the tests
(modulesPath + "/profiles/qemu-guest.nix")
../lib/minify.nix
];
networking.hostName = "test-install-machine";
environment.etc."install-successful".text = "ok";
boot.consoleLogLevel = lib.mkForce 100;
boot.kernelParams = [ "boot.shell_on_fail" ];
# disko config
boot.loader.grub.efiSupport = lib.mkDefault true;
boot.loader.grub.efiInstallAsRemovable = lib.mkDefault true;
clan.core.vars.settings.secretStore = "vm";
clan.core.vars.generators.test = {
files.test.neededFor = "partitioning";
script = ''
echo "notok" > $out/test
'';
};
disko.devices = {
disk = {
main = {
type = "disk";
device = "/dev/vda";
preCreateHook = ''
test -e /run/partitioning-secrets/test/test
'';
content = {
type = "gpt";
partitions = {
boot = {
size = "1M";
type = "EF02"; # for grub MBR
priority = 1;
};
ESP = {
size = "512M";
type = "EF00";
content = {
type = "filesystem";
format = "vfat";
mountpoint = "/boot";
mountOptions = [ "umask=0077" ];
};
};
root = {
size = "100%";
content = {
type = "filesystem";
format = "ext4";
mountpoint = "/";
};
};
};
};
};
};
};
};
};
perSystem =
{
pkgs,
lib,
...
}:
let
dependencies = [
self
self.nixosConfigurations.test-install-machine-with-system.config.system.build.toplevel
self.nixosConfigurations.test-install-machine-with-system.config.system.build.diskoScript
self.nixosConfigurations.test-install-machine-with-system.config.system.clan.deployment.file
pkgs.stdenv.drvPath
pkgs.bash.drvPath
pkgs.nixos-anywhere
pkgs.bubblewrap
] ++ builtins.map (i: i.outPath) (builtins.attrValues self.inputs);
closureInfo = pkgs.closureInfo { rootPaths = dependencies; };
in
{
# On aarch64-linux, hangs on reboot with after installation:
# vm-test-run-test-installation> (finished: waiting for the VM to power off, in 1.97 seconds)
# vm-test-run-test-installation>
# vm-test-run-test-installation> new_machine: must succeed: cat /etc/install-successful
# vm-test-run-test-installation> new_machine: waiting for the VM to finish booting
# vm-test-run-test-installation> new_machine: starting vm
# vm-test-run-test-installation> new_machine: QEMU running (pid 80)
# vm-test-run-test-installation> new_machine: Guest root shell did not produce any data yet...
# vm-test-run-test-installation> new_machine: To debug, enter the VM and run 'systemctl status backdoor.service'.
checks = pkgs.lib.mkIf (pkgs.stdenv.isLinux && pkgs.stdenv.hostPlatform.system != "aarch64-linux") {
test-installation-without-system = (import ../lib/test-base.nix) {
name = "test-installation-without-system";
nodes.target = {
services.openssh.enable = true;
virtualisation.diskImage = "./target.qcow2";
virtualisation.useBootLoader = true;
# virtualisation.fileSystems."/" = {
# device = "/dev/disk/by-label/this-is-not-real-and-will-never-be-used";
# fsType = "ext4";
# };
};
nodes.installer =
{ modulesPath, ... }:
{
imports = [
(modulesPath + "/../tests/common/auto-format-root-device.nix")
];
services.openssh.enable = true;
users.users.root.openssh.authorizedKeys.keyFiles = [ ../lib/ssh/pubkey ];
system.nixos.variant_id = "installer";
environment.systemPackages = [ pkgs.nixos-facter ];
virtualisation.emptyDiskImages = [ 512 ];
virtualisation.diskSize = 8 * 1024;
virtualisation.rootDevice = "/dev/vdb";
# both installer and target need to use the same diskImage
virtualisation.diskImage = "./target.qcow2";
nix.settings = {
substituters = lib.mkForce [ ];
hashed-mirrors = null;
connect-timeout = lib.mkForce 3;
flake-registry = pkgs.writeText "flake-registry" ''{"flakes":[],"version":2}'';
experimental-features = [
"nix-command"
"flakes"
];
};
system.extraDependencies = dependencies;
};
nodes.client = {
environment.systemPackages = [
self.packages.${pkgs.system}.clan-cli
] ++ self.packages.${pkgs.system}.clan-cli.runtimeDependencies;
environment.etc."install-closure".source = "${closureInfo}/store-paths";
virtualisation.memorySize = 2048;
nix.settings = {
substituters = lib.mkForce [ ];
hashed-mirrors = null;
connect-timeout = lib.mkForce 3;
flake-registry = pkgs.writeText "flake-registry" ''{"flakes":[],"version":2}'';
experimental-features = [
"nix-command"
"flakes"
];
};
system.extraDependencies = dependencies;
};
testScript = ''
client.start()
installer.start()
client.succeed("${pkgs.coreutils}/bin/install -Dm 600 ${../lib/ssh/privkey} /root/.ssh/id_ed25519")
client.wait_until_succeeds("timeout 2 ssh -o StrictHostKeyChecking=accept-new -v root@installer hostname")
client.succeed("cp -r ${../..} test-flake && chmod -R +w test-flake")
client.fail("test -f test-flake/machines/test-install-machine-without-system/hardware-configuration.nix")
client.fail("test -f test-flake/machines/test-install-machine-without-system/facter.json")
client.succeed("clan machines install --debug --flake test-flake --yes test-install-machine-without-system --target-host root@installer --update-hardware-config nixos-facter >&2")
try:
installer.shutdown()
except BrokenPipeError:
# qemu has already exited
pass
target.state_dir = installer.state_dir
target.start()
target.wait_for_unit("multi-user.target")
assert(target.succeed("cat /etc/install-successful").strip() == "ok")
'';
} { inherit pkgs self; };
};
};
}

View File

@@ -175,12 +175,19 @@
client.succeed("${pkgs.coreutils}/bin/install -Dm 600 ${../lib/ssh/privkey} /root/.ssh/id_ed25519") client.succeed("${pkgs.coreutils}/bin/install -Dm 600 ${../lib/ssh/privkey} /root/.ssh/id_ed25519")
client.wait_until_succeeds("timeout 2 ssh -o StrictHostKeyChecking=accept-new -v root@installer hostname") client.wait_until_succeeds("timeout 2 ssh -o StrictHostKeyChecking=accept-new -v root@installer hostname")
client.succeed("cp -r ${../..} test-flake && chmod -R +w test-flake") client.succeed("cp -r ${../..} test-flake && chmod -R +w test-flake")
# test that we can generate hardware configurations
client.fail("test -f test-flake/machines/test-install-machine/facter.json")
client.fail("test -f test-flake/machines/test-install-machine/hardware-configuration.nix") client.fail("test -f test-flake/machines/test-install-machine/hardware-configuration.nix")
client.succeed("clan machines update-hardware-config --flake test-flake test-install-machine root@installer >&2") client.succeed("clan machines update-hardware-config --flake test-flake test-install-machine root@installer >&2")
client.succeed("test -f test-flake/machines/test-install-machine/facter.json") client.succeed("test -f test-flake/machines/test-install-machine/facter.json")
client.succeed("clan machines update-hardware-config --backend nixos-generate-config --flake test-flake test-install-machine root@installer>&2") client.succeed("clan machines update-hardware-config --backend nixos-generate-config --flake test-flake test-install-machine root@installer>&2")
client.succeed("test -f test-flake/machines/test-install-machine/hardware-configuration.nix") client.succeed("test -f test-flake/machines/test-install-machine/hardware-configuration.nix")
client.succeed("clan machines install --debug --flake ${../..} --yes test-install-machine --target-host root@installer >&2")
# but we don't use them because they're not cached
client.succeed("rm test-flake/machines/test-install-machine/hardware-configuration.nix test-flake/machines/test-install-machine/facter.json")
client.succeed("clan machines install --debug --flake test-flake --yes test-install-machine --target-host root@installer >&2")
try: try:
installer.shutdown() installer.shutdown()
except BrokenPipeError: except BrokenPipeError:

26
flake.lock generated
View File

@@ -75,6 +75,7 @@
"nixpkgs": "nixpkgs", "nixpkgs": "nixpkgs",
"sops-nix": "sops-nix", "sops-nix": "sops-nix",
"systems": "systems", "systems": "systems",
"test-fixtures": "test-fixtures",
"treefmt-nix": "treefmt-nix" "treefmt-nix": "treefmt-nix"
} }
}, },
@@ -114,6 +115,31 @@
"type": "github" "type": "github"
} }
}, },
"test-fixtures": {
"inputs": {
"flake-parts": [
"flake-parts"
],
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1741504481,
"narHash": "sha256-Ndx7LCbLF2sgRvbiefKEe1rgL+cYlBANVRokg27DflI=",
"ref": "main",
"rev": "3508b7ed11dad068ffc8c9f0047a5c7d54644e2c",
"shallow": true,
"type": "git",
"url": "https://git.clan.lol/clan/test-fixtures"
},
"original": {
"ref": "main",
"shallow": true,
"type": "git",
"url": "https://git.clan.lol/clan/test-fixtures"
}
},
"treefmt-nix": { "treefmt-nix": {
"inputs": { "inputs": {
"nixpkgs": [ "nixpkgs": [

View File

@@ -19,6 +19,10 @@
treefmt-nix.url = "github:numtide/treefmt-nix"; treefmt-nix.url = "github:numtide/treefmt-nix";
treefmt-nix.inputs.nixpkgs.follows = "nixpkgs"; treefmt-nix.inputs.nixpkgs.follows = "nixpkgs";
test-fixtures.url = "git+https://git.clan.lol/clan/test-fixtures?ref=main&shallow=1";
test-fixtures.inputs.flake-parts.follows = "flake-parts";
test-fixtures.inputs.nixpkgs.follows = "nixpkgs";
}; };
outputs = outputs =

View File

@@ -3,6 +3,7 @@ import logging
import os import os
import sys import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path from pathlib import Path
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
@@ -25,6 +26,12 @@ from clan_cli.vars.generate import generate_vars
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class BuildOn(Enum):
AUTO = "auto"
LOCAL = "local"
REMOTE = "remote"
@dataclass @dataclass
class InstallOptions: class InstallOptions:
machine: Machine machine: Machine
@@ -33,7 +40,7 @@ class InstallOptions:
debug: bool = False debug: bool = False
no_reboot: bool = False no_reboot: bool = False
phases: str | None = None phases: str | None = None
build_on_remote: bool = False build_on: BuildOn | None = None
nix_options: list[str] = field(default_factory=list) nix_options: list[str] = field(default_factory=list)
update_hardware_config: HardwareConfig = HardwareConfig.NONE update_hardware_config: HardwareConfig = HardwareConfig.NONE
password: str | None = None password: str | None = None
@@ -122,10 +129,8 @@ def install_machine(opts: InstallOptions) -> None:
if opts.identity_file: if opts.identity_file:
cmd += ["-i", str(opts.identity_file)] cmd += ["-i", str(opts.identity_file)]
if opts.build_on_remote: if opts.build_on:
cmd.extend(["--build-on", "remote"]) cmd += ["--build-on", opts.build_on.value]
else:
cmd.extend(["--build-on", "auto"])
if h.port: if h.port:
cmd += ["--ssh-port", str(h.port)] cmd += ["--ssh-port", str(h.port)]
@@ -210,7 +215,7 @@ def install_command(args: argparse.Namespace) -> None:
debug=args.debug, debug=args.debug,
no_reboot=args.no_reboot, no_reboot=args.no_reboot,
nix_options=args.option, nix_options=args.option,
build_on_remote=args.build_on_remote, build_on=BuildOn(args.build_on) if args.build_on is not None else None,
update_hardware_config=HardwareConfig(args.update_hardware_config), update_hardware_config=HardwareConfig(args.update_hardware_config),
password=password, password=password,
identity_file=args.identity_file, identity_file=args.identity_file,
@@ -241,10 +246,10 @@ def register_install_parser(parser: argparse.ArgumentParser) -> None:
help="Host key (.ssh/known_hosts) check mode.", help="Host key (.ssh/known_hosts) check mode.",
) )
parser.add_argument( parser.add_argument(
"--build-on-remote", "--build-on",
action="store_true", choices=[x.value for x in BuildOn],
help="build the NixOS configuration on the remote machine", default=None,
default=False, help="where to build the NixOS configuration",
) )
parser.add_argument( parser.add_argument(
"--yes", "--yes",

View File

@@ -4,15 +4,13 @@ import logging
from dataclasses import dataclass, field from dataclasses import dataclass, field
from functools import cached_property from functools import cached_property
from pathlib import Path from pathlib import Path
from time import time
from typing import TYPE_CHECKING, Any, Literal from typing import TYPE_CHECKING, Any, Literal
from clan_cli.cmd import RunOpts, run_no_stdout
from clan_cli.errors import ClanError from clan_cli.errors import ClanError
from clan_cli.facts import public_modules as facts_public_modules from clan_cli.facts import public_modules as facts_public_modules
from clan_cli.facts import secret_modules as facts_secret_modules from clan_cli.facts import secret_modules as facts_secret_modules
from clan_cli.flake import Flake from clan_cli.flake import Flake
from clan_cli.nix import nix_build, nix_config, nix_eval, nix_test_store from clan_cli.nix import nix_config, nix_test_store
from clan_cli.ssh.host import Host from clan_cli.ssh.host import Host
from clan_cli.ssh.host_key import HostKeyCheck from clan_cli.ssh.host_key import HostKeyCheck
from clan_cli.ssh.parse import parse_deployment_address from clan_cli.ssh.parse import parse_deployment_address
@@ -64,41 +62,6 @@ class Machine:
f"nixosConfigurations.{self.name}.pkgs.hostPlatform.system" f"nixosConfigurations.{self.name}.pkgs.hostPlatform.system"
) )
@property
def can_build_locally(self) -> bool:
config = nix_config()
if self.system == config["system"] or self.system in config["extra-platforms"]:
return True
nix_code = f"""
let
flake = builtins.getFlake("path:{self.flake.store_path}?narHash={self.flake.hash}");
in
(flake.inputs.nixpkgs.legacyPackages.{self.system}.runCommandNoCC "clan-can-build-{int(time())}" {{ }} "touch $out").drvPath
"""
unsubstitutable_drv = json.loads(
run_no_stdout(
nix_eval(
[
"--expr",
nix_code,
]
),
opts=RunOpts(prefix=self.name),
).stdout.strip()
)
try:
run_no_stdout(
nix_build([f"{unsubstitutable_drv}^*"]), opts=RunOpts(prefix=self.name)
)
except Exception as e:
self.debug("failed to build test derivation", exc_info=e)
return False
else:
return True
@property @property
def deployment(self) -> dict: def deployment(self) -> dict:
if self.cached_deployment is not None: if self.cached_deployment is not None: