vars/sops: stop writing on clan vars check

This fixes an issue where check_vars() would add machine keys or authorize machines for shared vars.

These write operations should only ever be done on a `clan vars generate`, which `clan vars check` should be a read-only operation
This commit is contained in:
DavHau
2025-10-13 18:43:40 +07:00
parent f313ace19a
commit bf41a9ef00
5 changed files with 110 additions and 14 deletions

View File

@@ -1,5 +1,6 @@
import json
import logging
import os
import shutil
import subprocess
import time
@@ -429,9 +430,43 @@ def test_generated_shared_secret_sops(
machine1 = Machine(name="machine1", flake=Flake(str(flake.path)))
machine2 = Machine(name="machine2", flake=Flake(str(flake.path)))
cli.run(["vars", "generate", "--flake", str(flake.path), "machine1"])
assert check_vars(machine1.name, machine1.flake)
# Get the initial state of the flake directory after generation
def get_file_mtimes(path: str) -> dict[str, float]:
"""Get modification times of all files in a directory tree."""
mtimes = {}
for root, _dirs, files in os.walk(path):
# Skip .git directory
if ".git" in root:
continue
for file in files:
filepath = Path(root) / file
mtimes[str(filepath)] = filepath.stat().st_mtime
return mtimes
initial_mtimes = get_file_mtimes(str(flake.path))
# First check_vars should not write anything
assert check_vars(machine1.name, machine1.flake), (
"machine1 has already generated vars, so check_vars should return True\n"
f"Check result:\n{check_vars(machine1.name, machine1.flake)}"
)
# Verify no files were modified
after_check_mtimes = get_file_mtimes(str(flake.path))
assert initial_mtimes == after_check_mtimes, (
"check_vars should not modify any files when vars are already valid"
)
assert not check_vars(machine2.name, machine2.flake), (
"machine2 has not generated vars yet, so check_vars should return False"
)
# Verify no files were modified
after_check_mtimes_2 = get_file_mtimes(str(flake.path))
assert initial_mtimes == after_check_mtimes_2, (
"check_vars should not modify any files when vars are not valid"
)
cli.run(["vars", "generate", "--flake", str(flake.path), "machine2"])
assert check_vars(machine2.name, machine2.flake)
m1_sops_store = sops.SecretStore(machine1.flake)
m2_sops_store = sops.SecretStore(machine2.flake)
# Create generators with machine context for testing

View File

@@ -3,6 +3,7 @@ import logging
from typing import TYPE_CHECKING
from clan_cli.completions import add_dynamic_completer, complete_machines
from clan_cli.vars.secret_modules import sops
from clan_lib.errors import ClanError
from clan_lib.flake import Flake, require_flake
from clan_lib.machines.machines import Machine
@@ -26,6 +27,26 @@ class VarStatus:
self.unfixed_secret_vars = unfixed_secret_vars
self.invalid_generators = invalid_generators
def text(self) -> str:
log = ""
if self.missing_secret_vars:
log += "Missing secret vars:\n"
for var in self.missing_secret_vars:
log += f" - {var.id}\n"
if self.missing_public_vars:
log += "Missing public vars:\n"
for var in self.missing_public_vars:
log += f" - {var.id}\n"
if self.unfixed_secret_vars:
log += "Unfixed secret vars:\n"
for var in self.unfixed_secret_vars:
log += f" - {var.id}\n"
if self.invalid_generators:
log += "Invalid generators (outdated invalidation hash):\n"
for gen in self.invalid_generators:
log += f" - {gen}\n"
return log if log else "All vars are present and valid."
def vars_status(
machine_name: str,
@@ -66,15 +87,32 @@ def vars_status(
f"Secret var '{file.name}' for service '{generator.name}' in machine {machine.name} is missing.",
)
missing_secret_vars.append(file)
if (
isinstance(machine.secret_vars_store, sops.SecretStore)
and generator.share
and file.exists
and not machine.secret_vars_store.machine_has_access(
generator=generator,
secret_name=file.name,
machine=machine.name,
)
):
msg = (
f"Secret var '{generator.name}/{file.name}' is marked for deployment to machine '{machine.name}', but the machine does not have access to it.\n"
f"Run 'clan vars generate {machine.name}' to fix this.\n"
)
machine.info(msg)
missing_secret_vars.append(file)
else:
msg = machine.secret_vars_store.health_check(
health_msg = machine.secret_vars_store.health_check(
machine=machine.name,
generators=[generator],
file_name=file.name,
)
if msg:
if health_msg is not None:
machine.info(
f"Secret var '{file.name}' for service '{generator.name}' in machine {machine.name} needs update: {msg}",
f"Secret var '{file.name}' for service '{generator.name}' in machine {machine.name} needs update: {health_msg}",
)
unfixed_secret_vars.append(file)
@@ -106,6 +144,7 @@ def check_vars(
generator_name: None | str = None,
) -> bool:
status = vars_status(machine_name, flake, generator_name=generator_name)
log.info(f"Check results for machine '{machine_name}': \n{status.text()}")
return not (
status.missing_secret_vars
or status.missing_public_vars

View File

@@ -259,6 +259,10 @@ class Generator:
_secret_store=sec_store,
)
# link generator to its files
for file in files:
file.generator(generator)
if share:
# For shared generators, check if we already created it
existing = next(

View File

@@ -98,7 +98,8 @@ class SecretStore(StoreBase):
def machine_has_access(
self, generator: Generator, secret_name: str, machine: str
) -> bool:
self.ensure_machine_key(machine)
if not has_machine(self.flake.path, machine):
return False
key_dir = sops_machines_folder(self.flake.path) / machine
return self.key_has_access(key_dir, generator, secret_name)
@@ -156,8 +157,6 @@ class SecretStore(StoreBase):
else:
continue
if file.secret and self.exists(generator, file.name):
if file.deploy:
self.ensure_machine_has_access(generator, file.name, machine)
needs_update, msg = self.needs_fix(generator, file.name, machine)
if needs_update:
outdated.append((generator.name, file.name, msg))
@@ -283,6 +282,7 @@ class SecretStore(StoreBase):
) -> None:
if self.machine_has_access(generator, name, machine):
return
self.ensure_machine_key(machine)
secret_folder = self.secret_path(generator, name)
add_secret(
self.flake.path,

View File

@@ -5,6 +5,7 @@ from clan_cli.vars import graph
from clan_cli.vars.generator import Generator
from clan_cli.vars.graph import requested_closure
from clan_cli.vars.migration import check_can_migrate, migrate_files
from clan_cli.vars.secret_modules import sops
from clan_lib.api import API
from clan_lib.errors import ClanError
@@ -152,15 +153,15 @@ def run_generators(
if not machines:
msg = "At least one machine must be provided"
raise ClanError(msg)
all_generators = get_generators(machines, full_closure=True)
if isinstance(generators, list):
# List of generator names - use them exactly as provided
if len(generators) == 0:
return
all_generators = get_generators(machines, full_closure=True)
generator_objects = [g for g in all_generators if g.key.name in generators]
generators_to_run = [g for g in all_generators if g.key.name in generators]
else:
# None or single string - use get_generators with closure parameter
generator_objects = get_generators(
generators_to_run = get_generators(
machines,
full_closure=full_closure,
generator_name=generators,
@@ -170,13 +171,30 @@ def run_generators(
# TODO: make this more lazy and ask for every generator on execution
if callable(prompt_values):
prompt_values = {
generator.name: prompt_values(generator) for generator in generator_objects
generator.name: prompt_values(generator) for generator in generators_to_run
}
# execute health check
for machine in machines:
_ensure_healthy(machine=machine)
# ensure all selected machines have access to all selected shared generators
for machine in machines:
# This is only relevant for the sops store
# TODO: improve store abstraction to use Protocols and introduce a proper SecretStore interface
if not isinstance(machine.secret_vars_store, sops.SecretStore):
continue
for generator in all_generators:
if generator.share:
for file in generator.files:
if not file.secret or not file.exists:
continue
machine.secret_vars_store.ensure_machine_has_access(
generator,
file.name,
machine.name,
)
# get the flake via any machine (they are all the same)
flake = machines[0].flake
@@ -188,13 +206,13 @@ def run_generators(
# preheat the select cache, to reduce repeated calls during execution
selectors = []
for generator in generator_objects:
for generator in generators_to_run:
machine = get_generator_machine(generator)
selectors.append(generator.final_script_selector(machine.name))
flake.precache(selectors)
# execute generators
for generator in generator_objects:
for generator in generators_to_run:
machine = get_generator_machine(generator)
if check_can_migrate(machine, generator):
migrate_files(machine, generator)