Merge pull request 'fix: handle arbitrary store paths references in flake cache' (#4441) from fix-flake-caching into main

Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/4441
This commit is contained in:
lassulus
2025-07-22 16:18:15 +00:00
3 changed files with 272 additions and 57 deletions

View File

@@ -1,9 +1,11 @@
import json import json
import logging import logging
import os import os
import re
import textwrap import textwrap
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from functools import cache
from hashlib import sha1 from hashlib import sha1
from pathlib import Path from pathlib import Path
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
@@ -14,6 +16,62 @@ from clan_lib.errors import ClanError
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def get_nix_store_dir() -> str:
"""Get the Nix store directory path pattern for regex matching.
This always returns the pattern that Nix uses in its output,
which is typically /nix/store regardless of chroot.
"""
return os.environ.get("NIX_STORE_DIR", "/nix/store")
def get_physical_store_path(store_path: str) -> Path:
"""Convert a store path to its physical location, handling chroot environments.
When CLAN_TEST_STORE is set, paths like /nix/store/hash-name are
actually located at CLAN_TEST_STORE/nix/store/hash-name.
"""
test_store = os.environ.get("CLAN_TEST_STORE")
if test_store and store_path.startswith("/nix/store/"):
# Remove leading / to join properly
relative_path = store_path.lstrip("/")
return Path(test_store) / relative_path
return Path(store_path)
@cache
def get_store_path_regex(store_dir: str) -> re.Pattern[str]:
"""Get compiled regex for a specific store directory.
Matches store paths: store_dir/hash-name
The hash is base32 lowercase, name can contain [0-9a-zA-Z+-.?=_]
"""
# Pattern: store_dir/hash-name
pattern = (
re.escape(store_dir) + r"/[0-9a-z]+-[0-9a-zA-Z\+\-\._\?=]*" # hash-name
)
return re.compile(pattern)
def find_store_references(text: str) -> list[str]:
"""Find all store path references in a string."""
store_dir = get_nix_store_dir()
return get_store_path_regex(store_dir).findall(text)
def is_pure_store_path(path: str) -> bool:
"""Check if a path is a pure Nix store path without file references or metadata.
Pure store paths have the format: /nix/store/hash-name
They should NOT contain:
- Additional path components (/nix/store/hash-name/subdir/file.nix)
- Line numbers or metadata (/nix/store/hash-name:42)
"""
store_dir = get_nix_store_dir()
regex = get_store_path_regex(store_dir)
return bool(regex.fullmatch(path))
class SetSelectorType(str, Enum): class SetSelectorType(str, Enum):
""" """
enum for the type of selector in a set. enum for the type of selector in a set.
@@ -327,12 +385,10 @@ class FlakeCacheEntry:
self.value[requested_index] = FlakeCacheEntry() self.value[requested_index] = FlakeCacheEntry()
self.value[requested_index].insert(value[i], selectors[1:]) self.value[requested_index].insert(value[i], selectors[1:])
# strings need to be checked if they are store paths # strings need to be checked if they are pure store paths
# if they are, we store them as a dict with the outPath key # if they are, we store them as a dict with the outPath key
# this is to mirror nix behavior, where the outPath of an attrset is used if no further key is specified # this is to mirror nix behavior, where the outPath of an attrset is used if no further key is specified
elif isinstance(value, str) and value.startswith( elif isinstance(value, str) and is_pure_store_path(value):
os.environ.get("NIX_STORE_DIR", "/nix/store")
):
assert selectors == [] assert selectors == []
self.value = {"outPath": FlakeCacheEntry(value)} self.value = {"outPath": FlakeCacheEntry(value)}
@@ -342,41 +398,32 @@ class FlakeCacheEntry:
assert selectors == [] assert selectors == []
if self.value == {}: if self.value == {}:
self.value = value self.value = value
# Only check for outPath wrapping conflicts for strings (store paths)
elif isinstance(value, str) and (
isinstance(self.value, dict)
and "outPath" in self.value
and isinstance(self.value["outPath"], FlakeCacheEntry)
):
# If the same value is already wrapped in outPath, it's not a conflict
if self.value["outPath"].value == value:
# Value already cached as outPath, no need to change
pass
else:
msg = f"Cannot insert {value} into cache, already have {self.value}"
raise TypeError(msg)
elif self.value != value: elif self.value != value:
msg = f"Cannot insert {value} into cache, already have {self.value}" msg = f"Cannot insert {value} into cache, already have {self.value}"
raise TypeError(msg) raise TypeError(msg)
def _check_path_exists(self, path_str: str) -> bool:
"""Check if a path exists, handling potential line number suffixes."""
path = Path(path_str)
if path.exists():
return True
# Try stripping line numbers if the path doesn't exist
# Handle format: /path/to/file:123 or /path/to/file:123:456
if ":" in path_str:
parts = path_str.split(":")
if len(parts) >= 2:
# Check if all parts after the first colon are numbers
if all(part.isdigit() for part in parts[1:]):
base_path = parts[0]
return Path(base_path).exists()
return False
def is_cached(self, selectors: list[Selector]) -> bool: def is_cached(self, selectors: list[Selector]) -> bool:
selector: Selector selector: Selector
# for store paths we have to check if they still exist, otherwise they have to be rebuild and are thus not cached # for store paths we have to check if they still exist, otherwise they have to be rebuild and are thus not cached
if isinstance(self.value, str): if isinstance(self.value, str):
# Check if it's a regular nix store path store_refs = find_store_references(self.value)
nix_store_dir = os.environ.get("NIX_STORE_DIR", "/nix/store") if store_refs:
if self.value.startswith(nix_store_dir): # Check if all store references exist at their physical location
return self._check_path_exists(self.value) return all(get_physical_store_path(ref).exists() for ref in store_refs)
# Check if it's a test store path
test_store = os.environ.get("CLAN_TEST_STORE")
if test_store and self.value.startswith(test_store):
return self._check_path_exists(self.value)
# if self.value is not dict but we request more selectors, we assume we are cached and an error will be thrown in the select function # if self.value is not dict but we request more selectors, we assume we are cached and an error will be thrown in the select function
if isinstance(self.value, str | float | int | None): if isinstance(self.value, str | float | int | None):

View File

@@ -1,4 +1,3 @@
import contextlib
import subprocess import subprocess
from pathlib import Path from pathlib import Path
from sys import platform from sys import platform
@@ -7,7 +6,16 @@ from unittest.mock import patch
import pytest import pytest
from clan_cli.tests.fixtures_flakes import ClanFlake from clan_cli.tests.fixtures_flakes import ClanFlake
from clan_lib.flake.flake import Flake, FlakeCache, FlakeCacheEntry, parse_selector from clan_lib.flake.flake import (
Flake,
FlakeCache,
FlakeCacheEntry,
Selector,
find_store_references,
get_physical_store_path,
is_pure_store_path,
parse_selector,
)
@pytest.mark.with_core @pytest.mark.with_core
@@ -171,29 +179,37 @@ def test_cache_is_cached_with_clan_test_store(
checked for existence when CLAN_TEST_STORE is set, because the cache checked for existence when CLAN_TEST_STORE is set, because the cache
only checks existence for paths starting with NIX_STORE_DIR (/nix/store). only checks existence for paths starting with NIX_STORE_DIR (/nix/store).
""" """
# Create a temporary store # Create a temporary store that mirrors the /nix/store structure
test_store = tmp_path / "test-store" test_store = tmp_path / "test-store"
test_store.mkdir() nix_store = test_store / "nix" / "store"
nix_store.mkdir(parents=True)
# Set CLAN_TEST_STORE environment variable # Set CLAN_TEST_STORE environment variable
monkeypatch.setenv("CLAN_TEST_STORE", str(test_store)) monkeypatch.setenv("CLAN_TEST_STORE", str(test_store))
# Ensure NIX_STORE_DIR is not set (typical scenario) # Ensure NIX_STORE_DIR is not set (typical scenario)
monkeypatch.delenv("NIX_STORE_DIR", raising=False) monkeypatch.delenv("NIX_STORE_DIR", raising=False)
# Create a fake store path in the test store # Create a fake store path that follows the real /nix/store pattern
fake_store_path = test_store / "abc123-test-output" # This is what Nix would actually return even in a chroot
fake_store_hash = "abc123def456ghi789jkl012mno345pqr"
fake_store_name = f"{fake_store_hash}-test-output"
fake_store_path = nix_store / fake_store_name
fake_store_path.write_text("test content") fake_store_path.write_text("test content")
# Create a cache entry # Create a cache entry
cache = FlakeCacheEntry() cache = FlakeCacheEntry()
# Insert a store path into the cache # Insert a store path into the cache (as Nix would return it)
nix_store_path = f"/nix/store/{fake_store_name}"
selectors = parse_selector("testOutput") selectors = parse_selector("testOutput")
cache.insert(str(fake_store_path), selectors) cache.insert(nix_store_path, selectors)
# Verify the path is cached and exists # Verify the path is cached and exists
assert cache.is_cached(selectors), "Path should be cached" assert cache.is_cached(selectors), "Path should be cached"
assert Path(cache.select(selectors)).exists(), "Path should exist" # The physical path should exist in the test store
assert get_physical_store_path(nix_store_path).exists(), (
"Physical path should exist"
)
# Now delete the path to simulate garbage collection # Now delete the path to simulate garbage collection
fake_store_path.unlink() fake_store_path.unlink()
@@ -267,8 +283,8 @@ def test_cache_gc(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("NIX_STORE_DIR", str(tmp_path / "store")) monkeypatch.setenv("NIX_STORE_DIR", str(tmp_path / "store"))
monkeypatch.setenv("NIX_CACHE_HOME", str(tmp_path / "cache")) monkeypatch.setenv("NIX_CACHE_HOME", str(tmp_path / "cache"))
monkeypatch.setenv("HOME", str(tmp_path / "home")) monkeypatch.setenv("HOME", str(tmp_path / "home"))
with contextlib.suppress(KeyError): monkeypatch.delenv("CLAN_TEST_STORE", raising=False)
monkeypatch.delenv("CLAN_TEST_STORE") monkeypatch.delenv("NIX_REMOTE", raising=False)
monkeypatch.setenv("NIX_BUILD_TOP", str(tmp_path / "build")) monkeypatch.setenv("NIX_BUILD_TOP", str(tmp_path / "build"))
test_file = tmp_path / "flake" / "testfile" test_file = tmp_path / "flake" / "testfile"
@@ -298,6 +314,126 @@ def test_cache_gc(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
assert not my_flake._cache.is_cached("testfile") # noqa: SLF001 assert not my_flake._cache.is_cached("testfile") # noqa: SLF001
def test_store_path_with_line_numbers_not_wrapped() -> None:
"""Test that store paths with line numbers are not wrapped in outPath dict.
This is a regression test for the bug where strings like:
/nix/store/abc-file.nix:123
were being treated as store paths and wrapped in {"outPath": ...} dict,
when they should be stored as plain strings.
"""
cache = FlakeCacheEntry()
# Test that a path with line number is NOT wrapped in outPath
# Use a realistic store path hash (32 chars in base32)
path_with_line = "/nix/store/abc123def456ghi789jkl012mno345pqr-file.nix:42"
selectors: list[Selector] = []
cache.insert(path_with_line, selectors)
# Should be stored as a plain string, not wrapped
assert cache.value == path_with_line
assert not isinstance(cache.value, dict)
# Test that a regular store path IS wrapped in outPath
cache2 = FlakeCacheEntry()
regular_store_path = "/nix/store/abc123def456ghi789jkl012mno345pqr-package"
cache2.insert(regular_store_path, [])
# Should be wrapped in outPath dict
assert isinstance(cache2.value, dict)
assert "outPath" in cache2.value
assert cache2.value["outPath"].value == regular_store_path
# Test path with multiple colons (line:column format)
cache3 = FlakeCacheEntry()
path_with_line_col = "/nix/store/xyz789abc123def456ghi789jkl012mno-source.nix:10:5"
cache3.insert(path_with_line_col, [])
# Should be stored as plain string
assert cache3.value == path_with_line_col
assert not isinstance(cache3.value, dict)
def test_store_reference_helpers() -> None:
"""Test the store reference helper functions."""
from clan_lib.flake.flake import (
find_store_references,
is_pure_store_path,
)
# Test find_store_references
assert find_store_references("/nix/store/abc123-pkg") == ["/nix/store/abc123-pkg"]
assert find_store_references("/nix/store/abc123-file.nix:42") == [
"/nix/store/abc123-file.nix"
]
assert find_store_references("/nix/store/abc123-src/lib/file.nix:42:10") == [
"/nix/store/abc123-src"
]
# Multiple references
multi_ref = "error at /nix/store/abc123-src/file.nix:42 and /nix/store/xyz789-lib/lib.nix:10"
refs = find_store_references(multi_ref)
assert len(refs) == 2
assert "/nix/store/abc123-src" in refs
assert "/nix/store/xyz789-lib" in refs
# No references
assert find_store_references("/home/user/file.nix") == []
assert find_store_references("no store paths here") == []
# Test is_pure_store_path
assert is_pure_store_path("/nix/store/abc123def456ghi789jkl012mno345pqr-package")
assert is_pure_store_path("/nix/store/abc123def456ghi789jkl012mno345pqr-source")
assert not is_pure_store_path(
"/nix/store/abc123def456ghi789jkl012mno345pqr-file.nix:42"
)
assert not is_pure_store_path(
"/nix/store/abc123def456ghi789jkl012mno345pqr-src/subdir/file.nix"
)
assert not is_pure_store_path("/home/user/file")
def test_cache_with_multiple_store_references() -> None:
"""Test caching behavior with strings containing multiple store references."""
cache = FlakeCacheEntry()
# String with multiple store references (none exist)
multi_ref = "Error at /nix/store/nonexistent1-src/file.nix:42 and /nix/store/nonexistent2-lib/lib.nix:10"
cache.insert(multi_ref, [])
assert cache.value == multi_ref
assert isinstance(cache.value, str)
# Should return False since none of the referenced paths exist
assert not cache.is_cached([])
def test_store_references_with_custom_store_dir(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Test store reference detection with custom NIX_STORE_DIR."""
# Set custom store directory
monkeypatch.setenv("NIX_STORE_DIR", "/custom/store")
# Test find_store_references with custom dir
assert find_store_references("/custom/store/abc123-pkg") == [
"/custom/store/abc123-pkg"
]
assert find_store_references("/custom/store/abc123-file.nix") == [
"/custom/store/abc123-file.nix"
]
# Test is_pure_store_path with custom dir
assert is_pure_store_path("/custom/store/abc123def456ghi789jkl012mno345pqr-package")
assert not is_pure_store_path(
"/custom/store/abc123def456ghi789jkl012mno345pqr-file.nix:42"
)
assert not is_pure_store_path(
"/nix/store/abc123def456ghi789jkl012mno345pqr-package"
)
def test_cache_path_with_line_numbers( def test_cache_path_with_line_numbers(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None: ) -> None:
@@ -306,32 +442,36 @@ def test_cache_path_with_line_numbers(
This is a regression test for the bug where cached store paths with line numbers This is a regression test for the bug where cached store paths with line numbers
(e.g., /nix/store/path:123) are not properly checked for existence. (e.g., /nix/store/path:123) are not properly checked for existence.
""" """
# Create a temporary store # Create a temporary store that mirrors the /nix/store structure
test_store = tmp_path / "test-store" test_store = tmp_path / "test-store"
test_store.mkdir() nix_store = test_store / "nix" / "store"
nix_store.mkdir(parents=True)
# Set CLAN_TEST_STORE environment variable # Set CLAN_TEST_STORE environment variable
monkeypatch.setenv("CLAN_TEST_STORE", str(test_store)) monkeypatch.setenv("CLAN_TEST_STORE", str(test_store))
# Create a fake store path # Create a fake store path that follows the real /nix/store pattern
fake_store_path = test_store / "abc123-source-file.nix" fake_store_hash = "abc123def456ghi789jkl012mno345pqr"
fake_store_name = f"{fake_store_hash}-source-file.nix"
fake_store_path = nix_store / fake_store_name
fake_store_path.write_text("# nix source file\n{ foo = 123; }") fake_store_path.write_text("# nix source file\n{ foo = 123; }")
# Create cache entries for paths with line numbers # Create cache entries for paths with line numbers
cache = FlakeCacheEntry() cache = FlakeCacheEntry()
# Test single line number format # Test single line number format (as Nix would return it)
path_with_line = f"{fake_store_path}:42" nix_path_with_line = f"/nix/store/{fake_store_name}:42"
selectors = parse_selector("testPath1") selectors = parse_selector("testPath1")
cache.insert(path_with_line, selectors) cache.insert(nix_path_with_line, selectors)
# Test line:column format # Test line:column format
path_with_line_col = f"{fake_store_path}:42:10" nix_path_with_line_col = f"/nix/store/{fake_store_name}:42:10"
selectors2 = parse_selector("testPath2") selectors2 = parse_selector("testPath2")
cache.insert(path_with_line_col, selectors2) cache.insert(nix_path_with_line_col, selectors2)
# Test path with colon but non-numeric suffix (should not be treated as line number) # Test path with colon but non-numeric suffix (should not be treated as line number)
path_with_colon = test_store / "file:with:colons" # This would be a regular file path, not in the store
path_with_colon = tmp_path / "file:with:colons"
path_with_colon.write_text("test") path_with_colon.write_text("test")
selectors3 = parse_selector("testPath3") selectors3 = parse_selector("testPath3")
cache.insert(str(path_with_colon), selectors3) cache.insert(str(path_with_colon), selectors3)
@@ -367,10 +507,38 @@ def test_cache_path_with_line_numbers(
# Test with regular /nix/store paths # Test with regular /nix/store paths
monkeypatch.delenv("CLAN_TEST_STORE", raising=False) monkeypatch.delenv("CLAN_TEST_STORE", raising=False)
cache2 = FlakeCacheEntry() cache2 = FlakeCacheEntry()
nix_path_with_line = "/nix/store/fake-source.nix:123" nix_path_with_line = "/nix/store/0123456789abcdefghijklmnopqrstuv-source.nix:123"
cache2.insert(nix_path_with_line, parse_selector("nixPath")) cache2.insert({"nixPath": nix_path_with_line}, [])
# Should return False because neither the exact path nor base path exists # The path is stored under the nixPath key, and since it starts with /nix/store
# and has a line number, _check_path_exists will strip the line number and check
# if the base file exists. Since it doesn't exist, this should return False.
assert not cache2.is_cached(parse_selector("nixPath")), ( assert not cache2.is_cached(parse_selector("nixPath")), (
"Nix store path with line number should not be cached when file doesn't exist" "Nix store path with line number should not be cached when file doesn't exist"
) )
def test_reinserting_store_path_value() -> None:
"""Test that reinserting the same store path value doesn't cause conflicts.
This is a regression test for the bug where a store path was first inserted
as a pure path (wrapped in outPath), and then the same value was inserted
again as a plain string, causing a TypeError.
"""
cache = FlakeCacheEntry()
# First insert as pure store path - gets wrapped in outPath
pure_path = "/nix/store/abc123def456ghi789jkl012mno345pqr-source"
cache.insert(pure_path, [])
assert isinstance(cache.value, dict)
assert "outPath" in cache.value
assert cache.value["outPath"].value == pure_path
# Now try to insert the same value again - should not raise
cache.insert(pure_path, [])
# Value should remain unchanged
assert isinstance(cache.value, dict)
assert "outPath" in cache.value
assert cache.value["outPath"].value == pure_path

View File

@@ -159,13 +159,13 @@ def test_select() -> None:
def test_out_path() -> None: def test_out_path() -> None:
testdict = {"x": {"y": [123, 345, 456], "z": "/nix/store/bla"}} testdict = {"x": {"y": [123, 345, 456], "z": "/nix/store/abc-bla"}}
test_cache = FlakeCacheEntry() test_cache = FlakeCacheEntry()
test_cache.insert(testdict, []) test_cache.insert(testdict, [])
selectors = parse_selector("x.z") selectors = parse_selector("x.z")
assert test_cache.select(selectors) == "/nix/store/bla" assert test_cache.select(selectors) == "/nix/store/abc-bla"
selectors = parse_selector("x.z.outPath") selectors = parse_selector("x.z.outPath")
assert test_cache.select(selectors) == "/nix/store/bla" assert test_cache.select(selectors) == "/nix/store/abc-bla"
def test_out_path_in_multiselect_raises_exception() -> None: def test_out_path_in_multiselect_raises_exception() -> None: