Merge pull request 'fix: handle arbitrary store paths references in flake cache' (#4441) from fix-flake-caching into main
Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/4441
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import textwrap
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from functools import cache
|
||||
from hashlib import sha1
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
@@ -14,6 +16,62 @@ from clan_lib.errors import ClanError
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_nix_store_dir() -> str:
|
||||
"""Get the Nix store directory path pattern for regex matching.
|
||||
|
||||
This always returns the pattern that Nix uses in its output,
|
||||
which is typically /nix/store regardless of chroot.
|
||||
"""
|
||||
return os.environ.get("NIX_STORE_DIR", "/nix/store")
|
||||
|
||||
|
||||
def get_physical_store_path(store_path: str) -> Path:
|
||||
"""Convert a store path to its physical location, handling chroot environments.
|
||||
|
||||
When CLAN_TEST_STORE is set, paths like /nix/store/hash-name are
|
||||
actually located at CLAN_TEST_STORE/nix/store/hash-name.
|
||||
"""
|
||||
test_store = os.environ.get("CLAN_TEST_STORE")
|
||||
if test_store and store_path.startswith("/nix/store/"):
|
||||
# Remove leading / to join properly
|
||||
relative_path = store_path.lstrip("/")
|
||||
return Path(test_store) / relative_path
|
||||
return Path(store_path)
|
||||
|
||||
|
||||
@cache
|
||||
def get_store_path_regex(store_dir: str) -> re.Pattern[str]:
|
||||
"""Get compiled regex for a specific store directory.
|
||||
|
||||
Matches store paths: store_dir/hash-name
|
||||
The hash is base32 lowercase, name can contain [0-9a-zA-Z+-.?=_]
|
||||
"""
|
||||
# Pattern: store_dir/hash-name
|
||||
pattern = (
|
||||
re.escape(store_dir) + r"/[0-9a-z]+-[0-9a-zA-Z\+\-\._\?=]*" # hash-name
|
||||
)
|
||||
return re.compile(pattern)
|
||||
|
||||
|
||||
def find_store_references(text: str) -> list[str]:
|
||||
"""Find all store path references in a string."""
|
||||
store_dir = get_nix_store_dir()
|
||||
return get_store_path_regex(store_dir).findall(text)
|
||||
|
||||
|
||||
def is_pure_store_path(path: str) -> bool:
|
||||
"""Check if a path is a pure Nix store path without file references or metadata.
|
||||
|
||||
Pure store paths have the format: /nix/store/hash-name
|
||||
They should NOT contain:
|
||||
- Additional path components (/nix/store/hash-name/subdir/file.nix)
|
||||
- Line numbers or metadata (/nix/store/hash-name:42)
|
||||
"""
|
||||
store_dir = get_nix_store_dir()
|
||||
regex = get_store_path_regex(store_dir)
|
||||
return bool(regex.fullmatch(path))
|
||||
|
||||
|
||||
class SetSelectorType(str, Enum):
|
||||
"""
|
||||
enum for the type of selector in a set.
|
||||
@@ -327,12 +385,10 @@ class FlakeCacheEntry:
|
||||
self.value[requested_index] = FlakeCacheEntry()
|
||||
self.value[requested_index].insert(value[i], selectors[1:])
|
||||
|
||||
# strings need to be checked if they are store paths
|
||||
# strings need to be checked if they are pure store paths
|
||||
# if they are, we store them as a dict with the outPath key
|
||||
# this is to mirror nix behavior, where the outPath of an attrset is used if no further key is specified
|
||||
elif isinstance(value, str) and value.startswith(
|
||||
os.environ.get("NIX_STORE_DIR", "/nix/store")
|
||||
):
|
||||
elif isinstance(value, str) and is_pure_store_path(value):
|
||||
assert selectors == []
|
||||
self.value = {"outPath": FlakeCacheEntry(value)}
|
||||
|
||||
@@ -342,41 +398,32 @@ class FlakeCacheEntry:
|
||||
assert selectors == []
|
||||
if self.value == {}:
|
||||
self.value = value
|
||||
# Only check for outPath wrapping conflicts for strings (store paths)
|
||||
elif isinstance(value, str) and (
|
||||
isinstance(self.value, dict)
|
||||
and "outPath" in self.value
|
||||
and isinstance(self.value["outPath"], FlakeCacheEntry)
|
||||
):
|
||||
# If the same value is already wrapped in outPath, it's not a conflict
|
||||
if self.value["outPath"].value == value:
|
||||
# Value already cached as outPath, no need to change
|
||||
pass
|
||||
else:
|
||||
msg = f"Cannot insert {value} into cache, already have {self.value}"
|
||||
raise TypeError(msg)
|
||||
elif self.value != value:
|
||||
msg = f"Cannot insert {value} into cache, already have {self.value}"
|
||||
raise TypeError(msg)
|
||||
|
||||
def _check_path_exists(self, path_str: str) -> bool:
|
||||
"""Check if a path exists, handling potential line number suffixes."""
|
||||
path = Path(path_str)
|
||||
if path.exists():
|
||||
return True
|
||||
|
||||
# Try stripping line numbers if the path doesn't exist
|
||||
# Handle format: /path/to/file:123 or /path/to/file:123:456
|
||||
if ":" in path_str:
|
||||
parts = path_str.split(":")
|
||||
if len(parts) >= 2:
|
||||
# Check if all parts after the first colon are numbers
|
||||
if all(part.isdigit() for part in parts[1:]):
|
||||
base_path = parts[0]
|
||||
return Path(base_path).exists()
|
||||
return False
|
||||
|
||||
def is_cached(self, selectors: list[Selector]) -> bool:
|
||||
selector: Selector
|
||||
|
||||
# for store paths we have to check if they still exist, otherwise they have to be rebuild and are thus not cached
|
||||
if isinstance(self.value, str):
|
||||
# Check if it's a regular nix store path
|
||||
nix_store_dir = os.environ.get("NIX_STORE_DIR", "/nix/store")
|
||||
if self.value.startswith(nix_store_dir):
|
||||
return self._check_path_exists(self.value)
|
||||
|
||||
# Check if it's a test store path
|
||||
test_store = os.environ.get("CLAN_TEST_STORE")
|
||||
if test_store and self.value.startswith(test_store):
|
||||
return self._check_path_exists(self.value)
|
||||
store_refs = find_store_references(self.value)
|
||||
if store_refs:
|
||||
# Check if all store references exist at their physical location
|
||||
return all(get_physical_store_path(ref).exists() for ref in store_refs)
|
||||
|
||||
# if self.value is not dict but we request more selectors, we assume we are cached and an error will be thrown in the select function
|
||||
if isinstance(self.value, str | float | int | None):
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import contextlib
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from sys import platform
|
||||
@@ -7,7 +6,16 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
from clan_cli.tests.fixtures_flakes import ClanFlake
|
||||
|
||||
from clan_lib.flake.flake import Flake, FlakeCache, FlakeCacheEntry, parse_selector
|
||||
from clan_lib.flake.flake import (
|
||||
Flake,
|
||||
FlakeCache,
|
||||
FlakeCacheEntry,
|
||||
Selector,
|
||||
find_store_references,
|
||||
get_physical_store_path,
|
||||
is_pure_store_path,
|
||||
parse_selector,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.with_core
|
||||
@@ -171,29 +179,37 @@ def test_cache_is_cached_with_clan_test_store(
|
||||
checked for existence when CLAN_TEST_STORE is set, because the cache
|
||||
only checks existence for paths starting with NIX_STORE_DIR (/nix/store).
|
||||
"""
|
||||
# Create a temporary store
|
||||
# Create a temporary store that mirrors the /nix/store structure
|
||||
test_store = tmp_path / "test-store"
|
||||
test_store.mkdir()
|
||||
nix_store = test_store / "nix" / "store"
|
||||
nix_store.mkdir(parents=True)
|
||||
|
||||
# Set CLAN_TEST_STORE environment variable
|
||||
monkeypatch.setenv("CLAN_TEST_STORE", str(test_store))
|
||||
# Ensure NIX_STORE_DIR is not set (typical scenario)
|
||||
monkeypatch.delenv("NIX_STORE_DIR", raising=False)
|
||||
|
||||
# Create a fake store path in the test store
|
||||
fake_store_path = test_store / "abc123-test-output"
|
||||
# Create a fake store path that follows the real /nix/store pattern
|
||||
# This is what Nix would actually return even in a chroot
|
||||
fake_store_hash = "abc123def456ghi789jkl012mno345pqr"
|
||||
fake_store_name = f"{fake_store_hash}-test-output"
|
||||
fake_store_path = nix_store / fake_store_name
|
||||
fake_store_path.write_text("test content")
|
||||
|
||||
# Create a cache entry
|
||||
cache = FlakeCacheEntry()
|
||||
|
||||
# Insert a store path into the cache
|
||||
# Insert a store path into the cache (as Nix would return it)
|
||||
nix_store_path = f"/nix/store/{fake_store_name}"
|
||||
selectors = parse_selector("testOutput")
|
||||
cache.insert(str(fake_store_path), selectors)
|
||||
cache.insert(nix_store_path, selectors)
|
||||
|
||||
# Verify the path is cached and exists
|
||||
assert cache.is_cached(selectors), "Path should be cached"
|
||||
assert Path(cache.select(selectors)).exists(), "Path should exist"
|
||||
# The physical path should exist in the test store
|
||||
assert get_physical_store_path(nix_store_path).exists(), (
|
||||
"Physical path should exist"
|
||||
)
|
||||
|
||||
# Now delete the path to simulate garbage collection
|
||||
fake_store_path.unlink()
|
||||
@@ -267,8 +283,8 @@ def test_cache_gc(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setenv("NIX_STORE_DIR", str(tmp_path / "store"))
|
||||
monkeypatch.setenv("NIX_CACHE_HOME", str(tmp_path / "cache"))
|
||||
monkeypatch.setenv("HOME", str(tmp_path / "home"))
|
||||
with contextlib.suppress(KeyError):
|
||||
monkeypatch.delenv("CLAN_TEST_STORE")
|
||||
monkeypatch.delenv("CLAN_TEST_STORE", raising=False)
|
||||
monkeypatch.delenv("NIX_REMOTE", raising=False)
|
||||
monkeypatch.setenv("NIX_BUILD_TOP", str(tmp_path / "build"))
|
||||
|
||||
test_file = tmp_path / "flake" / "testfile"
|
||||
@@ -298,6 +314,126 @@ def test_cache_gc(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
assert not my_flake._cache.is_cached("testfile") # noqa: SLF001
|
||||
|
||||
|
||||
def test_store_path_with_line_numbers_not_wrapped() -> None:
|
||||
"""Test that store paths with line numbers are not wrapped in outPath dict.
|
||||
|
||||
This is a regression test for the bug where strings like:
|
||||
/nix/store/abc-file.nix:123
|
||||
were being treated as store paths and wrapped in {"outPath": ...} dict,
|
||||
when they should be stored as plain strings.
|
||||
"""
|
||||
cache = FlakeCacheEntry()
|
||||
|
||||
# Test that a path with line number is NOT wrapped in outPath
|
||||
# Use a realistic store path hash (32 chars in base32)
|
||||
path_with_line = "/nix/store/abc123def456ghi789jkl012mno345pqr-file.nix:42"
|
||||
selectors: list[Selector] = []
|
||||
cache.insert(path_with_line, selectors)
|
||||
|
||||
# Should be stored as a plain string, not wrapped
|
||||
assert cache.value == path_with_line
|
||||
assert not isinstance(cache.value, dict)
|
||||
|
||||
# Test that a regular store path IS wrapped in outPath
|
||||
cache2 = FlakeCacheEntry()
|
||||
regular_store_path = "/nix/store/abc123def456ghi789jkl012mno345pqr-package"
|
||||
cache2.insert(regular_store_path, [])
|
||||
|
||||
# Should be wrapped in outPath dict
|
||||
assert isinstance(cache2.value, dict)
|
||||
assert "outPath" in cache2.value
|
||||
assert cache2.value["outPath"].value == regular_store_path
|
||||
|
||||
# Test path with multiple colons (line:column format)
|
||||
cache3 = FlakeCacheEntry()
|
||||
path_with_line_col = "/nix/store/xyz789abc123def456ghi789jkl012mno-source.nix:10:5"
|
||||
cache3.insert(path_with_line_col, [])
|
||||
|
||||
# Should be stored as plain string
|
||||
assert cache3.value == path_with_line_col
|
||||
assert not isinstance(cache3.value, dict)
|
||||
|
||||
|
||||
def test_store_reference_helpers() -> None:
|
||||
"""Test the store reference helper functions."""
|
||||
from clan_lib.flake.flake import (
|
||||
find_store_references,
|
||||
is_pure_store_path,
|
||||
)
|
||||
|
||||
# Test find_store_references
|
||||
assert find_store_references("/nix/store/abc123-pkg") == ["/nix/store/abc123-pkg"]
|
||||
assert find_store_references("/nix/store/abc123-file.nix:42") == [
|
||||
"/nix/store/abc123-file.nix"
|
||||
]
|
||||
assert find_store_references("/nix/store/abc123-src/lib/file.nix:42:10") == [
|
||||
"/nix/store/abc123-src"
|
||||
]
|
||||
|
||||
# Multiple references
|
||||
multi_ref = "error at /nix/store/abc123-src/file.nix:42 and /nix/store/xyz789-lib/lib.nix:10"
|
||||
refs = find_store_references(multi_ref)
|
||||
assert len(refs) == 2
|
||||
assert "/nix/store/abc123-src" in refs
|
||||
assert "/nix/store/xyz789-lib" in refs
|
||||
|
||||
# No references
|
||||
assert find_store_references("/home/user/file.nix") == []
|
||||
assert find_store_references("no store paths here") == []
|
||||
|
||||
# Test is_pure_store_path
|
||||
assert is_pure_store_path("/nix/store/abc123def456ghi789jkl012mno345pqr-package")
|
||||
assert is_pure_store_path("/nix/store/abc123def456ghi789jkl012mno345pqr-source")
|
||||
assert not is_pure_store_path(
|
||||
"/nix/store/abc123def456ghi789jkl012mno345pqr-file.nix:42"
|
||||
)
|
||||
assert not is_pure_store_path(
|
||||
"/nix/store/abc123def456ghi789jkl012mno345pqr-src/subdir/file.nix"
|
||||
)
|
||||
assert not is_pure_store_path("/home/user/file")
|
||||
|
||||
|
||||
def test_cache_with_multiple_store_references() -> None:
|
||||
"""Test caching behavior with strings containing multiple store references."""
|
||||
cache = FlakeCacheEntry()
|
||||
|
||||
# String with multiple store references (none exist)
|
||||
multi_ref = "Error at /nix/store/nonexistent1-src/file.nix:42 and /nix/store/nonexistent2-lib/lib.nix:10"
|
||||
cache.insert(multi_ref, [])
|
||||
|
||||
assert cache.value == multi_ref
|
||||
assert isinstance(cache.value, str)
|
||||
|
||||
# Should return False since none of the referenced paths exist
|
||||
assert not cache.is_cached([])
|
||||
|
||||
|
||||
def test_store_references_with_custom_store_dir(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Test store reference detection with custom NIX_STORE_DIR."""
|
||||
|
||||
# Set custom store directory
|
||||
monkeypatch.setenv("NIX_STORE_DIR", "/custom/store")
|
||||
|
||||
# Test find_store_references with custom dir
|
||||
assert find_store_references("/custom/store/abc123-pkg") == [
|
||||
"/custom/store/abc123-pkg"
|
||||
]
|
||||
assert find_store_references("/custom/store/abc123-file.nix") == [
|
||||
"/custom/store/abc123-file.nix"
|
||||
]
|
||||
|
||||
# Test is_pure_store_path with custom dir
|
||||
assert is_pure_store_path("/custom/store/abc123def456ghi789jkl012mno345pqr-package")
|
||||
assert not is_pure_store_path(
|
||||
"/custom/store/abc123def456ghi789jkl012mno345pqr-file.nix:42"
|
||||
)
|
||||
assert not is_pure_store_path(
|
||||
"/nix/store/abc123def456ghi789jkl012mno345pqr-package"
|
||||
)
|
||||
|
||||
|
||||
def test_cache_path_with_line_numbers(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
@@ -306,32 +442,36 @@ def test_cache_path_with_line_numbers(
|
||||
This is a regression test for the bug where cached store paths with line numbers
|
||||
(e.g., /nix/store/path:123) are not properly checked for existence.
|
||||
"""
|
||||
# Create a temporary store
|
||||
# Create a temporary store that mirrors the /nix/store structure
|
||||
test_store = tmp_path / "test-store"
|
||||
test_store.mkdir()
|
||||
nix_store = test_store / "nix" / "store"
|
||||
nix_store.mkdir(parents=True)
|
||||
|
||||
# Set CLAN_TEST_STORE environment variable
|
||||
monkeypatch.setenv("CLAN_TEST_STORE", str(test_store))
|
||||
|
||||
# Create a fake store path
|
||||
fake_store_path = test_store / "abc123-source-file.nix"
|
||||
# Create a fake store path that follows the real /nix/store pattern
|
||||
fake_store_hash = "abc123def456ghi789jkl012mno345pqr"
|
||||
fake_store_name = f"{fake_store_hash}-source-file.nix"
|
||||
fake_store_path = nix_store / fake_store_name
|
||||
fake_store_path.write_text("# nix source file\n{ foo = 123; }")
|
||||
|
||||
# Create cache entries for paths with line numbers
|
||||
cache = FlakeCacheEntry()
|
||||
|
||||
# Test single line number format
|
||||
path_with_line = f"{fake_store_path}:42"
|
||||
# Test single line number format (as Nix would return it)
|
||||
nix_path_with_line = f"/nix/store/{fake_store_name}:42"
|
||||
selectors = parse_selector("testPath1")
|
||||
cache.insert(path_with_line, selectors)
|
||||
cache.insert(nix_path_with_line, selectors)
|
||||
|
||||
# Test line:column format
|
||||
path_with_line_col = f"{fake_store_path}:42:10"
|
||||
nix_path_with_line_col = f"/nix/store/{fake_store_name}:42:10"
|
||||
selectors2 = parse_selector("testPath2")
|
||||
cache.insert(path_with_line_col, selectors2)
|
||||
cache.insert(nix_path_with_line_col, selectors2)
|
||||
|
||||
# Test path with colon but non-numeric suffix (should not be treated as line number)
|
||||
path_with_colon = test_store / "file:with:colons"
|
||||
# This would be a regular file path, not in the store
|
||||
path_with_colon = tmp_path / "file:with:colons"
|
||||
path_with_colon.write_text("test")
|
||||
selectors3 = parse_selector("testPath3")
|
||||
cache.insert(str(path_with_colon), selectors3)
|
||||
@@ -367,10 +507,38 @@ def test_cache_path_with_line_numbers(
|
||||
# Test with regular /nix/store paths
|
||||
monkeypatch.delenv("CLAN_TEST_STORE", raising=False)
|
||||
cache2 = FlakeCacheEntry()
|
||||
nix_path_with_line = "/nix/store/fake-source.nix:123"
|
||||
cache2.insert(nix_path_with_line, parse_selector("nixPath"))
|
||||
nix_path_with_line = "/nix/store/0123456789abcdefghijklmnopqrstuv-source.nix:123"
|
||||
cache2.insert({"nixPath": nix_path_with_line}, [])
|
||||
|
||||
# Should return False because neither the exact path nor base path exists
|
||||
# The path is stored under the nixPath key, and since it starts with /nix/store
|
||||
# and has a line number, _check_path_exists will strip the line number and check
|
||||
# if the base file exists. Since it doesn't exist, this should return False.
|
||||
assert not cache2.is_cached(parse_selector("nixPath")), (
|
||||
"Nix store path with line number should not be cached when file doesn't exist"
|
||||
)
|
||||
|
||||
|
||||
def test_reinserting_store_path_value() -> None:
|
||||
"""Test that reinserting the same store path value doesn't cause conflicts.
|
||||
|
||||
This is a regression test for the bug where a store path was first inserted
|
||||
as a pure path (wrapped in outPath), and then the same value was inserted
|
||||
again as a plain string, causing a TypeError.
|
||||
"""
|
||||
cache = FlakeCacheEntry()
|
||||
|
||||
# First insert as pure store path - gets wrapped in outPath
|
||||
pure_path = "/nix/store/abc123def456ghi789jkl012mno345pqr-source"
|
||||
cache.insert(pure_path, [])
|
||||
|
||||
assert isinstance(cache.value, dict)
|
||||
assert "outPath" in cache.value
|
||||
assert cache.value["outPath"].value == pure_path
|
||||
|
||||
# Now try to insert the same value again - should not raise
|
||||
cache.insert(pure_path, [])
|
||||
|
||||
# Value should remain unchanged
|
||||
assert isinstance(cache.value, dict)
|
||||
assert "outPath" in cache.value
|
||||
assert cache.value["outPath"].value == pure_path
|
||||
|
||||
@@ -159,13 +159,13 @@ def test_select() -> None:
|
||||
|
||||
|
||||
def test_out_path() -> None:
|
||||
testdict = {"x": {"y": [123, 345, 456], "z": "/nix/store/bla"}}
|
||||
testdict = {"x": {"y": [123, 345, 456], "z": "/nix/store/abc-bla"}}
|
||||
test_cache = FlakeCacheEntry()
|
||||
test_cache.insert(testdict, [])
|
||||
selectors = parse_selector("x.z")
|
||||
assert test_cache.select(selectors) == "/nix/store/bla"
|
||||
assert test_cache.select(selectors) == "/nix/store/abc-bla"
|
||||
selectors = parse_selector("x.z.outPath")
|
||||
assert test_cache.select(selectors) == "/nix/store/bla"
|
||||
assert test_cache.select(selectors) == "/nix/store/abc-bla"
|
||||
|
||||
|
||||
def test_out_path_in_multiselect_raises_exception() -> None:
|
||||
|
||||
Reference in New Issue
Block a user