From bd1451ce1850222a943ddd4c8113aea24b24cd95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Tue, 22 Jul 2025 15:19:57 +0200 Subject: [PATCH] fix: handle arbitrary store paths references in flake cache Previously, paths like /nix/store/hash-file.nix:123 were incorrectly treated as pure store paths and wrapped in {"outPath": ...}, breaking the cache. This fix: - Adds helper functions to properly detect and handle store references - Distinguishes between pure store paths and paths with metadata (line numbers) - Supports multiple store references in a single string - Handles custom NIX_STORE_DIR correctly - Ensures existence checks work for all store references Also fixes test_cache_gc to delete NIX_REMOTE for proper local store testing. --- pkgs/clan-cli/clan_lib/flake/flake.py | 107 ++++++--- .../clan_lib/flake/flake_cache_test.py | 216 ++++++++++++++++-- pkgs/clan-cli/clan_lib/flake/flake_test.py | 6 +- 3 files changed, 272 insertions(+), 57 deletions(-) diff --git a/pkgs/clan-cli/clan_lib/flake/flake.py b/pkgs/clan-cli/clan_lib/flake/flake.py index b7d28877d..1f20731ae 100644 --- a/pkgs/clan-cli/clan_lib/flake/flake.py +++ b/pkgs/clan-cli/clan_lib/flake/flake.py @@ -1,9 +1,11 @@ import json import logging import os +import re import textwrap from dataclasses import asdict, dataclass, field from enum import Enum +from functools import cache from hashlib import sha1 from pathlib import Path from tempfile import NamedTemporaryFile @@ -14,6 +16,62 @@ from clan_lib.errors import ClanError log = logging.getLogger(__name__) +def get_nix_store_dir() -> str: + """Get the Nix store directory path pattern for regex matching. + + This always returns the pattern that Nix uses in its output, + which is typically /nix/store regardless of chroot. + """ + return os.environ.get("NIX_STORE_DIR", "/nix/store") + + +def get_physical_store_path(store_path: str) -> Path: + """Convert a store path to its physical location, handling chroot environments. + + When CLAN_TEST_STORE is set, paths like /nix/store/hash-name are + actually located at CLAN_TEST_STORE/nix/store/hash-name. + """ + test_store = os.environ.get("CLAN_TEST_STORE") + if test_store and store_path.startswith("/nix/store/"): + # Remove leading / to join properly + relative_path = store_path.lstrip("/") + return Path(test_store) / relative_path + return Path(store_path) + + +@cache +def get_store_path_regex(store_dir: str) -> re.Pattern[str]: + """Get compiled regex for a specific store directory. + + Matches store paths: store_dir/hash-name + The hash is base32 lowercase, name can contain [0-9a-zA-Z+-.?=_] + """ + # Pattern: store_dir/hash-name + pattern = ( + re.escape(store_dir) + r"/[0-9a-z]+-[0-9a-zA-Z\+\-\._\?=]*" # hash-name + ) + return re.compile(pattern) + + +def find_store_references(text: str) -> list[str]: + """Find all store path references in a string.""" + store_dir = get_nix_store_dir() + return get_store_path_regex(store_dir).findall(text) + + +def is_pure_store_path(path: str) -> bool: + """Check if a path is a pure Nix store path without file references or metadata. + + Pure store paths have the format: /nix/store/hash-name + They should NOT contain: + - Additional path components (/nix/store/hash-name/subdir/file.nix) + - Line numbers or metadata (/nix/store/hash-name:42) + """ + store_dir = get_nix_store_dir() + regex = get_store_path_regex(store_dir) + return bool(regex.fullmatch(path)) + + class SetSelectorType(str, Enum): """ enum for the type of selector in a set. @@ -327,12 +385,10 @@ class FlakeCacheEntry: self.value[requested_index] = FlakeCacheEntry() self.value[requested_index].insert(value[i], selectors[1:]) - # strings need to be checked if they are store paths + # strings need to be checked if they are pure store paths # if they are, we store them as a dict with the outPath key # this is to mirror nix behavior, where the outPath of an attrset is used if no further key is specified - elif isinstance(value, str) and value.startswith( - os.environ.get("NIX_STORE_DIR", "/nix/store") - ): + elif isinstance(value, str) and is_pure_store_path(value): assert selectors == [] self.value = {"outPath": FlakeCacheEntry(value)} @@ -342,41 +398,32 @@ class FlakeCacheEntry: assert selectors == [] if self.value == {}: self.value = value + # Only check for outPath wrapping conflicts for strings (store paths) + elif isinstance(value, str) and ( + isinstance(self.value, dict) + and "outPath" in self.value + and isinstance(self.value["outPath"], FlakeCacheEntry) + ): + # If the same value is already wrapped in outPath, it's not a conflict + if self.value["outPath"].value == value: + # Value already cached as outPath, no need to change + pass + else: + msg = f"Cannot insert {value} into cache, already have {self.value}" + raise TypeError(msg) elif self.value != value: msg = f"Cannot insert {value} into cache, already have {self.value}" raise TypeError(msg) - def _check_path_exists(self, path_str: str) -> bool: - """Check if a path exists, handling potential line number suffixes.""" - path = Path(path_str) - if path.exists(): - return True - - # Try stripping line numbers if the path doesn't exist - # Handle format: /path/to/file:123 or /path/to/file:123:456 - if ":" in path_str: - parts = path_str.split(":") - if len(parts) >= 2: - # Check if all parts after the first colon are numbers - if all(part.isdigit() for part in parts[1:]): - base_path = parts[0] - return Path(base_path).exists() - return False - def is_cached(self, selectors: list[Selector]) -> bool: selector: Selector # for store paths we have to check if they still exist, otherwise they have to be rebuild and are thus not cached if isinstance(self.value, str): - # Check if it's a regular nix store path - nix_store_dir = os.environ.get("NIX_STORE_DIR", "/nix/store") - if self.value.startswith(nix_store_dir): - return self._check_path_exists(self.value) - - # Check if it's a test store path - test_store = os.environ.get("CLAN_TEST_STORE") - if test_store and self.value.startswith(test_store): - return self._check_path_exists(self.value) + store_refs = find_store_references(self.value) + if store_refs: + # Check if all store references exist at their physical location + return all(get_physical_store_path(ref).exists() for ref in store_refs) # if self.value is not dict but we request more selectors, we assume we are cached and an error will be thrown in the select function if isinstance(self.value, str | float | int | None): diff --git a/pkgs/clan-cli/clan_lib/flake/flake_cache_test.py b/pkgs/clan-cli/clan_lib/flake/flake_cache_test.py index 9849752ed..aa91a555f 100644 --- a/pkgs/clan-cli/clan_lib/flake/flake_cache_test.py +++ b/pkgs/clan-cli/clan_lib/flake/flake_cache_test.py @@ -1,4 +1,3 @@ -import contextlib import subprocess from pathlib import Path from sys import platform @@ -7,7 +6,16 @@ from unittest.mock import patch import pytest from clan_cli.tests.fixtures_flakes import ClanFlake -from clan_lib.flake.flake import Flake, FlakeCache, FlakeCacheEntry, parse_selector +from clan_lib.flake.flake import ( + Flake, + FlakeCache, + FlakeCacheEntry, + Selector, + find_store_references, + get_physical_store_path, + is_pure_store_path, + parse_selector, +) @pytest.mark.with_core @@ -171,29 +179,37 @@ def test_cache_is_cached_with_clan_test_store( checked for existence when CLAN_TEST_STORE is set, because the cache only checks existence for paths starting with NIX_STORE_DIR (/nix/store). """ - # Create a temporary store + # Create a temporary store that mirrors the /nix/store structure test_store = tmp_path / "test-store" - test_store.mkdir() + nix_store = test_store / "nix" / "store" + nix_store.mkdir(parents=True) # Set CLAN_TEST_STORE environment variable monkeypatch.setenv("CLAN_TEST_STORE", str(test_store)) # Ensure NIX_STORE_DIR is not set (typical scenario) monkeypatch.delenv("NIX_STORE_DIR", raising=False) - # Create a fake store path in the test store - fake_store_path = test_store / "abc123-test-output" + # Create a fake store path that follows the real /nix/store pattern + # This is what Nix would actually return even in a chroot + fake_store_hash = "abc123def456ghi789jkl012mno345pqr" + fake_store_name = f"{fake_store_hash}-test-output" + fake_store_path = nix_store / fake_store_name fake_store_path.write_text("test content") # Create a cache entry cache = FlakeCacheEntry() - # Insert a store path into the cache + # Insert a store path into the cache (as Nix would return it) + nix_store_path = f"/nix/store/{fake_store_name}" selectors = parse_selector("testOutput") - cache.insert(str(fake_store_path), selectors) + cache.insert(nix_store_path, selectors) # Verify the path is cached and exists assert cache.is_cached(selectors), "Path should be cached" - assert Path(cache.select(selectors)).exists(), "Path should exist" + # The physical path should exist in the test store + assert get_physical_store_path(nix_store_path).exists(), ( + "Physical path should exist" + ) # Now delete the path to simulate garbage collection fake_store_path.unlink() @@ -267,8 +283,8 @@ def test_cache_gc(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("NIX_STORE_DIR", str(tmp_path / "store")) monkeypatch.setenv("NIX_CACHE_HOME", str(tmp_path / "cache")) monkeypatch.setenv("HOME", str(tmp_path / "home")) - with contextlib.suppress(KeyError): - monkeypatch.delenv("CLAN_TEST_STORE") + monkeypatch.delenv("CLAN_TEST_STORE", raising=False) + monkeypatch.delenv("NIX_REMOTE", raising=False) monkeypatch.setenv("NIX_BUILD_TOP", str(tmp_path / "build")) test_file = tmp_path / "flake" / "testfile" @@ -298,6 +314,126 @@ def test_cache_gc(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: assert not my_flake._cache.is_cached("testfile") # noqa: SLF001 +def test_store_path_with_line_numbers_not_wrapped() -> None: + """Test that store paths with line numbers are not wrapped in outPath dict. + + This is a regression test for the bug where strings like: + /nix/store/abc-file.nix:123 + were being treated as store paths and wrapped in {"outPath": ...} dict, + when they should be stored as plain strings. + """ + cache = FlakeCacheEntry() + + # Test that a path with line number is NOT wrapped in outPath + # Use a realistic store path hash (32 chars in base32) + path_with_line = "/nix/store/abc123def456ghi789jkl012mno345pqr-file.nix:42" + selectors: list[Selector] = [] + cache.insert(path_with_line, selectors) + + # Should be stored as a plain string, not wrapped + assert cache.value == path_with_line + assert not isinstance(cache.value, dict) + + # Test that a regular store path IS wrapped in outPath + cache2 = FlakeCacheEntry() + regular_store_path = "/nix/store/abc123def456ghi789jkl012mno345pqr-package" + cache2.insert(regular_store_path, []) + + # Should be wrapped in outPath dict + assert isinstance(cache2.value, dict) + assert "outPath" in cache2.value + assert cache2.value["outPath"].value == regular_store_path + + # Test path with multiple colons (line:column format) + cache3 = FlakeCacheEntry() + path_with_line_col = "/nix/store/xyz789abc123def456ghi789jkl012mno-source.nix:10:5" + cache3.insert(path_with_line_col, []) + + # Should be stored as plain string + assert cache3.value == path_with_line_col + assert not isinstance(cache3.value, dict) + + +def test_store_reference_helpers() -> None: + """Test the store reference helper functions.""" + from clan_lib.flake.flake import ( + find_store_references, + is_pure_store_path, + ) + + # Test find_store_references + assert find_store_references("/nix/store/abc123-pkg") == ["/nix/store/abc123-pkg"] + assert find_store_references("/nix/store/abc123-file.nix:42") == [ + "/nix/store/abc123-file.nix" + ] + assert find_store_references("/nix/store/abc123-src/lib/file.nix:42:10") == [ + "/nix/store/abc123-src" + ] + + # Multiple references + multi_ref = "error at /nix/store/abc123-src/file.nix:42 and /nix/store/xyz789-lib/lib.nix:10" + refs = find_store_references(multi_ref) + assert len(refs) == 2 + assert "/nix/store/abc123-src" in refs + assert "/nix/store/xyz789-lib" in refs + + # No references + assert find_store_references("/home/user/file.nix") == [] + assert find_store_references("no store paths here") == [] + + # Test is_pure_store_path + assert is_pure_store_path("/nix/store/abc123def456ghi789jkl012mno345pqr-package") + assert is_pure_store_path("/nix/store/abc123def456ghi789jkl012mno345pqr-source") + assert not is_pure_store_path( + "/nix/store/abc123def456ghi789jkl012mno345pqr-file.nix:42" + ) + assert not is_pure_store_path( + "/nix/store/abc123def456ghi789jkl012mno345pqr-src/subdir/file.nix" + ) + assert not is_pure_store_path("/home/user/file") + + +def test_cache_with_multiple_store_references() -> None: + """Test caching behavior with strings containing multiple store references.""" + cache = FlakeCacheEntry() + + # String with multiple store references (none exist) + multi_ref = "Error at /nix/store/nonexistent1-src/file.nix:42 and /nix/store/nonexistent2-lib/lib.nix:10" + cache.insert(multi_ref, []) + + assert cache.value == multi_ref + assert isinstance(cache.value, str) + + # Should return False since none of the referenced paths exist + assert not cache.is_cached([]) + + +def test_store_references_with_custom_store_dir( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test store reference detection with custom NIX_STORE_DIR.""" + + # Set custom store directory + monkeypatch.setenv("NIX_STORE_DIR", "/custom/store") + + # Test find_store_references with custom dir + assert find_store_references("/custom/store/abc123-pkg") == [ + "/custom/store/abc123-pkg" + ] + assert find_store_references("/custom/store/abc123-file.nix") == [ + "/custom/store/abc123-file.nix" + ] + + # Test is_pure_store_path with custom dir + assert is_pure_store_path("/custom/store/abc123def456ghi789jkl012mno345pqr-package") + assert not is_pure_store_path( + "/custom/store/abc123def456ghi789jkl012mno345pqr-file.nix:42" + ) + assert not is_pure_store_path( + "/nix/store/abc123def456ghi789jkl012mno345pqr-package" + ) + + def test_cache_path_with_line_numbers( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: @@ -306,32 +442,36 @@ def test_cache_path_with_line_numbers( This is a regression test for the bug where cached store paths with line numbers (e.g., /nix/store/path:123) are not properly checked for existence. """ - # Create a temporary store + # Create a temporary store that mirrors the /nix/store structure test_store = tmp_path / "test-store" - test_store.mkdir() + nix_store = test_store / "nix" / "store" + nix_store.mkdir(parents=True) # Set CLAN_TEST_STORE environment variable monkeypatch.setenv("CLAN_TEST_STORE", str(test_store)) - # Create a fake store path - fake_store_path = test_store / "abc123-source-file.nix" + # Create a fake store path that follows the real /nix/store pattern + fake_store_hash = "abc123def456ghi789jkl012mno345pqr" + fake_store_name = f"{fake_store_hash}-source-file.nix" + fake_store_path = nix_store / fake_store_name fake_store_path.write_text("# nix source file\n{ foo = 123; }") # Create cache entries for paths with line numbers cache = FlakeCacheEntry() - # Test single line number format - path_with_line = f"{fake_store_path}:42" + # Test single line number format (as Nix would return it) + nix_path_with_line = f"/nix/store/{fake_store_name}:42" selectors = parse_selector("testPath1") - cache.insert(path_with_line, selectors) + cache.insert(nix_path_with_line, selectors) # Test line:column format - path_with_line_col = f"{fake_store_path}:42:10" + nix_path_with_line_col = f"/nix/store/{fake_store_name}:42:10" selectors2 = parse_selector("testPath2") - cache.insert(path_with_line_col, selectors2) + cache.insert(nix_path_with_line_col, selectors2) # Test path with colon but non-numeric suffix (should not be treated as line number) - path_with_colon = test_store / "file:with:colons" + # This would be a regular file path, not in the store + path_with_colon = tmp_path / "file:with:colons" path_with_colon.write_text("test") selectors3 = parse_selector("testPath3") cache.insert(str(path_with_colon), selectors3) @@ -367,10 +507,38 @@ def test_cache_path_with_line_numbers( # Test with regular /nix/store paths monkeypatch.delenv("CLAN_TEST_STORE", raising=False) cache2 = FlakeCacheEntry() - nix_path_with_line = "/nix/store/fake-source.nix:123" - cache2.insert(nix_path_with_line, parse_selector("nixPath")) + nix_path_with_line = "/nix/store/0123456789abcdefghijklmnopqrstuv-source.nix:123" + cache2.insert({"nixPath": nix_path_with_line}, []) - # Should return False because neither the exact path nor base path exists + # The path is stored under the nixPath key, and since it starts with /nix/store + # and has a line number, _check_path_exists will strip the line number and check + # if the base file exists. Since it doesn't exist, this should return False. assert not cache2.is_cached(parse_selector("nixPath")), ( "Nix store path with line number should not be cached when file doesn't exist" ) + + +def test_reinserting_store_path_value() -> None: + """Test that reinserting the same store path value doesn't cause conflicts. + + This is a regression test for the bug where a store path was first inserted + as a pure path (wrapped in outPath), and then the same value was inserted + again as a plain string, causing a TypeError. + """ + cache = FlakeCacheEntry() + + # First insert as pure store path - gets wrapped in outPath + pure_path = "/nix/store/abc123def456ghi789jkl012mno345pqr-source" + cache.insert(pure_path, []) + + assert isinstance(cache.value, dict) + assert "outPath" in cache.value + assert cache.value["outPath"].value == pure_path + + # Now try to insert the same value again - should not raise + cache.insert(pure_path, []) + + # Value should remain unchanged + assert isinstance(cache.value, dict) + assert "outPath" in cache.value + assert cache.value["outPath"].value == pure_path diff --git a/pkgs/clan-cli/clan_lib/flake/flake_test.py b/pkgs/clan-cli/clan_lib/flake/flake_test.py index a6fa1269b..8f5e6dfb2 100644 --- a/pkgs/clan-cli/clan_lib/flake/flake_test.py +++ b/pkgs/clan-cli/clan_lib/flake/flake_test.py @@ -159,13 +159,13 @@ def test_select() -> None: def test_out_path() -> None: - testdict = {"x": {"y": [123, 345, 456], "z": "/nix/store/bla"}} + testdict = {"x": {"y": [123, 345, 456], "z": "/nix/store/abc-bla"}} test_cache = FlakeCacheEntry() test_cache.insert(testdict, []) selectors = parse_selector("x.z") - assert test_cache.select(selectors) == "/nix/store/bla" + assert test_cache.select(selectors) == "/nix/store/abc-bla" selectors = parse_selector("x.z.outPath") - assert test_cache.select(selectors) == "/nix/store/bla" + assert test_cache.select(selectors) == "/nix/store/abc-bla" def test_out_path_in_multiselect_raises_exception() -> None: