Merge pull request 'vars: optimize generate - reduce cache misses' (#5348) from dave into main

Reviewed-on: https://git.clan.lol/clan/clan-core/pulls/5348
This commit is contained in:
DavHau
2025-10-02 11:50:26 +00:00
5 changed files with 150 additions and 36 deletions

View File

@@ -3,6 +3,7 @@ import logging
import os
import re
import shlex
import traceback
from dataclasses import asdict, dataclass, field
from enum import Enum
from functools import cache
@@ -792,7 +793,7 @@ class Flake:
_cache: FlakeCache | None = field(init=False, default=None)
_path: Path | None = field(init=False, default=None)
_is_local: bool | None = field(init=False, default=None)
_cache_misses: int = field(init=False, default=0)
_cache_miss_stack_traces: list[str] = field(init=False, default_factory=list)
@classmethod
def from_json(
@@ -814,6 +815,34 @@ class Flake:
return NotImplemented
return self.identifier == other.identifier
def _record_cache_miss(self, selector_info: str) -> None:
"""Record a cache miss with its stack trace."""
stack_trace = "".join(traceback.format_stack())
self._cache_miss_stack_traces.append(f"{selector_info}\n{stack_trace}")
@property
def _cache_misses(self) -> int:
"""Get the count of cache misses from the stack trace list."""
return len(self._cache_miss_stack_traces)
def print_cache_miss_analysis(self, title: str = "Cache miss analysis") -> None:
"""Print detailed analysis of cache misses with stack traces.
Args:
title: Title for the analysis output
"""
if not self._cache_miss_stack_traces:
return
print(f"\n=== {title} ===")
print(f"Total cache misses: {len(self._cache_miss_stack_traces)}")
print("\nStack traces for all cache misses:")
for i, trace in enumerate(self._cache_miss_stack_traces, 1):
print(f"\n--- Cache miss #{i} ---")
print(trace)
print("=" * 50)
@property
def is_local(self) -> bool:
if self._is_local is None:
@@ -886,10 +915,13 @@ class Flake:
"""Invalidate the cache and reload it.
This method is used to refresh the cache by reloading it from the flake.
Also resets cache miss tracking.
"""
self.prefetch()
self._cache = FlakeCache()
# Reset cache miss tracking when invalidating cache
self._cache_miss_stack_traces.clear()
if self.hash is None:
msg = "Hash cannot be None"
raise ClanError(msg)
@@ -1063,8 +1095,10 @@ class Flake:
]
if not_fetched_selectors:
# Increment cache miss counter for each selector that wasn't cached
self._cache_misses += 1
# Record cache miss with stack trace
self._record_cache_miss(
f"Cache miss for selectors: {not_fetched_selectors}"
)
self.get_from_nix(not_fetched_selectors)
def select(
@@ -1090,7 +1124,8 @@ class Flake:
if not self._cache.is_cached(selector):
log.debug(f"(cached) $ clan select {shlex.quote(selector)}")
log.debug(f"Cache miss for {selector}")
self._cache_misses += 1
# Record cache miss with stack trace
self._record_cache_miss(f"Cache miss for selector: {selector}")
self.get_from_nix([selector])
else:
log.debug(f"$ clan select {shlex.quote(selector)}")

View File

@@ -129,10 +129,21 @@ class InventoryStore:
self._allowed_path_transforms = _allowed_path_transforms
if _keys is None:
_keys = list(InventorySnapshot.__annotations__.keys())
_keys = self.default_keys()
self._keys = _keys
@classmethod
def default_keys(cls) -> list[str]:
return list(InventorySnapshot.__annotations__.keys())
@classmethod
def default_selectors(cls) -> list[str]:
return [
f"clanInternals.inventoryClass.inventory.{key}"
for key in cls.default_keys()
]
def _load_merged_inventory(self) -> InventorySnapshot:
"""Loads the evaluated inventory.
After all merge operations with eventual nix code in buildClan.

View File

@@ -9,6 +9,7 @@ from clan_cli.vars.migration import check_can_migrate, migrate_files
from clan_lib.api import API
from clan_lib.errors import ClanError
from clan_lib.machines.machines import Machine
from clan_lib.persist.inventory_store import InventoryStore
log = logging.getLogger(__name__)
@@ -37,18 +38,22 @@ def get_generators(
if not machines:
msg = "At least one machine must be provided"
raise ClanError(msg)
all_machines = machines[0].flake.list_machines().keys()
flake = machines[0].flake
flake.precache(
InventoryStore.default_selectors()
+ Generator.get_machine_selectors(m.name for m in machines)
)
all_machines = flake.list_machines().keys()
requested_machines = [machine.name for machine in machines]
all_generators_list = Generator.get_machine_generators(
all_machines,
machines[0].flake,
flake,
include_previous_values=include_previous_values,
)
requested_generators_list = Generator.get_machine_generators(
requested_machines,
machines[0].flake,
flake,
include_previous_values=include_previous_values,
)