perf(core): Wire warm-cache heuristic into packager and tests

Companion to the previous commit. Plumb `tokenCountCacheFileExists` into
the packager `defaultDeps` so the metrics warm-up sizing can be exercised
deterministically from tests, and add a paired test that asserts the
2-warm-up-worker branch is taken when the persistent disk cache exists.

Also rename the cold-cache test to make the new gating explicit and refresh
its docstring with the warm/cold distinction.

https://claude.ai/code/session_01TJqKkJ8n3r6Pa2JdW9Vp2w
This commit is contained in:
Kazuki Yamada
2026-05-10 15:59:43 +09:00
parent dcdbdd5efc
commit ca44a74f3f
2 changed files with 64 additions and 17 deletions
+33 -13
View File
@@ -13,7 +13,7 @@ import type { ProcessedFile } from './file/fileTypes.js';
import { getGitDiffs } from './git/gitDiffHandle.js';
import { getGitLogs } from './git/gitLogHandle.js';
import { calculateMetrics, createMetricsTaskRunner } from './metrics/calculateMetrics.js';
import { loadTokenCountCache, saveTokenCountCache } from './metrics/tokenCountCache.js';
import { loadTokenCountCache, saveTokenCountCache, tokenCountCacheFileExists } from './metrics/tokenCountCache.js';
import { prefetchCompiledTemplate } from './output/outputGenerate.js';
import { prefetchSortData, sortOutputFiles } from './output/outputSort.js';
import { produceOutput } from './packager/produceOutput.js';
@@ -56,6 +56,7 @@ const defaultDeps = {
prefetchSortData,
getGitDiffs,
getGitLogs,
tokenCountCacheFileExists,
// Lazy-load packSkill to defer importing the skill module chain
// (skillSectionGenerators, skillStyle → Handlebars), which adds ~25ms
// to module loading. Only used when --skill-generate is active (non-default).
@@ -114,22 +115,41 @@ export const pack = async (
//
// Worker count: the metrics phase wall-clock is dominated by gpt-tokenizer
// throughput, so adding a 3rd warm worker pays off when the metrics phase
// is long enough to amortize its ~250ms BPE warm-up. We size on a coarse
// proxy for "long metrics phase" — whether the user constrained the file
// set via --include / config.include / --stdin (explicitFiles). When
// unconstrained (default scan), 3 warm workers measure +2.46% paired mean
// wall-clock reduction on the 1046-file workload (n=20, t=3.18,
// NODE_DISABLE_COMPILE_CACHE=1). With an explicit scope the file set is
// typically a few hundred files, the metrics phase is shorter, and the
// 3rd worker's BPE warm-up dominates the parallelism gain (-11.85%
// paired regression on the 258-file `--include 'src,tests'` workload at
// unconditional EAGER_WARMUP_THREADS=3), so we keep the original
// 2-worker sizing.
// is long enough to amortize its ~250ms BPE warm-up. We size on two coarse
// proxies for "long metrics phase":
//
// 1. `hasExplicitScope` — whether the user constrained the file set via
// --include / config.include / --stdin (explicitFiles). Explicit scopes
// typically pack a few hundred files; the metrics phase is short and
// the 3rd worker's BPE warm-up dominates the parallelism gain (-11.85%
// paired regression on the 258-file `--include 'src,tests'` workload).
//
// 2. `tokenCountCacheFileExists()` — whether the persistent disk cache
// exists from a previous run. When present, almost every per-file token
// count is served from cache and `calculateFileMetrics` returns without
// dispatching any worker tasks (see `calculateFileMetrics.ts`); the only
// real worker work is a single wrapper-token call (also cached after the
// first run). On warm-cache repeat runs (the common case), spawning 3
// workers means 3 redundant BPE table parses (~340 ms of CPU each) that
// contend with file collection and security check on a 4-core host.
// Reducing to 2 measured -110.5 ms / -9.65 % paired wall-clock on the
// 1047-file repomix self-pack workload (n=30, t=11.97, faster=30/30,
// NODE_DISABLE_COMPILE_CACHE=1).
// Cold-cache (cache file missing) keeps the original 3-worker warmup
// so the actual file tokenizations parallelise across more workers.
// The probe is a coarse heuristic: a cache file written by a previous
// run that used a different `tokenCount.encoding` (e.g. cl100k_base
// rather than the default o200k_base) yields no hits for the current
// run, so the metrics phase will be cold-but-with-2-workers. Output
// correctness is unaffected — only the warmup parallelism is
// suboptimal — and the next run rebuilds the cache for the new
// encoding so subsequent runs hit again.
//
// EAGER_WARMUP_THREADS × TASKS_PER_THREAD yields maxThreads=min(cpuCount, N)
// for N ∈ {2, 3}; single-CPU hosts collapse to maxThreads=1 (no change).
const hasExplicitScope = (explicitFiles?.length ?? 0) > 0 || config.include.length > 0;
const EAGER_WARMUP_THREADS = hasExplicitScope ? 2 : 3;
const cacheLikelyWarm = deps.tokenCountCacheFileExists();
const EAGER_WARMUP_THREADS = hasExplicitScope || cacheLikelyWarm ? 2 : 3;
const EAGER_METRICS_NUM_TASKS = EAGER_WARMUP_THREADS * TASKS_PER_THREAD;
const { taskRunner: metricsTaskRunner, warmupPromise: metricsWarmupPromise } = deps.createMetricsTaskRunner(
EAGER_METRICS_NUM_TASKS,
+31 -4
View File
@@ -193,6 +193,10 @@ describe('packager', () => {
getGitLogs: vi.fn().mockResolvedValue(undefined),
prefetchSortData: vi.fn().mockResolvedValue(undefined),
sortOutputFiles: vi.fn().mockImplementation((files) => files),
// Default: assume the persistent token-count cache is missing so the
// metrics warm-up sizes for the cold-cache path. Tests targeting the
// warm-cache path override this to return true.
tokenCountCacheFileExists: vi.fn().mockReturnValue(false),
},
};
};
@@ -248,12 +252,15 @@ describe('packager', () => {
expect(cleanup).toHaveBeenCalled();
});
test('uses 3 warm-up workers for the metrics pool when no scope is specified', async () => {
test('uses 3 warm-up workers for the metrics pool when no scope is specified and token cache is cold', async () => {
// The eager metrics warm-up sizes the pool via numOfTasks. When the user did
// not constrain the file set (no --include / no --stdin), the metrics phase is
// typically long enough to amortize a 3rd worker's BPE warm-up, so we pass
// numOfTasks = 3 * TASKS_PER_THREAD = 600 (yielding maxThreads=min(cpuCount, 3)).
// not constrain the file set (no --include / no --stdin) AND no persistent
// token-count cache file exists from a prior run, the metrics phase has to
// tokenize every file and is typically long enough to amortize a 3rd worker's
// BPE warm-up, so we pass numOfTasks = 3 * TASKS_PER_THREAD = 600 (yielding
// maxThreads=min(cpuCount, 3)).
const { deps } = baseDeps();
deps.tokenCountCacheFileExists = vi.fn().mockReturnValue(false);
const config = createMockConfig();
config.include = [];
@@ -264,6 +271,26 @@ describe('packager', () => {
expect(deps.createSecurityTaskRunner).toHaveBeenCalled();
});
test('uses 2 warm-up workers for the metrics pool when no scope is specified but the token cache is warm', async () => {
// When the persistent token-count cache file already exists from a previous
// run, almost every per-file token count is served from cache and the metrics
// phase performs at most a single wrapper-token tokenization (also cached on
// the second run). Spawning a 3rd worker means a redundant ~340 ms BPE table
// parse that contends with file collection and security check on the critical
// path, so the heuristic falls back to numOfTasks = 2 * TASKS_PER_THREAD = 400.
const { deps } = baseDeps();
deps.tokenCountCacheFileExists = vi.fn().mockReturnValue(true);
const config = createMockConfig();
config.include = [];
await pack(['root'], config, vi.fn(), deps);
expect(deps.createMetricsTaskRunner).toHaveBeenCalledWith(400, expect.any(String));
// The security pool pre-warm is unaffected by the cache state — its gate is
// still `hasExplicitScope`, which is false here.
expect(deps.createSecurityTaskRunner).toHaveBeenCalled();
});
test('uses 2 warm-up workers for the metrics pool when --include narrows scope', async () => {
// With explicit includes the file set is typically much smaller and the
// 3rd worker's BPE warm-up dominates the parallelism gain (paired benchmarks