From 5715b2e58eee03caaf6bf120a5526a65ae5ec3d9 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 17:57:49 +0900 Subject: [PATCH 01/13] feat(worker): Add unified worker entry point for bundling support Add a unified worker entry point that enables full bundling support by allowing bundled files to spawn workers using themselves. This is a prerequisite for bundling the website server to improve Cloud Run cold start times. Changes: - Add src/shared/unifiedWorker.ts as single entry point for all workers - Support both worker_threads and child_process runtimes - Add REPOMIX_WORKER_TYPE env var for child_process worker type detection - Add REPOMIX_WORKER_PATH env var for bundled environment worker path - Add REPOMIX_WASM_DIR env var for WASM file location override - Update processConcurrency.ts to use unified worker path - Add debug logging (REPOMIX_DEBUG_WORKER=1) for worker troubleshooting - Export unified worker handler from main index.ts Note: This is work in progress. There's a known issue with child_process runtime where nested worker pools (created inside a worker) may receive incorrect REPOMIX_WORKER_TYPE environment variable, causing task routing issues. Investigation ongoing. --- src/cli/actions/defaultAction.ts | 2 +- .../actions/workers/defaultActionWorker.ts | 27 ++- src/cli/cliSpinner.ts | 5 +- src/core/file/fileCollect.ts | 2 +- src/core/file/fileProcess.ts | 2 +- src/core/metrics/calculateMetrics.ts | 2 +- src/core/security/securityCheck.ts | 2 +- src/core/treeSitter/loadLanguage.ts | 35 +++- src/index.ts | 11 ++ src/shared/processConcurrency.ts | 14 +- src/shared/unifiedWorker.ts | 157 ++++++++++++++++++ tests/cli/actions/defaultAction.test.ts | 2 +- tests/shared/processConcurrency.test.ts | 24 +-- 13 files changed, 260 insertions(+), 25 deletions(-) create mode 100644 src/shared/unifiedWorker.ts diff --git a/src/cli/actions/defaultAction.ts b/src/cli/actions/defaultAction.ts index d46789ba..0fe69e1f 100644 --- a/src/cli/actions/defaultAction.ts +++ b/src/cli/actions/defaultAction.ts @@ -107,7 +107,7 @@ export const runDefaultAction = async ( // Create worker task runner const taskRunner = initTaskRunner({ numOfTasks: 1, - workerPath: new URL('./workers/defaultActionWorker.js', import.meta.url).href, + workerType: 'defaultAction', runtime: 'child_process', }); diff --git a/src/cli/actions/workers/defaultActionWorker.ts b/src/cli/actions/workers/defaultActionWorker.ts index f1d2b9ec..8f81de8d 100644 --- a/src/cli/actions/workers/defaultActionWorker.ts +++ b/src/cli/actions/workers/defaultActionWorker.ts @@ -36,6 +36,11 @@ function defaultActionWorker(task: PingTask): Promise; async function defaultActionWorker( task: DefaultActionTask | PingTask, ): Promise { + // Debug: Log received task + if (process.env.REPOMIX_DEBUG_WORKER) { + console.error('[DefaultActionWorker] Task received:', typeof task, task); + } + // Handle ping requests for Bun compatibility check if ('ping' in task) { return { @@ -43,19 +48,35 @@ async function defaultActionWorker( }; } + // Validate task structure + if (!task || typeof task !== 'object') { + throw new Error(`Invalid task: expected object, got ${typeof task}`); + } + // At this point, task is guaranteed to be DefaultActionTask - const { directories, cwd, config, cliOptions, stdinFilePaths } = task; + const { directories, cwd, config, cliOptions = {}, stdinFilePaths } = task; + + // Additional validation for required fields + if (!directories || !Array.isArray(directories)) { + throw new Error(`Invalid task.directories: expected array, got ${typeof directories}. Task keys: ${Object.keys(task).join(', ')}`); + } logger.trace('Worker: Using pre-loaded config:', config); + logger.trace('Worker: cliOptions:', cliOptions); // Initialize spinner in worker - const spinner = new Spinner('Initializing...', cliOptions); + // Use optional cliOptions to handle bundled environments where cliOptions might be undefined + const spinner = new Spinner('Initializing...', cliOptions as CliOptions); spinner.start(); let packResult: PackResult; try { - const { skillName, skillDir, skillProjectName, skillSourceUrl } = cliOptions; + // Use optional chaining to safely access cliOptions properties + const skillName = (cliOptions as CliOptions)?.skillName; + const skillDir = (cliOptions as CliOptions)?.skillDir; + const skillProjectName = (cliOptions as CliOptions)?.skillProjectName; + const skillSourceUrl = (cliOptions as CliOptions)?.skillSourceUrl; const packOptions = { skillName, skillDir, skillProjectName, skillSourceUrl }; if (stdinFilePaths) { diff --git a/src/cli/cliSpinner.ts b/src/cli/cliSpinner.ts index c40878cb..a499965d 100644 --- a/src/cli/cliSpinner.ts +++ b/src/cli/cliSpinner.ts @@ -12,10 +12,11 @@ export class Spinner { private interval: ReturnType | null = null; private readonly isQuiet: boolean; - constructor(message: string, cliOptions: CliOptions) { + constructor(message: string, cliOptions?: CliOptions) { this.message = message; // If the user has specified the verbose flag, don't show the spinner - this.isQuiet = cliOptions.quiet || cliOptions.verbose || cliOptions.stdout || false; + // Use optional chaining to handle undefined cliOptions (e.g., in bundled worker environments) + this.isQuiet = cliOptions?.quiet || cliOptions?.verbose || cliOptions?.stdout || false; } start(): void { diff --git a/src/core/file/fileCollect.ts b/src/core/file/fileCollect.ts index e5f1c289..07191634 100644 --- a/src/core/file/fileCollect.ts +++ b/src/core/file/fileCollect.ts @@ -25,7 +25,7 @@ export const collectFiles = async ( ): Promise => { const taskRunner = deps.initTaskRunner({ numOfTasks: filePaths.length, - workerPath: new URL('./workers/fileCollectWorker.js', import.meta.url).href, + workerType: 'fileCollect', runtime: 'worker_threads', }); const tasks = filePaths.map( diff --git a/src/core/file/fileProcess.ts b/src/core/file/fileProcess.ts index 85e7016d..be987616 100644 --- a/src/core/file/fileProcess.ts +++ b/src/core/file/fileProcess.ts @@ -23,7 +23,7 @@ export const processFiles = async ( ): Promise => { const taskRunner = deps.initTaskRunner({ numOfTasks: rawFiles.length, - workerPath: new URL('./workers/fileProcessWorker.js', import.meta.url).href, + workerType: 'fileProcess', // High memory usage and leak risk runtime: 'worker_threads', }); diff --git a/src/core/metrics/calculateMetrics.ts b/src/core/metrics/calculateMetrics.ts index 56679f7e..6ec76a3e 100644 --- a/src/core/metrics/calculateMetrics.ts +++ b/src/core/metrics/calculateMetrics.ts @@ -43,7 +43,7 @@ export const calculateMetrics = async ( deps.taskRunner ?? initTaskRunner({ numOfTasks: processedFiles.length, - workerPath: new URL('./workers/calculateMetricsWorker.js', import.meta.url).href, + workerType: 'calculateMetrics', runtime: 'worker_threads', }); diff --git a/src/core/security/securityCheck.ts b/src/core/security/securityCheck.ts index d20cf692..3fffed91 100644 --- a/src/core/security/securityCheck.ts +++ b/src/core/security/securityCheck.ts @@ -57,7 +57,7 @@ export const runSecurityCheck = async ( const taskRunner = deps.initTaskRunner({ numOfTasks: rawFiles.length + gitDiffTasks.length + gitLogTasks.length, - workerPath: new URL('./workers/securityCheckWorker.js', import.meta.url).href, + workerType: 'securityCheck', runtime: 'worker_threads', }); const fileTasks = rawFiles.map( diff --git a/src/core/treeSitter/loadLanguage.ts b/src/core/treeSitter/loadLanguage.ts index bf672cd4..975fae52 100644 --- a/src/core/treeSitter/loadLanguage.ts +++ b/src/core/treeSitter/loadLanguage.ts @@ -1,9 +1,32 @@ import fs from 'node:fs/promises'; +import path from 'node:path'; import { createRequire } from 'node:module'; import { Language } from 'web-tree-sitter'; const require = createRequire(import.meta.url); +/** + * Custom WASM base path for bundled environments. + * Set via REPOMIX_WASM_DIR environment variable or setWasmBasePath(). + * When set, WASM files are loaded from this directory instead of node_modules. + */ +let customWasmBasePath: string | null = null; + +/** + * Set a custom base path for WASM files. + * Used in bundled environments where WASM files are copied to a custom location. + */ +export function setWasmBasePath(basePath: string): void { + customWasmBasePath = basePath; +} + +/** + * Get the WASM base path from environment variable or custom setting. + */ +function getWasmBasePath(): string | null { + return customWasmBasePath ?? process.env.REPOMIX_WASM_DIR ?? null; +} + export async function loadLanguage(langName: string): Promise { if (!langName) { throw new Error('Invalid language name'); @@ -19,7 +42,17 @@ export async function loadLanguage(langName: string): Promise { } async function getWasmPath(langName: string): Promise { - const wasmPath = require.resolve(`@repomix/tree-sitter-wasms/out/tree-sitter-${langName}.wasm`); + const wasmBasePath = getWasmBasePath(); + + let wasmPath: string; + if (wasmBasePath) { + // Use custom WASM path for bundled environments + wasmPath = path.join(wasmBasePath, `tree-sitter-${langName}.wasm`); + } else { + // Use require.resolve for standard node_modules environments + wasmPath = require.resolve(`@repomix/tree-sitter-wasms/out/tree-sitter-${langName}.wasm`); + } + try { await fs.access(wasmPath); return wasmPath; diff --git a/src/index.ts b/src/index.ts index 0c1e5bac..2dfc1ef1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,6 +24,7 @@ export { TokenCounter } from './core/metrics/TokenCounter.js'; // Tree-sitter export { parseFile } from './core/treeSitter/parseFile.js'; +export { setWasmBasePath } from './core/treeSitter/loadLanguage.js'; // --------------------------------------------------------------------------------------------------------------------- // Config @@ -56,3 +57,13 @@ export { runDefaultAction, buildCliConfig } from './cli/actions/defaultAction.js // Remote action export { runRemoteAction } from './cli/actions/remoteAction.js'; + +// --------------------------------------------------------------------------------------------------------------------- +// Worker (for bundled environments) +// --------------------------------------------------------------------------------------------------------------------- +export { + default as unifiedWorkerHandler, + onWorkerTermination as unifiedWorkerTermination, + getUnifiedWorkerPath, + type WorkerType, +} from './shared/unifiedWorker.js'; diff --git a/src/shared/processConcurrency.ts b/src/shared/processConcurrency.ts index 68691af8..063794dc 100644 --- a/src/shared/processConcurrency.ts +++ b/src/shared/processConcurrency.ts @@ -1,12 +1,13 @@ import os from 'node:os'; import { type Options, Tinypool } from 'tinypool'; import { logger } from './logger.js'; +import { type WorkerType, getUnifiedWorkerPath } from './unifiedWorker.js'; export type WorkerRuntime = NonNullable; export interface WorkerOptions { numOfTasks: number; - workerPath: string; + workerType: WorkerType; runtime: WorkerRuntime; } @@ -32,11 +33,14 @@ export const getWorkerThreadCount = (numOfTasks: number): { minThreads: number; }; export const createWorkerPool = (options: WorkerOptions): Tinypool => { - const { numOfTasks, workerPath, runtime = 'child_process' } = options; + const { numOfTasks, workerType, runtime = 'child_process' } = options; const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); + // Use unified worker entry point for bundling support + const workerPath = getUnifiedWorkerPath(); + logger.trace( - `Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker path: ${workerPath}`, + `Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker type: ${workerType}`, ); const startTime = process.hrtime.bigint(); @@ -49,12 +53,16 @@ export const createWorkerPool = (options: WorkerOptions): Tinypool => { idleTimeout: 5000, teardown: 'onWorkerTermination', workerData: { + workerType, logLevel: logger.getLogLevel(), }, // Only add env for child_process workers ...(runtime === 'child_process' && { env: { ...process.env, + // Pass worker type as environment variable for child_process workers + // This is needed because workerData is not directly accessible in child_process runtime + REPOMIX_WORKER_TYPE: workerType, // Pass log level as environment variable for child_process workers REPOMIX_LOG_LEVEL: logger.getLogLevel().toString(), // Ensure color support in child_process workers diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts new file mode 100644 index 00000000..058423a6 --- /dev/null +++ b/src/shared/unifiedWorker.ts @@ -0,0 +1,157 @@ +/** + * Unified Worker Entry Point + * + * This module serves as a single entry point for all worker types in Repomix. + * It enables full bundling support by allowing the bundled file to spawn workers + * using itself (import.meta.url), eliminating path resolution issues. + * + * When running as a worker, it dynamically imports the appropriate worker handler + * based on the workerType specified in workerData. + */ + +import { isMainThread, workerData } from 'node:worker_threads'; + +// Detect if running as a Tinypool worker +// For worker_threads: isMainThread is false +// For child_process: process.__tinypool_state__.isTinypoolWorker is true +const isTinypoolWorker = (): boolean => { + // Check for child_process runtime (Tinypool sets this before importing worker) + const tinypoolState = (process as NodeJS.Process & { __tinypool_state__?: { isTinypoolWorker?: boolean } }).__tinypool_state__; + if (tinypoolState?.isTinypoolWorker) { + return true; + } + // Check for worker_threads runtime + return !isMainThread; +}; + +// Worker type definitions +export type WorkerType = + | 'fileCollect' + | 'fileProcess' + | 'securityCheck' + | 'calculateMetrics' + | 'defaultAction'; + +// Worker handler type - uses 'any' to accommodate different worker signatures +// biome-ignore lint/suspicious/noExplicitAny: Worker handlers have varying signatures +type WorkerHandler = (task: any) => Promise; +type WorkerCleanup = () => void | Promise; + +// Store the loaded handler and cleanup function +let loadedHandler: WorkerHandler | null = null; +let loadedCleanup: WorkerCleanup | null = null; + +/** + * Dynamically load the appropriate worker handler based on workerType. + * Uses dynamic imports to avoid loading all worker code when not needed. + */ +const loadWorkerHandler = async (workerType: WorkerType): Promise<{ handler: WorkerHandler; cleanup?: WorkerCleanup }> => { + switch (workerType) { + case 'fileCollect': { + const module = await import('../core/file/workers/fileCollectWorker.js'); + return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + } + case 'fileProcess': { + const module = await import('../core/file/workers/fileProcessWorker.js'); + return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + } + case 'securityCheck': { + const module = await import('../core/security/workers/securityCheckWorker.js'); + return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + } + case 'calculateMetrics': { + const module = await import('../core/metrics/workers/calculateMetricsWorker.js'); + return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + } + case 'defaultAction': { + const module = await import('../cli/actions/workers/defaultActionWorker.js'); + return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + } + default: + throw new Error(`Unknown worker type: ${workerType}`); + } +}; + +/** + * Initialize the worker handler if running as a Tinypool worker. + * This is called at module load time. + */ +const initializeWorker = async (): Promise => { + if (!isTinypoolWorker()) { + return; + } + + // Get workerType from multiple sources: + // 1. worker_threads workerData (for worker_threads runtime) + // 2. Environment variable (for child_process runtime) + const workerType: WorkerType | undefined = + (workerData as { workerType?: WorkerType } | undefined)?.workerType ?? + (process.env.REPOMIX_WORKER_TYPE as WorkerType | undefined); + + // Debug: Log worker initialization + if (process.env.REPOMIX_DEBUG_WORKER) { + console.error( + `[UnifiedWorker] Initializing: workerType=${workerType}, ` + + `env.REPOMIX_WORKER_TYPE=${process.env.REPOMIX_WORKER_TYPE}, ` + + `workerData=${JSON.stringify(workerData)}, ` + + `PID=${process.pid}`, + ); + } + + if (!workerType) { + throw new Error('Worker started without workerType (check workerData or REPOMIX_WORKER_TYPE env)'); + } + + const { handler, cleanup } = await loadWorkerHandler(workerType); + loadedHandler = handler; + loadedCleanup = cleanup ?? null; +}; + +// Initialize worker on module load (only in worker threads) +const initPromise = initializeWorker(); + +/** + * Default export for Tinypool. + * This function is called for each task and delegates to the loaded handler. + */ +export default async (task: unknown): Promise => { + // Ensure initialization is complete + await initPromise; + + if (!loadedHandler) { + throw new Error('Worker handler not initialized'); + } + + // Debug: Log task details in bundled environment + if (process.env.REPOMIX_DEBUG_WORKER) { + console.error('[UnifiedWorker] Task received:', JSON.stringify(task, null, 2)); + } + + return loadedHandler(task); +}; + +/** + * Cleanup function for Tinypool teardown. + * Delegates to the loaded worker's cleanup function. + */ +export const onWorkerTermination = async (): Promise => { + if (loadedCleanup) { + await loadedCleanup(); + } +}; + +/** + * Get the path to this unified worker module. + * Used by processConcurrency.ts to spawn workers. + * + * In bundled environments, set REPOMIX_WORKER_PATH to the bundled file path. + * The bundled file should contain all worker code, and when imported as a worker, + * the isTinypoolWorker() check will trigger initialization. + */ +export const getUnifiedWorkerPath = (): string => { + // Allow override for bundled environments + if (process.env.REPOMIX_WORKER_PATH) { + return process.env.REPOMIX_WORKER_PATH; + } + return new URL('./unifiedWorker.js', import.meta.url).href; +}; diff --git a/tests/cli/actions/defaultAction.test.ts b/tests/cli/actions/defaultAction.test.ts index c2858b9f..4d5852a9 100644 --- a/tests/cli/actions/defaultAction.test.ts +++ b/tests/cli/actions/defaultAction.test.ts @@ -149,7 +149,7 @@ describe('defaultAction', () => { expect(processConcurrency.initTaskRunner).toHaveBeenCalledWith({ numOfTasks: 1, - workerPath: expect.stringContaining('defaultActionWorker.js'), + workerType: 'defaultAction', runtime: 'child_process', }); diff --git a/tests/shared/processConcurrency.test.ts b/tests/shared/processConcurrency.test.ts index 983da939..97f87bfc 100644 --- a/tests/shared/processConcurrency.test.ts +++ b/tests/shared/processConcurrency.test.ts @@ -10,6 +10,9 @@ import { vi.mock('node:os'); vi.mock('tinypool'); +vi.mock('../../src/shared/unifiedWorker.js', () => ({ + getUnifiedWorkerPath: () => '/mocked/path/to/unifiedWorker.js', +})); describe('processConcurrency', () => { describe('getProcessConcurrency', () => { @@ -72,17 +75,17 @@ describe('processConcurrency', () => { }); it('should initialize Tinypool with correct configuration', () => { - const workerPath = '/path/to/worker.js'; - const tinypool = createWorkerPool({ numOfTasks: 500, workerPath, runtime: 'child_process' }); + const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'fileCollect', runtime: 'child_process' }); expect(Tinypool).toHaveBeenCalledWith({ - filename: workerPath, + filename: '/mocked/path/to/unifiedWorker.js', runtime: 'child_process', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 idleTimeout: 5000, teardown: 'onWorkerTermination', workerData: { + workerType: 'fileCollect', logLevel: 2, }, env: expect.objectContaining({ @@ -95,17 +98,17 @@ describe('processConcurrency', () => { }); it('should initialize Tinypool with worker_threads runtime when specified', () => { - const workerPath = '/path/to/worker.js'; - const tinypool = createWorkerPool({ numOfTasks: 500, workerPath, runtime: 'worker_threads' }); + const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'securityCheck', runtime: 'worker_threads' }); expect(Tinypool).toHaveBeenCalledWith({ - filename: workerPath, + filename: '/mocked/path/to/unifiedWorker.js', runtime: 'worker_threads', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 idleTimeout: 5000, teardown: 'onWorkerTermination', workerData: { + workerType: 'securityCheck', logLevel: 2, }, }); @@ -126,8 +129,7 @@ describe('processConcurrency', () => { }); it('should return a TaskRunner with run and cleanup methods', () => { - const workerPath = '/path/to/worker.js'; - const taskRunner = initTaskRunner({ numOfTasks: 100, workerPath, runtime: 'child_process' }); + const taskRunner = initTaskRunner({ numOfTasks: 100, workerType: 'fileProcess', runtime: 'child_process' }); expect(taskRunner).toHaveProperty('run'); expect(taskRunner).toHaveProperty('cleanup'); @@ -136,12 +138,14 @@ describe('processConcurrency', () => { }); it('should pass runtime parameter to createWorkerPool', () => { - const workerPath = '/path/to/worker.js'; - const taskRunner = initTaskRunner({ numOfTasks: 100, workerPath, runtime: 'worker_threads' }); + const taskRunner = initTaskRunner({ numOfTasks: 100, workerType: 'calculateMetrics', runtime: 'worker_threads' }); expect(Tinypool).toHaveBeenCalledWith( expect.objectContaining({ runtime: 'worker_threads', + workerData: expect.objectContaining({ + workerType: 'calculateMetrics', + }), }), ); expect(taskRunner).toHaveProperty('run'); From 1f5a92932e2188dc5cb2ba9483c9a7ce279b8a36 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 17:58:10 +0900 Subject: [PATCH 02/13] feat(website-server): Support bundled environment for worker spawning Modify website server entry point to support being used as both server and worker entry in bundled environments: - Re-export unified worker handler from repomix for Tinypool - Add isTinypoolWorker() check to skip server startup when running as worker - Wrap server initialization in conditional block This enables esbuild bundling of the server while maintaining worker functionality for Cloud Run cold start optimization. --- website/server/src/index.ts | 84 +++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/website/server/src/index.ts b/website/server/src/index.ts index a1b6e311..670a2306 100644 --- a/website/server/src/index.ts +++ b/website/server/src/index.ts @@ -10,51 +10,63 @@ import { rateLimitMiddleware } from './middlewares/rateLimit.js'; import { logInfo, logMemoryUsage } from './utils/logger.js'; import { getProcessConcurrency } from './utils/processConcurrency.js'; -const API_TIMEOUT_MS = 35_000; +// Re-export unified worker for bundled environment +// When this file is used as a Tinypool worker, it needs to export the handler +export { unifiedWorkerHandler as default, unifiedWorkerTermination as onWorkerTermination } from 'repomix'; -// Log server metrics on startup -logInfo('Server starting', { - metrics: { +// Check if running as a Tinypool worker (bundled environment) +// In bundled mode, this file is used both as server entry and worker entry +const isTinypoolWorker = (): boolean => { + const tinypoolState = (process as NodeJS.Process & { __tinypool_state__?: { isTinypoolWorker?: boolean } }) + .__tinypool_state__; + return tinypoolState?.isTinypoolWorker ?? false; +}; + +// Skip server initialization if running as a Tinypool worker +if (!isTinypoolWorker()) { + const API_TIMEOUT_MS = 35_000; + + // Log server metrics on startup + logInfo('Server starting', { + metrics: { + processConcurrency: getProcessConcurrency(), + }, + }); + + // Log initial memory usage + logMemoryUsage('Server startup', { processConcurrency: getProcessConcurrency(), - }, -}); + }); -// Log initial memory usage -logMemoryUsage('Server startup', { - processConcurrency: getProcessConcurrency(), -}); + const app = new Hono(); -const app = new Hono(); + // Configure CORS + app.use('/*', corsMiddleware); -// Configure CORS -app.use('/*', corsMiddleware); + // Enable compression + app.use(compress()); -// Enable compression -app.use(compress()); + // Set timeout for API routes + app.use('/api', timeout(API_TIMEOUT_MS)); -// Set timeout for API routes -app.use('/api', timeout(API_TIMEOUT_MS)); + // Setup custom logger + app.use('*', cloudLoggerMiddleware()); -// Setup custom logger -app.use('*', cloudLoggerMiddleware()); + // Apply rate limiting to API routes + app.use('/api/*', rateLimitMiddleware()); -// Apply rate limiting to API routes -app.use('/api/*', rateLimitMiddleware()); + // Health check endpoint + app.get('/health', (c) => c.text('OK')); -// Health check endpoint -app.get('/health', (c) => c.text('OK')); + // Main packing endpoint + app.post('/api/pack', bodyLimitMiddleware, packAction); -// Main packing endpoint -app.post('/api/pack', bodyLimitMiddleware, packAction); + // Start server + const port = process.env.PORT ? Number.parseInt(process.env.PORT, 10) : 3000; + logInfo(`Server starting on port ${port}`); -// Start server -const port = process.env.PORT ? Number.parseInt(process.env.PORT, 10) : 3000; -logInfo(`Server starting on port ${port}`); - -serve({ - fetch: app.fetch, - port, -}); - -// Export app for testing -export default app; + serve({ + fetch: app.fetch, + port, + }); +} From 792dcaaeb66d51b65c082a398c93bbd3131dec88 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 22:08:17 +0900 Subject: [PATCH 03/13] fix(worker): Use individual worker files in non-bundled environment Fix regression where fileCollect tasks were incorrectly routed to defaultActionWorker due to REPOMIX_WORKER_TYPE environment variable inheritance in child_process mode. Changes: - Add getWorkerPath() that returns individual worker file paths - Only use unified worker when REPOMIX_WORKER_PATH is explicitly set - Move WorkerType definition to processConcurrency.ts to avoid circular import This ensures the regular CLI works correctly while still supporting bundled environments when REPOMIX_WORKER_PATH is set. --- src/shared/processConcurrency.ts | 36 +++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/shared/processConcurrency.ts b/src/shared/processConcurrency.ts index 063794dc..022356c0 100644 --- a/src/shared/processConcurrency.ts +++ b/src/shared/processConcurrency.ts @@ -1,16 +1,46 @@ import os from 'node:os'; import { type Options, Tinypool } from 'tinypool'; import { logger } from './logger.js'; -import { type WorkerType, getUnifiedWorkerPath } from './unifiedWorker.js'; export type WorkerRuntime = NonNullable; +// Worker type definitions - shared with unifiedWorker.ts +export type WorkerType = 'fileCollect' | 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction'; + export interface WorkerOptions { numOfTasks: number; workerType: WorkerType; runtime: WorkerRuntime; } +/** + * Get the worker file path for a given worker type. + * In bundled environments (REPOMIX_WORKER_PATH set), uses the unified worker. + * Otherwise, uses individual worker files. + */ +const getWorkerPath = (workerType: WorkerType): string => { + // Bundled environment: use unified worker path + if (process.env.REPOMIX_WORKER_PATH) { + return process.env.REPOMIX_WORKER_PATH; + } + + // Non-bundled environment: use individual worker files + switch (workerType) { + case 'fileCollect': + return new URL('../core/file/workers/fileCollectWorker.js', import.meta.url).href; + case 'fileProcess': + return new URL('../core/file/workers/fileProcessWorker.js', import.meta.url).href; + case 'securityCheck': + return new URL('../core/security/workers/securityCheckWorker.js', import.meta.url).href; + case 'calculateMetrics': + return new URL('../core/metrics/workers/calculateMetricsWorker.js', import.meta.url).href; + case 'defaultAction': + return new URL('../cli/actions/workers/defaultActionWorker.js', import.meta.url).href; + default: + throw new Error(`Unknown worker type: ${workerType}`); + } +}; + // Worker initialization is expensive, so we prefer fewer threads unless there are many files const TASKS_PER_THREAD = 100; @@ -36,8 +66,8 @@ export const createWorkerPool = (options: WorkerOptions): Tinypool => { const { numOfTasks, workerType, runtime = 'child_process' } = options; const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); - // Use unified worker entry point for bundling support - const workerPath = getUnifiedWorkerPath(); + // Get worker path - uses unified worker in bundled env, individual files otherwise + const workerPath = getWorkerPath(workerType); logger.trace( `Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker type: ${workerType}`, From cea7bdc4fe07b3e785f5ace40a62d8c741bf378d Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 22:23:45 +0900 Subject: [PATCH 04/13] fix(worker): Infer worker type from task structure for bundled env Fix issue where Tinypool reuses child processes across different worker pools in bundled environments, causing tasks to be routed to incorrect handlers. Changes: - Add inferWorkerTypeFromTask() to determine worker type from task structure - Add getWorkerTypeFromWorkerData() to handle Tinypool's array workerData format - Cache handlers by worker type instead of single loadedHandler - Dynamically select handler based on inferred or configured worker type This enables bundled website server to correctly handle all worker types (fileCollect, fileProcess, securityCheck, calculateMetrics, defaultAction) even when child processes are reused. --- src/shared/unifiedWorker.ts | 164 ++++++++++++++++++++++++++---------- 1 file changed, 118 insertions(+), 46 deletions(-) diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts index 058423a6..2f792a42 100644 --- a/src/shared/unifiedWorker.ts +++ b/src/shared/unifiedWorker.ts @@ -37,107 +37,179 @@ export type WorkerType = type WorkerHandler = (task: any) => Promise; type WorkerCleanup = () => void | Promise; -// Store the loaded handler and cleanup function -let loadedHandler: WorkerHandler | null = null; -let loadedCleanup: WorkerCleanup | null = null; +// Cache loaded handlers by worker type +const handlerCache = new Map(); /** * Dynamically load the appropriate worker handler based on workerType. * Uses dynamic imports to avoid loading all worker code when not needed. + * Results are cached for reuse. */ const loadWorkerHandler = async (workerType: WorkerType): Promise<{ handler: WorkerHandler; cleanup?: WorkerCleanup }> => { + // Check cache first + const cached = handlerCache.get(workerType); + if (cached) { + return cached; + } + + let result: { handler: WorkerHandler; cleanup?: WorkerCleanup }; + switch (workerType) { case 'fileCollect': { const module = await import('../core/file/workers/fileCollectWorker.js'); - return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; } case 'fileProcess': { const module = await import('../core/file/workers/fileProcessWorker.js'); - return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; } case 'securityCheck': { const module = await import('../core/security/workers/securityCheckWorker.js'); - return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; } case 'calculateMetrics': { const module = await import('../core/metrics/workers/calculateMetricsWorker.js'); - return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; } case 'defaultAction': { const module = await import('../cli/actions/workers/defaultActionWorker.js'); - return { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; } default: throw new Error(`Unknown worker type: ${workerType}`); } + + // Cache the result + handlerCache.set(workerType, result); + return result; }; /** - * Initialize the worker handler if running as a Tinypool worker. - * This is called at module load time. + * Infer worker type from task structure. + * This is used in bundled environments where Tinypool may reuse child processes + * across different worker pools. */ -const initializeWorker = async (): Promise => { - if (!isTinypoolWorker()) { - return; +const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => { + if (!task || typeof task !== 'object') { + return null; } - // Get workerType from multiple sources: - // 1. worker_threads workerData (for worker_threads runtime) - // 2. Environment variable (for child_process runtime) - const workerType: WorkerType | undefined = - (workerData as { workerType?: WorkerType } | undefined)?.workerType ?? - (process.env.REPOMIX_WORKER_TYPE as WorkerType | undefined); + const taskObj = task as Record; - // Debug: Log worker initialization - if (process.env.REPOMIX_DEBUG_WORKER) { - console.error( - `[UnifiedWorker] Initializing: workerType=${workerType}, ` + - `env.REPOMIX_WORKER_TYPE=${process.env.REPOMIX_WORKER_TYPE}, ` + - `workerData=${JSON.stringify(workerData)}, ` + - `PID=${process.pid}`, - ); + // defaultAction: has directories, cwd, config, cliOptions + if ('directories' in taskObj && 'cwd' in taskObj && 'config' in taskObj) { + return 'defaultAction'; } - if (!workerType) { - throw new Error('Worker started without workerType (check workerData or REPOMIX_WORKER_TYPE env)'); + // defaultAction ping task + if ('ping' in taskObj) { + return 'defaultAction'; } - const { handler, cleanup } = await loadWorkerHandler(workerType); - loadedHandler = handler; - loadedCleanup = cleanup ?? null; + // fileCollect: has filePath, rootDir, maxFileSize + if ('filePath' in taskObj && 'rootDir' in taskObj && 'maxFileSize' in taskObj) { + return 'fileCollect'; + } + + // fileProcess: has filePath, content, config (with output property) + if ('filePath' in taskObj && 'content' in taskObj && 'config' in taskObj) { + return 'fileProcess'; + } + + // securityCheck: has filePath, content (no config) + if ('filePath' in taskObj && 'content' in taskObj && !('config' in taskObj)) { + return 'securityCheck'; + } + + // calculateMetrics: has filePath, content, encoding + if ('filePath' in taskObj && 'content' in taskObj && 'encoding' in taskObj) { + return 'calculateMetrics'; + } + + return null; }; -// Initialize worker on module load (only in worker threads) -const initPromise = initializeWorker(); +/** + * Get workerType from workerData. + * In Tinypool child_process mode, workerData is an array. + */ +const getWorkerTypeFromWorkerData = (): WorkerType | undefined => { + if (!workerData) { + return undefined; + } + + // Handle array format (Tinypool child_process mode) + if (Array.isArray(workerData)) { + for (const item of workerData) { + if (item && typeof item === 'object' && 'workerType' in item) { + return item.workerType as WorkerType; + } + } + return undefined; + } + + // Handle object format (worker_threads mode) + if (typeof workerData === 'object' && 'workerType' in workerData) { + return (workerData as { workerType?: WorkerType }).workerType; + } + + return undefined; +}; /** * Default export for Tinypool. - * This function is called for each task and delegates to the loaded handler. + * This function is called for each task and delegates to the appropriate handler. + * + * In bundled environments where Tinypool may reuse child processes across different + * worker pools, we use task-based inference to determine the correct handler. */ export default async (task: unknown): Promise => { - // Ensure initialization is complete - await initPromise; + // Determine worker type: try workerData/env first, then infer from task + let workerType: WorkerType | undefined = + getWorkerTypeFromWorkerData() ?? (process.env.REPOMIX_WORKER_TYPE as WorkerType | undefined); - if (!loadedHandler) { - throw new Error('Worker handler not initialized'); - } + // In bundled environments, Tinypool may reuse child processes. + // If the task doesn't match the initially configured worker type, infer from task. + const inferredType = inferWorkerTypeFromTask(task); - // Debug: Log task details in bundled environment + // Debug: Log task details if (process.env.REPOMIX_DEBUG_WORKER) { - console.error('[UnifiedWorker] Task received:', JSON.stringify(task, null, 2)); + console.error( + `[UnifiedWorker] Task received: workerType=${workerType}, inferredType=${inferredType}, ` + + `PID=${process.pid}, task=${JSON.stringify(task, null, 2)}`, + ); } - return loadedHandler(task); + // Use inferred type if available (more reliable in bundled env) + if (inferredType) { + workerType = inferredType; + } + + if (!workerType) { + throw new Error('Cannot determine worker type from workerData, env, or task structure'); + } + + // Load handler (cached) + const { handler } = await loadWorkerHandler(workerType); + return handler(task); }; /** * Cleanup function for Tinypool teardown. - * Delegates to the loaded worker's cleanup function. + * Cleans up all cached handlers. */ export const onWorkerTermination = async (): Promise => { - if (loadedCleanup) { - await loadedCleanup(); + for (const { cleanup } of handlerCache.values()) { + if (cleanup) { + await cleanup(); + } } + handlerCache.clear(); }; /** From f8fedf0c879528763e928de3d5edf2d9854ae564 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 22:38:55 +0900 Subject: [PATCH 05/13] refactor(worker): Remove debug logging and unused exports Remove code that was added for debugging during development: - Remove unused isTinypoolWorker function from unifiedWorker.ts - Remove REPOMIX_DEBUG_WORKER logging from unifiedWorker.ts - Remove debug logging from defaultActionWorker.ts - Remove unused getUnifiedWorkerPath export - Update tests to use workerType instead of workerPath --- .../actions/workers/defaultActionWorker.ts | 9 ++-- src/core/treeSitter/loadLanguage.ts | 2 +- src/index.ts | 1 - src/shared/unifiedWorker.ts | 50 ++----------------- .../metrics/calculateGitDiffMetrics.test.ts | 2 +- .../metrics/calculateGitLogMetrics.test.ts | 2 +- .../metrics/calculateOutputMetrics.test.ts | 16 +++--- .../calculateSelectiveFileMetrics.test.ts | 4 +- tests/shared/processConcurrency.test.ts | 7 +-- 9 files changed, 23 insertions(+), 70 deletions(-) diff --git a/src/cli/actions/workers/defaultActionWorker.ts b/src/cli/actions/workers/defaultActionWorker.ts index 8f81de8d..9ea12b76 100644 --- a/src/cli/actions/workers/defaultActionWorker.ts +++ b/src/cli/actions/workers/defaultActionWorker.ts @@ -36,11 +36,6 @@ function defaultActionWorker(task: PingTask): Promise; async function defaultActionWorker( task: DefaultActionTask | PingTask, ): Promise { - // Debug: Log received task - if (process.env.REPOMIX_DEBUG_WORKER) { - console.error('[DefaultActionWorker] Task received:', typeof task, task); - } - // Handle ping requests for Bun compatibility check if ('ping' in task) { return { @@ -58,7 +53,9 @@ async function defaultActionWorker( // Additional validation for required fields if (!directories || !Array.isArray(directories)) { - throw new Error(`Invalid task.directories: expected array, got ${typeof directories}. Task keys: ${Object.keys(task).join(', ')}`); + throw new Error( + `Invalid task.directories: expected array, got ${typeof directories}. Task keys: ${Object.keys(task).join(', ')}`, + ); } logger.trace('Worker: Using pre-loaded config:', config); diff --git a/src/core/treeSitter/loadLanguage.ts b/src/core/treeSitter/loadLanguage.ts index 975fae52..5e34e0bd 100644 --- a/src/core/treeSitter/loadLanguage.ts +++ b/src/core/treeSitter/loadLanguage.ts @@ -1,6 +1,6 @@ import fs from 'node:fs/promises'; -import path from 'node:path'; import { createRequire } from 'node:module'; +import path from 'node:path'; import { Language } from 'web-tree-sitter'; const require = createRequire(import.meta.url); diff --git a/src/index.ts b/src/index.ts index 2dfc1ef1..7a695c5d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -64,6 +64,5 @@ export { runRemoteAction } from './cli/actions/remoteAction.js'; export { default as unifiedWorkerHandler, onWorkerTermination as unifiedWorkerTermination, - getUnifiedWorkerPath, type WorkerType, } from './shared/unifiedWorker.js'; diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts index 2f792a42..f829259c 100644 --- a/src/shared/unifiedWorker.ts +++ b/src/shared/unifiedWorker.ts @@ -9,28 +9,10 @@ * based on the workerType specified in workerData. */ -import { isMainThread, workerData } from 'node:worker_threads'; - -// Detect if running as a Tinypool worker -// For worker_threads: isMainThread is false -// For child_process: process.__tinypool_state__.isTinypoolWorker is true -const isTinypoolWorker = (): boolean => { - // Check for child_process runtime (Tinypool sets this before importing worker) - const tinypoolState = (process as NodeJS.Process & { __tinypool_state__?: { isTinypoolWorker?: boolean } }).__tinypool_state__; - if (tinypoolState?.isTinypoolWorker) { - return true; - } - // Check for worker_threads runtime - return !isMainThread; -}; +import { workerData } from 'node:worker_threads'; // Worker type definitions -export type WorkerType = - | 'fileCollect' - | 'fileProcess' - | 'securityCheck' - | 'calculateMetrics' - | 'defaultAction'; +export type WorkerType = 'fileCollect' | 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction'; // Worker handler type - uses 'any' to accommodate different worker signatures // biome-ignore lint/suspicious/noExplicitAny: Worker handlers have varying signatures @@ -45,7 +27,9 @@ const handlerCache = new Map => { +const loadWorkerHandler = async ( + workerType: WorkerType, +): Promise<{ handler: WorkerHandler; cleanup?: WorkerCleanup }> => { // Check cache first const cached = handlerCache.get(workerType); if (cached) { @@ -177,14 +161,6 @@ export default async (task: unknown): Promise => { // If the task doesn't match the initially configured worker type, infer from task. const inferredType = inferWorkerTypeFromTask(task); - // Debug: Log task details - if (process.env.REPOMIX_DEBUG_WORKER) { - console.error( - `[UnifiedWorker] Task received: workerType=${workerType}, inferredType=${inferredType}, ` + - `PID=${process.pid}, task=${JSON.stringify(task, null, 2)}`, - ); - } - // Use inferred type if available (more reliable in bundled env) if (inferredType) { workerType = inferredType; @@ -211,19 +187,3 @@ export const onWorkerTermination = async (): Promise => { } handlerCache.clear(); }; - -/** - * Get the path to this unified worker module. - * Used by processConcurrency.ts to spawn workers. - * - * In bundled environments, set REPOMIX_WORKER_PATH to the bundled file path. - * The bundled file should contain all worker code, and when imported as a worker, - * the isTinypoolWorker() check will trigger initialization. - */ -export const getUnifiedWorkerPath = (): string => { - // Allow override for bundled environments - if (process.env.REPOMIX_WORKER_PATH) { - return process.env.REPOMIX_WORKER_PATH; - } - return new URL('./unifiedWorker.js', import.meta.url).href; -}; diff --git a/tests/core/metrics/calculateGitDiffMetrics.test.ts b/tests/core/metrics/calculateGitDiffMetrics.test.ts index a84a8b9b..8fb21fe6 100644 --- a/tests/core/metrics/calculateGitDiffMetrics.test.ts +++ b/tests/core/metrics/calculateGitDiffMetrics.test.ts @@ -67,7 +67,7 @@ describe('calculateGitDiffMetrics', () => { cwd: '/test/project', }; - const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }); + const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }); beforeEach(() => { vi.clearAllMocks(); diff --git a/tests/core/metrics/calculateGitLogMetrics.test.ts b/tests/core/metrics/calculateGitLogMetrics.test.ts index 86f23439..24e7e2ff 100644 --- a/tests/core/metrics/calculateGitLogMetrics.test.ts +++ b/tests/core/metrics/calculateGitLogMetrics.test.ts @@ -67,7 +67,7 @@ describe('calculateGitLogMetrics', () => { cwd: '/test/project', }; - const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }); + const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }); beforeEach(() => { vi.clearAllMocks(); diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts index bc2f900c..90b0ae5c 100644 --- a/tests/core/metrics/calculateOutputMetrics.test.ts +++ b/tests/core/metrics/calculateOutputMetrics.test.ts @@ -24,7 +24,7 @@ describe('calculateOutputMetrics', () => { const path = 'test.txt'; const result = await calculateOutputMetrics(content, encoding, path, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBe(2); // 'test content' should be counted as 2 tokens @@ -35,7 +35,7 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const result = await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBe(2); @@ -59,7 +59,7 @@ describe('calculateOutputMetrics', () => { await expect( calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }), ).rejects.toThrow('Worker error'); @@ -71,7 +71,7 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const result = await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBe(0); @@ -82,7 +82,7 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const result = await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBeGreaterThan(0); @@ -110,7 +110,7 @@ describe('calculateOutputMetrics', () => { }; const result = await calculateOutputMetrics(content, encoding, path, { - taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(chunksProcessed).toBeGreaterThan(1); // Should have processed multiple chunks @@ -135,7 +135,7 @@ describe('calculateOutputMetrics', () => { await expect( calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }), ).rejects.toThrow('Parallel processing error'); @@ -161,7 +161,7 @@ describe('calculateOutputMetrics', () => { }; await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockChunkTrackingTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockChunkTrackingTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); // Check that chunks are roughly equal in size diff --git a/tests/core/metrics/calculateSelectiveFileMetrics.test.ts b/tests/core/metrics/calculateSelectiveFileMetrics.test.ts index 8a036094..2e89b416 100644 --- a/tests/core/metrics/calculateSelectiveFileMetrics.test.ts +++ b/tests/core/metrics/calculateSelectiveFileMetrics.test.ts @@ -36,7 +36,7 @@ describe('calculateSelectiveFileMetrics', () => { 'o200k_base', progressCallback, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }, ); @@ -57,7 +57,7 @@ describe('calculateSelectiveFileMetrics', () => { 'o200k_base', progressCallback, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }, ); diff --git a/tests/shared/processConcurrency.test.ts b/tests/shared/processConcurrency.test.ts index 97f87bfc..ee90c740 100644 --- a/tests/shared/processConcurrency.test.ts +++ b/tests/shared/processConcurrency.test.ts @@ -10,9 +10,6 @@ import { vi.mock('node:os'); vi.mock('tinypool'); -vi.mock('../../src/shared/unifiedWorker.js', () => ({ - getUnifiedWorkerPath: () => '/mocked/path/to/unifiedWorker.js', -})); describe('processConcurrency', () => { describe('getProcessConcurrency', () => { @@ -78,7 +75,7 @@ describe('processConcurrency', () => { const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'fileCollect', runtime: 'child_process' }); expect(Tinypool).toHaveBeenCalledWith({ - filename: '/mocked/path/to/unifiedWorker.js', + filename: expect.stringContaining('fileCollectWorker.js'), runtime: 'child_process', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 @@ -101,7 +98,7 @@ describe('processConcurrency', () => { const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'securityCheck', runtime: 'worker_threads' }); expect(Tinypool).toHaveBeenCalledWith({ - filename: '/mocked/path/to/unifiedWorker.js', + filename: expect.stringContaining('securityCheckWorker.js'), runtime: 'worker_threads', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 From 8ef6af4d7000d52b71d8d54984629dd4c6774d9d Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 22:49:46 +0900 Subject: [PATCH 06/13] refactor(worker): Clean up unified worker implementation - Consolidate WorkerType definition to unifiedWorker.ts - Fix inferWorkerTypeFromTask order: check calculateMetrics before securityCheck - Simplify cliOptions handling in defaultActionWorker --- .../actions/workers/defaultActionWorker.ts | 23 +++++++------------ src/shared/processConcurrency.ts | 5 ++-- src/shared/unifiedWorker.ts | 14 +++++------ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/cli/actions/workers/defaultActionWorker.ts b/src/cli/actions/workers/defaultActionWorker.ts index 9ea12b76..44d8665a 100644 --- a/src/cli/actions/workers/defaultActionWorker.ts +++ b/src/cli/actions/workers/defaultActionWorker.ts @@ -49,31 +49,24 @@ async function defaultActionWorker( } // At this point, task is guaranteed to be DefaultActionTask - const { directories, cwd, config, cliOptions = {}, stdinFilePaths } = task; + const { directories, cwd, config, cliOptions, stdinFilePaths } = task; - // Additional validation for required fields if (!directories || !Array.isArray(directories)) { - throw new Error( - `Invalid task.directories: expected array, got ${typeof directories}. Task keys: ${Object.keys(task).join(', ')}`, - ); + throw new Error('Invalid task: directories must be an array'); } - logger.trace('Worker: Using pre-loaded config:', config); - logger.trace('Worker: cliOptions:', cliOptions); + // Provide defaults for bundled environments where cliOptions might be undefined + const safeCliOptions: CliOptions = cliOptions ?? ({} as CliOptions); - // Initialize spinner in worker - // Use optional cliOptions to handle bundled environments where cliOptions might be undefined - const spinner = new Spinner('Initializing...', cliOptions as CliOptions); + logger.trace('Worker: Using pre-loaded config:', config); + + const spinner = new Spinner('Initializing...', safeCliOptions); spinner.start(); let packResult: PackResult; try { - // Use optional chaining to safely access cliOptions properties - const skillName = (cliOptions as CliOptions)?.skillName; - const skillDir = (cliOptions as CliOptions)?.skillDir; - const skillProjectName = (cliOptions as CliOptions)?.skillProjectName; - const skillSourceUrl = (cliOptions as CliOptions)?.skillSourceUrl; + const { skillName, skillDir, skillProjectName, skillSourceUrl } = safeCliOptions; const packOptions = { skillName, skillDir, skillProjectName, skillSourceUrl }; if (stdinFilePaths) { diff --git a/src/shared/processConcurrency.ts b/src/shared/processConcurrency.ts index 022356c0..4c973e32 100644 --- a/src/shared/processConcurrency.ts +++ b/src/shared/processConcurrency.ts @@ -1,11 +1,12 @@ import os from 'node:os'; import { type Options, Tinypool } from 'tinypool'; import { logger } from './logger.js'; +import type { WorkerType } from './unifiedWorker.js'; export type WorkerRuntime = NonNullable; -// Worker type definitions - shared with unifiedWorker.ts -export type WorkerType = 'fileCollect' | 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction'; +// Re-export WorkerType for external consumers +export type { WorkerType } from './unifiedWorker.js'; export interface WorkerOptions { numOfTasks: number; diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts index f829259c..b830bb53 100644 --- a/src/shared/unifiedWorker.ts +++ b/src/shared/unifiedWorker.ts @@ -100,21 +100,21 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => { return 'fileCollect'; } - // fileProcess: has filePath, content, config (with output property) + // fileProcess: has filePath, content, config if ('filePath' in taskObj && 'content' in taskObj && 'config' in taskObj) { return 'fileProcess'; } - // securityCheck: has filePath, content (no config) - if ('filePath' in taskObj && 'content' in taskObj && !('config' in taskObj)) { - return 'securityCheck'; - } - - // calculateMetrics: has filePath, content, encoding + // calculateMetrics: has filePath, content, encoding (must check before securityCheck) if ('filePath' in taskObj && 'content' in taskObj && 'encoding' in taskObj) { return 'calculateMetrics'; } + // securityCheck: has filePath, content (no config, no encoding) + if ('filePath' in taskObj && 'content' in taskObj) { + return 'securityCheck'; + } + return null; }; From 9ffb2b89077a7b06db78aea136586b674e907a6f Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Wed, 31 Dec 2025 23:04:23 +0900 Subject: [PATCH 07/13] fix(worker): Fix task inference to match actual task structures Based on multi-agent code review: - Fix fileProcess inference: check for rawFile instead of filePath/content - Fix calculateMetrics inference: check for content/encoding (path is optional) - Fix securityCheck inference: add type field check for specificity - Remove unnecessary type assertion in defaultActionWorker --- src/cli/actions/workers/defaultActionWorker.ts | 2 +- src/shared/unifiedWorker.ts | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cli/actions/workers/defaultActionWorker.ts b/src/cli/actions/workers/defaultActionWorker.ts index 44d8665a..8f5002be 100644 --- a/src/cli/actions/workers/defaultActionWorker.ts +++ b/src/cli/actions/workers/defaultActionWorker.ts @@ -56,7 +56,7 @@ async function defaultActionWorker( } // Provide defaults for bundled environments where cliOptions might be undefined - const safeCliOptions: CliOptions = cliOptions ?? ({} as CliOptions); + const safeCliOptions: CliOptions = cliOptions ?? {}; logger.trace('Worker: Using pre-loaded config:', config); diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts index b830bb53..1adec3c9 100644 --- a/src/shared/unifiedWorker.ts +++ b/src/shared/unifiedWorker.ts @@ -100,18 +100,18 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => { return 'fileCollect'; } - // fileProcess: has filePath, content, config - if ('filePath' in taskObj && 'content' in taskObj && 'config' in taskObj) { + // fileProcess: has rawFile (nested object) and config + if ('rawFile' in taskObj && 'config' in taskObj) { return 'fileProcess'; } - // calculateMetrics: has filePath, content, encoding (must check before securityCheck) - if ('filePath' in taskObj && 'content' in taskObj && 'encoding' in taskObj) { + // calculateMetrics: has content, encoding (must check before securityCheck) + if ('content' in taskObj && 'encoding' in taskObj) { return 'calculateMetrics'; } - // securityCheck: has filePath, content (no config, no encoding) - if ('filePath' in taskObj && 'content' in taskObj) { + // securityCheck: has filePath, content, type + if ('filePath' in taskObj && 'content' in taskObj && 'type' in taskObj) { return 'securityCheck'; } From f79232f81f3593cb1c85ace0d466ebfec3b1d6bc Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Thu, 1 Jan 2026 00:41:28 +0900 Subject: [PATCH 08/13] test(worker): Add unified worker tests and unify async signatures - Add comprehensive tests for unifiedWorker.ts covering task inference and worker termination cleanup - Unify onWorkerTermination to async signature across all worker files for consistency (fileCollect, securityCheck, calculateMetrics) --- src/core/file/workers/fileCollectWorker.ts | 2 +- .../metrics/workers/calculateMetricsWorker.ts | 2 +- .../security/workers/securityCheckWorker.ts | 2 +- .../metrics/calculateGitDiffMetrics.test.ts | 6 +- .../metrics/calculateGitLogMetrics.test.ts | 6 +- .../metrics/calculateOutputMetrics.test.ts | 6 +- tests/shared/unifiedWorker.test.ts | 171 ++++++++++++++++++ 7 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 tests/shared/unifiedWorker.test.ts diff --git a/src/core/file/workers/fileCollectWorker.ts b/src/core/file/workers/fileCollectWorker.ts index 875ef337..7da9d0ab 100644 --- a/src/core/file/workers/fileCollectWorker.ts +++ b/src/core/file/workers/fileCollectWorker.ts @@ -51,6 +51,6 @@ export default async ({ filePath, rootDir, maxFileSize }: FileCollectTask): Prom }; // Export cleanup function for Tinypool teardown (no cleanup needed for this worker) -export const onWorkerTermination = () => { +export const onWorkerTermination = async (): Promise => { // No cleanup needed for file collection worker }; diff --git a/src/core/metrics/workers/calculateMetricsWorker.ts b/src/core/metrics/workers/calculateMetricsWorker.ts index 34fc0ee4..241af02e 100644 --- a/src/core/metrics/workers/calculateMetricsWorker.ts +++ b/src/core/metrics/workers/calculateMetricsWorker.ts @@ -45,6 +45,6 @@ export default async (task: TokenCountTask): Promise => { }; // Export cleanup function for Tinypool teardown -export const onWorkerTermination = () => { +export const onWorkerTermination = async (): Promise => { freeTokenCounters(); }; diff --git a/src/core/security/workers/securityCheckWorker.ts b/src/core/security/workers/securityCheckWorker.ts index acc4c6b6..73e00a31 100644 --- a/src/core/security/workers/securityCheckWorker.ts +++ b/src/core/security/workers/securityCheckWorker.ts @@ -85,6 +85,6 @@ export const createSecretLintConfig = (): SecretLintCoreConfig => ({ }); // Export cleanup function for Tinypool teardown (no cleanup needed for this worker) -export const onWorkerTermination = () => { +export const onWorkerTermination = async (): Promise => { // No cleanup needed for security check worker }; diff --git a/tests/core/metrics/calculateGitDiffMetrics.test.ts b/tests/core/metrics/calculateGitDiffMetrics.test.ts index 8fb21fe6..adcdf5d7 100644 --- a/tests/core/metrics/calculateGitDiffMetrics.test.ts +++ b/tests/core/metrics/calculateGitDiffMetrics.test.ts @@ -67,7 +67,11 @@ describe('calculateGitDiffMetrics', () => { cwd: '/test/project', }; - const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }); + const mockTaskRunner = mockInitTaskRunner({ + numOfTasks: 1, + workerType: 'calculateMetrics', + runtime: 'worker_threads', + }); beforeEach(() => { vi.clearAllMocks(); diff --git a/tests/core/metrics/calculateGitLogMetrics.test.ts b/tests/core/metrics/calculateGitLogMetrics.test.ts index 24e7e2ff..1c53b90b 100644 --- a/tests/core/metrics/calculateGitLogMetrics.test.ts +++ b/tests/core/metrics/calculateGitLogMetrics.test.ts @@ -67,7 +67,11 @@ describe('calculateGitLogMetrics', () => { cwd: '/test/project', }; - const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }); + const mockTaskRunner = mockInitTaskRunner({ + numOfTasks: 1, + workerType: 'calculateMetrics', + runtime: 'worker_threads', + }); beforeEach(() => { vi.clearAllMocks(); diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts index 90b0ae5c..104914ff 100644 --- a/tests/core/metrics/calculateOutputMetrics.test.ts +++ b/tests/core/metrics/calculateOutputMetrics.test.ts @@ -161,7 +161,11 @@ describe('calculateOutputMetrics', () => { }; await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockChunkTrackingTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), + taskRunner: mockChunkTrackingTaskRunner({ + numOfTasks: 1, + workerType: 'calculateMetrics', + runtime: 'worker_threads', + }), }); // Check that chunks are roughly equal in size diff --git a/tests/shared/unifiedWorker.test.ts b/tests/shared/unifiedWorker.test.ts new file mode 100644 index 00000000..ee1823ad --- /dev/null +++ b/tests/shared/unifiedWorker.test.ts @@ -0,0 +1,171 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// We need to test the internal functions, so we'll test through the module behavior +// Mock all worker modules +vi.mock('../../src/core/file/workers/fileCollectWorker.js', () => ({ + default: vi.fn().mockResolvedValue({ collected: true }), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/core/file/workers/fileProcessWorker.js', () => ({ + default: vi.fn().mockResolvedValue({ processed: true }), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/core/security/workers/securityCheckWorker.js', () => ({ + default: vi.fn().mockResolvedValue(null), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/core/metrics/workers/calculateMetricsWorker.js', () => ({ + default: vi.fn().mockResolvedValue(100), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/cli/actions/workers/defaultActionWorker.js', () => ({ + default: vi.fn().mockResolvedValue({ packResult: {}, config: {} }), + onWorkerTermination: vi.fn(), +})); + +// Mock worker_threads +vi.mock('node:worker_threads', () => ({ + workerData: undefined, +})); + +describe('unifiedWorker', () => { + beforeEach(() => { + vi.clearAllMocks(); + // Reset module cache to clear handler cache + vi.resetModules(); + }); + + describe('inferWorkerTypeFromTask', () => { + it('should infer defaultAction from task with directories, cwd, config', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + directories: ['.'], + cwd: '/test', + config: {}, + cliOptions: {}, + }; + + await handler(task); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + expect(defaultActionWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer defaultAction from ping task', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { ping: true }; + + await handler(task); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + expect(defaultActionWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer fileCollect from task with filePath, rootDir, maxFileSize', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + filePath: 'test.ts', + rootDir: '/root', + maxFileSize: 1000, + }; + + await handler(task); + + const fileCollectWorker = await import('../../src/core/file/workers/fileCollectWorker.js'); + expect(fileCollectWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer fileProcess from task with rawFile and config', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + rawFile: { path: 'test.ts', content: 'code' }, + config: {}, + }; + + await handler(task); + + const fileProcessWorker = await import('../../src/core/file/workers/fileProcessWorker.js'); + expect(fileProcessWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer calculateMetrics from task with content and encoding', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + content: 'test content', + encoding: 'cl100k_base', + }; + + await handler(task); + + const calculateMetricsWorker = await import('../../src/core/metrics/workers/calculateMetricsWorker.js'); + expect(calculateMetricsWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer securityCheck from task with filePath, content, type', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + filePath: 'test.ts', + content: 'test content', + type: 'file', + }; + + await handler(task); + + const securityCheckWorker = await import('../../src/core/security/workers/securityCheckWorker.js'); + expect(securityCheckWorker.default).toHaveBeenCalledWith(task); + }); + + it('should throw error for unrecognizable task structure', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { unknownField: 'value' }; + + await expect(handler(task)).rejects.toThrow('Cannot determine worker type'); + }); + + it('should throw error for null task', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + + await expect(handler(null)).rejects.toThrow('Cannot determine worker type'); + }); + + it('should throw error for non-object task', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + + await expect(handler('string')).rejects.toThrow('Cannot determine worker type'); + }); + }); + + describe('onWorkerTermination', () => { + it('should call cleanup on cached handlers', async () => { + // First, load a handler to populate the cache + const { default: handler, onWorkerTermination } = await import('../../src/shared/unifiedWorker.js'); + const task = { ping: true }; + + await handler(task); + + // Now call termination + await onWorkerTermination(); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + expect(defaultActionWorker.onWorkerTermination).toHaveBeenCalled(); + }); + + it('should clear handler cache after cleanup', async () => { + const { default: handler, onWorkerTermination } = await import('../../src/shared/unifiedWorker.js'); + + // Load handler + await handler({ ping: true }); + + // Terminate + await onWorkerTermination(); + + // Load again - should call the module import again + vi.clearAllMocks(); + await handler({ ping: true }); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + // The handler should be called again (cache was cleared) + expect(defaultActionWorker.default).toHaveBeenCalled(); + }); + }); +}); From 6323e4c6fa6dfaefaae61ac548df1cd4c0fd17d3 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Thu, 1 Jan 2026 00:44:15 +0900 Subject: [PATCH 09/13] style(worker): Add explicit Promise return types Ensure all onWorkerTermination exports have consistent `: Promise` type annotations for better type safety. --- src/cli/actions/workers/defaultActionWorker.ts | 2 +- src/core/file/workers/fileProcessWorker.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cli/actions/workers/defaultActionWorker.ts b/src/cli/actions/workers/defaultActionWorker.ts index 8f5002be..323ebaa8 100644 --- a/src/cli/actions/workers/defaultActionWorker.ts +++ b/src/cli/actions/workers/defaultActionWorker.ts @@ -116,7 +116,7 @@ async function defaultActionWorker( export default defaultActionWorker; // Export cleanup function for Tinypool teardown -export const onWorkerTermination = async () => { +export const onWorkerTermination = async (): Promise => { // Any cleanup needed when worker terminates // Currently no specific cleanup required for defaultAction worker }; diff --git a/src/core/file/workers/fileProcessWorker.ts b/src/core/file/workers/fileProcessWorker.ts index 3e51da52..8832533a 100644 --- a/src/core/file/workers/fileProcessWorker.ts +++ b/src/core/file/workers/fileProcessWorker.ts @@ -22,6 +22,6 @@ export default async ({ rawFile, config }: FileProcessTask): Promise { +export const onWorkerTermination = async (): Promise => { await cleanupLanguageParser(); }; From 93bd04a9c8b3621450503c79800376d051d43282 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Thu, 1 Jan 2026 00:50:44 +0900 Subject: [PATCH 10/13] chore(website-server): Temporarily use PR branch for repomix dep This allows CI to pass by using the PR branch which has the new exports. Should be changed back to #main after merging. --- website/server/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/server/package.json b/website/server/package.json index 1ed369d7..783d0b0c 100644 --- a/website/server/package.json +++ b/website/server/package.json @@ -16,7 +16,7 @@ "@hono/node-server": "^1.19.7", "fflate": "^0.8.2", "hono": "^4.11.1", - "repomix": "github:yamadashy/repomix#main", + "repomix": "github:yamadashy/repomix#feat/unified-worker-bundling", "winston": "^3.19.0", "zod": "^4.2.1" }, From 87b29184666dd8e5a3d73d82fe6a00d5666856e6 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Thu, 1 Jan 2026 00:55:08 +0900 Subject: [PATCH 11/13] fix(ci): Use npm link for website server lint Build and link local repomix package before running website server lint. This ensures the latest exports are available during type checking. --- .github/workflows/ci.yml | 9 ++++++++- website/server/package.json | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a55e00ae..8f72d937 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,9 +86,16 @@ jobs: with: node-version-file: .tool-versions cache: npm + - name: Build and link local repomix + run: | + npm ci + npm run build + npm link - name: Install website server dependencies working-directory: website/server - run: npm ci + run: | + npm ci + npm link repomix - name: Lint website server working-directory: website/server run: npm run lint diff --git a/website/server/package.json b/website/server/package.json index 783d0b0c..1ed369d7 100644 --- a/website/server/package.json +++ b/website/server/package.json @@ -16,7 +16,7 @@ "@hono/node-server": "^1.19.7", "fflate": "^0.8.2", "hono": "^4.11.1", - "repomix": "github:yamadashy/repomix#feat/unified-worker-bundling", + "repomix": "github:yamadashy/repomix#main", "winston": "^3.19.0", "zod": "^4.2.1" }, From 3c4c19236859def5d4b7c10091c4ab696caa5b73 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Thu, 1 Jan 2026 19:16:21 +0900 Subject: [PATCH 12/13] feat(website-server): Add bundled mode support for Cloud Run - Update Dockerfile for bundled production mode with esbuild - Copy WASM files and tinypool for runtime - Set environment variables for bundled worker path - Update compose.yml to disable bundled mode for local development --- website/compose.yml | 3 +++ website/server/Dockerfile | 45 ++++++++++++++------------------ website/server/package-lock.json | 2 +- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/website/compose.yml b/website/compose.yml index 9ba4d440..91941dba 100644 --- a/website/compose.yml +++ b/website/compose.yml @@ -26,6 +26,9 @@ services: environment: - NODE_ENV=development - PORT=8080 + # Unset bundled mode for local development (volume mount overwrites bundled files) + - REPOMIX_WORKER_PATH= + - REPOMIX_WASM_DIR= # override default command command: sh -c "npm i && npm run dev" develop: diff --git a/website/server/Dockerfile b/website/server/Dockerfile index baf494be..2cc2d33a 100644 --- a/website/server/Dockerfile +++ b/website/server/Dockerfile @@ -1,5 +1,5 @@ # ============================================================================== -# Base image +# Build stage # ============================================================================== FROM node:24-alpine AS builder @@ -12,26 +12,14 @@ COPY package*.json ./ # Install all dependencies (including dev dependencies for build) RUN npm ci -# Copy source code +# Copy source code and build COPY . . - -# Build TypeScript RUN npm run build -# ============================================================================== -# Production dependencies -# ============================================================================== -FROM node:24-alpine AS deps - -# Install git (required for GitHub-based npm dependencies) -RUN apk add --no-cache git - -WORKDIR /app -COPY package*.json ./ - -# Install only production dependencies -RUN npm ci --omit=dev --ignore-scripts && \ - npm cache clean --force +# Bundle the server with esbuild for unified worker support +RUN npx esbuild dist/index.js --bundle --platform=node --target=node20 \ + --format=esm --outfile=dist-bundled/server.mjs --external:tinypool \ + --banner:js="import { createRequire as _createRequire } from 'module'; const require = _createRequire(import.meta.url); import { fileURLToPath as _fileURLToPath } from 'url'; import { dirname as _dirname } from 'path'; const __filename = _fileURLToPath(import.meta.url); const __dirname = _dirname(__filename);" # ============================================================================== # Runtime image @@ -43,18 +31,23 @@ RUN apk add --no-cache git ca-certificates WORKDIR /app -# Copy built application -COPY --from=builder /app/dist ./dist +# Copy bundled server +COPY --from=builder /app/dist-bundled ./dist-bundled -# Copy production dependencies -COPY --from=deps /app/node_modules ./node_modules +# Copy WASM files for tree-sitter (may be hoisted to root node_modules) +COPY --from=builder /app/node_modules/@repomix/tree-sitter-wasms/out/*.wasm ./wasm/ -# Set environment variables +# Copy tinypool (external dependency) +COPY --from=builder /app/node_modules/tinypool ./node_modules/tinypool + +# Set environment variables for bundled mode ENV NODE_ENV=production \ - PORT=8080 + PORT=8080 \ + REPOMIX_WORKER_PATH=/app/dist-bundled/server.mjs \ + REPOMIX_WASM_DIR=/app/wasm # Expose port EXPOSE 8080 -# Start the server directly -CMD ["node", "dist/index.js"] +# Start the bundled server +CMD ["node", "dist-bundled/server.mjs"] diff --git a/website/server/package-lock.json b/website/server/package-lock.json index 8fc2dea8..ff00af49 100644 --- a/website/server/package-lock.json +++ b/website/server/package-lock.json @@ -3594,7 +3594,7 @@ }, "node_modules/repomix": { "version": "1.11.0", - "resolved": "git+ssh://git@github.com/yamadashy/repomix.git#5b02cb56ed606f283e2fd118e07e69c75ee99d8b", + "resolved": "git+ssh://git@github.com/yamadashy/repomix.git#main", "license": "MIT", "dependencies": { "@clack/prompts": "^0.11.0", From 534b2382fdbf34a27dc8516f7af3d7fc73b515dd Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Thu, 1 Jan 2026 19:31:50 +0900 Subject: [PATCH 13/13] chore(website-server): Pin repomix dependency to specific commit hash Use specific commit hash in package-lock.json for reproducibility. --- website/server/package-lock.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/server/package-lock.json b/website/server/package-lock.json index ff00af49..903ea5e0 100644 --- a/website/server/package-lock.json +++ b/website/server/package-lock.json @@ -3594,7 +3594,7 @@ }, "node_modules/repomix": { "version": "1.11.0", - "resolved": "git+ssh://git@github.com/yamadashy/repomix.git#main", + "resolved": "git+ssh://git@github.com/yamadashy/repomix.git#3c4c19236859def5d4b7c10091c4ab696caa5b73", "license": "MIT", "dependencies": { "@clack/prompts": "^0.11.0",