Merge pull request #1056 from yamadashy/feat/unified-worker-bundling

feat(worker): Add unified worker entry point for bundling support
This commit is contained in:
Kazuki Yamada
2026-01-02 22:22:34 +09:00
committed by GitHub
27 changed files with 588 additions and 106 deletions
+8 -1
View File
@@ -86,9 +86,16 @@ jobs:
with:
node-version-file: .tool-versions
cache: npm
- name: Build and link local repomix
run: |
npm ci
npm run build
npm link
- name: Install website server dependencies
working-directory: website/server
run: npm ci
run: |
npm ci
npm link repomix
- name: Lint website server
working-directory: website/server
run: npm run lint
+1 -1
View File
@@ -107,7 +107,7 @@ export const runDefaultAction = async (
// Create worker task runner
const taskRunner = initTaskRunner<DefaultActionTask | PingTask, DefaultActionWorkerResult | PingResult>({
numOfTasks: 1,
workerPath: new URL('./workers/defaultActionWorker.js', import.meta.url).href,
workerType: 'defaultAction',
runtime: 'child_process',
});
+15 -4
View File
@@ -43,19 +43,30 @@ async function defaultActionWorker(
};
}
// Validate task structure
if (!task || typeof task !== 'object') {
throw new Error(`Invalid task: expected object, got ${typeof task}`);
}
// At this point, task is guaranteed to be DefaultActionTask
const { directories, cwd, config, cliOptions, stdinFilePaths } = task;
if (!directories || !Array.isArray(directories)) {
throw new Error('Invalid task: directories must be an array');
}
// Provide defaults for bundled environments where cliOptions might be undefined
const safeCliOptions: CliOptions = cliOptions ?? {};
logger.trace('Worker: Using pre-loaded config:', config);
// Initialize spinner in worker
const spinner = new Spinner('Initializing...', cliOptions);
const spinner = new Spinner('Initializing...', safeCliOptions);
spinner.start();
let packResult: PackResult;
try {
const { skillName, skillDir, skillProjectName, skillSourceUrl } = cliOptions;
const { skillName, skillDir, skillProjectName, skillSourceUrl } = safeCliOptions;
const packOptions = { skillName, skillDir, skillProjectName, skillSourceUrl };
if (stdinFilePaths) {
@@ -105,7 +116,7 @@ async function defaultActionWorker(
export default defaultActionWorker;
// Export cleanup function for Tinypool teardown
export const onWorkerTermination = async () => {
export const onWorkerTermination = async (): Promise<void> => {
// Any cleanup needed when worker terminates
// Currently no specific cleanup required for defaultAction worker
};
+3 -2
View File
@@ -12,10 +12,11 @@ export class Spinner {
private interval: ReturnType<typeof setInterval> | null = null;
private readonly isQuiet: boolean;
constructor(message: string, cliOptions: CliOptions) {
constructor(message: string, cliOptions?: CliOptions) {
this.message = message;
// If the user has specified the verbose flag, don't show the spinner
this.isQuiet = cliOptions.quiet || cliOptions.verbose || cliOptions.stdout || false;
// Use optional chaining to handle undefined cliOptions (e.g., in bundled worker environments)
this.isQuiet = cliOptions?.quiet || cliOptions?.verbose || cliOptions?.stdout || false;
}
start(): void {
+1 -1
View File
@@ -25,7 +25,7 @@ export const collectFiles = async (
): Promise<FileCollectResults> => {
const taskRunner = deps.initTaskRunner<FileCollectTask, FileCollectResult>({
numOfTasks: filePaths.length,
workerPath: new URL('./workers/fileCollectWorker.js', import.meta.url).href,
workerType: 'fileCollect',
runtime: 'worker_threads',
});
const tasks = filePaths.map(
+1 -1
View File
@@ -23,7 +23,7 @@ export const processFiles = async (
): Promise<ProcessedFile[]> => {
const taskRunner = deps.initTaskRunner<FileProcessTask, ProcessedFile>({
numOfTasks: rawFiles.length,
workerPath: new URL('./workers/fileProcessWorker.js', import.meta.url).href,
workerType: 'fileProcess',
// High memory usage and leak risk
runtime: 'worker_threads',
});
+1 -1
View File
@@ -51,6 +51,6 @@ export default async ({ filePath, rootDir, maxFileSize }: FileCollectTask): Prom
};
// Export cleanup function for Tinypool teardown (no cleanup needed for this worker)
export const onWorkerTermination = () => {
export const onWorkerTermination = async (): Promise<void> => {
// No cleanup needed for file collection worker
};
+1 -1
View File
@@ -22,6 +22,6 @@ export default async ({ rawFile, config }: FileProcessTask): Promise<ProcessedFi
};
// Export cleanup function for Tinypool teardown
export const onWorkerTermination = async () => {
export const onWorkerTermination = async (): Promise<void> => {
await cleanupLanguageParser();
};
+1 -1
View File
@@ -43,7 +43,7 @@ export const calculateMetrics = async (
deps.taskRunner ??
initTaskRunner<TokenCountTask, number>({
numOfTasks: processedFiles.length,
workerPath: new URL('./workers/calculateMetricsWorker.js', import.meta.url).href,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
});
@@ -45,6 +45,6 @@ export default async (task: TokenCountTask): Promise<number> => {
};
// Export cleanup function for Tinypool teardown
export const onWorkerTermination = () => {
export const onWorkerTermination = async (): Promise<void> => {
freeTokenCounters();
};
+1 -1
View File
@@ -57,7 +57,7 @@ export const runSecurityCheck = async (
const taskRunner = deps.initTaskRunner<SecurityCheckTask, SuspiciousFileResult | null>({
numOfTasks: rawFiles.length + gitDiffTasks.length + gitLogTasks.length,
workerPath: new URL('./workers/securityCheckWorker.js', import.meta.url).href,
workerType: 'securityCheck',
runtime: 'worker_threads',
});
const fileTasks = rawFiles.map(
@@ -85,6 +85,6 @@ export const createSecretLintConfig = (): SecretLintCoreConfig => ({
});
// Export cleanup function for Tinypool teardown (no cleanup needed for this worker)
export const onWorkerTermination = () => {
export const onWorkerTermination = async (): Promise<void> => {
// No cleanup needed for security check worker
};
+34 -1
View File
@@ -1,9 +1,32 @@
import fs from 'node:fs/promises';
import { createRequire } from 'node:module';
import path from 'node:path';
import { Language } from 'web-tree-sitter';
const require = createRequire(import.meta.url);
/**
* Custom WASM base path for bundled environments.
* Set via REPOMIX_WASM_DIR environment variable or setWasmBasePath().
* When set, WASM files are loaded from this directory instead of node_modules.
*/
let customWasmBasePath: string | null = null;
/**
* Set a custom base path for WASM files.
* Used in bundled environments where WASM files are copied to a custom location.
*/
export function setWasmBasePath(basePath: string): void {
customWasmBasePath = basePath;
}
/**
* Get the WASM base path from environment variable or custom setting.
*/
function getWasmBasePath(): string | null {
return customWasmBasePath ?? process.env.REPOMIX_WASM_DIR ?? null;
}
export async function loadLanguage(langName: string): Promise<Language> {
if (!langName) {
throw new Error('Invalid language name');
@@ -19,7 +42,17 @@ export async function loadLanguage(langName: string): Promise<Language> {
}
async function getWasmPath(langName: string): Promise<string> {
const wasmPath = require.resolve(`@repomix/tree-sitter-wasms/out/tree-sitter-${langName}.wasm`);
const wasmBasePath = getWasmBasePath();
let wasmPath: string;
if (wasmBasePath) {
// Use custom WASM path for bundled environments
wasmPath = path.join(wasmBasePath, `tree-sitter-${langName}.wasm`);
} else {
// Use require.resolve for standard node_modules environments
wasmPath = require.resolve(`@repomix/tree-sitter-wasms/out/tree-sitter-${langName}.wasm`);
}
try {
await fs.access(wasmPath);
return wasmPath;
+10
View File
@@ -24,6 +24,7 @@ export { TokenCounter } from './core/metrics/TokenCounter.js';
// Tree-sitter
export { parseFile } from './core/treeSitter/parseFile.js';
export { setWasmBasePath } from './core/treeSitter/loadLanguage.js';
// ---------------------------------------------------------------------------------------------------------------------
// Config
@@ -56,3 +57,12 @@ export { runDefaultAction, buildCliConfig } from './cli/actions/defaultAction.js
// Remote action
export { runRemoteAction } from './cli/actions/remoteAction.js';
// ---------------------------------------------------------------------------------------------------------------------
// Worker (for bundled environments)
// ---------------------------------------------------------------------------------------------------------------------
export {
default as unifiedWorkerHandler,
onWorkerTermination as unifiedWorkerTermination,
type WorkerType,
} from './shared/unifiedWorker.js';
+42 -3
View File
@@ -1,15 +1,47 @@
import os from 'node:os';
import { type Options, Tinypool } from 'tinypool';
import { logger } from './logger.js';
import type { WorkerType } from './unifiedWorker.js';
export type WorkerRuntime = NonNullable<Options['runtime']>;
// Re-export WorkerType for external consumers
export type { WorkerType } from './unifiedWorker.js';
export interface WorkerOptions {
numOfTasks: number;
workerPath: string;
workerType: WorkerType;
runtime: WorkerRuntime;
}
/**
* Get the worker file path for a given worker type.
* In bundled environments (REPOMIX_WORKER_PATH set), uses the unified worker.
* Otherwise, uses individual worker files.
*/
const getWorkerPath = (workerType: WorkerType): string => {
// Bundled environment: use unified worker path
if (process.env.REPOMIX_WORKER_PATH) {
return process.env.REPOMIX_WORKER_PATH;
}
// Non-bundled environment: use individual worker files
switch (workerType) {
case 'fileCollect':
return new URL('../core/file/workers/fileCollectWorker.js', import.meta.url).href;
case 'fileProcess':
return new URL('../core/file/workers/fileProcessWorker.js', import.meta.url).href;
case 'securityCheck':
return new URL('../core/security/workers/securityCheckWorker.js', import.meta.url).href;
case 'calculateMetrics':
return new URL('../core/metrics/workers/calculateMetricsWorker.js', import.meta.url).href;
case 'defaultAction':
return new URL('../cli/actions/workers/defaultActionWorker.js', import.meta.url).href;
default:
throw new Error(`Unknown worker type: ${workerType}`);
}
};
// Worker initialization is expensive, so we prefer fewer threads unless there are many files
const TASKS_PER_THREAD = 100;
@@ -32,11 +64,14 @@ export const getWorkerThreadCount = (numOfTasks: number): { minThreads: number;
};
export const createWorkerPool = (options: WorkerOptions): Tinypool => {
const { numOfTasks, workerPath, runtime = 'child_process' } = options;
const { numOfTasks, workerType, runtime = 'child_process' } = options;
const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks);
// Get worker path - uses unified worker in bundled env, individual files otherwise
const workerPath = getWorkerPath(workerType);
logger.trace(
`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker path: ${workerPath}`,
`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker type: ${workerType}`,
);
const startTime = process.hrtime.bigint();
@@ -49,12 +84,16 @@ export const createWorkerPool = (options: WorkerOptions): Tinypool => {
idleTimeout: 5000,
teardown: 'onWorkerTermination',
workerData: {
workerType,
logLevel: logger.getLogLevel(),
},
// Only add env for child_process workers
...(runtime === 'child_process' && {
env: {
...process.env,
// Pass worker type as environment variable for child_process workers
// This is needed because workerData is not directly accessible in child_process runtime
REPOMIX_WORKER_TYPE: workerType,
// Pass log level as environment variable for child_process workers
REPOMIX_LOG_LEVEL: logger.getLogLevel().toString(),
// Ensure color support in child_process workers
+189
View File
@@ -0,0 +1,189 @@
/**
* Unified Worker Entry Point
*
* This module serves as a single entry point for all worker types in Repomix.
* It enables full bundling support by allowing the bundled file to spawn workers
* using itself (import.meta.url), eliminating path resolution issues.
*
* When running as a worker, it dynamically imports the appropriate worker handler
* based on the workerType specified in workerData.
*/
import { workerData } from 'node:worker_threads';
// Worker type definitions
export type WorkerType = 'fileCollect' | 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction';
// Worker handler type - uses 'any' to accommodate different worker signatures
// biome-ignore lint/suspicious/noExplicitAny: Worker handlers have varying signatures
type WorkerHandler = (task: any) => Promise<any>;
type WorkerCleanup = () => void | Promise<void>;
// Cache loaded handlers by worker type
const handlerCache = new Map<WorkerType, { handler: WorkerHandler; cleanup?: WorkerCleanup }>();
/**
* Dynamically load the appropriate worker handler based on workerType.
* Uses dynamic imports to avoid loading all worker code when not needed.
* Results are cached for reuse.
*/
const loadWorkerHandler = async (
workerType: WorkerType,
): Promise<{ handler: WorkerHandler; cleanup?: WorkerCleanup }> => {
// Check cache first
const cached = handlerCache.get(workerType);
if (cached) {
return cached;
}
let result: { handler: WorkerHandler; cleanup?: WorkerCleanup };
switch (workerType) {
case 'fileCollect': {
const module = await import('../core/file/workers/fileCollectWorker.js');
result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
break;
}
case 'fileProcess': {
const module = await import('../core/file/workers/fileProcessWorker.js');
result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
break;
}
case 'securityCheck': {
const module = await import('../core/security/workers/securityCheckWorker.js');
result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
break;
}
case 'calculateMetrics': {
const module = await import('../core/metrics/workers/calculateMetricsWorker.js');
result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
break;
}
case 'defaultAction': {
const module = await import('../cli/actions/workers/defaultActionWorker.js');
result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
break;
}
default:
throw new Error(`Unknown worker type: ${workerType}`);
}
// Cache the result
handlerCache.set(workerType, result);
return result;
};
/**
* Infer worker type from task structure.
* This is used in bundled environments where Tinypool may reuse child processes
* across different worker pools.
*/
const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => {
if (!task || typeof task !== 'object') {
return null;
}
const taskObj = task as Record<string, unknown>;
// defaultAction: has directories, cwd, config, cliOptions
if ('directories' in taskObj && 'cwd' in taskObj && 'config' in taskObj) {
return 'defaultAction';
}
// defaultAction ping task
if ('ping' in taskObj) {
return 'defaultAction';
}
// fileCollect: has filePath, rootDir, maxFileSize
if ('filePath' in taskObj && 'rootDir' in taskObj && 'maxFileSize' in taskObj) {
return 'fileCollect';
}
// fileProcess: has rawFile (nested object) and config
if ('rawFile' in taskObj && 'config' in taskObj) {
return 'fileProcess';
}
// calculateMetrics: has content, encoding (must check before securityCheck)
if ('content' in taskObj && 'encoding' in taskObj) {
return 'calculateMetrics';
}
// securityCheck: has filePath, content, type
if ('filePath' in taskObj && 'content' in taskObj && 'type' in taskObj) {
return 'securityCheck';
}
return null;
};
/**
* Get workerType from workerData.
* In Tinypool child_process mode, workerData is an array.
*/
const getWorkerTypeFromWorkerData = (): WorkerType | undefined => {
if (!workerData) {
return undefined;
}
// Handle array format (Tinypool child_process mode)
if (Array.isArray(workerData)) {
for (const item of workerData) {
if (item && typeof item === 'object' && 'workerType' in item) {
return item.workerType as WorkerType;
}
}
return undefined;
}
// Handle object format (worker_threads mode)
if (typeof workerData === 'object' && 'workerType' in workerData) {
return (workerData as { workerType?: WorkerType }).workerType;
}
return undefined;
};
/**
* Default export for Tinypool.
* This function is called for each task and delegates to the appropriate handler.
*
* In bundled environments where Tinypool may reuse child processes across different
* worker pools, we use task-based inference to determine the correct handler.
*/
export default async (task: unknown): Promise<unknown> => {
// Determine worker type: try workerData/env first, then infer from task
let workerType: WorkerType | undefined =
getWorkerTypeFromWorkerData() ?? (process.env.REPOMIX_WORKER_TYPE as WorkerType | undefined);
// In bundled environments, Tinypool may reuse child processes.
// If the task doesn't match the initially configured worker type, infer from task.
const inferredType = inferWorkerTypeFromTask(task);
// Use inferred type if available (more reliable in bundled env)
if (inferredType) {
workerType = inferredType;
}
if (!workerType) {
throw new Error('Cannot determine worker type from workerData, env, or task structure');
}
// Load handler (cached)
const { handler } = await loadWorkerHandler(workerType);
return handler(task);
};
/**
* Cleanup function for Tinypool teardown.
* Cleans up all cached handlers.
*/
export const onWorkerTermination = async (): Promise<void> => {
for (const { cleanup } of handlerCache.values()) {
if (cleanup) {
await cleanup();
}
}
handlerCache.clear();
};
+1 -1
View File
@@ -149,7 +149,7 @@ describe('defaultAction', () => {
expect(processConcurrency.initTaskRunner).toHaveBeenCalledWith({
numOfTasks: 1,
workerPath: expect.stringContaining('defaultActionWorker.js'),
workerType: 'defaultAction',
runtime: 'child_process',
});
@@ -67,7 +67,11 @@ describe('calculateGitDiffMetrics', () => {
cwd: '/test/project',
};
const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' });
const mockTaskRunner = mockInitTaskRunner({
numOfTasks: 1,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
});
beforeEach(() => {
vi.clearAllMocks();
@@ -67,7 +67,11 @@ describe('calculateGitLogMetrics', () => {
cwd: '/test/project',
};
const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' });
const mockTaskRunner = mockInitTaskRunner({
numOfTasks: 1,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
});
beforeEach(() => {
vi.clearAllMocks();
@@ -24,7 +24,7 @@ describe('calculateOutputMetrics', () => {
const path = 'test.txt';
const result = await calculateOutputMetrics(content, encoding, path, {
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
});
expect(result).toBe(2); // 'test content' should be counted as 2 tokens
@@ -35,7 +35,7 @@ describe('calculateOutputMetrics', () => {
const encoding = 'o200k_base';
const result = await calculateOutputMetrics(content, encoding, undefined, {
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
});
expect(result).toBe(2);
@@ -59,7 +59,7 @@ describe('calculateOutputMetrics', () => {
await expect(
calculateOutputMetrics(content, encoding, undefined, {
taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
}),
).rejects.toThrow('Worker error');
@@ -71,7 +71,7 @@ describe('calculateOutputMetrics', () => {
const encoding = 'o200k_base';
const result = await calculateOutputMetrics(content, encoding, undefined, {
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
});
expect(result).toBe(0);
@@ -82,7 +82,7 @@ describe('calculateOutputMetrics', () => {
const encoding = 'o200k_base';
const result = await calculateOutputMetrics(content, encoding, undefined, {
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
});
expect(result).toBeGreaterThan(0);
@@ -110,7 +110,7 @@ describe('calculateOutputMetrics', () => {
};
const result = await calculateOutputMetrics(content, encoding, path, {
taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
});
expect(chunksProcessed).toBeGreaterThan(1); // Should have processed multiple chunks
@@ -135,7 +135,7 @@ describe('calculateOutputMetrics', () => {
await expect(
calculateOutputMetrics(content, encoding, undefined, {
taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
}),
).rejects.toThrow('Parallel processing error');
@@ -161,7 +161,11 @@ describe('calculateOutputMetrics', () => {
};
await calculateOutputMetrics(content, encoding, undefined, {
taskRunner: mockChunkTrackingTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockChunkTrackingTaskRunner({
numOfTasks: 1,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
}),
});
// Check that chunks are roughly equal in size
@@ -36,7 +36,7 @@ describe('calculateSelectiveFileMetrics', () => {
'o200k_base',
progressCallback,
{
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
},
);
@@ -57,7 +57,7 @@ describe('calculateSelectiveFileMetrics', () => {
'o200k_base',
progressCallback,
{
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }),
taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
},
);
+11 -10
View File
@@ -72,17 +72,17 @@ describe('processConcurrency', () => {
});
it('should initialize Tinypool with correct configuration', () => {
const workerPath = '/path/to/worker.js';
const tinypool = createWorkerPool({ numOfTasks: 500, workerPath, runtime: 'child_process' });
const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'fileCollect', runtime: 'child_process' });
expect(Tinypool).toHaveBeenCalledWith({
filename: workerPath,
filename: expect.stringContaining('fileCollectWorker.js'),
runtime: 'child_process',
minThreads: 1,
maxThreads: 4, // Math.min(4, 500/100) = 4
idleTimeout: 5000,
teardown: 'onWorkerTermination',
workerData: {
workerType: 'fileCollect',
logLevel: 2,
},
env: expect.objectContaining({
@@ -95,17 +95,17 @@ describe('processConcurrency', () => {
});
it('should initialize Tinypool with worker_threads runtime when specified', () => {
const workerPath = '/path/to/worker.js';
const tinypool = createWorkerPool({ numOfTasks: 500, workerPath, runtime: 'worker_threads' });
const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'securityCheck', runtime: 'worker_threads' });
expect(Tinypool).toHaveBeenCalledWith({
filename: workerPath,
filename: expect.stringContaining('securityCheckWorker.js'),
runtime: 'worker_threads',
minThreads: 1,
maxThreads: 4, // Math.min(4, 500/100) = 4
idleTimeout: 5000,
teardown: 'onWorkerTermination',
workerData: {
workerType: 'securityCheck',
logLevel: 2,
},
});
@@ -126,8 +126,7 @@ describe('processConcurrency', () => {
});
it('should return a TaskRunner with run and cleanup methods', () => {
const workerPath = '/path/to/worker.js';
const taskRunner = initTaskRunner({ numOfTasks: 100, workerPath, runtime: 'child_process' });
const taskRunner = initTaskRunner({ numOfTasks: 100, workerType: 'fileProcess', runtime: 'child_process' });
expect(taskRunner).toHaveProperty('run');
expect(taskRunner).toHaveProperty('cleanup');
@@ -136,12 +135,14 @@ describe('processConcurrency', () => {
});
it('should pass runtime parameter to createWorkerPool', () => {
const workerPath = '/path/to/worker.js';
const taskRunner = initTaskRunner({ numOfTasks: 100, workerPath, runtime: 'worker_threads' });
const taskRunner = initTaskRunner({ numOfTasks: 100, workerType: 'calculateMetrics', runtime: 'worker_threads' });
expect(Tinypool).toHaveBeenCalledWith(
expect.objectContaining({
runtime: 'worker_threads',
workerData: expect.objectContaining({
workerType: 'calculateMetrics',
}),
}),
);
expect(taskRunner).toHaveProperty('run');
+171
View File
@@ -0,0 +1,171 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
// We need to test the internal functions, so we'll test through the module behavior
// Mock all worker modules
vi.mock('../../src/core/file/workers/fileCollectWorker.js', () => ({
default: vi.fn().mockResolvedValue({ collected: true }),
onWorkerTermination: vi.fn(),
}));
vi.mock('../../src/core/file/workers/fileProcessWorker.js', () => ({
default: vi.fn().mockResolvedValue({ processed: true }),
onWorkerTermination: vi.fn(),
}));
vi.mock('../../src/core/security/workers/securityCheckWorker.js', () => ({
default: vi.fn().mockResolvedValue(null),
onWorkerTermination: vi.fn(),
}));
vi.mock('../../src/core/metrics/workers/calculateMetricsWorker.js', () => ({
default: vi.fn().mockResolvedValue(100),
onWorkerTermination: vi.fn(),
}));
vi.mock('../../src/cli/actions/workers/defaultActionWorker.js', () => ({
default: vi.fn().mockResolvedValue({ packResult: {}, config: {} }),
onWorkerTermination: vi.fn(),
}));
// Mock worker_threads
vi.mock('node:worker_threads', () => ({
workerData: undefined,
}));
describe('unifiedWorker', () => {
beforeEach(() => {
vi.clearAllMocks();
// Reset module cache to clear handler cache
vi.resetModules();
});
describe('inferWorkerTypeFromTask', () => {
it('should infer defaultAction from task with directories, cwd, config', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = {
directories: ['.'],
cwd: '/test',
config: {},
cliOptions: {},
};
await handler(task);
const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js');
expect(defaultActionWorker.default).toHaveBeenCalledWith(task);
});
it('should infer defaultAction from ping task', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = { ping: true };
await handler(task);
const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js');
expect(defaultActionWorker.default).toHaveBeenCalledWith(task);
});
it('should infer fileCollect from task with filePath, rootDir, maxFileSize', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = {
filePath: 'test.ts',
rootDir: '/root',
maxFileSize: 1000,
};
await handler(task);
const fileCollectWorker = await import('../../src/core/file/workers/fileCollectWorker.js');
expect(fileCollectWorker.default).toHaveBeenCalledWith(task);
});
it('should infer fileProcess from task with rawFile and config', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = {
rawFile: { path: 'test.ts', content: 'code' },
config: {},
};
await handler(task);
const fileProcessWorker = await import('../../src/core/file/workers/fileProcessWorker.js');
expect(fileProcessWorker.default).toHaveBeenCalledWith(task);
});
it('should infer calculateMetrics from task with content and encoding', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = {
content: 'test content',
encoding: 'cl100k_base',
};
await handler(task);
const calculateMetricsWorker = await import('../../src/core/metrics/workers/calculateMetricsWorker.js');
expect(calculateMetricsWorker.default).toHaveBeenCalledWith(task);
});
it('should infer securityCheck from task with filePath, content, type', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = {
filePath: 'test.ts',
content: 'test content',
type: 'file',
};
await handler(task);
const securityCheckWorker = await import('../../src/core/security/workers/securityCheckWorker.js');
expect(securityCheckWorker.default).toHaveBeenCalledWith(task);
});
it('should throw error for unrecognizable task structure', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = { unknownField: 'value' };
await expect(handler(task)).rejects.toThrow('Cannot determine worker type');
});
it('should throw error for null task', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
await expect(handler(null)).rejects.toThrow('Cannot determine worker type');
});
it('should throw error for non-object task', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
await expect(handler('string')).rejects.toThrow('Cannot determine worker type');
});
});
describe('onWorkerTermination', () => {
it('should call cleanup on cached handlers', async () => {
// First, load a handler to populate the cache
const { default: handler, onWorkerTermination } = await import('../../src/shared/unifiedWorker.js');
const task = { ping: true };
await handler(task);
// Now call termination
await onWorkerTermination();
const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js');
expect(defaultActionWorker.onWorkerTermination).toHaveBeenCalled();
});
it('should clear handler cache after cleanup', async () => {
const { default: handler, onWorkerTermination } = await import('../../src/shared/unifiedWorker.js');
// Load handler
await handler({ ping: true });
// Terminate
await onWorkerTermination();
// Load again - should call the module import again
vi.clearAllMocks();
await handler({ ping: true });
const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js');
// The handler should be called again (cache was cleared)
expect(defaultActionWorker.default).toHaveBeenCalled();
});
});
});
+3
View File
@@ -26,6 +26,9 @@ services:
environment:
- NODE_ENV=development
- PORT=8080
# Unset bundled mode for local development (volume mount overwrites bundled files)
- REPOMIX_WORKER_PATH=
- REPOMIX_WASM_DIR=
# override default command
command: sh -c "npm i && npm run dev"
develop:
+19 -26
View File
@@ -1,5 +1,5 @@
# ==============================================================================
# Base image
# Build stage
# ==============================================================================
FROM node:24-alpine AS builder
@@ -12,26 +12,14 @@ COPY package*.json ./
# Install all dependencies (including dev dependencies for build)
RUN npm ci
# Copy source code
# Copy source code and build
COPY . .
# Build TypeScript
RUN npm run build
# ==============================================================================
# Production dependencies
# ==============================================================================
FROM node:24-alpine AS deps
# Install git (required for GitHub-based npm dependencies)
RUN apk add --no-cache git
WORKDIR /app
COPY package*.json ./
# Install only production dependencies
RUN npm ci --omit=dev --ignore-scripts && \
npm cache clean --force
# Bundle the server with esbuild for unified worker support
RUN npx esbuild dist/index.js --bundle --platform=node --target=node20 \
--format=esm --outfile=dist-bundled/server.mjs --external:tinypool \
--banner:js="import { createRequire as _createRequire } from 'module'; const require = _createRequire(import.meta.url); import { fileURLToPath as _fileURLToPath } from 'url'; import { dirname as _dirname } from 'path'; const __filename = _fileURLToPath(import.meta.url); const __dirname = _dirname(__filename);"
# ==============================================================================
# Runtime image
@@ -43,18 +31,23 @@ RUN apk add --no-cache git ca-certificates
WORKDIR /app
# Copy built application
COPY --from=builder /app/dist ./dist
# Copy bundled server
COPY --from=builder /app/dist-bundled ./dist-bundled
# Copy production dependencies
COPY --from=deps /app/node_modules ./node_modules
# Copy WASM files for tree-sitter (may be hoisted to root node_modules)
COPY --from=builder /app/node_modules/@repomix/tree-sitter-wasms/out/*.wasm ./wasm/
# Set environment variables
# Copy tinypool (external dependency)
COPY --from=builder /app/node_modules/tinypool ./node_modules/tinypool
# Set environment variables for bundled mode
ENV NODE_ENV=production \
PORT=8080
PORT=8080 \
REPOMIX_WORKER_PATH=/app/dist-bundled/server.mjs \
REPOMIX_WASM_DIR=/app/wasm
# Expose port
EXPOSE 8080
# Start the server directly
CMD ["node", "dist/index.js"]
# Start the bundled server
CMD ["node", "dist-bundled/server.mjs"]
+1 -1
View File
@@ -3594,7 +3594,7 @@
},
"node_modules/repomix": {
"version": "1.11.0",
"resolved": "git+ssh://git@github.com/yamadashy/repomix.git#5b02cb56ed606f283e2fd118e07e69c75ee99d8b",
"resolved": "git+ssh://git@github.com/yamadashy/repomix.git#3c4c19236859def5d4b7c10091c4ab696caa5b73",
"license": "MIT",
"dependencies": {
"@clack/prompts": "^0.11.0",
+48 -36
View File
@@ -10,51 +10,63 @@ import { rateLimitMiddleware } from './middlewares/rateLimit.js';
import { logInfo, logMemoryUsage } from './utils/logger.js';
import { getProcessConcurrency } from './utils/processConcurrency.js';
const API_TIMEOUT_MS = 35_000;
// Re-export unified worker for bundled environment
// When this file is used as a Tinypool worker, it needs to export the handler
export { unifiedWorkerHandler as default, unifiedWorkerTermination as onWorkerTermination } from 'repomix';
// Log server metrics on startup
logInfo('Server starting', {
metrics: {
// Check if running as a Tinypool worker (bundled environment)
// In bundled mode, this file is used both as server entry and worker entry
const isTinypoolWorker = (): boolean => {
const tinypoolState = (process as NodeJS.Process & { __tinypool_state__?: { isTinypoolWorker?: boolean } })
.__tinypool_state__;
return tinypoolState?.isTinypoolWorker ?? false;
};
// Skip server initialization if running as a Tinypool worker
if (!isTinypoolWorker()) {
const API_TIMEOUT_MS = 35_000;
// Log server metrics on startup
logInfo('Server starting', {
metrics: {
processConcurrency: getProcessConcurrency(),
},
});
// Log initial memory usage
logMemoryUsage('Server startup', {
processConcurrency: getProcessConcurrency(),
},
});
});
// Log initial memory usage
logMemoryUsage('Server startup', {
processConcurrency: getProcessConcurrency(),
});
const app = new Hono();
const app = new Hono();
// Configure CORS
app.use('/*', corsMiddleware);
// Configure CORS
app.use('/*', corsMiddleware);
// Enable compression
app.use(compress());
// Enable compression
app.use(compress());
// Set timeout for API routes
app.use('/api', timeout(API_TIMEOUT_MS));
// Set timeout for API routes
app.use('/api', timeout(API_TIMEOUT_MS));
// Setup custom logger
app.use('*', cloudLoggerMiddleware());
// Setup custom logger
app.use('*', cloudLoggerMiddleware());
// Apply rate limiting to API routes
app.use('/api/*', rateLimitMiddleware());
// Apply rate limiting to API routes
app.use('/api/*', rateLimitMiddleware());
// Health check endpoint
app.get('/health', (c) => c.text('OK'));
// Health check endpoint
app.get('/health', (c) => c.text('OK'));
// Main packing endpoint
app.post('/api/pack', bodyLimitMiddleware, packAction);
// Main packing endpoint
app.post('/api/pack', bodyLimitMiddleware, packAction);
// Start server
const port = process.env.PORT ? Number.parseInt(process.env.PORT, 10) : 3000;
logInfo(`Server starting on port ${port}`);
// Start server
const port = process.env.PORT ? Number.parseInt(process.env.PORT, 10) : 3000;
logInfo(`Server starting on port ${port}`);
serve({
fetch: app.fetch,
port,
});
// Export app for testing
export default app;
serve({
fetch: app.fetch,
port,
});
}