mirror of
https://github.com/yamadashy/repomix.git
synced 2026-05-30 11:18:53 +02:00
refactor(server): Extract validation messages to shared constants
Addresses PR #1493 round 5 follow-up (schema-drift tracking) from claude. intent(eliminate-drift): prior reviews flagged the string-coupling between packRequestSchema (produces zod messages) and classifyRejectReason (matches them back to a metric label) as fragile — a message rewrite in one place would silently land requests in the 'other' bucket, quietly mislabeling the dashboard. Extract all 11 messages to a shared MESSAGES module so the producer, the consumer, and the test's expected values all reference the same constants decision(map-over-switch): classifyRejectReason's 11-case switch becomes a MESSAGE_TO_REASON lookup table. Same runtime behavior, easier to scan, and the keys are compile-time-typed via the shared constants (so a typo in either side is a TS error) decision(preserve-invalid_format-path): 'invalid_format' is keyed by path (`format`), not by message text. Left as path-match so it continues to work even if zod's default enum error message changes improve(test-references-constants): test.each cases now reference MESSAGES directly. The test continues to catch classifier-logic drift (wrong label for a known key, missing entry in the lookup) while schema/classifier drift becomes impossible by construction Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,41 +1,45 @@
|
||||
import { describe, expect, test } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
import { classifyRejectReason, getRepoHost } from '../../../website/server/src/actions/packEventSchema.js';
|
||||
import { MESSAGES } from '../../../website/server/src/actions/packRequestMessages.js';
|
||||
|
||||
// Classifier drift test. Messages below MUST match those defined in
|
||||
// `website/server/src/actions/packRequestSchema.ts`. If a schema message is
|
||||
// edited without updating this file, the classifier falls back to `'other'`
|
||||
// and dashboards quietly mislabel — but that drift is not caught here
|
||||
// (the test deliberately avoids importing the schema to keep this file's
|
||||
// module graph free of server-only packages like `repomix` and `hono` that
|
||||
// the root vitest harness doesn't install). For true schema drift catching,
|
||||
// the schema and classifier would need to share a constants module.
|
||||
// Classifier drift test — imports MESSAGES from the same shared module that
|
||||
// packRequestSchema uses. This means a message-text rewrite automatically
|
||||
// propagates to the schema (producer), the classifier (consumer), AND the
|
||||
// test's expected values, so schema/classifier drift is impossible by
|
||||
// construction. The test's value is catching classifier-logic drift: if the
|
||||
// classifier's MESSAGE_TO_REASON map loses a key (or maps it to the wrong
|
||||
// label), the corresponding case here fails.
|
||||
//
|
||||
// Deliberately avoids importing packRequestSchema itself — that file
|
||||
// transitively depends on `repomix`, which the root vitest harness can't
|
||||
// resolve because repomix IS this repo.
|
||||
|
||||
// Construct a ZodError with a single issue whose message matches the one the
|
||||
// schema would produce. classifyRejectReason only reads `.message` and
|
||||
// `.path` from the first issue.
|
||||
// Construct a ZodError with a single issue whose message matches the shared
|
||||
// constant. classifyRejectReason only reads `.message` and `.path` from the
|
||||
// first issue.
|
||||
const zodErrorWith = (message: string, path: (string | number)[] = []) =>
|
||||
new z.ZodError([{ code: 'custom', message, path, input: undefined }]);
|
||||
|
||||
// Mimic the AppError-with-cause wrapping that `validateRequest` does in
|
||||
// production — native Error with `cause` is enough to exercise the
|
||||
// cause-chain path in classifyRejectReason, no AppError import needed.
|
||||
// cause-chain path in classifyRejectReason.
|
||||
const wrapped = (message: string, path: (string | number)[] = []) =>
|
||||
new Error(`Invalid request: ${message}`, { cause: zodErrorWith(message, path) });
|
||||
|
||||
describe('classifyRejectReason', () => {
|
||||
test.each([
|
||||
['missing_input', 'Either URL or file must be provided'],
|
||||
['both_provided', 'Cannot provide both URL and file'],
|
||||
['invalid_url', 'Invalid repository URL'],
|
||||
['url_too_long', 'Repository URL is too long'],
|
||||
['url_empty', 'Repository URL is required'],
|
||||
['invalid_file', 'Invalid file format'],
|
||||
['not_zip', 'Only ZIP files are allowed'],
|
||||
['file_too_large', 'File size must be less than 10MB'],
|
||||
['invalid_ignore_chars', 'Invalid characters in ignore patterns'],
|
||||
['include_too_long', 'Include patterns too long'],
|
||||
['ignore_too_long', 'Ignore patterns too long'],
|
||||
['missing_input', MESSAGES.MISSING_INPUT],
|
||||
['both_provided', MESSAGES.BOTH_PROVIDED],
|
||||
['invalid_url', MESSAGES.INVALID_URL],
|
||||
['url_too_long', MESSAGES.URL_TOO_LONG],
|
||||
['url_empty', MESSAGES.URL_REQUIRED],
|
||||
['invalid_file', MESSAGES.INVALID_FILE],
|
||||
['not_zip', MESSAGES.NOT_ZIP],
|
||||
['file_too_large', MESSAGES.FILE_TOO_LARGE],
|
||||
['invalid_ignore_chars', MESSAGES.INVALID_IGNORE_CHARS],
|
||||
['include_too_long', MESSAGES.INCLUDE_TOO_LONG],
|
||||
['ignore_too_long', MESSAGES.IGNORE_TOO_LONG],
|
||||
])('%s — classifies "%s"', (expected, message) => {
|
||||
expect(classifyRejectReason(zodErrorWith(message))).toBe(expected);
|
||||
// Wrapped via AppError.cause (the real production path)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { MESSAGES } from './packRequestMessages.js';
|
||||
|
||||
// Shared log schema for `pack_completed` events. Used by packAction (success /
|
||||
// validation_error / pack_error) and rateLimitMiddleware (rate_limited) so that
|
||||
// log-based metric filters and outcome labels stay in sync between the two
|
||||
@@ -28,11 +30,11 @@ export function getRepoHost(input: { file?: unknown; url?: string }): string {
|
||||
}
|
||||
|
||||
// Map a validation error to a stable `rejectReason` label for log-based metrics.
|
||||
// Matches against the first zod issue's message (strings are stable because
|
||||
// they are defined in this project's schema — see packRequestSchema.ts). Falls
|
||||
// back to 'other' for unmapped paths so a sudden jump in `other` surfaces
|
||||
// unknown failure modes in the dashboard. NOTE: only the first issue is
|
||||
// classified — a request failing multiple validations (e.g. both URL and
|
||||
// Matches against the first zod issue's message — strings come from the
|
||||
// shared MESSAGES module so schema and classifier cannot drift out of sync.
|
||||
// Falls back to 'other' for unmapped paths so a sudden jump in `other`
|
||||
// surfaces unknown failure modes in the dashboard. NOTE: only the first issue
|
||||
// is classified — a request failing multiple validations (e.g. both URL and
|
||||
// options) gets bucketed by whichever zod surfaces first.
|
||||
//
|
||||
// `validateRequest` wraps ZodError in AppError, so the original issues live on
|
||||
@@ -42,36 +44,29 @@ export function getRepoHost(input: { file?: unknown; url?: string }): string {
|
||||
// Pre-zod paths (e.g. the JSON.parse failure in packAction) set
|
||||
// `rejectReason: 'invalid_json'` directly at the call site since the label is
|
||||
// statically known — no synthetic error needs to be routed through here.
|
||||
const MESSAGE_TO_REASON: Record<string, string> = {
|
||||
[MESSAGES.MISSING_INPUT]: 'missing_input',
|
||||
[MESSAGES.BOTH_PROVIDED]: 'both_provided',
|
||||
[MESSAGES.INVALID_URL]: 'invalid_url',
|
||||
[MESSAGES.URL_TOO_LONG]: 'url_too_long',
|
||||
[MESSAGES.URL_REQUIRED]: 'url_empty',
|
||||
[MESSAGES.INVALID_FILE]: 'invalid_file',
|
||||
[MESSAGES.NOT_ZIP]: 'not_zip',
|
||||
[MESSAGES.FILE_TOO_LARGE]: 'file_too_large',
|
||||
[MESSAGES.INVALID_IGNORE_CHARS]: 'invalid_ignore_chars',
|
||||
[MESSAGES.INCLUDE_TOO_LONG]: 'include_too_long',
|
||||
[MESSAGES.IGNORE_TOO_LONG]: 'ignore_too_long',
|
||||
};
|
||||
|
||||
export function classifyRejectReason(error: unknown): string {
|
||||
const issues = extractZodIssues(error);
|
||||
if (!issues || issues.length === 0) return 'unknown';
|
||||
|
||||
const first = issues[0];
|
||||
const msg = first?.message ?? '';
|
||||
switch (msg) {
|
||||
case 'Either URL or file must be provided':
|
||||
return 'missing_input';
|
||||
case 'Cannot provide both URL and file':
|
||||
return 'both_provided';
|
||||
case 'Invalid repository URL':
|
||||
return 'invalid_url';
|
||||
case 'Repository URL is too long':
|
||||
return 'url_too_long';
|
||||
case 'Repository URL is required':
|
||||
return 'url_empty';
|
||||
case 'Invalid file format':
|
||||
return 'invalid_file';
|
||||
case 'Only ZIP files are allowed':
|
||||
return 'not_zip';
|
||||
case 'File size must be less than 10MB':
|
||||
return 'file_too_large';
|
||||
case 'Invalid characters in ignore patterns':
|
||||
return 'invalid_ignore_chars';
|
||||
case 'Include patterns too long':
|
||||
return 'include_too_long';
|
||||
case 'Ignore patterns too long':
|
||||
return 'ignore_too_long';
|
||||
}
|
||||
const byMessage = MESSAGE_TO_REASON[msg];
|
||||
if (byMessage) return byMessage;
|
||||
|
||||
const path = Array.isArray(first?.path) ? first.path.join('.') : '';
|
||||
if (path === 'format') return 'invalid_format';
|
||||
return 'other';
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
// Shared validation-error message strings used by both packRequestSchema (the
|
||||
// producer — zod issues) and packEventSchema.classifyRejectReason (the consumer
|
||||
// — maps message back to a metric label). Keeping these in one module makes
|
||||
// drift impossible by construction: a message rewrite propagates to both
|
||||
// sides automatically, and the reject-reason bucket on the dashboard stays
|
||||
// aligned with what zod actually emits.
|
||||
//
|
||||
// Tests import the same constants, so there is no third copy to keep in sync.
|
||||
|
||||
export const MESSAGES = {
|
||||
URL_REQUIRED: 'Repository URL is required',
|
||||
URL_TOO_LONG: 'Repository URL is too long',
|
||||
INVALID_URL: 'Invalid repository URL',
|
||||
INVALID_FILE: 'Invalid file format',
|
||||
NOT_ZIP: 'Only ZIP files are allowed',
|
||||
FILE_TOO_LARGE: 'File size must be less than 10MB',
|
||||
INVALID_IGNORE_CHARS: 'Invalid characters in ignore patterns',
|
||||
INCLUDE_TOO_LONG: 'Include patterns too long',
|
||||
IGNORE_TOO_LONG: 'Ignore patterns too long',
|
||||
MISSING_INPUT: 'Either URL or file must be provided',
|
||||
BOTH_PROVIDED: 'Cannot provide both URL and file',
|
||||
} as const;
|
||||
@@ -1,27 +1,28 @@
|
||||
import { isValidRemoteValue } from 'repomix';
|
||||
import { z } from 'zod';
|
||||
import { FILE_SIZE_LIMITS } from '../domains/pack/utils/fileUtils.js';
|
||||
import { MESSAGES } from './packRequestMessages.js';
|
||||
|
||||
export const packRequestSchema = z
|
||||
.object({
|
||||
url: z
|
||||
.string()
|
||||
.min(1, 'Repository URL is required')
|
||||
.max(200, 'Repository URL is too long')
|
||||
.min(1, MESSAGES.URL_REQUIRED)
|
||||
.max(200, MESSAGES.URL_TOO_LONG)
|
||||
.transform((val) => val.trim())
|
||||
.refine((val) => isValidRemoteValue(val), { message: 'Invalid repository URL' })
|
||||
.refine((val) => isValidRemoteValue(val), { message: MESSAGES.INVALID_URL })
|
||||
.optional(),
|
||||
file: z
|
||||
.custom<File>()
|
||||
.refine((file) => file instanceof File, {
|
||||
message: 'Invalid file format',
|
||||
message: MESSAGES.INVALID_FILE,
|
||||
})
|
||||
.refine((file) => file.type === 'application/zip' || file.name.endsWith('.zip'), {
|
||||
message: 'Only ZIP files are allowed',
|
||||
message: MESSAGES.NOT_ZIP,
|
||||
})
|
||||
.refine((file) => file.size <= FILE_SIZE_LIMITS.MAX_ZIP_SIZE, {
|
||||
// 10MB limit
|
||||
message: 'File size must be less than 10MB',
|
||||
message: MESSAGES.FILE_TOO_LARGE,
|
||||
})
|
||||
.optional(),
|
||||
format: z.enum(['xml', 'markdown', 'plain']),
|
||||
@@ -34,15 +35,15 @@ export const packRequestSchema = z
|
||||
directoryStructure: z.boolean().optional(),
|
||||
includePatterns: z
|
||||
.string()
|
||||
.max(100_000, 'Include patterns too long')
|
||||
.max(100_000, MESSAGES.INCLUDE_TOO_LONG)
|
||||
.optional()
|
||||
.transform((val) => val?.trim()),
|
||||
ignorePatterns: z
|
||||
.string()
|
||||
// Regular expression to validate ignore patterns
|
||||
// Allowed characters: alphanumeric, *, ?, /, -, _, ., !, (, ), space, comma
|
||||
.regex(/^[a-zA-Z0-9*?/\-_.,!()\s]*$/, 'Invalid characters in ignore patterns')
|
||||
.max(1000, 'Ignore patterns too long')
|
||||
.regex(/^[a-zA-Z0-9*?/\-_.,!()\s]*$/, MESSAGES.INVALID_IGNORE_CHARS)
|
||||
.max(1000, MESSAGES.IGNORE_TOO_LONG)
|
||||
.optional()
|
||||
.transform((val) => val?.trim()),
|
||||
outputParsable: z.boolean().optional(),
|
||||
@@ -52,10 +53,10 @@ export const packRequestSchema = z
|
||||
})
|
||||
.strict()
|
||||
.refine((data) => data.url || data.file, {
|
||||
message: 'Either URL or file must be provided',
|
||||
message: MESSAGES.MISSING_INPUT,
|
||||
})
|
||||
.refine((data) => !(data.url && data.file), {
|
||||
message: 'Cannot provide both URL and file',
|
||||
message: MESSAGES.BOTH_PROVIDED,
|
||||
});
|
||||
|
||||
export type PackRequest = z.infer<typeof packRequestSchema>;
|
||||
|
||||
Reference in New Issue
Block a user