refactor(server): Extract validation messages to shared constants

Addresses PR #1493 round 5 follow-up (schema-drift tracking) from claude.

intent(eliminate-drift): prior reviews flagged the string-coupling
  between packRequestSchema (produces zod messages) and
  classifyRejectReason (matches them back to a metric label) as
  fragile — a message rewrite in one place would silently land
  requests in the 'other' bucket, quietly mislabeling the dashboard.
  Extract all 11 messages to a shared MESSAGES module so the
  producer, the consumer, and the test's expected values all
  reference the same constants
decision(map-over-switch): classifyRejectReason's 11-case switch
  becomes a MESSAGE_TO_REASON lookup table. Same runtime behavior,
  easier to scan, and the keys are compile-time-typed via the
  shared constants (so a typo in either side is a TS error)
decision(preserve-invalid_format-path): 'invalid_format' is keyed by
  path (`format`), not by message text. Left as path-match so it
  continues to work even if zod's default enum error message changes
improve(test-references-constants): test.each cases now reference
  MESSAGES directly. The test continues to catch classifier-logic
  drift (wrong label for a known key, missing entry in the lookup)
  while schema/classifier drift becomes impossible by construction

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Kazuki Yamada
2026-04-18 23:07:25 +09:00
parent e580284729
commit 10846fbf8d
4 changed files with 85 additions and 63 deletions
+27 -23
View File
@@ -1,41 +1,45 @@
import { describe, expect, test } from 'vitest';
import { z } from 'zod';
import { classifyRejectReason, getRepoHost } from '../../../website/server/src/actions/packEventSchema.js';
import { MESSAGES } from '../../../website/server/src/actions/packRequestMessages.js';
// Classifier drift test. Messages below MUST match those defined in
// `website/server/src/actions/packRequestSchema.ts`. If a schema message is
// edited without updating this file, the classifier falls back to `'other'`
// and dashboards quietly mislabel — but that drift is not caught here
// (the test deliberately avoids importing the schema to keep this file's
// module graph free of server-only packages like `repomix` and `hono` that
// the root vitest harness doesn't install). For true schema drift catching,
// the schema and classifier would need to share a constants module.
// Classifier drift test — imports MESSAGES from the same shared module that
// packRequestSchema uses. This means a message-text rewrite automatically
// propagates to the schema (producer), the classifier (consumer), AND the
// test's expected values, so schema/classifier drift is impossible by
// construction. The test's value is catching classifier-logic drift: if the
// classifier's MESSAGE_TO_REASON map loses a key (or maps it to the wrong
// label), the corresponding case here fails.
//
// Deliberately avoids importing packRequestSchema itself — that file
// transitively depends on `repomix`, which the root vitest harness can't
// resolve because repomix IS this repo.
// Construct a ZodError with a single issue whose message matches the one the
// schema would produce. classifyRejectReason only reads `.message` and
// `.path` from the first issue.
// Construct a ZodError with a single issue whose message matches the shared
// constant. classifyRejectReason only reads `.message` and `.path` from the
// first issue.
const zodErrorWith = (message: string, path: (string | number)[] = []) =>
new z.ZodError([{ code: 'custom', message, path, input: undefined }]);
// Mimic the AppError-with-cause wrapping that `validateRequest` does in
// production — native Error with `cause` is enough to exercise the
// cause-chain path in classifyRejectReason, no AppError import needed.
// cause-chain path in classifyRejectReason.
const wrapped = (message: string, path: (string | number)[] = []) =>
new Error(`Invalid request: ${message}`, { cause: zodErrorWith(message, path) });
describe('classifyRejectReason', () => {
test.each([
['missing_input', 'Either URL or file must be provided'],
['both_provided', 'Cannot provide both URL and file'],
['invalid_url', 'Invalid repository URL'],
['url_too_long', 'Repository URL is too long'],
['url_empty', 'Repository URL is required'],
['invalid_file', 'Invalid file format'],
['not_zip', 'Only ZIP files are allowed'],
['file_too_large', 'File size must be less than 10MB'],
['invalid_ignore_chars', 'Invalid characters in ignore patterns'],
['include_too_long', 'Include patterns too long'],
['ignore_too_long', 'Ignore patterns too long'],
['missing_input', MESSAGES.MISSING_INPUT],
['both_provided', MESSAGES.BOTH_PROVIDED],
['invalid_url', MESSAGES.INVALID_URL],
['url_too_long', MESSAGES.URL_TOO_LONG],
['url_empty', MESSAGES.URL_REQUIRED],
['invalid_file', MESSAGES.INVALID_FILE],
['not_zip', MESSAGES.NOT_ZIP],
['file_too_large', MESSAGES.FILE_TOO_LARGE],
['invalid_ignore_chars', MESSAGES.INVALID_IGNORE_CHARS],
['include_too_long', MESSAGES.INCLUDE_TOO_LONG],
['ignore_too_long', MESSAGES.IGNORE_TOO_LONG],
])('%s — classifies "%s"', (expected, message) => {
expect(classifyRejectReason(zodErrorWith(message))).toBe(expected);
// Wrapped via AppError.cause (the real production path)
+24 -29
View File
@@ -1,3 +1,5 @@
import { MESSAGES } from './packRequestMessages.js';
// Shared log schema for `pack_completed` events. Used by packAction (success /
// validation_error / pack_error) and rateLimitMiddleware (rate_limited) so that
// log-based metric filters and outcome labels stay in sync between the two
@@ -28,11 +30,11 @@ export function getRepoHost(input: { file?: unknown; url?: string }): string {
}
// Map a validation error to a stable `rejectReason` label for log-based metrics.
// Matches against the first zod issue's message (strings are stable because
// they are defined in this project's schema — see packRequestSchema.ts). Falls
// back to 'other' for unmapped paths so a sudden jump in `other` surfaces
// unknown failure modes in the dashboard. NOTE: only the first issue is
// classified — a request failing multiple validations (e.g. both URL and
// Matches against the first zod issue's message strings come from the
// shared MESSAGES module so schema and classifier cannot drift out of sync.
// Falls back to 'other' for unmapped paths so a sudden jump in `other`
// surfaces unknown failure modes in the dashboard. NOTE: only the first issue
// is classified — a request failing multiple validations (e.g. both URL and
// options) gets bucketed by whichever zod surfaces first.
//
// `validateRequest` wraps ZodError in AppError, so the original issues live on
@@ -42,36 +44,29 @@ export function getRepoHost(input: { file?: unknown; url?: string }): string {
// Pre-zod paths (e.g. the JSON.parse failure in packAction) set
// `rejectReason: 'invalid_json'` directly at the call site since the label is
// statically known — no synthetic error needs to be routed through here.
const MESSAGE_TO_REASON: Record<string, string> = {
[MESSAGES.MISSING_INPUT]: 'missing_input',
[MESSAGES.BOTH_PROVIDED]: 'both_provided',
[MESSAGES.INVALID_URL]: 'invalid_url',
[MESSAGES.URL_TOO_LONG]: 'url_too_long',
[MESSAGES.URL_REQUIRED]: 'url_empty',
[MESSAGES.INVALID_FILE]: 'invalid_file',
[MESSAGES.NOT_ZIP]: 'not_zip',
[MESSAGES.FILE_TOO_LARGE]: 'file_too_large',
[MESSAGES.INVALID_IGNORE_CHARS]: 'invalid_ignore_chars',
[MESSAGES.INCLUDE_TOO_LONG]: 'include_too_long',
[MESSAGES.IGNORE_TOO_LONG]: 'ignore_too_long',
};
export function classifyRejectReason(error: unknown): string {
const issues = extractZodIssues(error);
if (!issues || issues.length === 0) return 'unknown';
const first = issues[0];
const msg = first?.message ?? '';
switch (msg) {
case 'Either URL or file must be provided':
return 'missing_input';
case 'Cannot provide both URL and file':
return 'both_provided';
case 'Invalid repository URL':
return 'invalid_url';
case 'Repository URL is too long':
return 'url_too_long';
case 'Repository URL is required':
return 'url_empty';
case 'Invalid file format':
return 'invalid_file';
case 'Only ZIP files are allowed':
return 'not_zip';
case 'File size must be less than 10MB':
return 'file_too_large';
case 'Invalid characters in ignore patterns':
return 'invalid_ignore_chars';
case 'Include patterns too long':
return 'include_too_long';
case 'Ignore patterns too long':
return 'ignore_too_long';
}
const byMessage = MESSAGE_TO_REASON[msg];
if (byMessage) return byMessage;
const path = Array.isArray(first?.path) ? first.path.join('.') : '';
if (path === 'format') return 'invalid_format';
return 'other';
@@ -0,0 +1,22 @@
// Shared validation-error message strings used by both packRequestSchema (the
// producer — zod issues) and packEventSchema.classifyRejectReason (the consumer
// — maps message back to a metric label). Keeping these in one module makes
// drift impossible by construction: a message rewrite propagates to both
// sides automatically, and the reject-reason bucket on the dashboard stays
// aligned with what zod actually emits.
//
// Tests import the same constants, so there is no third copy to keep in sync.
export const MESSAGES = {
URL_REQUIRED: 'Repository URL is required',
URL_TOO_LONG: 'Repository URL is too long',
INVALID_URL: 'Invalid repository URL',
INVALID_FILE: 'Invalid file format',
NOT_ZIP: 'Only ZIP files are allowed',
FILE_TOO_LARGE: 'File size must be less than 10MB',
INVALID_IGNORE_CHARS: 'Invalid characters in ignore patterns',
INCLUDE_TOO_LONG: 'Include patterns too long',
IGNORE_TOO_LONG: 'Ignore patterns too long',
MISSING_INPUT: 'Either URL or file must be provided',
BOTH_PROVIDED: 'Cannot provide both URL and file',
} as const;
+12 -11
View File
@@ -1,27 +1,28 @@
import { isValidRemoteValue } from 'repomix';
import { z } from 'zod';
import { FILE_SIZE_LIMITS } from '../domains/pack/utils/fileUtils.js';
import { MESSAGES } from './packRequestMessages.js';
export const packRequestSchema = z
.object({
url: z
.string()
.min(1, 'Repository URL is required')
.max(200, 'Repository URL is too long')
.min(1, MESSAGES.URL_REQUIRED)
.max(200, MESSAGES.URL_TOO_LONG)
.transform((val) => val.trim())
.refine((val) => isValidRemoteValue(val), { message: 'Invalid repository URL' })
.refine((val) => isValidRemoteValue(val), { message: MESSAGES.INVALID_URL })
.optional(),
file: z
.custom<File>()
.refine((file) => file instanceof File, {
message: 'Invalid file format',
message: MESSAGES.INVALID_FILE,
})
.refine((file) => file.type === 'application/zip' || file.name.endsWith('.zip'), {
message: 'Only ZIP files are allowed',
message: MESSAGES.NOT_ZIP,
})
.refine((file) => file.size <= FILE_SIZE_LIMITS.MAX_ZIP_SIZE, {
// 10MB limit
message: 'File size must be less than 10MB',
message: MESSAGES.FILE_TOO_LARGE,
})
.optional(),
format: z.enum(['xml', 'markdown', 'plain']),
@@ -34,15 +35,15 @@ export const packRequestSchema = z
directoryStructure: z.boolean().optional(),
includePatterns: z
.string()
.max(100_000, 'Include patterns too long')
.max(100_000, MESSAGES.INCLUDE_TOO_LONG)
.optional()
.transform((val) => val?.trim()),
ignorePatterns: z
.string()
// Regular expression to validate ignore patterns
// Allowed characters: alphanumeric, *, ?, /, -, _, ., !, (, ), space, comma
.regex(/^[a-zA-Z0-9*?/\-_.,!()\s]*$/, 'Invalid characters in ignore patterns')
.max(1000, 'Ignore patterns too long')
.regex(/^[a-zA-Z0-9*?/\-_.,!()\s]*$/, MESSAGES.INVALID_IGNORE_CHARS)
.max(1000, MESSAGES.IGNORE_TOO_LONG)
.optional()
.transform((val) => val?.trim()),
outputParsable: z.boolean().optional(),
@@ -52,10 +53,10 @@ export const packRequestSchema = z
})
.strict()
.refine((data) => data.url || data.file, {
message: 'Either URL or file must be provided',
message: MESSAGES.MISSING_INPUT,
})
.refine((data) => !(data.url && data.file), {
message: 'Cannot provide both URL and file',
message: MESSAGES.BOTH_PROVIDED,
});
export type PackRequest = z.infer<typeof packRequestSchema>;