diff --git a/src/core/file/truncateBase64.ts b/src/core/file/truncateBase64.ts index a8c31150..4f197b76 100644 --- a/src/core/file/truncateBase64.ts +++ b/src/core/file/truncateBase64.ts @@ -24,35 +24,36 @@ const standaloneBase64Pattern = new RegExp(`([A-Za-z0-9+/]{${MIN_BASE64_LENGTH_S const hasLongBase64Run = (content: string): boolean => { const len = content.length; if (len < MIN_BASE64_LENGTH_STANDALONE) return false; - // Newline pre-filter: `\n` is not a base64 character, so it always resets the - // run below. A run of `MIN_BASE64_LENGTH_STANDALONE` therefore has to fit - // inside a single line. If every line is shorter than that threshold no run is - // possible, and we can bail out before the per-character scan. `indexOf` is a - // native (memchr-style) scan, far cheaper than the charCodeAt loop, and the - // vast majority of source files have no such long line, so this skips the hot - // loop entirely for them. + // `\n` is not a base64 character, so it always resets the run counter below. A + // run of `MIN_BASE64_LENGTH_STANDALONE` therefore has to fit inside a single + // line, and only lines at least that long can possibly contain one. Walk the + // content one line at a time (via the native, memchr-style `indexOf('\n')`) + // and run the per-character scan *only* on lines that clear the threshold. + // Shorter lines provably cannot hold a qualifying run, so they are skipped + // without a charCodeAt pass — and the vast majority of source files have no + // long line at all, so the expensive loop is usually skipped entirely. The + // old code, once it found a single long line, fell back to scanning the whole + // file (every short line included); this restricts the scan to the long lines, + // which is equivalent because the run never spans a newline. let lineStart = 0; - let newlineIndex = content.indexOf('\n'); - while (newlineIndex !== -1) { - if (newlineIndex - lineStart >= MIN_BASE64_LENGTH_STANDALONE) break; - lineStart = newlineIndex + 1; - newlineIndex = content.indexOf('\n', lineStart); - } - // The final segment (after the last newline, or the whole content when there - // is none) also needs the length check before we can rule out a long run. - if (newlineIndex === -1 && len - lineStart < MIN_BASE64_LENGTH_STANDALONE) { - return false; - } - let run = 0; - for (let i = 0; i < len; i++) { - const c = content.charCodeAt(i); - // [A-Z]:65-90, [a-z]:97-122, [0-9]:48-57, '+':43, '/':47 - if ((c >= 65 && c <= 90) || (c >= 97 && c <= 122) || (c >= 48 && c <= 57) || c === 43 || c === 47) { - run++; - if (run >= MIN_BASE64_LENGTH_STANDALONE) return true; - } else { - run = 0; + while (true) { + const newlineIndex = content.indexOf('\n', lineStart); + const lineEnd = newlineIndex === -1 ? len : newlineIndex; + if (lineEnd - lineStart >= MIN_BASE64_LENGTH_STANDALONE) { + let run = 0; + for (let i = lineStart; i < lineEnd; i++) { + const c = content.charCodeAt(i); + // [A-Z]:65-90, [a-z]:97-122, [0-9]:48-57, '+':43, '/':47 + if ((c >= 65 && c <= 90) || (c >= 97 && c <= 122) || (c >= 48 && c <= 57) || c === 43 || c === 47) { + run++; + if (run >= MIN_BASE64_LENGTH_STANDALONE) return true; + } else { + run = 0; + } + } } + if (newlineIndex === -1) break; + lineStart = newlineIndex + 1; } return false; }; diff --git a/tests/core/file/truncateBase64.test.ts b/tests/core/file/truncateBase64.test.ts index b1a89a01..d46de0bc 100644 --- a/tests/core/file/truncateBase64.test.ts +++ b/tests/core/file/truncateBase64.test.ts @@ -130,7 +130,8 @@ describe('truncateBase64Content', () => { it('should not truncate a base64-like run split across a newline', () => { // A 320-char base64 body interrupted by a newline: neither line segment // reaches 256, and `\n` resets the run, so nothing should be truncated. - // Guards the newline pre-filter in `hasLongBase64Run`. + // Guards the line-scoped scan in `hasLongBase64Run`, which only inspects + // lines that individually reach the threshold. const half = longBase64.slice(0, 160); const input = `const data = "${half}\n${half}";`; const result = truncateBase64Content(input); @@ -138,8 +139,8 @@ describe('truncateBase64Content', () => { }); it('should truncate a long base64 run that follows many short lines', () => { - // Many short lines (each < 256) precede the real run, so the newline - // pre-filter must fall through to the full scan and still truncate. + // Many short lines (each < 256) precede the real run; the line-scoped scan + // must skip them and still reach the long final line to truncate. const shortLines = 'const a = 1;\n'.repeat(50); const input = `${shortLines}const data = "${longBase64}";`; const result = truncateBase64Content(input); @@ -147,9 +148,22 @@ describe('truncateBase64Content', () => { expect(result.startsWith(shortLines)).toBe(true); }); + it('should truncate a base64 run on a long line that follows another long non-base64 line', () => { + // The scan in `hasLongBase64Run` is line-scoped: it only character-scans + // lines that reach the length threshold. An earlier long line that is NOT a + // base64 run (here a 300-char run of '-', which resets the counter every + // char) must not cause the scanner to stop — the real run on the following + // long line still has to be detected and truncated. + const longDashes = '-'.repeat(300); + const input = `${longDashes}\nconst data = "${longBase64}";`; + const result = truncateBase64Content(input); + expect(result).toContain(longDashes); + expect(result).toContain('DTJXfKHG6xA1Wn+kye4TOF2Cp8zxFjtg...'); + }); + it('should truncate a base64 run on a CRLF-terminated line', () => { // The `\r` before `\n` is also non-base64; the long line must still be - // detected by the pre-filter and truncated by the full scan. + // detected by the line-scoped scan and truncated. const input = `const data = "${longBase64}";\r\nconst next = 2;\r\n`; const result = truncateBase64Content(input); expect(result).toContain('DTJXfKHG6xA1Wn+kye4TOF2Cp8zxFjtg...'); @@ -157,8 +171,8 @@ describe('truncateBase64Content', () => { }); it('should truncate a long base64 run with no newline at all', () => { - // Single-line content (no `\n`): the pre-filter treats the whole string as - // one segment and must fall through to the full scan. + // Single-line content (no `\n`): the line-scoped scan treats the whole + // string as one segment and scans it directly. const input = `prefix-${longBase64}-suffix`; const result = truncateBase64Content(input); expect(result).toContain('DTJXfKHG6xA1Wn+kye4TOF2Cp8zxFjtg...');