Files
compound-engineering-plugin…/tests/skill-shell-safety.test.ts
2026-05-02 10:44:09 -07:00

476 lines
20 KiB
TypeScript

import { readdirSync, readFileSync, statSync } from "fs"
import path from "path"
import { describe, expect, test } from "bun:test"
/**
* Skill `!` backtick pre-resolution commands run through Claude Code's shell
* permission checker at skill-load time. The checker rejects several patterns
* outright, failing the skill before its body ever runs. AGENTS.md guidance
* for the `!` pre-resolution exception allows `&&`, `||`, `2>/dev/null`, and
* fallback sentinels — but several specific shapes are not on that allowlist.
*
* Past incidents:
* - PR #699 introduced a `case "$common" in /*) ... ;; *) ... ;; esac` block
* into ce-compound and ce-sessions to derive a worktree-stable repo name.
* The cleaner replacement is
* `basename "$(dirname "$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null)")"`.
* - PR #701 replaced the `case` blocks with `[A] && B || C` chains, which
* trip a different rejection: "ambiguous syntax with command separators".
* Issue #710. Fix: wrap the `&&` chain in a subshell, or split into
* scripts so the safety check sees only `bash <path>`.
* - The `basename "$(dirname "$common")"` shape (a double-quoted string
* containing `$()` containing another double-quoted string) trips
* "Unhandled node type: string". Issue #709. Fix: replace nested `$()`
* with parameter expansion, pipe to sed, or extract to a script.
* - ce-compound and ce-sessions used `git rev-parse ... | sed -E '...'` which
* trips the permission checker as "multiple operations". Fix: replace the
* pipe with bash parameter expansion (e.g. strip suffix, strip prefix).
* - ce-compound and ce-sessions used `git rev-parse --abbrev-ref HEAD 2>/dev/null`
* with no fallback. Outside a git repo, `git rev-parse` exits 128;
* `2>/dev/null` suppresses stderr but the non-zero exit propagates and
* Claude Code reports "Shell command failed for pattern" (issue #730). Fix:
* pair `2>/dev/null` with `|| true` or `|| echo '__SENTINEL__'`.
*/
const PLUGIN_SKILLS_GLOB = ["plugins/compound-engineering/skills", "plugins/coding-tutor/skills"]
function listSkillFiles(): string[] {
const out: string[] = []
for (const rel of PLUGIN_SKILLS_GLOB) {
const root = path.join(process.cwd(), rel)
try { statSync(root) } catch { continue }
for (const entry of readdirSync(root, { withFileTypes: true })) {
if (!entry.isDirectory()) continue
const skillDir = path.join(root, entry.name)
function walk(dir: string) {
for (const e of readdirSync(dir, { withFileTypes: true })) {
const full = path.join(dir, e.name)
if (e.isDirectory()) { walk(full); continue }
if (e.name.endsWith(".md")) out.push(full)
}
}
walk(skillDir)
}
}
return out
}
function findPreResolutionCommands(body: string): { lineNumber: number; command: string }[] {
// Scan the entire body so multi-line `!` blocks are also caught. `[^`]*`
// matches across newlines (line terminators are not special inside JS
// character classes), so wrapped commands surface here too.
const found: { lineNumber: number; command: string }[] = []
const regex = /!`([^`]*)`/g
let match: RegExpExecArray | null
while ((match = regex.exec(body)) !== null) {
const lineNumber = body.slice(0, match.index).split(/\r?\n/).length
found.push({ lineNumber, command: match[1] })
}
return found
}
/**
* Returns true when both `&&` and `||` appear at the same lexical depth (not
* inside `( ... )` subshells or `$( ... )` command substitutions, and not
* inside quoted strings). This is the `[A] && B || C` shell antipattern that
* Claude Code's safety check rejects as "ambiguous syntax".
*/
function hasTopLevelMixedAndOr(cmd: string): boolean {
let depth = 0
let inSingleQuote = false
let inDoubleQuote = false
let andAtDepth0 = false
let orAtDepth0 = false
for (let i = 0; i < cmd.length; i++) {
const c = cmd[i]
const next = cmd[i + 1]
if (!inDoubleQuote && c === "'") { inSingleQuote = !inSingleQuote; continue }
if (!inSingleQuote && c === '"') { inDoubleQuote = !inDoubleQuote; continue }
if (inSingleQuote || inDoubleQuote) continue
if (c === '$' && next === '(') { depth++; i++; continue }
if (c === '(') { depth++; continue }
if (c === ')') { depth--; continue }
if (depth === 0) {
if (c === '&' && next === '&') { andAtDepth0 = true; i++; continue }
if (c === '|' && next === '|') { orAtDepth0 = true; i++; continue }
}
}
return andAtDepth0 && orAtDepth0
}
/**
* Returns the contents of every top-level `$(...)` in the command, with
* matched parens preserved correctly even when nested. Used to detect the
* "Unhandled node type: string" pattern (a `$(...)` whose contents contain
* a double-quoted string).
*/
function findCommandSubstitutionContents(cmd: string): string[] {
const results: string[] = []
let i = 0
let inSingleQuote = false
while (i < cmd.length) {
const c = cmd[i]
if (c === "'" && !inSingleQuote) { inSingleQuote = true; i++; continue }
if (c === "'" && inSingleQuote) { inSingleQuote = false; i++; continue }
if (inSingleQuote) { i++; continue }
if (c === '$' && cmd[i + 1] === '(') {
let depth = 1
let j = i + 2
const start = j
while (j < cmd.length && depth > 0) {
if (cmd[j] === '$' && cmd[j + 1] === '(') { depth++; j += 2; continue }
if (cmd[j] === '(') { depth++; j++; continue }
if (cmd[j] === ')') { depth--; j++; continue }
j++
}
results.push(cmd.slice(start, Math.max(start, j - 1)))
i = j
continue
}
i++
}
return results
}
/**
* Returns true when any `$(...)` in the command contains a double-quoted
* string — the shape that trips Claude Code's "Unhandled node type: string"
* rejection (e.g., `basename "$(dirname "$common")"`).
*/
function hasNestedQuotedStringInCommandSubst(cmd: string): boolean {
return findCommandSubstitutionContents(cmd).some(s => s.includes('"'))
}
/**
* Returns true when the command's *trailing* top-level statement suppresses
* stderr with `2>/dev/null` but has no `||` fallback. The trailing statement
* determines the command's overall exit code (after `;` or `&&` chains). When
* that exit code is non-zero, Claude Code reports "Shell command failed for
* pattern" and aborts skill load before its body runs (issue #730). Established
* defensive pattern: pair `2>/dev/null` with `|| true` or `|| echo '__SENTINEL__'`.
*
* `2>/dev/null` inside an earlier `;`-chained statement is fine if the trailing
* statement (e.g., a final `echo`) succeeds — that case is not flagged.
*/
function hasUnguardedErrorSuppression(cmd: string): boolean {
let depth = 0
let inSingleQuote = false
let inDoubleQuote = false
let lastSeparatorEnd = 0
for (let i = 0; i < cmd.length; i++) {
const c = cmd[i]
const next = cmd[i + 1]
if (!inDoubleQuote && c === "'") { inSingleQuote = !inSingleQuote; continue }
if (!inSingleQuote && c === '"') { inDoubleQuote = !inDoubleQuote; continue }
if (inSingleQuote || inDoubleQuote) continue
if (c === '$' && next === '(') { depth++; i++; continue }
if (c === '(') { depth++; continue }
if (c === ')') { depth--; continue }
if (depth === 0) {
if (c === ';') { lastSeparatorEnd = i + 1; continue }
if (c === '&' && next === '&') { lastSeparatorEnd = i + 2; i++; continue }
if (c === '|' && next === '|') { lastSeparatorEnd = i + 2; i++; continue }
}
}
return cmd.slice(lastSeparatorEnd).includes("2>/dev/null")
}
/**
* Returns true when the command contains bash parameter expansion using
* pattern operators: `${var%pattern}`, `${var##pattern}`, `${var#pattern}`,
* `${var%%pattern}`, `${var/pat/repl}`, `${var:-default}`, etc.
* Claude Code's permission checker rejects these as "Contains expansion".
*
* Note: simple `${var}` (no operator after the variable name) is fine.
* The issue is operators like `%`, `#`, `/`, `:-`, `:=` that follow the name.
*/
function hasParameterExpansion(cmd: string): boolean {
// Match ${varname followed by any operator character that isn't just }
// Operators: %, #, /, :, ^ — any of these after the identifier
return /\$\{[A-Za-z_][A-Za-z0-9_]*[%#/:^][^}]*\}/.test(cmd)
}
/**
* Returns true when the command contains a top-level pipe (`|` that is not
* `||`). Claude Code's permission checker treats piped commands as separate
* operations and may require approval for each, causing skill-load failure
* when the user's permission mode is restrictive.
*/
function hasTopLevelPipe(cmd: string): boolean {
let depth = 0
let inSingleQuote = false
let inDoubleQuote = false
for (let i = 0; i < cmd.length; i++) {
const c = cmd[i]
const next = cmd[i + 1]
if (!inDoubleQuote && c === "'") { inSingleQuote = !inSingleQuote; continue }
if (!inSingleQuote && c === '"') { inDoubleQuote = !inDoubleQuote; continue }
if (inSingleQuote || inDoubleQuote) continue
if (c === '$' && next === '(') { depth++; i++; continue }
if (c === '(') { depth++; continue }
if (c === ')') { depth--; continue }
if (depth === 0 && c === '|' && next !== '|' && cmd[i - 1] !== '|') return true
}
return false
}
describe("findPreResolutionCommands", () => {
test("captures single-line `!` blocks with correct line numbers", () => {
const sample = "intro\n!`echo hi` mid !`echo bye`\nend"
expect(findPreResolutionCommands(sample)).toEqual([
{ lineNumber: 2, command: "echo hi" },
{ lineNumber: 2, command: "echo bye" },
])
})
test("captures multi-line `!` blocks", () => {
const sample = "intro\n!`one`\ngap\n!`split\nover\nlines`\nend"
expect(findPreResolutionCommands(sample)).toEqual([
{ lineNumber: 2, command: "one" },
{ lineNumber: 4, command: "split\nover\nlines" },
])
})
})
describe("hasTopLevelMixedAndOr", () => {
test("flags the `[A] && B || C` antipattern", () => {
expect(hasTopLevelMixedAndOr('[ -n "$x" ] && echo yes || echo no')).toBe(true)
})
test("does not flag `&&`-only chains", () => {
expect(hasTopLevelMixedAndOr('a=$(cmd) && [ -n "$a" ] && echo "$a"')).toBe(false)
})
test("does not flag `||`-only chains", () => {
expect(hasTopLevelMixedAndOr("cmd 2>/dev/null || echo fallback")).toBe(false)
})
test("does not flag `&&` inside subshells with `||` only at top level", () => {
expect(hasTopLevelMixedAndOr('(a && b) || (c && d) || echo fallback')).toBe(false)
})
test("does not flag operators inside quoted strings", () => {
expect(hasTopLevelMixedAndOr('echo "a && b || c"')).toBe(false)
})
})
describe("hasNestedQuotedStringInCommandSubst", () => {
test("flags `basename \"$(dirname \"$common\")\"`", () => {
expect(hasNestedQuotedStringInCommandSubst('basename "$(dirname "$common")"')).toBe(true)
})
test("flags deeply nested `$(dirname \"$(dirname \"$x\")\")`", () => {
expect(hasNestedQuotedStringInCommandSubst('basename "$(dirname "$(dirname "$x")")"')).toBe(true)
})
test("does not flag `$(...)` whose contents only contain single-quoted strings", () => {
expect(hasNestedQuotedStringInCommandSubst("a=$(gh api endpoint --jq '.field')")).toBe(false)
})
test("does not flag `$(...)` with no quoted strings inside", () => {
expect(hasNestedQuotedStringInCommandSubst('a=$(git rev-parse HEAD 2>/dev/null)')).toBe(false)
})
test("does not flag double-quoted strings outside any `$(...)`", () => {
expect(hasNestedQuotedStringInCommandSubst('echo "${VAR}/path"')).toBe(false)
})
})
describe("hasUnguardedErrorSuppression", () => {
test("flags single-statement `2>/dev/null` with no fallback", () => {
expect(hasUnguardedErrorSuppression("git rev-parse --abbrev-ref HEAD 2>/dev/null")).toBe(true)
})
test("flags `command -v <name> 2>/dev/null` with no fallback", () => {
expect(hasUnguardedErrorSuppression("command -v codex 2>/dev/null")).toBe(true)
})
test("does not flag `2>/dev/null || true`", () => {
expect(hasUnguardedErrorSuppression("git rev-parse --abbrev-ref HEAD 2>/dev/null || true")).toBe(false)
})
test("does not flag `2>/dev/null || echo '__SENTINEL__'`", () => {
expect(hasUnguardedErrorSuppression("git rev-parse --abbrev-ref origin/HEAD 2>/dev/null || echo '__DEFAULT_BRANCH_UNRESOLVED__'")).toBe(false)
})
test("does not flag commands without `2>/dev/null`", () => {
expect(hasUnguardedErrorSuppression("git status")).toBe(false)
})
test("does not flag `2>/dev/null` inside a guarded subshell", () => {
expect(hasUnguardedErrorSuppression("(top=$(git rev-parse --show-toplevel 2>/dev/null); cat \"$top/file\") || echo '__NO_CONFIG__'")).toBe(false)
})
test("does not flag `2>/dev/null` in non-trailing statement followed by always-succeeding tail", () => {
expect(hasUnguardedErrorSuppression('common=$(git rev-parse --git-common-dir 2>/dev/null); repo="${common%/.git}"; echo "${repo##*/}"')).toBe(false)
})
test("flags `2>/dev/null` on the trailing statement of a `;` chain", () => {
expect(hasUnguardedErrorSuppression("setup; cmd 2>/dev/null")).toBe(true)
})
test("flags `2>/dev/null` on the trailing statement of an `&&` chain", () => {
expect(hasUnguardedErrorSuppression("setup && cmd 2>/dev/null")).toBe(true)
})
test("flags `2>/dev/null` on the trailing statement of an `||` chain (a `||` belonging to an earlier statement does not protect a later unguarded one)", () => {
expect(hasUnguardedErrorSuppression("probe || git rev-parse --abbrev-ref HEAD 2>/dev/null")).toBe(true)
})
})
describe("hasParameterExpansion", () => {
test("flags ${var%pattern} (strip-suffix operator)", () => {
expect(hasParameterExpansion('repo="${common%/.git}"')).toBe(true)
})
test("flags ${var##pattern} (strip-prefix operator)", () => {
expect(hasParameterExpansion('echo "${repo##*/}"')).toBe(true)
})
test("flags ${var:-default} (default-value operator)", () => {
expect(hasParameterExpansion('echo "${var:-fallback}"')).toBe(true)
})
test("flags ${var/pat/repl} (substitution operator)", () => {
expect(hasParameterExpansion('echo "${var/foo/bar}"')).toBe(true)
})
test("does not flag simple ${var} expansion", () => {
expect(hasParameterExpansion('echo "${CLAUDE_SKILL_DIR}/scripts/foo.sh"')).toBe(false)
})
test("does not flag commands with no ${...}", () => {
expect(hasParameterExpansion("git rev-parse --abbrev-ref HEAD 2>/dev/null || true")).toBe(false)
})
test("does not flag $() command substitution", () => {
expect(hasParameterExpansion("top=$(git rev-parse --show-toplevel 2>/dev/null)")).toBe(false)
})
})
describe("hasTopLevelPipe", () => {
test("flags a simple pipe", () => {
expect(hasTopLevelPipe("git rev-parse --git-common-dir 2>/dev/null | sed -E 's|x||'")).toBe(true)
})
test("does not flag `||`", () => {
expect(hasTopLevelPipe("cmd 2>/dev/null || echo fallback")).toBe(false)
})
test("does not flag a pipe inside `$(...)`", () => {
expect(hasTopLevelPipe("x=$(echo foo | tr a b); echo $x")).toBe(false)
})
test("does not flag a pipe inside `(...)`", () => {
expect(hasTopLevelPipe("(echo foo | tr a b) || echo fallback")).toBe(false)
})
test("does not flag commands with no pipe", () => {
expect(hasTopLevelPipe("git rev-parse --abbrev-ref HEAD 2>/dev/null")).toBe(false)
})
})
describe("skill `!` pre-resolution commands avoid Claude Code denylist", () => {
const files = listSkillFiles()
for (const filePath of files) {
const rel = path.relative(process.cwd(), filePath)
const body = readFileSync(filePath, "utf8")
const preResolutionCommands = findPreResolutionCommands(body)
if (preResolutionCommands.length === 0) continue
test(`${rel} pre-resolution commands contain no \`case\`/\`esac\` (blocked by Claude Code permission check)`, () => {
const offenders = preResolutionCommands.filter(({ command }) =>
/\bcase\b/.test(command) && /\besac\b/.test(command),
)
const formatted = offenders
.map(({ lineNumber, command }) => ` line ${lineNumber}: ${command}`)
.join("\n")
expect(
offenders,
`Claude Code rejects \`case ... esac\` in \`!\` pre-resolution commands. Use \`if\`/\`then\`/\`else\` or \`&&\`/\`||\` chaining, or \`git rev-parse --path-format=absolute --git-common-dir\` for worktree-stable repo names.\nOffending commands:\n${formatted}`,
).toEqual([])
})
test(`${rel} pre-resolution commands do not mix \`&&\` and \`||\` at top level (issue #710)`, () => {
const offenders = preResolutionCommands.filter(({ command }) =>
hasTopLevelMixedAndOr(command),
)
const formatted = offenders
.map(({ lineNumber, command }) => ` line ${lineNumber}: ${command}`)
.join("\n")
expect(
offenders,
`Claude Code rejects the \`[A] && B || C\` antipattern as "ambiguous syntax with command separators". Wrap the \`&&\` chain in a subshell so only \`||\` remains at top level — \`(A && B) || C\` — or extract to a script.\nOffending commands:\n${formatted}`,
).toEqual([])
})
test(`${rel} pre-resolution commands do not nest double-quoted strings inside \`$(...)\` (issue #709)`, () => {
const offenders = preResolutionCommands.filter(({ command }) =>
hasNestedQuotedStringInCommandSubst(command),
)
const formatted = offenders
.map(({ lineNumber, command }) => ` line ${lineNumber}: ${command}`)
.join("\n")
expect(
offenders,
`Claude Code rejects \`$(...)\` containing a double-quoted string as "Unhandled node type: string" (e.g., \`basename "$(dirname "$common")"\`). Extract the logic to a script under \`scripts/\` and invoke it from the skill body — do NOT replace with \`\${var%/suffix}\` parameter expansion, which is also rejected as "Contains expansion".\nOffending commands:\n${formatted}`,
).toEqual([])
})
test(`${rel} pre-resolution commands do not use bash parameter expansion operators (rejected as "Contains expansion")`, () => {
const offenders = preResolutionCommands.filter(({ command }) =>
hasParameterExpansion(command),
)
const formatted = offenders
.map(({ lineNumber, command }) => ` line ${lineNumber}: ${command}`)
.join("\n")
expect(
offenders,
`Claude Code rejects bash parameter expansion operators (\`\${var%pat}\`, \`\${var##pat}\`, \`\${var:-default}\`, etc.) as "Contains expansion". Extract the logic to a script under \`scripts/\` and invoke it from the skill body (not from \`!\` pre-resolution — scripts called from \`!\` trip the permission gate at load time). Or remove the pre-resolution and let the agent derive the value at runtime via a Bash tool call.\nOffending commands:\n${formatted}`,
).toEqual([])
})
test(`${rel} pre-resolution commands that suppress stderr with \`2>/dev/null\` also include a \`||\` fallback (issue #730)`, () => {
const offenders = preResolutionCommands.filter(({ command }) =>
hasUnguardedErrorSuppression(command),
)
const formatted = offenders
.map(({ lineNumber, command }) => ` line ${lineNumber}: ${command}`)
.join("\n")
expect(
offenders,
`\`2>/dev/null\` only suppresses stderr — the non-zero exit code still propagates. Outside the success path (e.g., \`git rev-parse\` outside a git repo), Claude Code reports "Shell command failed for pattern" and aborts skill load. Pair \`2>/dev/null\` with \`|| true\` (when an empty result is fine) or \`|| echo '__SENTINEL__'\` (when the agent should distinguish "did not resolve" from "resolved to empty").\nOffending commands:\n${formatted}`,
).toEqual([])
})
test(`${rel} pre-resolution commands do not use top-level pipes (triggers permission check for multiple operations)`, () => {
const offenders = preResolutionCommands.filter(({ command }) =>
hasTopLevelPipe(command),
)
const formatted = offenders
.map(({ lineNumber, command }) => ` line ${lineNumber}: ${command}`)
.join("\n")
expect(
offenders,
`Claude Code's permission checker flags piped commands as "multiple operations requiring approval", which fails skill load. Do NOT replace with parameter expansion (\`\${var%/.git}\`, \`\${var##*/}\`) — those are also rejected as "Contains expansion". Extract the logic to a script under \`scripts/\` and invoke it from the skill body.\nOffending commands:\n${formatted}`,
).toEqual([])
})
}
})