Files
compound-engineering-plugin…/tests/skills/ce-update.test.ts
2026-04-28 00:14:27 -07:00

249 lines
12 KiB
TypeScript

import { chmodSync, copyFileSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs"
import { execFileSync } from "child_process"
import { tmpdir } from "os"
import path from "path"
import { describe, expect, test } from "bun:test"
const SKILL_PATH = path.join(
process.cwd(),
"plugins/compound-engineering/skills/ce-update/SKILL.md",
)
const SKILL_BODY = readFileSync(SKILL_PATH, "utf8")
describe("ce-update SKILL.md", () => {
// Regression guard for https://github.com/EveryInc/compound-engineering-plugin/issues/556.
//
// `CLAUDE_PLUGIN_ROOT` points at the currently-loaded plugin version directory
// (e.g. `~/.claude/plugins/cache/<marketplace>/compound-engineering/<version>`),
// NOT the plugins cache root. Appending `/cache/<anything>/compound-engineering/`
// produces a path that never exists, which caused the cache-probe to fail and
// emit `__CE_UPDATE_CACHE_FAILED__` on every healthy install. Has regressed twice.
test("does not append a /cache/<marketplace>/ suffix onto CLAUDE_PLUGIN_ROOT", () => {
const antiPattern = /\$\{CLAUDE_PLUGIN_ROOT\}\/cache\//
expect(
antiPattern.test(SKILL_BODY),
"ce-update/SKILL.md reintroduced the ${CLAUDE_PLUGIN_ROOT}/cache/... antipattern — derive the cache dir from dirname \"${CLAUDE_PLUGIN_ROOT}\" instead.",
).toBe(false)
})
// Regression guard: a previous fix extracted pre-resolution logic into
// `!`bash "${CLAUDE_SKILL_DIR}/scripts/<name>.sh"`` commands. That cleared
// Claude Code's safety check but tripped its *permission* check at
// skill-load time, which does NOT honor `defaultMode: bypassPermissions`.
// The reliable fix is to invoke scripts from the skill body via the
// runtime Bash tool. Reintroducing any `!`bash <abs-path>`` pre-resolution
// would re-break the skill at load time — this test catches that.
test("does not use `!` pre-resolution to invoke bundled scripts", () => {
const preResolutions = SKILL_BODY.match(/!`[^`\n]*bash\s+[^`\n]*\.sh[^`\n]*`/g)
expect(
preResolutions,
`ce-update/SKILL.md must not use '!\`bash <path>.sh\`' pre-resolution — it hits Claude Code's load-time permission check, which does not honor 'defaultMode: bypassPermissions'. Move probes into the skill body via runtime Bash tool calls instead. Found: ${JSON.stringify(preResolutions)}`,
).toBeNull()
})
// The skill must reference each script in a runtime instruction so the
// agent collects the values before applying decision logic. The form
// `bash "${CLAUDE_SKILL_DIR}/scripts/<name>.sh"` (not bare relative paths)
// is required because the runtime Bash tool runs from the user's project
// CWD, not the skill directory — empirically, `bash scripts/<name>.sh`
// failed with "No such file or directory" when the skill tried it. The
// `${CLAUDE_SKILL_DIR}` env var Claude Code sets at runtime is the only
// portable way to resolve to the skill's own scripts directory across both
// marketplace-cached and `--plugin-dir` installs.
test("instructs the agent to invoke each probe script with a CLAUDE_SKILL_DIR-prefixed path", () => {
for (const script of ["upstream-version.sh", "currently-loaded-version.sh", "marketplace-name.sh"]) {
expect(
SKILL_BODY.includes(`bash "\${CLAUDE_SKILL_DIR}/scripts/${script}"`),
`ce-update/SKILL.md must instruct the agent to run 'bash "\${CLAUDE_SKILL_DIR}/scripts/${script}"' — relative paths like 'bash scripts/${script}' fail at runtime because the Bash tool's CWD is the user's project, not the skill directory.`,
).toBe(true)
}
})
// Regression guard: each probe is `bash <abs-path>` at runtime, which does
// not match the user's typical allow rules (most have `Bash(bash -c:*)` at
// most, not `Bash(bash:*)`). Without `allowed-tools` granting permission
// for the specific scripts, users without `defaultMode: bypassPermissions`
// get an approval prompt every time they run the skill. The patterns are
// pinned to each script filename — `Bash(bash *)` would be too broad.
test("declares narrow allowed-tools patterns for each probe script", () => {
const frontmatter = SKILL_BODY.match(/^---\n([\s\S]*?)\n---/)
expect(frontmatter, "ce-update/SKILL.md must have YAML frontmatter").not.toBeNull()
const allowedTools = frontmatter![1].match(/^allowed-tools:\s*(.+)$/m)
expect(
allowedTools,
"ce-update/SKILL.md must declare `allowed-tools:` for each probe script so users without bypassPermissions don't get a prompt every run.",
).not.toBeNull()
const tools = allowedTools![1]
for (const script of ["upstream-version.sh", "currently-loaded-version.sh", "marketplace-name.sh"]) {
expect(
tools.includes(`Bash(bash *${script})`),
`ce-update/SKILL.md allowed-tools must include 'Bash(bash *${script})' so the runtime Bash call passes the permission check without granting blanket Bash access (got: ${tools})`,
).toBe(true)
}
expect(
/Bash\(bash \*\)/.test(tools),
`ce-update/SKILL.md allowed-tools must NOT use the broad 'Bash(bash *)' pattern — pin to each script filename instead (got: ${tools})`,
).toBe(false)
})
})
// Regression guard for the runtime probe scripts that derive their own
// location from BASH_SOURCE rather than reading `${CLAUDE_SKILL_DIR}` from
// the environment. CLAUDE_SKILL_DIR is documented as a SKILL.md content
// substitution, not a guaranteed environment variable for Bash tool
// subprocesses; if the scripts read the env var directly and Claude Code
// doesn't export it, they always emit `__CE_UPDATE_NOT_MARKETPLACE__` and
// the skill never performs version comparison even on real marketplace
// installs.
//
// These tests run each script copied into a fake marketplace-shaped path,
// with CLAUDE_SKILL_DIR explicitly cleared from the environment, and assert
// that the script extracts the correct version/marketplace segment from its
// own location.
describe("ce-update probe scripts are self-locating", () => {
function runFromFakeMarketplace(scriptName: string, marketplaceName: string, version: string): string {
const root = mkdtempSync(path.join(tmpdir(), "ce-update-fake-marketplace-"))
try {
const skillDir = path.join(root, ".claude/plugins/cache", marketplaceName, "compound-engineering", version, "skills/ce-update")
mkdirSync(path.join(skillDir, "scripts"), { recursive: true })
const sourceScript = path.join(path.dirname(SKILL_PATH), "scripts", scriptName)
const targetScript = path.join(skillDir, "scripts", scriptName)
copyFileSync(sourceScript, targetScript)
chmodSync(targetScript, 0o755)
const env = { ...process.env }
delete env.CLAUDE_SKILL_DIR
return execFileSync("bash", [targetScript], { env, encoding: "utf8" }).trim()
} finally {
rmSync(root, { recursive: true, force: true })
}
}
test("currently-loaded-version.sh extracts version from BASH_SOURCE path without CLAUDE_SKILL_DIR", () => {
expect(runFromFakeMarketplace("currently-loaded-version.sh", "some-marketplace", "9.9.9")).toBe("9.9.9")
})
test("marketplace-name.sh extracts marketplace from BASH_SOURCE path without CLAUDE_SKILL_DIR", () => {
expect(runFromFakeMarketplace("marketplace-name.sh", "some-marketplace", "9.9.9")).toBe("some-marketplace")
})
})
// Regression guard for https://github.com/EveryInc/compound-engineering-plugin/issues/659.
//
// The marketplace installs plugin contents from `main` HEAD, so the cache
// folder basename reflects `plugin.json` at install time — not any release tag.
// Comparing the installed folder against the latest GitHub release tag caused
// a persistent false-positive "Out of date" whenever `main` was ahead of the
// last tag (the normal state between releases), and the prescribed fix
// (`claude plugin update ...`) reinstalled the same version, looping forever.
//
// Rather than grep-testing the script body, this suite executes
// `scripts/upstream-version.sh` against a mocked `gh` that returns
// distinguishable values for `gh api` vs `gh release list`. The script must
// report the version from `plugin.json`, not from release tags.
describe("ce-update upstream-version.sh script", () => {
const UPSTREAM_SCRIPT = path.join(path.dirname(SKILL_PATH), "scripts/upstream-version.sh")
test("returns the version from main's plugin.json, not any release tag", () => {
// Chosen so a tag-based fallback would produce a clearly different value
// than the plugin.json-based read. Either 1.0.0 or an empty/sentinel
// output indicates the script is reading the wrong source.
const pluginJsonVersion = "99.0.0"
const releaseTagVersion = "1.0.0"
const stdout = runUpstreamScript(UPSTREAM_SCRIPT, {
pluginJsonVersion,
releaseTagVersion,
})
expect(stdout).toBe(pluginJsonVersion)
})
test("emits __CE_UPDATE_VERSION_FAILED__ when upstream plugin.json cannot be read", () => {
// Simulates gh failing entirely (missing auth, offline, rate-limited).
// The fallback must produce the sentinel so the skill's decision logic
// can stop rather than silently compare against an empty string — a
// pipeline-style `|| echo` only catches last-stage failures, and jq on
// empty input exits 0 with no output.
const stdout = runUpstreamScript(UPSTREAM_SCRIPT, {
ghExitCode: 1,
})
expect(stdout).toContain("__CE_UPDATE_VERSION_FAILED__")
})
})
type MockOptions = {
pluginJsonVersion?: string
releaseTagVersion?: string
ghExitCode?: number
}
/**
* Run the upstream-version.sh script with a mocked `gh` on PATH. The mock
* emits distinct payloads for `gh api` vs `gh release list` so the test can
* prove which source the script actually reads from.
*/
function runUpstreamScript(scriptPath: string, options: MockOptions): string {
const { pluginJsonVersion, releaseTagVersion, ghExitCode } = options
const mockDir = mkdtempSync(path.join(tmpdir(), "ce-update-gh-"))
try {
const pluginJsonB64 = pluginJsonVersion
? Buffer.from(
JSON.stringify({ name: "compound-engineering", version: pluginJsonVersion }),
).toString("base64")
: ""
const releaseJson = releaseTagVersion
? JSON.stringify([{ tagName: `compound-engineering-v${releaseTagVersion}` }])
: "[]"
// Emulate gh's behaviour without requiring host `jq`: real `gh --jq` uses
// gojq embedded in the binary, so neither the script nor this mock needs
// an external jq on PATH. When the script asks a `--jq` filter that
// extracts `.version`, we emit the pre-computed plugin.json version; when
// it asks for `.tagName`, we emit the pre-computed release tag. Any other
// filter is unexpected and the mock fails loudly so the test doesn't pass
// by accident.
const ghScript = `#!/bin/bash
${ghExitCode !== undefined ? `exit ${ghExitCode}` : `
subcommand="$1"; shift
jq_filter=""
while [ $# -gt 0 ]; do
case "$1" in
--jq) jq_filter="$2"; shift 2 ;;
*) shift ;;
esac
done
case "$subcommand" in
api)
case "$jq_filter" in
*'.version'*) printf '%s\\n' '${pluginJsonVersion ?? ""}' ;;
'') printf '%s\\n' '{"content":"${pluginJsonB64}"}' ;;
*) echo "unexpected --jq filter for gh api: $jq_filter" >&2; exit 2 ;;
esac
;;
release)
# If the script ever falls back to release-tag lookup, this is what it gets.
case "$jq_filter" in
*'tagName'*) printf '%s\\n' '${releaseTagVersion ?? ""}' ;;
'') printf '%s\\n' '${releaseJson}' ;;
*) echo "unexpected --jq filter for gh release: $jq_filter" >&2; exit 2 ;;
esac
;;
*) exit 1 ;;
esac
`}`
const ghPath = path.join(mockDir, "gh")
writeFileSync(ghPath, ghScript)
chmodSync(ghPath, 0o755)
return execFileSync("bash", [scriptPath], {
env: {
...process.env,
PATH: `${mockDir}:${process.env.PATH ?? ""}`,
},
encoding: "utf8",
}).trim()
} finally {
rmSync(mockDir, { recursive: true, force: true })
}
}