diff --git a/plugins/compound-engineering/skills/ce-doc-review/references/findings-schema.json b/plugins/compound-engineering/skills/ce-doc-review/references/findings-schema.json index 35c52ab8..cf2a0014 100644 --- a/plugins/compound-engineering/skills/ce-doc-review/references/findings-schema.json +++ b/plugins/compound-engineering/skills/ce-doc-review/references/findings-schema.json @@ -60,7 +60,7 @@ "confidence": { "type": "integer", "enum": [0, 25, 50, 75, 100], - "description": "Anchored confidence score. Use exactly one of 0, 25, 50, 75, 100. Each anchor has a behavioral criterion the reviewer must honestly self-apply. 0: Not confident at all. This is a false positive that does not stand up to light scrutiny, or a pre-existing issue the document did not introduce. 25: Somewhat confident. Might be a real issue but could also be a false positive; the reviewer was not able to verify. Stylistic preferences not explicitly called out in the origin document land here. 50: Moderately confident. The reviewer verified this is a real issue but it may be a nitpick or not meaningfully affect plan correctness. Relative to the rest of the document, it is not very important. Advisory observations (the honest answer to 'what breaks if we do not fix this?' is 'nothing breaks, but...') land here. 75: Highly confident. The reviewer double-checked and verified the issue will be hit in practice by implementers or readers of this document. The existing approach is insufficient. The issue is important and will directly impact plan correctness, implementer understanding, or downstream execution. 100: Absolutely certain. The reviewer double-checked and confirmed the issue. The evidence directly confirms it will happen frequently in practice. The document text, codebase, or cross-references leave no room for interpretation." + "description": "Anchored confidence score. Use exactly one of 0, 25, 50, 75, 100. Each anchor has a behavioral criterion the reviewer must honestly self-apply. 0: Not confident at all. This is a false positive that does not stand up to light scrutiny, or a pre-existing issue the document did not introduce. 25: Somewhat confident. Might be a real issue but could also be a false positive; the reviewer was not able to verify. 50: Moderately confident. The reviewer verified this is a real issue but it may be a nitpick or not meaningfully affect plan correctness. Relative to the rest of the document, it is not very important. Advisory observations (the honest answer to 'what breaks if we do not fix this?' is 'nothing breaks, but...') land here. 75: Highly confident. The reviewer double-checked and verified the issue will be hit in practice by implementers or readers of this document. The existing approach is insufficient. The issue is important and will directly impact plan correctness, implementer understanding, or downstream execution. 100: Absolutely certain. The reviewer double-checked and confirmed the issue. The evidence directly confirms it will happen frequently in practice. The document text, codebase, or cross-references leave no room for interpretation." }, "evidence": { "type": "array", diff --git a/plugins/compound-engineering/skills/ce-doc-review/references/synthesis-and-presentation.md b/plugins/compound-engineering/skills/ce-doc-review/references/synthesis-and-presentation.md index 4c47d6c9..41a3e133 100644 --- a/plugins/compound-engineering/skills/ce-doc-review/references/synthesis-and-presentation.md +++ b/plugins/compound-engineering/skills/ce-doc-review/references/synthesis-and-presentation.md @@ -21,7 +21,7 @@ Gate findings by their `confidence` anchor value. Anchors are discrete integers | Anchor | Meaning | Route | |--------|---------|-------| | `0` | False positive or pre-existing issue | Drop silently | -| `25` | Might be real but could not verify; stylistic-not-in-origin | Drop silently | +| `25` | Might be real but could not verify | Drop silently | | `50` | Verified real but nitpick / advisory / not very important | Surface in FYI subsection | | `75` | Double-checked, will hit in practice, directly impacts correctness | Enter actionable tier (classify by `autofix_class`) | | `100` | Evidence directly confirms; will happen frequently | Enter actionable tier (classify by `autofix_class`) |