mirror of
https://github.com/obra/superpowers.git
synced 2026-05-15 21:49:05 +08:00
Tighten cross-platform tool references
This commit is contained in:
@@ -45,7 +45,7 @@ Use OpenCode's native `skill` tool:
|
||||
|
||||
```
|
||||
use skill tool to list skills
|
||||
use skill tool to load superpowers/brainstorming
|
||||
use skill tool to load brainstorming
|
||||
```
|
||||
|
||||
## Updating
|
||||
@@ -103,10 +103,11 @@ Skills speak in actions ("create a todo", "dispatch a subagent", "read a file").
|
||||
- "Create a todo" / "mark complete in todo list" → `todowrite`
|
||||
- `Subagent (general-purpose):` template → `task` tool with `subagent_type: "general"` (or `"explore"` for codebase exploration)
|
||||
- "Invoke a skill" → OpenCode's native `skill` tool
|
||||
- "Read a file" / "create a file" / "edit a file" → `read`, `write`, `edit`
|
||||
- "Read a file" → `read`
|
||||
- "Create a file" / "edit a file" / "delete a file" → `apply_patch`
|
||||
- "Run a shell command" → `bash`
|
||||
- "Search file contents" / "find files by name" → `grep`, `glob`
|
||||
- "Fetch a URL" / "search the web" → `webfetch`, `websearch`
|
||||
- "Fetch a URL" → `webfetch`
|
||||
|
||||
## Getting Help
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Superpowers plugin for OpenCode.ai
|
||||
*
|
||||
* Injects superpowers bootstrap context via system prompt transform.
|
||||
* Injects superpowers bootstrap context via message transform.
|
||||
* Auto-registers skills directory via config hook (no symlinks needed).
|
||||
*/
|
||||
|
||||
@@ -74,11 +74,15 @@ export const SuperpowersPlugin = async ({ client, directory }) => {
|
||||
const { content } = extractAndStripFrontmatter(fullContent);
|
||||
|
||||
const toolMapping = `**Tool Mapping for OpenCode:**
|
||||
When skills reference tools you don't have, substitute OpenCode equivalents:
|
||||
- \`TodoWrite\` → \`todowrite\`
|
||||
- \`Task\` tool with subagents → Use OpenCode's subagent system (@mention)
|
||||
- \`Skill\` tool → OpenCode's native \`skill\` tool
|
||||
- \`Read\`, \`Write\`, \`Edit\`, \`Bash\` → Your native tools
|
||||
When skills request actions, substitute OpenCode equivalents:
|
||||
- Create or update todos → \`todowrite\`
|
||||
- \`Subagent (general-purpose):\` → \`task\` with \`subagent_type: "general"\`
|
||||
- Invoke a skill → OpenCode's native \`skill\` tool
|
||||
- Read files → \`read\`
|
||||
- Create, edit, or delete files → \`apply_patch\`
|
||||
- Run shell commands → \`bash\`
|
||||
- Search files → \`grep\`, \`glob\`
|
||||
- Fetch a URL → \`webfetch\`
|
||||
|
||||
Use OpenCode's native \`skill\` tool to list and load skills.`;
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ use skill tool to list skills
|
||||
### Loading a Skill
|
||||
|
||||
```
|
||||
use skill tool to load superpowers/brainstorming
|
||||
use skill tool to load brainstorming
|
||||
```
|
||||
|
||||
### Personal Skills
|
||||
@@ -99,7 +99,7 @@ To pin a specific version, use a branch or tag:
|
||||
|
||||
The plugin does two things:
|
||||
|
||||
1. **Injects bootstrap context** via the `experimental.chat.system.transform` hook, adding superpowers awareness to every conversation.
|
||||
1. **Injects bootstrap context** via the `experimental.chat.messages.transform` hook, adding superpowers awareness to every conversation.
|
||||
2. **Registers the skills directory** via the `config` hook, so OpenCode discovers all superpowers skills without symlinks or manual config.
|
||||
|
||||
### Tool Mapping
|
||||
@@ -109,10 +109,11 @@ Skills speak in actions rather than naming any one runtime's tools. On OpenCode
|
||||
- "Create a todo" / "mark complete in todo list" → `todowrite`
|
||||
- `Subagent (general-purpose):` template → OpenCode's `task` tool with `subagent_type: "general"` (or `"explore"` for codebase exploration)
|
||||
- "Invoke a skill" → OpenCode's native `skill` tool
|
||||
- "Read a file" / "create a file" / "edit a file" → `read`, `write`, `edit`
|
||||
- "Read a file" → `read`
|
||||
- "Create a file" / "edit a file" / "delete a file" → `apply_patch`
|
||||
- "Run a shell command" → `bash`
|
||||
- "Search file contents" / "find files by name" → `grep`, `glob`
|
||||
- "Fetch a URL" / "search the web" → `webfetch`, `websearch`
|
||||
- "Fetch a URL" → `webfetch`
|
||||
|
||||
(Verified against the installed OpenCode CLI's tool inventory.)
|
||||
|
||||
@@ -152,7 +153,7 @@ Then use the installed package path in `opencode.json`:
|
||||
|
||||
### Bootstrap not appearing
|
||||
|
||||
1. Check OpenCode version supports `experimental.chat.system.transform` hook
|
||||
1. Check OpenCode version supports `experimental.chat.messages.transform` hook
|
||||
2. Restart OpenCode after config changes
|
||||
|
||||
## Getting Help
|
||||
|
||||
@@ -14,7 +14,7 @@ Live in `tests/`. Currently:
|
||||
- `tests/codex-plugin-sync/` — bash sync verification.
|
||||
- `tests/claude-code/test-helpers.sh`, `analyze-token-usage.py` — utilities used by remaining bash tests.
|
||||
- `tests/claude-code/test-subagent-driven-development.sh` — agent-can-describe-SDD test (no drill counterpart; tests description-recall, not behavior).
|
||||
- `tests/claude-code/test-subagent-driven-development-integration.sh` — extended SDD integration with token analysis (drill covers the YAGNI subset; bash adds commit-count, TodoWrite, and token telemetry assertions).
|
||||
- `tests/claude-code/test-subagent-driven-development-integration.sh` — extended SDD integration with token analysis (drill covers the YAGNI subset; bash adds commit-count, Claude Code task-tracking, and token telemetry assertions).
|
||||
- `tests/claude-code/test-worktree-native-preference.sh` — RED-GREEN-REFACTOR validation for worktree skill (drill covers the PRESSURE phase; bash also covers RED/GREEN baselines).
|
||||
- `tests/explicit-skill-requests/` — Haiku-specific, multi-turn, and skill-name-prompted tests not covered by drill.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ HEAD_SHA=$(git rev-parse HEAD)
|
||||
|
||||
**2. Dispatch code reviewer subagent:**
|
||||
|
||||
Dispatch a `general-purpose` subagent, filling the template at `code-reviewer.md`
|
||||
Dispatch a `general-purpose` subagent, filling the template at [code-reviewer.md](code-reviewer.md)
|
||||
|
||||
**Placeholders:**
|
||||
- `{DESCRIPTION}` - Brief summary of what you built
|
||||
@@ -100,4 +100,4 @@ You: [Fix progress indicators]
|
||||
- Show code/tests that prove it works
|
||||
- Request clarification
|
||||
|
||||
See template at: requesting-code-review/code-reviewer.md
|
||||
See template at: [code-reviewer.md](code-reviewer.md)
|
||||
|
||||
@@ -121,9 +121,9 @@ Implementer subagents report one of four statuses. Handle each appropriately:
|
||||
|
||||
## Prompt Templates
|
||||
|
||||
- `./implementer-prompt.md` - Dispatch implementer subagent
|
||||
- `./spec-reviewer-prompt.md` - Dispatch spec compliance reviewer subagent
|
||||
- `./code-quality-reviewer-prompt.md` - Dispatch code quality reviewer subagent
|
||||
- [implementer-prompt.md](implementer-prompt.md) - Dispatch implementer subagent
|
||||
- [spec-reviewer-prompt.md](spec-reviewer-prompt.md) - Dispatch spec compliance reviewer subagent
|
||||
- [code-quality-reviewer-prompt.md](code-quality-reviewer-prompt.md) - Dispatch code quality reviewer subagent
|
||||
|
||||
## Example Workflow
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ Use this template when dispatching a code quality reviewer subagent.
|
||||
|
||||
```
|
||||
Subagent (general-purpose):
|
||||
Use template at requesting-code-review/code-reviewer.md
|
||||
Use template at ../requesting-code-review/code-reviewer.md
|
||||
|
||||
DESCRIPTION: [task summary, from implementer's report]
|
||||
PLAN_OR_REQUIREMENTS: Task N from [plan-file]
|
||||
|
||||
@@ -41,7 +41,7 @@ If CLAUDE.md, GEMINI.md, or AGENTS.md says "don't use TDD" and a skill says "alw
|
||||
|
||||
## Platform Adaptation
|
||||
|
||||
Skills speak in actions ("dispatch a subagent", "create a todo", "read a file") rather than naming any one runtime's tools. For per-platform tool equivalents and instructions-file conventions, see `references/claude-code-tools.md`, `references/codex-tools.md`, `references/copilot-tools.md`, and `references/gemini-tools.md`. Gemini CLI users get the tool mapping loaded automatically via GEMINI.md.
|
||||
Skills speak in actions ("dispatch a subagent", "create a todo", "read a file") rather than naming any one runtime's tools. For per-platform tool equivalents and instructions-file conventions, see [claude-code-tools.md](references/claude-code-tools.md), [codex-tools.md](references/codex-tools.md), [copilot-tools.md](references/copilot-tools.md), and [gemini-tools.md](references/gemini-tools.md). Gemini CLI users get the tool mapping loaded automatically via GEMINI.md.
|
||||
|
||||
# Using Skills
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ Skills speak in actions ("dispatch a subagent", "create a todo", "read a file").
|
||||
| Invoke a skill | `Skill` |
|
||||
| Dispatch a subagent (`Subagent (general-purpose):` template) | `Agent` (older releases named this `Task`) |
|
||||
| Multiple parallel dispatches | Multiple `Agent` calls in one response |
|
||||
| Task tracking ("create a todo", "mark complete") | `TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet` (was a single tool named `TodoWrite` in older releases) |
|
||||
| Task tracking ("create a todo", "mark complete") | `TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet`; `TodoWrite` in `claude -p` / Agent SDK unless `CLAUDE_CODE_ENABLE_TASKS=1` is set |
|
||||
| Background-process / subagent lifecycle (read output, cancel) | `TaskOutput`, `TaskStop` — these are distinct from the todo tools above and apply to running shells, agents, and remote sessions |
|
||||
|
||||
## Instructions file
|
||||
|
||||
@@ -13,9 +13,9 @@ Skills speak in actions ("dispatch a subagent", "create a todo", "read a file").
|
||||
| Search the web | `web_search` |
|
||||
| Invoke a skill | `skill` |
|
||||
| Dispatch a subagent (`Subagent (general-purpose):` template) | `task` with `agent_type: "general-purpose"` (other accepted types: `explore`, `task`, `code-review`, `research`, `configure-copilot`) |
|
||||
| Multiple parallel dispatches | Multiple `task` calls in one response (or wrap with the `parallel` tool) |
|
||||
| Multiple parallel dispatches | Multiple `task` calls in one response |
|
||||
| Subagent status/output/control | `read_agent`, `list_agents`, `write_agent` |
|
||||
| Task tracking ("create a todo", "mark complete") | `sql` with the built-in `todos` table |
|
||||
| Task tracking ("create a todo", "mark complete") | `update_todo` |
|
||||
| Enter / exit plan mode | No equivalent — stay in the main session |
|
||||
|
||||
## Instructions file
|
||||
|
||||
@@ -29,14 +29,14 @@ User-level skills live at **`~/.gemini/skills/`**, with **`~/.agents/skills/`**
|
||||
|
||||
## Subagent support
|
||||
|
||||
Gemini CLI dispatches subagents through the `invoke_agent` tool, which takes `agent_name` and `prompt` parameters. The same dispatch is also surfaced as a chat-syntax shortcut: typing `@generalist <prompt>` is equivalent to calling `invoke_agent` with `agent_name: "generalist"`. Built-in agent names include `generalist`, `cli_help`, `codebase_investigator`, and (with browser tooling enabled) the browser agent.
|
||||
Gemini CLI dispatches subagents through the `invoke_agent` tool, which takes `agent_name` and `prompt` parameters. The same dispatch is also surfaced as a chat-syntax shortcut: typing `@generalist <prompt>` is equivalent to calling `invoke_agent` with `agent_name: "generalist"`. Built-in agent names include `generalist`, `cli_help`, `codebase_investigator`, and (with browser tooling enabled) `browser_agent`.
|
||||
|
||||
Skills dispatch with `Subagent (general-purpose):` and either reference a prompt-template file (e.g., `subagent-driven-development/implementer-prompt.md`) or supply an inline prompt. On Gemini CLI:
|
||||
Skills dispatch with `Subagent (general-purpose):` and either reference a prompt-template file (e.g., `superpowers:subagent-driven-development`'s `./implementer-prompt.md`) or supply an inline prompt. On Gemini CLI:
|
||||
|
||||
| Skill dispatch form | Gemini CLI equivalent |
|
||||
|---------------------|----------------------|
|
||||
| References a `*-prompt.md` template (implementer, spec-reviewer, code-quality-reviewer, code-reviewer, etc.) | Fill the template, then `invoke_agent` with `agent_name: "generalist"` and the filled prompt |
|
||||
| References `requesting-code-review/code-reviewer.md` | `invoke_agent` with `agent_name: "generalist"` and the filled review template |
|
||||
| References `superpowers:requesting-code-review`'s `./code-reviewer.md` | `invoke_agent` with `agent_name: "generalist"` and the filled review template |
|
||||
| Inline prompt (no template referenced) | `invoke_agent` with `agent_name: "generalist"` and your inline prompt |
|
||||
|
||||
### Prompt filling
|
||||
@@ -53,11 +53,11 @@ These tools are unique to Gemini CLI:
|
||||
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| `save_memory` | Persist facts to GEMINI.md across sessions |
|
||||
| `save_memory` (legacy) | Persist facts across sessions when `experimental.memoryV2 = false` |
|
||||
| `get_internal_docs` | Look up Gemini CLI's bundled documentation |
|
||||
| `ask_user` | Pose structured questions to the user (text / single-select / multi-select) |
|
||||
| `enter_plan_mode` / `exit_plan_mode` | Switch into and out of read-only plan mode |
|
||||
| `update_topic` | Update the current conversation's topic / strategic-intent metadata |
|
||||
| `complete_task` | Signal completion of the current top-level task |
|
||||
| `complete_task` | Signal that a Gemini subagent has completed and return its result to the parent agent |
|
||||
| `tracker_create_task`, `tracker_update_task`, `tracker_get_task`, `tracker_list_tasks`, `tracker_add_dependency`, `tracker_visualize` | Rich task tracker with dependency and visualization support |
|
||||
| `read_mcp_resource`, `list_mcp_resources` | MCP resource access |
|
||||
|
||||
@@ -9,7 +9,7 @@ description: Use when creating new skills, editing existing skills, or verifying
|
||||
|
||||
**Writing skills IS Test-Driven Development applied to process documentation.**
|
||||
|
||||
**Personal skills live in your runtime's skills directory** — see `../using-superpowers/references/<platform>-tools.md` (where `<platform>` is `claude-code`, `codex`, `copilot`, or `gemini`) for the path on your runtime. Codex, Copilot CLI, and Gemini CLI all also recognize `~/.agents/skills/` as a cross-runtime alias.
|
||||
**Personal skills live in your runtime's skills directory** — see [claude-code-tools.md](../using-superpowers/references/claude-code-tools.md), [codex-tools.md](../using-superpowers/references/codex-tools.md), [copilot-tools.md](../using-superpowers/references/copilot-tools.md), or [gemini-tools.md](../using-superpowers/references/gemini-tools.md) for the path on your runtime. Codex, Copilot CLI, and Gemini CLI all also recognize `~/.agents/skills/` as a cross-runtime alias.
|
||||
|
||||
You write test cases (pressure scenarios with subagents), watch them fail (baseline behavior), write the skill (documentation), watch tests pass (agents comply), and refactor (close loopholes).
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
Good Skills are concise, well-structured, and tested with real usage. This guide provides practical authoring decisions to help you write Skills that agents can discover and use effectively.
|
||||
|
||||
For conceptual background on how Skills work, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview).
|
||||
For conceptual background on how Skills work, see the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview).
|
||||
|
||||
## Core principles
|
||||
|
||||
@@ -149,7 +149,7 @@ What works perfectly for Opus might need more detail for Haiku. If you plan to u
|
||||
* `name` - Human-readable name of the Skill (64 characters maximum)
|
||||
* `description` - One-line description of what the Skill does and when to use it (1024 characters maximum)
|
||||
|
||||
For complete Skill structure details, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#skill-structure).
|
||||
For complete Skill structure details, see the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#skill-structure).
|
||||
</Note>
|
||||
|
||||
### Naming conventions
|
||||
@@ -234,7 +234,7 @@ description: Does stuff with files
|
||||
|
||||
### Progressive disclosure patterns
|
||||
|
||||
SKILL.md serves as an overview that points agents to detailed materials as needed, like a table of contents in an onboarding guide. For an explanation of how progressive disclosure works, see [How Skills work](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work) in the overview.
|
||||
SKILL.md serves as an overview that points agents to detailed materials as needed, like a table of contents in an onboarding guide. For an explanation of how progressive disclosure works, see [How Skills work](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#how-skills-work) in the overview.
|
||||
|
||||
**Practical guidance:**
|
||||
|
||||
@@ -1008,11 +1008,11 @@ Skills run in the code execution environment with platform-specific limitations:
|
||||
* **claude.ai**: Can install packages from npm and PyPI and pull from GitHub repositories
|
||||
* **Anthropic API**: Has no network access and no runtime package installation
|
||||
|
||||
List required packages in your SKILL.md and verify they're available in the [code execution tool documentation](/en/docs/agents-and-tools/tool-use/code-execution-tool).
|
||||
List required packages in your SKILL.md and verify they're available in the [code execution tool documentation](https://platform.claude.com/docs/en/agents-and-tools/tool-use/code-execution-tool).
|
||||
|
||||
### Runtime environment
|
||||
|
||||
Skills run in a code execution environment with filesystem access, bash commands, and code execution capabilities. For the conceptual explanation of this architecture, see [The Skills architecture](/en/docs/agents-and-tools/agent-skills/overview#the-skills-architecture) in the overview.
|
||||
Skills run in a code execution environment with filesystem access, bash commands, and code execution capabilities. For the conceptual explanation of this architecture, see [The Skills architecture](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#the-skills-architecture) in the overview.
|
||||
|
||||
**How this affects your authoring:**
|
||||
|
||||
@@ -1048,7 +1048,7 @@ bigquery-skill/
|
||||
|
||||
When the user asks about revenue, the agent reads SKILL.md, sees the reference to `reference/finance.md`, and invokes bash to read just that file. The sales.md and product.md files remain on the filesystem, consuming zero context tokens until needed. This filesystem-based model is what enables progressive disclosure. Agents can navigate and selectively load exactly what each task requires.
|
||||
|
||||
For complete details on the technical architecture, see [How Skills work](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work) in the Skills overview.
|
||||
For complete details on the technical architecture, see [How Skills work](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#how-skills-work) in the Skills overview.
|
||||
|
||||
### MCP tool references
|
||||
|
||||
@@ -1092,11 +1092,11 @@ reader = PdfReader("file.pdf")
|
||||
|
||||
### YAML frontmatter requirements
|
||||
|
||||
The SKILL.md frontmatter requires `name` (64 characters max) and `description` (1024 characters max) fields. See the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#skill-structure) for complete structure details.
|
||||
The SKILL.md frontmatter requires `name` (64 characters max) and `description` (1024 characters max) fields. See the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#skill-structure) for complete structure details.
|
||||
|
||||
### Token budgets
|
||||
|
||||
Keep SKILL.md body under 500 lines for optimal performance. If your content exceeds this, split it into separate files using the progressive disclosure patterns described earlier. For architectural details, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work).
|
||||
Keep SKILL.md body under 500 lines for optimal performance. If your content exceeds this, split it into separate files using the progressive disclosure patterns described earlier. For architectural details, see the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#how-skills-work).
|
||||
|
||||
## Checklist for effective Skills
|
||||
|
||||
@@ -1136,15 +1136,15 @@ Before sharing a Skill, verify:
|
||||
## Next steps
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Get started with Agent Skills" icon="rocket" href="/en/docs/agents-and-tools/agent-skills/quickstart">
|
||||
<Card title="Get started with Agent Skills" icon="rocket" href="https://platform.claude.com/docs/en/agents-and-tools/agent-skills/quickstart">
|
||||
Create your first Skill
|
||||
</Card>
|
||||
|
||||
<Card title="Use Skills in Claude Code" icon="terminal" href="/en/docs/claude-code/skills">
|
||||
<Card title="Use Skills in Claude Code" icon="terminal" href="https://code.claude.com/docs/en/skills">
|
||||
Create and manage Skills in Claude Code
|
||||
</Card>
|
||||
|
||||
<Card title="Use Skills with the API" icon="code" href="/en/api/skills-guide">
|
||||
<Card title="Use Skills with the API" icon="code" href="https://platform.claude.com/docs/en/build-with-claude/skills-guide">
|
||||
Upload and use Skills programmatically
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
# and is stricter on that axis. This bash test additionally asserts:
|
||||
# - >=3 git commits (initial + per-task commits, exercising SDD's
|
||||
# commit-per-task workflow shape)
|
||||
# - >=2 Agent/Task subagent dispatches (drill only asserts >=1)
|
||||
# - TodoWrite usage (drill makes no assertion)
|
||||
# - >=2 Claude Code subagent dispatches via Agent or Task (drill only asserts >=1)
|
||||
# - Claude Code task-tracking tool usage (drill makes no assertion)
|
||||
# - test/math.test.js exists (drill relies on `npm test` succeeding)
|
||||
# - analyze-token-usage.py token-budget telemetry
|
||||
# Kept until those assertions are added to drill or explicitly retired.
|
||||
@@ -224,13 +224,13 @@ else
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 3: TodoWrite was used for tracking
|
||||
# Test 3: Claude Code task-tracking tool was used
|
||||
echo "Test 3: Task tracking..."
|
||||
todo_count=$(grep -c '"name":"TodoWrite"' "$SESSION_FILE" || echo "0")
|
||||
todo_count=$(grep -cE '"name":"(TodoWrite|TaskCreate|TaskUpdate|TaskList|TaskGet)"' "$SESSION_FILE" || echo "0")
|
||||
if [ "$todo_count" -ge 1 ]; then
|
||||
echo " [PASS] TodoWrite used $todo_count time(s) for task tracking"
|
||||
echo " [PASS] Task tracking used $todo_count time(s)"
|
||||
else
|
||||
echo " [FAIL] TodoWrite not used"
|
||||
echo " [FAIL] No Claude Code task-tracking tool used"
|
||||
FAILED=$((FAILED + 1))
|
||||
fi
|
||||
echo ""
|
||||
|
||||
@@ -109,7 +109,7 @@ if [ -n "$FIRST_SKILL_LINE" ]; then
|
||||
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$TURN3_LOG" | \
|
||||
grep '"type":"tool_use"' | \
|
||||
grep -v '"name":"Skill"' | \
|
||||
grep -v '"name":"TodoWrite"' || true)
|
||||
grep -vE '"name":"(TodoWrite|TaskCreate|TaskUpdate|TaskList|TaskGet)"' || true)
|
||||
if [ -n "$PREMATURE_TOOLS" ]; then
|
||||
echo "WARNING: Tools invoked BEFORE Skill tool in Turn 3:"
|
||||
echo "$PREMATURE_TOOLS" | head -5
|
||||
|
||||
@@ -103,11 +103,11 @@ echo "Checking for premature action..."
|
||||
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$LOG_FILE" | head -1 | cut -d: -f1)
|
||||
if [ -n "$FIRST_SKILL_LINE" ]; then
|
||||
# Check if any non-Skill, non-system tools were invoked before the first Skill invocation
|
||||
# Filter out system messages, TodoWrite (planning is ok), and other non-action tools
|
||||
# Filter out task tracking tools (planning is ok) and other non-action tools
|
||||
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$LOG_FILE" | \
|
||||
grep '"type":"tool_use"' | \
|
||||
grep -v '"name":"Skill"' | \
|
||||
grep -v '"name":"TodoWrite"' || true)
|
||||
grep -vE '"name":"(TodoWrite|TaskCreate|TaskUpdate|TaskList|TaskGet)"' || true)
|
||||
if [ -n "$PREMATURE_TOOLS" ]; then
|
||||
echo "WARNING: Tools invoked BEFORE Skill tool:"
|
||||
echo "$PREMATURE_TOOLS" | head -5
|
||||
|
||||
@@ -44,6 +44,10 @@ const result = {
|
||||
scenario,
|
||||
firstBootstrapParts: countBootstrapParts(firstOutput),
|
||||
secondBootstrapParts: countBootstrapParts(secondOutput),
|
||||
staleMentionMapping: bootstrapText(firstOutput).includes('@mention'),
|
||||
staleTaskMapping: bootstrapText(firstOutput).includes('`Task` tool with subagents'),
|
||||
mapsSubagentToTask: bootstrapText(firstOutput).includes('`task` with `subagent_type: "general"`'),
|
||||
mapsMutationToApplyPatch: bootstrapText(firstOutput).includes('`apply_patch`'),
|
||||
firstReadCount: afterFirst.readCount,
|
||||
secondReadCount: afterSecond.readCount,
|
||||
firstExistsCount: afterFirst.existsCount,
|
||||
@@ -83,6 +87,12 @@ function countBootstrapParts(output) {
|
||||
).length;
|
||||
}
|
||||
|
||||
function bootstrapText(output) {
|
||||
return output.messages[0].parts.find(
|
||||
(part) => part.type === 'text' && part.text.includes('EXTREMELY_IMPORTANT')
|
||||
)?.text || '';
|
||||
}
|
||||
|
||||
function assertPresentBootstrap(result) {
|
||||
const failures = [];
|
||||
if (result.firstBootstrapParts !== 1) {
|
||||
@@ -100,6 +110,18 @@ function assertPresentBootstrap(result) {
|
||||
if (result.secondExistsCount !== result.firstExistsCount) {
|
||||
failures.push(`expected cached second transform to do no additional exists checks, got ${result.secondExistsCount - result.firstExistsCount}`);
|
||||
}
|
||||
if (result.staleMentionMapping) {
|
||||
failures.push('expected OpenCode bootstrap not to teach @mention subagent syntax');
|
||||
}
|
||||
if (result.staleTaskMapping) {
|
||||
failures.push('expected OpenCode bootstrap not to teach stale Task-tool mapping');
|
||||
}
|
||||
if (!result.mapsSubagentToTask) {
|
||||
failures.push('expected OpenCode bootstrap to map general-purpose subagents to task with subagent_type');
|
||||
}
|
||||
if (!result.mapsMutationToApplyPatch) {
|
||||
failures.push('expected OpenCode bootstrap to map file mutation to apply_patch');
|
||||
}
|
||||
return failures;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user