mirror of
https://github.com/obra/superpowers.git
synced 2026-04-23 09:59:05 +08:00
Compare commits
11 Commits
wip/brains
...
gs/plan-mo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7fecd85842 | ||
|
|
d1e39450c0 | ||
|
|
fa827dd12a | ||
|
|
998be8aacd | ||
|
|
39ae6ec2fb | ||
|
|
a901c14c68 | ||
|
|
4c78b2dad1 | ||
|
|
e014832137 | ||
|
|
4d54210a3c | ||
|
|
8abf9d0b02 | ||
|
|
a1a1ae5519 |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,5 +1,6 @@
|
||||
# Ensure shell scripts always have LF line endings
|
||||
*.sh text eol=lf
|
||||
hooks/session-start text eol=lf
|
||||
|
||||
# Ensure the polyglot wrapper keeps LF (it's parsed by both cmd and bash)
|
||||
*.cmd text eol=lf
|
||||
|
||||
@@ -32,6 +32,12 @@ Fetch and follow instructions from https://raw.githubusercontent.com/obra/superp
|
||||
|
||||
3. Restart Codex.
|
||||
|
||||
4. **For subagent skills** (optional): Skills like `dispatching-parallel-agents` and `subagent-driven-development` require Codex's collab feature. Add to your Codex config:
|
||||
```toml
|
||||
[features]
|
||||
collab = true
|
||||
```
|
||||
|
||||
### Windows
|
||||
|
||||
Use a junction instead of a symlink (works without Developer Mode):
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh",
|
||||
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd session-start",
|
||||
"async": true
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,43 +1,46 @@
|
||||
: << 'CMDBLOCK'
|
||||
@echo off
|
||||
REM ============================================================================
|
||||
REM DEPRECATED: This polyglot wrapper is no longer used as of Claude Code 2.1.x
|
||||
REM ============================================================================
|
||||
REM Cross-platform polyglot wrapper for hook scripts.
|
||||
REM On Windows: cmd.exe runs the batch portion, which finds and calls bash.
|
||||
REM On Unix: the shell interprets this as a script (: is a no-op in bash).
|
||||
REM
|
||||
REM Claude Code 2.1.x changed the Windows execution model for hooks:
|
||||
REM Hook scripts use extensionless filenames (e.g. "session-start" not
|
||||
REM "session-start.sh") so Claude Code's Windows auto-detection -- which
|
||||
REM prepends "bash" to any command containing .sh -- doesn't interfere.
|
||||
REM
|
||||
REM Before (2.0.x): Hooks ran with shell:true, using the system default shell.
|
||||
REM This wrapper provided cross-platform compatibility by
|
||||
REM being both a valid .cmd file (Windows) and bash script.
|
||||
REM
|
||||
REM After (2.1.x): Claude Code now auto-detects .sh files in hook commands
|
||||
REM and prepends "bash " on Windows. This broke the wrapper
|
||||
REM because the command:
|
||||
REM "run-hook.cmd" session-start.sh
|
||||
REM became:
|
||||
REM bash "run-hook.cmd" session-start.sh
|
||||
REM ...and bash cannot execute a .cmd file.
|
||||
REM
|
||||
REM The fix: hooks.json now calls session-start.sh directly. Claude Code 2.1.x
|
||||
REM handles the bash invocation automatically on Windows.
|
||||
REM
|
||||
REM This file is kept for reference and potential backward compatibility.
|
||||
REM ============================================================================
|
||||
REM
|
||||
REM Original purpose: Polyglot wrapper to run .sh scripts cross-platform
|
||||
REM Usage: run-hook.cmd <script-name> [args...]
|
||||
REM The script should be in the same directory as this wrapper
|
||||
|
||||
if "%~1"=="" (
|
||||
echo run-hook.cmd: missing script name >&2
|
||||
exit /b 1
|
||||
)
|
||||
"C:\Program Files\Git\bin\bash.exe" -l "%~dp0%~1" %2 %3 %4 %5 %6 %7 %8 %9
|
||||
exit /b
|
||||
|
||||
set "HOOK_DIR=%~dp0"
|
||||
|
||||
REM Try Git for Windows bash in standard locations
|
||||
if exist "C:\Program Files\Git\bin\bash.exe" (
|
||||
"C:\Program Files\Git\bin\bash.exe" "%HOOK_DIR%%~1" %2 %3 %4 %5 %6 %7 %8 %9
|
||||
exit /b %ERRORLEVEL%
|
||||
)
|
||||
if exist "C:\Program Files (x86)\Git\bin\bash.exe" (
|
||||
"C:\Program Files (x86)\Git\bin\bash.exe" "%HOOK_DIR%%~1" %2 %3 %4 %5 %6 %7 %8 %9
|
||||
exit /b %ERRORLEVEL%
|
||||
)
|
||||
|
||||
REM Try bash on PATH (e.g. user-installed Git Bash, MSYS2, Cygwin)
|
||||
where bash >nul 2>nul
|
||||
if %ERRORLEVEL% equ 0 (
|
||||
bash "%HOOK_DIR%%~1" %2 %3 %4 %5 %6 %7 %8 %9
|
||||
exit /b %ERRORLEVEL%
|
||||
)
|
||||
|
||||
REM No bash found - exit silently rather than error
|
||||
REM (plugin still works, just without SessionStart context injection)
|
||||
exit /b 0
|
||||
CMDBLOCK
|
||||
|
||||
# Unix shell runs from here
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
# Unix: run the named script directly
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
|
||||
SCRIPT_NAME="$1"
|
||||
shift
|
||||
"${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
|
||||
exec bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
|
||||
|
||||
@@ -58,9 +58,11 @@ After writing the spec document:
|
||||
3. If loop exceeds 5 iterations, surface to human for guidance
|
||||
|
||||
**Implementation (if continuing):**
|
||||
When the user approves the design and wants to build:
|
||||
1. **Invoke `superpowers:writing-plans` using the Skill tool.** Not EnterPlanMode. Not plan mode. Not direct implementation. The Skill tool.
|
||||
2. After the plan is written, use superpowers:using-git-worktrees to create an isolated workspace for implementation.
|
||||
- Ask: "Ready to set up for implementation?"
|
||||
- Use superpowers:using-git-worktrees to create isolated workspace
|
||||
- **REQUIRED:** Use superpowers:writing-plans to create detailed implementation plan
|
||||
- Do NOT use platform planning features (e.g., EnterPlanMode, plan mode)
|
||||
- Do NOT start implementing directly - the writing-plans skill comes first
|
||||
|
||||
## Key Principles
|
||||
|
||||
|
||||
@@ -3,6 +3,10 @@ name: using-superpowers
|
||||
description: Use when starting any conversation - establishes how to find and use skills, requiring Skill tool invocation before ANY response including clarifying questions
|
||||
---
|
||||
|
||||
<SUBAGENT-STOP>
|
||||
If you were dispatched as a subagent to execute a specific task, skip this skill.
|
||||
</SUBAGENT-STOP>
|
||||
|
||||
<EXTREMELY-IMPORTANT>
|
||||
If you think there is even a 1% chance a skill might apply to what you are doing, you ABSOLUTELY MUST invoke the skill.
|
||||
|
||||
@@ -27,6 +31,10 @@ If CLAUDE.md says "don't use TDD" and a skill says "always use TDD," follow CLAU
|
||||
|
||||
**In other environments:** Check your platform's documentation for how skills are loaded.
|
||||
|
||||
## Platform Adaptation
|
||||
|
||||
Skills use Claude Code tool names. Non-CC platforms: see `references/codex-tools.md` for tool equivalents.
|
||||
|
||||
# Using Skills
|
||||
|
||||
## The Rule
|
||||
@@ -73,7 +81,7 @@ These thoughts mean STOP—you're rationalizing:
|
||||
| "I'll just do this one thing first" | Check BEFORE doing anything. |
|
||||
| "This feels productive" | Undisciplined action wastes time. Skills prevent this. |
|
||||
| "I know what that means" | Knowing the concept ≠ using the skill. Invoke it. |
|
||||
| "I should use EnterPlanMode / plan mode" | If a loaded skill specifies the next step, follow the skill. EnterPlanMode is a platform default — skills override defaults. |
|
||||
| "Let me enter plan mode first" | If a skill handles planning (writing-plans, brainstorming), use the skill directly. EnterPlanMode is redundant — never layer it with a planning skill. |
|
||||
|
||||
## Skill Priority
|
||||
|
||||
|
||||
25
skills/using-superpowers/references/codex-tools.md
Normal file
25
skills/using-superpowers/references/codex-tools.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Codex Tool Mapping
|
||||
|
||||
Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent:
|
||||
|
||||
| Skill references | Codex equivalent |
|
||||
|-----------------|------------------|
|
||||
| `Task` tool (dispatch subagent) | `spawn_agent` |
|
||||
| Multiple `Task` calls (parallel) | Multiple `spawn_agent` calls |
|
||||
| Task returns result | `wait` |
|
||||
| Task completes automatically | `close_agent` to free slot |
|
||||
| `TodoWrite` (task tracking) | `update_plan` |
|
||||
| `Skill` tool (invoke a skill) | Skills load natively — just follow the instructions |
|
||||
| `Read`, `Write`, `Edit` (files) | Use your native file tools |
|
||||
| `Bash` (run commands) | Use your native shell tools |
|
||||
|
||||
## Subagent dispatch requires collab
|
||||
|
||||
Add to your Codex config (`~/.codex/config.toml`):
|
||||
|
||||
```toml
|
||||
[features]
|
||||
collab = true
|
||||
```
|
||||
|
||||
This enables `spawn_agent`, `wait`, and `close_agent` for skills like `dispatching-parallel-agents` and `subagent-driven-development`.
|
||||
@@ -1,10 +1,12 @@
|
||||
---
|
||||
name: writing-plans
|
||||
description: Use when you have a spec or requirements for a multi-step task, before touching code. After brainstorming, ALWAYS use this — not EnterPlanMode or plan mode.
|
||||
description: Use when you have a spec or requirements for a multi-step task, before touching code
|
||||
---
|
||||
|
||||
# Writing Plans
|
||||
|
||||
**IMPORTANT:** Invoke this skill directly — do NOT use EnterPlanMode or platform plan mode. This skill has its own workflow and approval checkpoint (execution handoff). Layering plan mode on top is redundant and restrictive.
|
||||
|
||||
## Overview
|
||||
|
||||
Write comprehensive implementation plans assuming the engineer has zero context for our codebase and questionable taste. Document everything they need to know: which files to touch for each task, code, testing, docs they might need to check, how to test it. Give them the whole plan as bite-sized tasks. DRY. YAGNI. TDD. Frequent commits.
|
||||
@@ -13,7 +15,7 @@ Assume they are a skilled developer, but know almost nothing about our toolset o
|
||||
|
||||
**Announce at start:** "I'm using the writing-plans skill to create the implementation plan."
|
||||
|
||||
**Context:** This runs in the main workspace after brainstorming, while context is fresh. The worktree is created afterward for implementation.
|
||||
**Context:** This should be run in a dedicated worktree (created by brainstorming skill).
|
||||
|
||||
**Save plans to:** `docs/superpowers/plans/YYYY-MM-DD-<feature-name>.md`
|
||||
- (User preferences for plan location override this default)
|
||||
|
||||
@@ -58,7 +58,6 @@ while [[ $# -gt 0 ]]; do
|
||||
echo ""
|
||||
echo "Tests:"
|
||||
echo " test-subagent-driven-development.sh Test skill loading and requirements"
|
||||
echo " test-brainstorm-handoff.sh Test brainstorm→writing-plans handoff"
|
||||
echo ""
|
||||
echo "Integration Tests (use --integration):"
|
||||
echo " test-subagent-driven-development-integration.sh Full workflow execution"
|
||||
@@ -75,7 +74,6 @@ done
|
||||
# List of skill tests to run (fast unit tests)
|
||||
tests=(
|
||||
"test-subagent-driven-development.sh"
|
||||
"test-brainstorm-handoff.sh"
|
||||
)
|
||||
|
||||
# Integration tests (slow, full execution)
|
||||
|
||||
@@ -1,330 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Test: Brainstorm-to-plan handoff (end-to-end)
|
||||
#
|
||||
# Full brainstorming flow that builds enough context distance to reproduce
|
||||
# the EnterPlanMode failure. Simulates a real brainstorming session with
|
||||
# multiple turns of Q&A before the "build it" moment.
|
||||
#
|
||||
# This test takes 5-10 minutes to run.
|
||||
#
|
||||
# PASS: Skill tool invoked with "writing-plans" AND EnterPlanMode NOT invoked
|
||||
# FAIL: EnterPlanMode invoked OR writing-plans not invoked
|
||||
#
|
||||
# Usage:
|
||||
# ./test-brainstorm-handoff-e2e.sh # With fix (expects PASS)
|
||||
# ./test-brainstorm-handoff-e2e.sh --without-fix # Strip fix, reproduce failure
|
||||
# ./test-brainstorm-handoff-e2e.sh --verbose # Show full output
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Parse flags
|
||||
VERBOSE=false
|
||||
WITHOUT_FIX=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--verbose|-v) VERBOSE=true; shift ;;
|
||||
--without-fix) WITHOUT_FIX=true; shift ;;
|
||||
*) echo "Unknown flag: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
TIMESTAMP=$(date +%s)
|
||||
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/brainstorm-handoff-e2e"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
echo "=== Brainstorm-to-Plan Handoff E2E Test ==="
|
||||
echo "Mode: $([ "$WITHOUT_FIX" = true ] && echo "WITHOUT FIX (expect failure)" || echo "WITH FIX (expect pass)")"
|
||||
echo "Output: $OUTPUT_DIR"
|
||||
echo "This test takes 5-10 minutes."
|
||||
echo ""
|
||||
|
||||
# --- Project Setup ---
|
||||
|
||||
PROJECT_DIR="$OUTPUT_DIR/project"
|
||||
mkdir -p "$PROJECT_DIR/src" "$PROJECT_DIR/test"
|
||||
|
||||
cat > "$PROJECT_DIR/package.json" << 'PROJ_EOF'
|
||||
{
|
||||
"name": "my-express-app",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "node src/index.js",
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"express": "^4.18.0",
|
||||
"better-sqlite3": "^9.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"vitest": "^1.0.0",
|
||||
"supertest": "^6.0.0"
|
||||
}
|
||||
}
|
||||
PROJ_EOF
|
||||
|
||||
cat > "$PROJECT_DIR/src/index.js" << 'PROJ_EOF'
|
||||
import express from 'express';
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
app.get('/health', (req, res) => res.json({ status: 'ok' }));
|
||||
|
||||
const PORT = process.env.PORT || 3000;
|
||||
if (process.env.NODE_ENV !== 'test') {
|
||||
app.listen(PORT, () => console.log(`Listening on ${PORT}`));
|
||||
}
|
||||
|
||||
export default app;
|
||||
PROJ_EOF
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
git init -q
|
||||
git add -A
|
||||
git commit -q -m "Initial commit"
|
||||
|
||||
# --- Plugin Setup ---
|
||||
|
||||
EFFECTIVE_PLUGIN_DIR="$PLUGIN_DIR"
|
||||
|
||||
if [ "$WITHOUT_FIX" = true ]; then
|
||||
echo "Creating plugin copy without the handoff fix..."
|
||||
EFFECTIVE_PLUGIN_DIR="$OUTPUT_DIR/plugin-without-fix"
|
||||
cp -R "$PLUGIN_DIR" "$EFFECTIVE_PLUGIN_DIR"
|
||||
|
||||
python3 << PYEOF
|
||||
import pathlib
|
||||
|
||||
# Strip fix from brainstorming SKILL.md
|
||||
p = pathlib.Path('$EFFECTIVE_PLUGIN_DIR/skills/brainstorming/SKILL.md')
|
||||
content = p.read_text()
|
||||
content = content.replace(
|
||||
'**Implementation (if continuing):**\nWhen the user approves the design and wants to build:\n1. **Invoke \`superpowers:writing-plans\` using the Skill tool.** Not EnterPlanMode. Not plan mode. Not direct implementation. The Skill tool.\n2. After the plan is written, use superpowers:using-git-worktrees to create an isolated workspace for implementation.',
|
||||
'**Implementation (if continuing):**\n- Ask: "Ready to set up for implementation?"\n- Use superpowers:using-git-worktrees to create isolated workspace\n- **REQUIRED:** Use superpowers:writing-plans to create detailed implementation plan'
|
||||
)
|
||||
p.write_text(content)
|
||||
|
||||
# Strip fix from using-superpowers
|
||||
p = pathlib.Path('$EFFECTIVE_PLUGIN_DIR/skills/using-superpowers/SKILL.md')
|
||||
lines = p.read_text().splitlines(keepends=True)
|
||||
lines = [l for l in lines if 'I should use EnterPlanMode' not in l]
|
||||
p.write_text(''.join(lines))
|
||||
|
||||
# Strip fix from writing-plans
|
||||
p = pathlib.Path('$EFFECTIVE_PLUGIN_DIR/skills/writing-plans/SKILL.md')
|
||||
content = p.read_text()
|
||||
content = content.replace(
|
||||
'description: Use when you have a spec or requirements for a multi-step task, before touching code. After brainstorming, ALWAYS use this — not EnterPlanMode or plan mode.',
|
||||
'description: Use when you have a spec or requirements for a multi-step task, before touching code'
|
||||
)
|
||||
content = content.replace(
|
||||
'**Context:** This runs in the main workspace after brainstorming, while context is fresh. The worktree is created afterward for implementation.',
|
||||
'**Context:** This should be run in a dedicated worktree (created by brainstorming skill).'
|
||||
)
|
||||
p.write_text(content)
|
||||
PYEOF
|
||||
echo "Plugin copy created."
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# --- Helper ---
|
||||
|
||||
run_turn() {
|
||||
local turn_num="$1"
|
||||
local prompt="$2"
|
||||
local max_turns="$3"
|
||||
local label="$4"
|
||||
local continue_flag="${5:-}"
|
||||
|
||||
local log_file="$OUTPUT_DIR/turn${turn_num}.json"
|
||||
echo ">>> Turn $turn_num: $label"
|
||||
|
||||
local cmd="timeout 300 claude -p \"$prompt\""
|
||||
cmd="$cmd --plugin-dir \"$EFFECTIVE_PLUGIN_DIR\""
|
||||
cmd="$cmd --dangerously-skip-permissions"
|
||||
cmd="$cmd --max-turns $max_turns"
|
||||
cmd="$cmd --output-format stream-json"
|
||||
if [ -n "$continue_flag" ]; then
|
||||
cmd="$cmd --continue"
|
||||
fi
|
||||
|
||||
eval "$cmd" > "$log_file" 2>&1 || true
|
||||
|
||||
echo " Done."
|
||||
if [ "$VERBOSE" = true ]; then
|
||||
echo " ---"
|
||||
grep '"type":"assistant"' "$log_file" 2>/dev/null | tail -1 | \
|
||||
jq -r '.message.content[0].text // empty' 2>/dev/null | \
|
||||
head -c 600 || true
|
||||
echo ""
|
||||
echo " ---"
|
||||
fi
|
||||
|
||||
echo "$log_file"
|
||||
}
|
||||
|
||||
# --- Run Full Brainstorming Flow ---
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# Turn 1: Start brainstorming - this loads the skill and begins Q&A
|
||||
T1=$(run_turn 1 \
|
||||
"I want to add URL shortening to this Express app. Help me think through the design." \
|
||||
5 "Starting brainstorming")
|
||||
|
||||
# Turn 2: Answer first question (whatever it is) generically
|
||||
T2=$(run_turn 2 \
|
||||
"Good question. Here is what I want: POST /api/shorten that takes a URL and returns a short code. GET /:code that redirects. GET /api/stats/:code for click tracking. Random 6-char alphanumeric codes. SQLite storage using better-sqlite3 which is already in package.json. No auth needed." \
|
||||
5 "Answering first question" --continue)
|
||||
|
||||
# Turn 3: Agree with recommendations
|
||||
T3=$(run_turn 3 \
|
||||
"Yes, that sounds right. Go with your recommendation." \
|
||||
5 "Agreeing with recommendation" --continue)
|
||||
|
||||
# Turn 4: Continue agreeing
|
||||
T4=$(run_turn 4 \
|
||||
"Looks good. I agree with that approach." \
|
||||
5 "Continuing to agree" --continue)
|
||||
|
||||
# Turn 5: Push toward completion
|
||||
T5=$(run_turn 5 \
|
||||
"Perfect. I am happy with all of that. Please wrap up the design and write the spec." \
|
||||
8 "Requesting spec write-up" --continue)
|
||||
|
||||
# Turn 6: Approve the spec
|
||||
T6=$(run_turn 6 \
|
||||
"The spec looks great. I approve it." \
|
||||
5 "Approving spec" --continue)
|
||||
|
||||
# Turn 7: THE CRITICAL MOMENT - "build it"
|
||||
T7=$(run_turn 7 \
|
||||
"Yes, build it." \
|
||||
5 "Critical handoff: build it" --continue)
|
||||
|
||||
# Turn 8: Safety net in case turn 7 asked a follow-up
|
||||
T8=$(run_turn 8 \
|
||||
"Yes. Go ahead and build it now." \
|
||||
5 "Safety net: build it" --continue)
|
||||
|
||||
echo ""
|
||||
|
||||
# --- Assertions ---
|
||||
|
||||
echo "=== Results ==="
|
||||
echo ""
|
||||
|
||||
# Combine all logs
|
||||
ALL_LOGS="$OUTPUT_DIR/all-turns.json"
|
||||
cat "$OUTPUT_DIR"/turn*.json > "$ALL_LOGS" 2>/dev/null
|
||||
|
||||
# Check handoff turns (6-8, where approval + "build it" happens)
|
||||
HANDOFF_LOGS="$OUTPUT_DIR/handoff-turns.json"
|
||||
cat "$OUTPUT_DIR/turn6.json" "$OUTPUT_DIR/turn7.json" "$OUTPUT_DIR/turn8.json" > "$HANDOFF_LOGS" 2>/dev/null
|
||||
|
||||
# Detection: writing-plans skill invoked in handoff turns?
|
||||
HAS_WRITING_PLANS=false
|
||||
if grep -q '"name":"Skill"' "$HANDOFF_LOGS" 2>/dev/null && grep -q 'writing-plans' "$HANDOFF_LOGS" 2>/dev/null; then
|
||||
HAS_WRITING_PLANS=true
|
||||
fi
|
||||
|
||||
# Detection: EnterPlanMode invoked in handoff turns?
|
||||
HAS_ENTER_PLAN_MODE=false
|
||||
if grep -q '"name":"EnterPlanMode"' "$HANDOFF_LOGS" 2>/dev/null; then
|
||||
HAS_ENTER_PLAN_MODE=true
|
||||
fi
|
||||
|
||||
# Also check across ALL turns (might happen earlier)
|
||||
HAS_ENTER_PLAN_MODE_ANYWHERE=false
|
||||
if grep -q '"name":"EnterPlanMode"' "$ALL_LOGS" 2>/dev/null; then
|
||||
HAS_ENTER_PLAN_MODE_ANYWHERE=true
|
||||
fi
|
||||
|
||||
# Report
|
||||
echo "Skills invoked (all turns):"
|
||||
grep -o '"skill":"[^"]*"' "$ALL_LOGS" 2>/dev/null | sort -u || echo " (none)"
|
||||
echo ""
|
||||
|
||||
echo "Skills invoked (handoff turns 6-8):"
|
||||
grep -o '"skill":"[^"]*"' "$HANDOFF_LOGS" 2>/dev/null | sort -u || echo " (none)"
|
||||
echo ""
|
||||
|
||||
echo "Tools invoked in handoff turns (6-8):"
|
||||
grep -o '"name":"[A-Z][^"]*"' "$HANDOFF_LOGS" 2>/dev/null | sort | uniq -c | sort -rn | head -10 || echo " (none)"
|
||||
echo ""
|
||||
|
||||
if [ "$HAS_ENTER_PLAN_MODE_ANYWHERE" = true ]; then
|
||||
echo "WARNING: EnterPlanMode was invoked somewhere in the conversation."
|
||||
echo "Turns containing EnterPlanMode:"
|
||||
for f in "$OUTPUT_DIR"/turn*.json; do
|
||||
if grep -q '"name":"EnterPlanMode"' "$f" 2>/dev/null; then
|
||||
echo " $(basename "$f")"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Determine result
|
||||
PASSED=false
|
||||
if [ "$WITHOUT_FIX" = true ]; then
|
||||
echo "--- Without-Fix Mode (reproducing failure) ---"
|
||||
if [ "$HAS_ENTER_PLAN_MODE" = true ] || [ "$HAS_ENTER_PLAN_MODE_ANYWHERE" = true ]; then
|
||||
echo "REPRODUCED: Claude used EnterPlanMode (the bug we're fixing)"
|
||||
PASSED=true
|
||||
elif [ "$HAS_WRITING_PLANS" = true ]; then
|
||||
echo "NOT REPRODUCED: Claude used writing-plans even without the fix"
|
||||
echo "(The old guidance was sufficient in this run)"
|
||||
PASSED=false
|
||||
else
|
||||
echo "INCONCLUSIVE: Claude used neither writing-plans nor EnterPlanMode"
|
||||
echo "The brainstorming flow may not have reached the handoff point."
|
||||
PASSED=false
|
||||
fi
|
||||
else
|
||||
echo "--- With-Fix Mode (verifying fix) ---"
|
||||
if [ "$HAS_WRITING_PLANS" = true ] && [ "$HAS_ENTER_PLAN_MODE_ANYWHERE" = false ]; then
|
||||
echo "PASS: Claude used writing-plans skill (correct handoff)"
|
||||
PASSED=true
|
||||
elif [ "$HAS_ENTER_PLAN_MODE_ANYWHERE" = true ]; then
|
||||
echo "FAIL: Claude used EnterPlanMode instead of writing-plans"
|
||||
PASSED=false
|
||||
elif [ "$HAS_WRITING_PLANS" = true ] && [ "$HAS_ENTER_PLAN_MODE_ANYWHERE" = true ]; then
|
||||
echo "FAIL: Claude used BOTH writing-plans AND EnterPlanMode"
|
||||
PASSED=false
|
||||
else
|
||||
echo "INCONCLUSIVE: Claude used neither writing-plans nor EnterPlanMode"
|
||||
echo "The brainstorming flow may not have reached the handoff."
|
||||
echo "Check logs to see where the conversation stopped."
|
||||
PASSED=false
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Show what happened in each turn
|
||||
echo "Turn-by-turn summary:"
|
||||
for i in 1 2 3 4 5 6 7 8; do
|
||||
local_log="$OUTPUT_DIR/turn${i}.json"
|
||||
if [ -f "$local_log" ]; then
|
||||
local_skills=$(grep -o '"skill":"[^"]*"' "$local_log" 2>/dev/null | tr '\n' ' ' || true)
|
||||
local_tools=$(grep -o '"name":"EnterPlanMode\|"name":"Skill"' "$local_log" 2>/dev/null | tr '\n' ' ' || true)
|
||||
local_size=$(wc -c < "$local_log" | tr -d ' ')
|
||||
printf " Turn %d: %s bytes" "$i" "$local_size"
|
||||
[ -n "$local_skills" ] && printf " | skills: %s" "$local_skills"
|
||||
[ -n "$local_tools" ] && printf " | tools: %s" "$local_tools"
|
||||
echo ""
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "Logs: $OUTPUT_DIR"
|
||||
echo ""
|
||||
|
||||
if [ "$PASSED" = true ]; then
|
||||
exit 0
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
@@ -1,317 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Test: Brainstorm-to-plan handoff
|
||||
#
|
||||
# Verifies that after brainstorming, Claude invokes the writing-plans skill
|
||||
# instead of using EnterPlanMode.
|
||||
#
|
||||
# The failure mode this catches:
|
||||
# User says "build it" after brainstorming -> Claude calls EnterPlanMode
|
||||
# (because the system prompt's planning guidance overpowers the brainstorming
|
||||
# skill's instructions, which were loaded many turns ago)
|
||||
#
|
||||
# PASS: Skill tool invoked with "writing-plans" AND EnterPlanMode NOT invoked
|
||||
# FAIL: EnterPlanMode invoked OR writing-plans not invoked
|
||||
#
|
||||
# Usage:
|
||||
# ./test-brainstorm-handoff.sh # Normal test (expects PASS)
|
||||
# ./test-brainstorm-handoff.sh --without-fix # Strip fix, reproduce failure
|
||||
# ./test-brainstorm-handoff.sh --verbose # Show full output
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Parse flags
|
||||
VERBOSE=false
|
||||
WITHOUT_FIX=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--verbose|-v) VERBOSE=true; shift ;;
|
||||
--without-fix) WITHOUT_FIX=true; shift ;;
|
||||
*) echo "Unknown flag: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
TIMESTAMP=$(date +%s)
|
||||
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/brainstorm-handoff"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
echo "=== Brainstorm-to-Plan Handoff Test ==="
|
||||
echo "Mode: $([ "$WITHOUT_FIX" = true ] && echo "WITHOUT FIX (expect failure)" || echo "WITH FIX (expect pass)")"
|
||||
echo "Output: $OUTPUT_DIR"
|
||||
echo ""
|
||||
|
||||
# --- Project Setup ---
|
||||
|
||||
PROJECT_DIR="$OUTPUT_DIR/project"
|
||||
mkdir -p "$PROJECT_DIR/src"
|
||||
mkdir -p "$PROJECT_DIR/docs/superpowers/specs"
|
||||
|
||||
cat > "$PROJECT_DIR/package.json" << 'PROJ_EOF'
|
||||
{
|
||||
"name": "my-express-app",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"express": "^4.18.0",
|
||||
"better-sqlite3": "^9.0.0"
|
||||
}
|
||||
}
|
||||
PROJ_EOF
|
||||
|
||||
cat > "$PROJECT_DIR/src/index.js" << 'PROJ_EOF'
|
||||
import express from 'express';
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
app.get('/health', (req, res) => res.json({ status: 'ok' }));
|
||||
|
||||
const PORT = process.env.PORT || 3000;
|
||||
app.listen(PORT, () => console.log(`Listening on ${PORT}`));
|
||||
PROJ_EOF
|
||||
|
||||
# Pre-create a spec document (simulating completed brainstorming)
|
||||
cat > "$PROJECT_DIR/docs/superpowers/specs/2025-01-15-url-shortener-design.md" << 'SPEC_EOF'
|
||||
# URL Shortener Design Spec
|
||||
|
||||
## Overview
|
||||
Add URL shortening capability to the existing Express.js API.
|
||||
|
||||
## Features
|
||||
- POST /api/shorten accepts { url } and returns { shortCode, shortUrl }
|
||||
- GET /:code redirects to the original URL (302)
|
||||
- GET /api/stats/:code returns { clicks, createdAt, originalUrl }
|
||||
|
||||
## Technical Design
|
||||
|
||||
### Database
|
||||
Single SQLite table via better-sqlite3:
|
||||
```sql
|
||||
CREATE TABLE urls (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
short_code TEXT UNIQUE NOT NULL,
|
||||
original_url TEXT NOT NULL,
|
||||
clicks INTEGER DEFAULT 0,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX idx_short_code ON urls(short_code);
|
||||
```
|
||||
|
||||
### File Structure
|
||||
- `src/index.js` — modified to mount new routes
|
||||
- `src/db.js` — database initialization and query functions
|
||||
- `src/shorten.js` — route handlers for all three endpoints
|
||||
- `src/code-generator.js` — random 6-char alphanumeric code generation
|
||||
|
||||
### Code Generation
|
||||
Random 6-character alphanumeric codes using crypto.randomBytes.
|
||||
Check for collisions and retry (astronomically unlikely with 36^6 space).
|
||||
|
||||
### Validation
|
||||
- URL must be present and start with http:// or https://
|
||||
- Return 400 with { error: "..." } for invalid input
|
||||
|
||||
### Error Handling
|
||||
- 404 with { error: "Not found" } for unknown short codes
|
||||
- 500 with { error: "Internal server error" } for database failures
|
||||
|
||||
## Decisions
|
||||
- 302 redirects (not 301) so browsers don't cache and we always track clicks
|
||||
- Database path configurable via DATABASE_PATH env var, defaults to ./data/urls.db
|
||||
- No auth, no custom codes, no expiry — keeping it simple
|
||||
SPEC_EOF
|
||||
|
||||
# Initialize git so brainstorming can inspect project state
|
||||
cd "$PROJECT_DIR"
|
||||
git init -q
|
||||
git add -A
|
||||
git commit -q -m "Initial commit with URL shortener spec"
|
||||
|
||||
# --- Plugin Setup ---
|
||||
|
||||
EFFECTIVE_PLUGIN_DIR="$PLUGIN_DIR"
|
||||
|
||||
if [ "$WITHOUT_FIX" = true ]; then
|
||||
echo "Creating plugin copy without the handoff fix..."
|
||||
EFFECTIVE_PLUGIN_DIR="$OUTPUT_DIR/plugin-without-fix"
|
||||
cp -R "$PLUGIN_DIR" "$EFFECTIVE_PLUGIN_DIR"
|
||||
|
||||
# Strip fix from brainstorming SKILL.md: revert to old implementation section
|
||||
python3 << PYEOF
|
||||
import pathlib
|
||||
p = pathlib.Path('$EFFECTIVE_PLUGIN_DIR/skills/brainstorming/SKILL.md')
|
||||
content = p.read_text()
|
||||
content = content.replace(
|
||||
'**Implementation (if continuing):**\nWhen the user approves the design and wants to build:\n1. **Invoke \`superpowers:writing-plans\` using the Skill tool.** Not EnterPlanMode. Not plan mode. Not direct implementation. The Skill tool.\n2. After the plan is written, use superpowers:using-git-worktrees to create an isolated workspace for implementation.',
|
||||
'**Implementation (if continuing):**\n- Ask: "Ready to set up for implementation?"\n- Use superpowers:using-git-worktrees to create isolated workspace\n- **REQUIRED:** Use superpowers:writing-plans to create detailed implementation plan'
|
||||
)
|
||||
p.write_text(content)
|
||||
PYEOF
|
||||
|
||||
# Strip fix from using-superpowers: remove EnterPlanMode red flag
|
||||
python3 << PYEOF
|
||||
import pathlib
|
||||
p = pathlib.Path('$EFFECTIVE_PLUGIN_DIR/skills/using-superpowers/SKILL.md')
|
||||
lines = p.read_text().splitlines(keepends=True)
|
||||
lines = [l for l in lines if 'I should use EnterPlanMode' not in l]
|
||||
p.write_text(''.join(lines))
|
||||
PYEOF
|
||||
|
||||
# Strip fix from writing-plans: revert description and context
|
||||
python3 << PYEOF
|
||||
import pathlib
|
||||
p = pathlib.Path('$EFFECTIVE_PLUGIN_DIR/skills/writing-plans/SKILL.md')
|
||||
content = p.read_text()
|
||||
content = content.replace(
|
||||
'description: Use when you have a spec or requirements for a multi-step task, before touching code. After brainstorming, ALWAYS use this — not EnterPlanMode or plan mode.',
|
||||
'description: Use when you have a spec or requirements for a multi-step task, before touching code'
|
||||
)
|
||||
content = content.replace(
|
||||
'**Context:** This runs in the main workspace after brainstorming, while context is fresh. The worktree is created afterward for implementation.',
|
||||
'**Context:** This should be run in a dedicated worktree (created by brainstorming skill).'
|
||||
)
|
||||
p.write_text(content)
|
||||
PYEOF
|
||||
echo "Plugin copy created at $EFFECTIVE_PLUGIN_DIR"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# --- Run Conversation ---
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# Turn 1: Load brainstorming and establish that we finished the design
|
||||
# The key is that brainstorming gets loaded into context, and we're at the handoff point
|
||||
echo ">>> Turn 1: Loading brainstorming skill and establishing context..."
|
||||
TURN1_LOG="$OUTPUT_DIR/turn1.json"
|
||||
|
||||
TURN1_PROMPT='I want to add URL shortening to this Express app. I already have the full design worked out and written to docs/superpowers/specs/2025-01-15-url-shortener-design.md. Please read the spec.'
|
||||
|
||||
timeout 300 claude -p "$TURN1_PROMPT" \
|
||||
--plugin-dir "$EFFECTIVE_PLUGIN_DIR" \
|
||||
--dangerously-skip-permissions \
|
||||
--max-turns 5 \
|
||||
--output-format stream-json \
|
||||
> "$TURN1_LOG" 2>&1 || true
|
||||
|
||||
echo "Turn 1 complete."
|
||||
if [ "$VERBOSE" = true ]; then
|
||||
echo "---"
|
||||
grep '"type":"assistant"' "$TURN1_LOG" | tail -1 | jq -r '.message.content[0].text // empty' 2>/dev/null | head -c 800 || true
|
||||
echo ""
|
||||
echo "---"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Turn 2: Approve and ask to build - this is the critical handoff moment
|
||||
echo ">>> Turn 2: 'The spec is done. Build it.' (critical handoff)..."
|
||||
TURN2_LOG="$OUTPUT_DIR/turn2.json"
|
||||
|
||||
TURN2_PROMPT='The spec is complete and I am happy with the design. Build it.'
|
||||
|
||||
timeout 300 claude -p "$TURN2_PROMPT" \
|
||||
--continue \
|
||||
--plugin-dir "$EFFECTIVE_PLUGIN_DIR" \
|
||||
--dangerously-skip-permissions \
|
||||
--max-turns 5 \
|
||||
--output-format stream-json \
|
||||
> "$TURN2_LOG" 2>&1 || true
|
||||
|
||||
echo "Turn 2 complete."
|
||||
if [ "$VERBOSE" = true ]; then
|
||||
echo "---"
|
||||
grep '"type":"assistant"' "$TURN2_LOG" | tail -1 | jq -r '.message.content[0].text // empty' 2>/dev/null | head -c 800 || true
|
||||
echo ""
|
||||
echo "---"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# --- Assertions ---
|
||||
|
||||
echo "=== Results ==="
|
||||
echo ""
|
||||
|
||||
# Combine all turn logs for analysis
|
||||
ALL_LOGS="$OUTPUT_DIR/all-turns.json"
|
||||
cat "$TURN1_LOG" "$TURN2_LOG" > "$ALL_LOGS"
|
||||
|
||||
# Detection: writing-plans skill invoked?
|
||||
HAS_WRITING_PLANS=false
|
||||
if grep -q '"name":"Skill"' "$ALL_LOGS" 2>/dev/null && grep -q 'writing-plans' "$ALL_LOGS" 2>/dev/null; then
|
||||
HAS_WRITING_PLANS=true
|
||||
fi
|
||||
|
||||
# Detection: EnterPlanMode invoked?
|
||||
HAS_ENTER_PLAN_MODE=false
|
||||
if grep -q '"name":"EnterPlanMode"' "$ALL_LOGS" 2>/dev/null; then
|
||||
HAS_ENTER_PLAN_MODE=true
|
||||
fi
|
||||
|
||||
# Report what skills were invoked
|
||||
echo "Skills invoked:"
|
||||
grep -o '"skill":"[^"]*"' "$ALL_LOGS" 2>/dev/null | sort -u || echo " (none)"
|
||||
echo ""
|
||||
|
||||
echo "Notable tools invoked:"
|
||||
grep -o '"name":"[A-Z][^"]*"' "$ALL_LOGS" 2>/dev/null | sort | uniq -c | sort -rn | head -10 || echo " (none)"
|
||||
echo ""
|
||||
|
||||
# Determine result
|
||||
PASSED=false
|
||||
if [ "$WITHOUT_FIX" = true ]; then
|
||||
# In without-fix mode, we EXPECT the failure (EnterPlanMode)
|
||||
echo "--- Without-Fix Mode (reproducing failure) ---"
|
||||
if [ "$HAS_ENTER_PLAN_MODE" = true ]; then
|
||||
echo "REPRODUCED: Claude used EnterPlanMode (the bug we're fixing)"
|
||||
PASSED=true
|
||||
elif [ "$HAS_WRITING_PLANS" = true ]; then
|
||||
echo "NOT REPRODUCED: Claude used writing-plans even without the fix"
|
||||
echo "(The model may have followed the old guidance anyway)"
|
||||
PASSED=false
|
||||
else
|
||||
echo "INCONCLUSIVE: Claude used neither writing-plans nor EnterPlanMode"
|
||||
echo "The brainstorming flow may not have reached the handoff point."
|
||||
PASSED=false
|
||||
fi
|
||||
else
|
||||
# Normal mode: expect writing-plans, not EnterPlanMode
|
||||
echo "--- With-Fix Mode (verifying fix) ---"
|
||||
if [ "$HAS_WRITING_PLANS" = true ] && [ "$HAS_ENTER_PLAN_MODE" = false ]; then
|
||||
echo "PASS: Claude used writing-plans skill (correct handoff)"
|
||||
PASSED=true
|
||||
elif [ "$HAS_ENTER_PLAN_MODE" = true ]; then
|
||||
echo "FAIL: Claude used EnterPlanMode instead of writing-plans"
|
||||
PASSED=false
|
||||
elif [ "$HAS_WRITING_PLANS" = true ] && [ "$HAS_ENTER_PLAN_MODE" = true ]; then
|
||||
echo "FAIL: Claude used BOTH writing-plans AND EnterPlanMode"
|
||||
PASSED=false
|
||||
else
|
||||
echo "INCONCLUSIVE: Claude used neither writing-plans nor EnterPlanMode"
|
||||
echo "The brainstorming flow may not have reached the handoff point."
|
||||
echo "Check logs - brainstorming may still be asking questions."
|
||||
PASSED=false
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Show the critical turn 2 response
|
||||
echo "Turn 2 response (first 500 chars):"
|
||||
grep '"type":"assistant"' "$TURN2_LOG" 2>/dev/null | tail -1 | \
|
||||
jq -r '.message.content[0].text // .message.content' 2>/dev/null | \
|
||||
head -c 500 || echo " (could not extract)"
|
||||
echo ""
|
||||
|
||||
echo ""
|
||||
echo "Logs:"
|
||||
echo " Turn 1: $TURN1_LOG"
|
||||
echo " Turn 2: $TURN2_LOG"
|
||||
echo " Combined: $ALL_LOGS"
|
||||
echo ""
|
||||
|
||||
if [ "$PASSED" = true ]; then
|
||||
exit 0
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user