diff --git a/evals/README.md b/evals/README.md index 1791dd4a..c74985ae 100644 --- a/evals/README.md +++ b/evals/README.md @@ -32,13 +32,13 @@ export ANTHROPIC_API_KEY=sk-... uv run drill run worktree-creation-from-main -b claude # Run with N repetitions -uv run drill run pattern-match-trap -b claude-opus-4-6 --n 5 +uv run drill run spec-writing-blind-spot -b claude-opus-4-6 --n 5 # Sweep across multiple backends -uv run drill run pattern-match-trap --models claude-opus-4-6,claude-opus-4-7 --n 10 +uv run drill run spec-writing-blind-spot --models claude-opus-4-6,claude-opus-4-7 --n 10 # Compare results -uv run drill compare pattern-match-trap +uv run drill compare spec-writing-blind-spot # List available scenarios uv run drill list @@ -48,10 +48,11 @@ uv run drill list | Category | Scenarios | Tests | |----------|-----------|-------| -| Worktree | 8 scenarios (creation, detection, consent, detached HEAD) | Skill compliance for `using-git-worktrees` | -| Wave decomposition | 5 scenarios (naive, spec-aware, false overlap, dependency chain, conflict surface) | Plan → waves decomposition quality | -| Wave execution | 3 scenarios (minimal, full, task failure) | End-to-end wave execution + failure escalation | -| Pattern-match trap | 1 scenario | Investigation depth gap between 4.6 and 4.7 (PRI-1270) | +| Worktree | 11 scenarios | Worktree creation, detection, consent, detached HEAD, and native-tool pressure | +| Skill triggering | 6 scenarios | Auto-invocation for core Superpowers skills | +| SDD workflow | 5 scenarios | Explicit invocation, mid-conversation invocation, real-project execution, and YAGNI enforcement | +| Review/spec/verification | 6 scenarios | Code review, spec review, architectural targeting, design blind spots, and verification reflexes | +| Tool mapping | 3 scenarios | Codex and Gemini subagent tool-name mapping | ## Backends diff --git a/evals/backends/claude.yaml b/evals/backends/claude.yaml index 47ba96af..3a252fa6 100644 --- a/evals/backends/claude.yaml +++ b/evals/backends/claude.yaml @@ -18,11 +18,11 @@ idle: ready_pattern: "^❯|^\\$|Human:|Enter to confirm" # Matches when Claude is actively working — spinners, "Thinking", time counter, # or "esc to cancel". Engine extends its wait deadline when any of these match -# so the Actor doesn't interrupt long-running subagent work (e.g., wave execution). +# so the Actor doesn't interrupt long-running subagent work. busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]" # Maximum total seconds the engine will extend the deadline across all busy -# detections during a single _wait_for_ready call. Wave execution can take -# 10-20 minutes per wave, so 30 minutes gives plenty of headroom. +# detections during a single _wait_for_ready call. Long-running subagent work +# can take a while, so 30 minutes gives plenty of headroom. max_busy_seconds: 1800 startup_timeout: 60 terminal: diff --git a/evals/drill/engine.py b/evals/drill/engine.py index 8519083a..026004d0 100644 --- a/evals/drill/engine.py +++ b/evals/drill/engine.py @@ -281,8 +281,8 @@ class Engine: ready pattern. If the backend's busy pattern matches (spinner visible, "Thinking...", timer counting), the deadline is extended by small increments up to `max_busy_seconds` total. This prevents - the Actor from interrupting long-running subagent work (wave - execution, multi-file implementation, etc.). + the Actor from interrupting long-running subagent work (multi-file + implementation, parallel dispatch, etc.). Exits silently if the final deadline (timeout + busy extensions) passes without reaching a ready state. diff --git a/evals/prompts/actor.md b/evals/prompts/actor.md index b4879502..51434fbb 100644 --- a/evals/prompts/actor.md +++ b/evals/prompts/actor.md @@ -38,4 +38,4 @@ When ANY of these indicators is present: The cleanest approach when you see the agent is busy: if your goals are done, use "done". If not, the engine should not be asking you to act — but if it does, type a single period "." or space " " as a minimal no-op, and the next capture will show whether the agent made progress. -Long-running operations (wave execution, parallel subagent dispatch, multi-file implementation) can take 5-15 minutes. Do not interrupt them by sending premature messages. +Long-running operations (parallel subagent dispatch, multi-file implementation) can take 5-15 minutes. Do not interrupt them by sending premature messages. diff --git a/evals/scenarios/decomposing-into-waves-conflict-surface.yaml b/evals/scenarios/decomposing-into-waves-conflict-surface.yaml deleted file mode 100644 index eed86dba..00000000 --- a/evals/scenarios/decomposing-into-waves-conflict-surface.yaml +++ /dev/null @@ -1,31 +0,0 @@ -scenario: decomposing-into-waves-conflict-surface -description: "Agent flags a barrel/index file as a conflict surface shared by multiple tasks" -user_posture: spec-aware - -setup: - helpers: - - create_conflict_surface_repo - assertions: - - "test -f docs/superpowers/plans/test-plan.md" - - "test -f src/services/index.ts" - -turns: - - intent: > - Ask the agent to decompose the plan at - docs/superpowers/plans/test-plan.md into waves. Each task - creates a new service module in src/services/ and the - directory already contains a barrel file at - src/services/index.ts. Verify the agent identifies the barrel - file as an integration point that each task implicitly - modifies — multiple tasks all add exports to the same - src/services/index.ts. - -limits: - max_turns: 10 - turn_timeout: 120 - -verify: - criteria: - - "Agent flagged a barrel/index file as a conflict surface (visible in terminal output)" - - "Agent either added the barrel file to each task's file list OR moved the tasks to sequential waves because they all implicitly modify the same barrel file" - observe: true diff --git a/evals/scenarios/decomposing-into-waves-dependency-chain.yaml b/evals/scenarios/decomposing-into-waves-dependency-chain.yaml deleted file mode 100644 index f86b4274..00000000 --- a/evals/scenarios/decomposing-into-waves-dependency-chain.yaml +++ /dev/null @@ -1,28 +0,0 @@ -scenario: decomposing-into-waves-dependency-chain -description: "Agent detects semantic import-based dependencies, not just file overlap" -user_posture: spec-aware - -setup: - helpers: - - create_dependency_chain_repo - assertions: - - "test -f docs/superpowers/plans/test-plan.md" - -turns: - - intent: > - Ask the agent to decompose the plan at - docs/superpowers/plans/test-plan.md into waves. Task 1 creates - src/types/auth.ts. Task 3 imports from src/types/auth.ts but - does not modify it. Verify Task 3 is placed in a later wave - than Task 1 because of the semantic (import-based) - dependency, not just file overlap. - -limits: - max_turns: 10 - turn_timeout: 120 - -verify: - criteria: - - "Task that imports another task's output is in a later wave" - - "Agent identified the semantic dependency beyond file overlap (mentioned in terminal output)" - observe: true diff --git a/evals/scenarios/decomposing-into-waves-false-overlap.yaml b/evals/scenarios/decomposing-into-waves-false-overlap.yaml deleted file mode 100644 index 817dd5d1..00000000 --- a/evals/scenarios/decomposing-into-waves-false-overlap.yaml +++ /dev/null @@ -1,32 +0,0 @@ -scenario: decomposing-into-waves-false-overlap -description: "Agent uses full paths, not bare filenames, when detecting file overlap between tasks" -user_posture: spec-aware - -setup: - helpers: - - create_false_overlap_repo - assertions: - - "test -f docs/superpowers/plans/test-plan.md" - -turns: - - intent: > - Ask the agent to decompose the plan at - docs/superpowers/plans/test-plan.md into waves. The plan has - three tasks that each create a file named types.ts but in - different directories (src/auth/, src/users/, src/billing/). - Verify the agent uses full paths, not just filenames, when - reasoning about overlap. - - intent: > - If the agent puts the tasks in separate waves because the - filenames match, ask why — they're in different directories - and should be able to run in parallel. - -limits: - max_turns: 10 - turn_timeout: 120 - -verify: - criteria: - - "Tasks with same-named files in different directories are in the same parallel wave" - - "Agent displayed overlap matrix or conflict list using full file paths (visible in terminal output)" - observe: true diff --git a/evals/scenarios/decomposing-into-waves-naive.yaml b/evals/scenarios/decomposing-into-waves-naive.yaml deleted file mode 100644 index 905b0e80..00000000 --- a/evals/scenarios/decomposing-into-waves-naive.yaml +++ /dev/null @@ -1,37 +0,0 @@ -scenario: decomposing-into-waves-naive -description: "Agent decomposes a multi-task plan into waves without user prompting" -user_posture: naive - -setup: - helpers: - - create_wave_test_repo - assertions: - - "git rev-parse --is-inside-work-tree" - - "test -f docs/superpowers/plans/test-plan.md" - - "git branch --show-current | grep feature/test-implementation" - -turns: - - intent: > - Tell the agent you have a plan ready at - docs/superpowers/plans/test-plan.md and you want to start - implementing. Do NOT mention waves, parallelism, or - decomposition. Let the agent decide how to execute. - - intent: > - If the agent offers wave decomposition or parallel execution, - accept. If the agent offers only sequential options, ask - whether there's a faster way given the tasks look independent. - - intent: > - If a .waves.md file is produced, review it briefly and confirm - it looks reasonable. If not, ask what happened. - -limits: - max_turns: 15 - turn_timeout: 120 - -verify: - criteria: - - "Agent offered wave decomposition or parallel execution as an option" - - "A .waves.md file exists in the filesystem" - - "The .waves.md file content includes a Waves Overview table" - - "The .waves.md file content includes file ownership blocks for parallel waves" - observe: true diff --git a/evals/scenarios/decomposing-into-waves-spec-aware.yaml b/evals/scenarios/decomposing-into-waves-spec-aware.yaml deleted file mode 100644 index 965dd799..00000000 --- a/evals/scenarios/decomposing-into-waves-spec-aware.yaml +++ /dev/null @@ -1,32 +0,0 @@ -scenario: decomposing-into-waves-spec-aware -description: "User explicitly asks the agent to use the decomposing-into-waves skill" -user_posture: spec-aware - -setup: - helpers: - - create_wave_test_repo - assertions: - - "git rev-parse --is-inside-work-tree" - - "test -f docs/superpowers/plans/test-plan.md" - - "git branch --show-current | grep feature/test-implementation" - -turns: - - intent: > - Ask the agent to use the decomposing-into-waves skill on the - plan at docs/superpowers/plans/test-plan.md. Reference the - skill by name. - - intent: > - Review the waves file output. Verify parallel waves don't have - file overlaps. Confirm or ask for corrections. - -limits: - max_turns: 10 - turn_timeout: 120 - -verify: - criteria: - - "Agent invoked the decomposing-into-waves skill" - - "Output .waves.md file has YAML frontmatter with run_id and waves structure" - - "File ownership block present for parallel waves" - - "Wave ordering respects task dependencies (foundation first)" - observe: true diff --git a/evals/scenarios/executing-waves-full-naive.yaml b/evals/scenarios/executing-waves-full-naive.yaml deleted file mode 100644 index ebbdbc4f..00000000 --- a/evals/scenarios/executing-waves-full-naive.yaml +++ /dev/null @@ -1,39 +0,0 @@ -scenario: executing-waves-full-naive -description: "Full 3-wave execution with 5 tasks; user references waves file without naming executing-waves" -user_posture: naive - -setup: - helpers: - - create_waves_file - assertions: - - "git rev-parse --is-inside-work-tree" - - "git branch --show-current | grep feature/test-implementation" - - "test -f docs/superpowers/plans/test-plan.waves.md" - - "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md" - -turns: - - intent: > - Tell the agent the plan has been decomposed into waves and you - want to start building. Reference the waves file at - docs/superpowers/plans/test-plan.waves.md. Do NOT mention the - executing-waves skill by name. - - intent: > - Let the agent proceed autonomously through all 3 waves. If it - starts sequentially without using the wave structure, ask why. - If it asks for confirmation to proceed, say "go ahead". - - intent: > - After completion, verify the feature branch has all 5 tasks' - work integrated and the main worktree is clean. - -limits: - max_turns: 60 - turn_timeout: 900 - -verify: - criteria: - - "Agent used worktree isolation for Wave 2 parallel tasks" - - "git log shows commits from all 5 tasks on the feature branch" - - "Agent reported progress at wave boundaries" - - "Final state has only the main worktree (no orphans)" - - "git log --merges shows merge commits from parallel wave tasks" - observe: true diff --git a/evals/scenarios/executing-waves-minimal.yaml b/evals/scenarios/executing-waves-minimal.yaml deleted file mode 100644 index 53288d7d..00000000 --- a/evals/scenarios/executing-waves-minimal.yaml +++ /dev/null @@ -1,36 +0,0 @@ -scenario: executing-waves-minimal -description: "Minimal wave execution — 1 sequential task + 2 parallel tasks" -user_posture: spec-aware - -setup: - helpers: - - create_waves_file_minimal - assertions: - - "git rev-parse --is-inside-work-tree" - - "git branch --show-current | grep feature/test-implementation" - - "test -f docs/superpowers/plans/test-plan.waves.md" - - "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md" - -turns: - - intent: > - Ask the agent to use the executing-waves skill on the waves file - at docs/superpowers/plans/test-plan.waves.md. Reference the skill - by name. - - intent: > - Observe execution. Let the agent proceed autonomously. If the - agent asks for any confirmation, answer "proceed" or equivalent. - - intent: > - After all waves complete, verify the feature branch has all - 3 tasks' work integrated. - -limits: - max_turns: 50 - turn_timeout: 600 - -verify: - criteria: - - "Agent invoked the executing-waves skill" - - "git log shows commits from all 3 tasks on feature branch" - - "git worktree list shows only main worktree (no orphaned worktrees)" - - "Wave 2 used worktree isolation for parallel tasks (visible in terminal output or git log merge commits)" - observe: true diff --git a/evals/scenarios/executing-waves-task-failure.yaml b/evals/scenarios/executing-waves-task-failure.yaml deleted file mode 100644 index 87fb4165..00000000 --- a/evals/scenarios/executing-waves-task-failure.yaml +++ /dev/null @@ -1,39 +0,0 @@ -scenario: executing-waves-task-failure -description: "Wave execution with a deliberately broken Task 3; tests failure escalation" -user_posture: spec-aware - -setup: - helpers: - - create_waves_file_with_broken_task - assertions: - - "git rev-parse --is-inside-work-tree" - - "git branch --show-current | grep feature/test-implementation" - - "test -f docs/superpowers/plans/test-plan.waves.md" - - "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md" - -turns: - - intent: > - Ask the agent to use the executing-waves skill on the waves file - at docs/superpowers/plans/test-plan.waves.md. Reference the skill - by name. - - intent: > - Observe execution. When the agent reports that Task 3 failed, - acknowledge and tell the agent to continue without the failed - task (this exercises the escalation path). - - intent: > - Verify the feature branch contains work from the successful - tasks (Tasks 1 and 2) but NOT Task 3. - -limits: - max_turns: 60 - turn_timeout: 900 - -verify: - criteria: - - "Agent invoked the executing-waves skill" - - "git log shows commits from Tasks 1 and 2 on feature branch" - - "Task 3 was marked as failed in terminal output" - - "Agent attempted retry of Task 3 before escalating (one retry per the failure handling matrix)" - - "Agent escalated Task 3 failure to the user rather than silently proceeding" - - "No orphaned worktrees remain from the failed task (except preserved for debugging)" - observe: true diff --git a/evals/setup_helpers/__init__.py b/evals/setup_helpers/__init__.py index dbc141ef..0d7c3f76 100644 --- a/evals/setup_helpers/__init__.py +++ b/evals/setup_helpers/__init__.py @@ -5,16 +5,6 @@ from setup_helpers.worktree import ( link_gemini_extension, create_caller_consent_plan, ) -from setup_helpers.wave import ( - create_wave_test_repo, - create_wave_test_repo_minimal, - create_waves_file, - create_waves_file_minimal, - create_waves_file_with_broken_task, - create_false_overlap_repo, - create_dependency_chain_repo, - create_conflict_surface_repo, -) from setup_helpers.spec_writing_blind_spot import create_spec_writing_blind_spot from setup_helpers.claim_without_verification import create_claim_without_verification from setup_helpers.spec_targets_wrong_component import create_spec_targets_wrong_component @@ -36,14 +26,6 @@ HELPER_REGISTRY = { "detach_worktree_head": detach_worktree_head, "link_gemini_extension": link_gemini_extension, "create_caller_consent_plan": create_caller_consent_plan, - "create_wave_test_repo": create_wave_test_repo, - "create_wave_test_repo_minimal": create_wave_test_repo_minimal, - "create_waves_file": create_waves_file, - "create_waves_file_minimal": create_waves_file_minimal, - "create_waves_file_with_broken_task": create_waves_file_with_broken_task, - "create_false_overlap_repo": create_false_overlap_repo, - "create_dependency_chain_repo": create_dependency_chain_repo, - "create_conflict_surface_repo": create_conflict_surface_repo, "create_spec_writing_blind_spot": create_spec_writing_blind_spot, "create_claim_without_verification": create_claim_without_verification, "create_spec_targets_wrong_component": create_spec_targets_wrong_component, diff --git a/evals/setup_helpers/wave.py b/evals/setup_helpers/wave.py deleted file mode 100644 index 6cfac30d..00000000 --- a/evals/setup_helpers/wave.py +++ /dev/null @@ -1,1335 +0,0 @@ -"""Setup helpers for wave execution drill scenarios. - -Each helper creates a test repository with a plan file that exercises a -specific aspect of the wave decomposition algorithm: - -- create_wave_test_repo: full 5-task plan spanning 3 waves -- create_wave_test_repo_minimal: smaller 3-task plan for faster runs -- create_waves_file: full 5-task plan pre-decomposed to .waves.md -- create_waves_file_minimal: 3-task plan pre-decomposed to .waves.md -- create_waves_file_with_broken_task: 3-task plan where Task 3 is structurally - impossible (exercises failure escalation) -- create_false_overlap_repo: same filename in different directories -- create_dependency_chain_repo: semantic (import-based) dependencies -- create_conflict_surface_repo: implicit barrel-file conflicts -""" -from __future__ import annotations -from pathlib import Path - -from setup_helpers.base import _git - - -# ---------------------------------------------------------------------------- -# Shared fixture content -# ---------------------------------------------------------------------------- - -PACKAGE_JSON = """\ -{ - "name": "wave-test-fixture", - "version": "0.1.0", - "private": true, - "scripts": { - "test": "jest", - "lint": "echo 'no lint configured' && exit 0", - "build": "tsc -p tsconfig.json" - }, - "devDependencies": { - "typescript": "^5.4.0", - "jest": "^29.7.0", - "@types/jest": "^29.5.12", - "ts-jest": "^29.1.2" - } -} -""" - -TSCONFIG_JSON = """\ -{ - "compilerOptions": { - "target": "ES2022", - "module": "commonjs", - "lib": ["ES2022"], - "strict": true, - "esModuleInterop": true, - "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "declaration": true, - "outDir": "dist", - "rootDir": "." - }, - "include": ["src/**/*.ts", "tests/**/*.ts"], - "exclude": ["node_modules", "dist"] -} -""" - -# jest.config.js uses ts-jest preset so implementers can write TypeScript -# test files that import from src/ without configuring anything themselves. -# This is deliberately provided up-front so the implementer never has to -# diagnose jest/ts-jest interop issues mid-task. -JEST_CONFIG_JS = """\ -/** @type {import('jest').Config} */ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - testMatch: ['/tests/**/*.test.ts'], - rootDir: '.', - moduleNameMapper: { - '^@/(.*)$': '/src/$1', - }, -}; -""" - -CLAUDE_MD = """\ -# Project Commands - -**install**: npm ci -**test**: npm test -**lint**: npm run lint -**build**: npm run build -""" - -README_MD = """\ -# Wave Test Fixture - -Synthetic project used by drill scenarios to exercise the wave decomposition -algorithm. Do not edit by hand — this file is generated by -`setup_helpers/wave.py`. -""" - - -# ---------------------------------------------------------------------------- -# Internal helpers -# ---------------------------------------------------------------------------- - -def _init_base_repo(workdir: Path) -> None: - """Create the base TypeScript repo on main with the standard fixture files.""" - workdir.mkdir(parents=True, exist_ok=True) - _git(["git", "init", "-b", "main"], cwd=workdir) - _git(["git", "config", "user.email", "drill@test.local"], cwd=workdir) - _git(["git", "config", "user.name", "Drill Test"], cwd=workdir) - - (workdir / "package.json").write_text(PACKAGE_JSON) - (workdir / "README.md").write_text(README_MD) - (workdir / "tsconfig.json").write_text(TSCONFIG_JSON) - (workdir / "jest.config.js").write_text(JEST_CONFIG_JS) - (workdir / "CLAUDE.md").write_text(CLAUDE_MD) - - _git( - ["git", "add", "package.json", "README.md", "tsconfig.json", - "jest.config.js", "CLAUDE.md"], - cwd=workdir, - ) - _git(["git", "commit", "-m", "initial commit"], cwd=workdir) - - -def _write_file(workdir: Path, rel_path: str, content: str) -> None: - """Write a file, creating parent directories as needed.""" - target = workdir / rel_path - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text(content) - - -def _ensure_dir(workdir: Path, rel_path: str) -> None: - """Create a directory and drop a .gitkeep so git can track it.""" - d = workdir / rel_path - d.mkdir(parents=True, exist_ok=True) - (d / ".gitkeep").write_text("") - - -def _commit_all_on_feature_branch(workdir: Path) -> None: - """Checkout feature/test-implementation and commit every remaining change.""" - _git(["git", "checkout", "-b", "feature/test-implementation"], cwd=workdir) - _git(["git", "add", "-A"], cwd=workdir) - _git(["git", "commit", "-m", "add wave test plan and fixtures"], cwd=workdir) - - -# ---------------------------------------------------------------------------- -# Plan bodies -# ---------------------------------------------------------------------------- - -WAVE_TEST_PLAN = """\ -# Wave Decomposition Test Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development -> (recommended) or superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Exercise the full wave decomposition algorithm across 3 waves. - -**Architecture:** Foundation types feed independent services which are wired -together by an API routes layer. This shape intentionally produces one -sequential task in Wave 1, three parallel tasks in Wave 2, and one -sequential integration task in Wave 3. - -**Tech Stack:** TypeScript, Jest. - ---- - -### Task 1: Foundation types - -**Files:** -- Create: `src/types/auth.ts` -- Create: `src/types/users.ts` -- Create: `src/types/billing.ts` -- Modify: `src/types/index.ts` - -**Acceptance Criteria:** -- `src/types/auth.ts` exports `User` and `Session` interfaces. -- `src/types/users.ts` exports a `UserProfile` interface with `id` and `email`. -- `src/types/billing.ts` exports `Plan` and `Subscription` interfaces. -- `src/types/index.ts` re-exports everything from the three files above. -- `npm run build` succeeds with no type errors. - -- [ ] **Step 1: Create src/types/auth.ts with User and Session interfaces.** -- [ ] **Step 2: Create src/types/users.ts with UserProfile interface.** -- [ ] **Step 3: Create src/types/billing.ts with Plan and Subscription interfaces.** -- [ ] **Step 4: Update src/types/index.ts to re-export the three modules.** -- [ ] **Step 5: Run `npm run build` and commit.** - ---- - -### Task 2: Auth service - -**Files:** -- Create: `src/services/auth.ts` -- Create: `tests/auth.test.ts` - -**Acceptance Criteria:** -- `src/services/auth.ts` exports an `AuthService` class with a `login(email, password)` method. -- `AuthService.login` returns a `Session` imported from `src/types/auth.ts`. -- `tests/auth.test.ts` covers the happy-path login case. -- `tests/auth.test.ts` covers an invalid-credentials failure case. -- `npm test -- tests/auth.test.ts` passes. - -- [ ] **Step 1: Write tests/auth.test.ts covering login success and failure.** -- [ ] **Step 2: Implement src/services/auth.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/auth.test.ts` and commit.** - ---- - -### Task 3: Users service - -**Files:** -- Create: `src/services/users.ts` -- Create: `tests/users.test.ts` - -**Acceptance Criteria:** -- `src/services/users.ts` exports a `UsersService` class with `getProfile(id)`. -- `UsersService.getProfile` returns a `UserProfile` imported from `src/types/users.ts`. -- `tests/users.test.ts` covers the happy-path lookup case. -- `tests/users.test.ts` covers a not-found case. -- `npm test -- tests/users.test.ts` passes. - -- [ ] **Step 1: Write tests/users.test.ts covering getProfile success and missing.** -- [ ] **Step 2: Implement src/services/users.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/users.test.ts` and commit.** - ---- - -### Task 4: Billing service - -**Files:** -- Create: `src/services/billing.ts` -- Create: `tests/billing.test.ts` - -**Acceptance Criteria:** -- `src/services/billing.ts` exports a `BillingService` class with `subscribe(userId, planId)`. -- `BillingService.subscribe` returns a `Subscription` imported from `src/types/billing.ts`. -- `tests/billing.test.ts` covers a successful subscription. -- `tests/billing.test.ts` covers a failed subscription. -- `npm test -- tests/billing.test.ts` passes. - -- [ ] **Step 1: Write tests/billing.test.ts covering subscribe success and failure.** -- [ ] **Step 2: Implement src/services/billing.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/billing.test.ts` and commit.** - ---- - -### Task 5: API routes - -**Files:** -- Create: `src/api/routes.ts` -- Modify: `src/index.ts` - -**Acceptance Criteria:** -- `src/api/routes.ts` imports `AuthService`, `UsersService`, and `BillingService`. -- `src/api/routes.ts` exports a `registerRoutes(app)` function that wires the three services. -- `src/index.ts` imports `registerRoutes` and calls it with the app. -- `npm run build` succeeds. -- `npm test` passes end to end. - -- [ ] **Step 1: Create src/api/routes.ts that composes the three services.** -- [ ] **Step 2: Update src/index.ts to register the routes on startup.** -- [ ] **Step 3: Run `npm run build && npm test` and commit.** -""" - - -FALSE_OVERLAP_PLAN = """\ -# False Overlap Test Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development -> (recommended) or superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Verify that wave decomposition uses full paths, not bare filenames, -when detecting file overlap between tasks. - -**Architecture:** Three fully-independent domains (auth, users, billing) each -define a locally-scoped `types.ts`. A decomposer that keys on filename alone -would serialize these tasks. A correct decomposer keys on full paths and -parallelizes them. - -**Tech Stack:** TypeScript, Jest. - ---- - -### Task 1: Auth domain scaffolding - -**Files:** -- Create: `src/auth/types.ts` -- Create: `src/auth/service.ts` - -**Acceptance Criteria:** -- `src/auth/types.ts` exports an `AuthToken` interface local to the auth domain. -- `src/auth/service.ts` exports an `AuthService` class that uses `AuthToken`. -- Nothing outside `src/auth/` is touched. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/auth/types.ts with AuthToken.** -- [ ] **Step 2: Create src/auth/service.ts importing AuthToken locally.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -### Task 2: Users domain scaffolding - -**Files:** -- Create: `src/users/types.ts` -- Create: `src/users/service.ts` - -**Acceptance Criteria:** -- `src/users/types.ts` exports a `UserRecord` interface local to the users domain. -- `src/users/service.ts` exports a `UsersService` class that uses `UserRecord`. -- Nothing outside `src/users/` is touched. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/users/types.ts with UserRecord.** -- [ ] **Step 2: Create src/users/service.ts importing UserRecord locally.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -### Task 3: Billing domain scaffolding - -**Files:** -- Create: `src/billing/types.ts` -- Create: `src/billing/service.ts` - -**Acceptance Criteria:** -- `src/billing/types.ts` exports an `Invoice` interface local to the billing domain. -- `src/billing/service.ts` exports a `BillingService` class that uses `Invoice`. -- Nothing outside `src/billing/` is touched. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/billing/types.ts with Invoice.** -- [ ] **Step 2: Create src/billing/service.ts importing Invoice locally.** -- [ ] **Step 3: Run `npm run build` and commit.** -""" - - -DEPENDENCY_CHAIN_PLAN = """\ -# Dependency Chain Test Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development -> (recommended) or superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Verify that wave decomposition detects semantic (import-based) -dependencies, not just file-overlap dependencies. - -**Architecture:** Two independent type modules (auth, billing) can be built -in parallel. A session service consumes the auth types but never touches -the billing types — the decomposer should recognize this asymmetric -dependency via the import, even though there is no file overlap. - -**Tech Stack:** TypeScript, Jest. - ---- - -### Task 1: Create auth types - -**Files:** -- Create: `src/types/auth.ts` - -**Acceptance Criteria:** -- `src/types/auth.ts` exports a `User` interface with `id` and `email`. -- `src/types/auth.ts` exports a `Session` interface with `userId` and `token`. -- No other file is modified. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/types/auth.ts with User and Session interfaces.** -- [ ] **Step 2: Run `npm run build` and commit.** - ---- - -### Task 2: Create billing types - -**Files:** -- Create: `src/types/billing.ts` - -**Acceptance Criteria:** -- `src/types/billing.ts` exports a `Plan` interface with `id` and `price`. -- `src/types/billing.ts` exports a `Subscription` interface with `userId` and `planId`. -- No other file is modified. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/types/billing.ts with Plan and Subscription interfaces.** -- [ ] **Step 2: Run `npm run build` and commit.** - ---- - -### Task 3: Create session service - -**Files:** -- Create: `src/services/session.ts` - -**Acceptance Criteria:** -- `src/services/session.ts` **imports** `User` and `Session` from `src/types/auth.ts`. -- `src/services/session.ts` does **not** import from `src/types/billing.ts`. -- `src/services/session.ts` does **not** modify `src/types/auth.ts`. -- `src/services/session.ts` exports a `SessionService` class with `create(user: User): Session`. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/services/session.ts importing User and Session from ../types/auth.** -- [ ] **Step 2: Implement SessionService.create.** -- [ ] **Step 3: Run `npm run build` and commit.** -""" - - -WAVE_TEST_PLAN_MINIMAL = """\ -# Wave Execution Minimal Test Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development -> (recommended) or superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Exercise wave execution across two waves with the smallest -possible surface — a single foundation task followed by two independent -parallel services. - -**Architecture:** Foundation types feed two independent, parallel -utility services. This produces one sequential task in Wave 1 and two -parallel tasks in Wave 2. - -**Tech Stack:** TypeScript, Jest. - ---- - -### Task 1: Foundation types - -**Files:** -- Create: `src/types/core.ts` -- Modify: `src/types/index.ts` - -**Acceptance Criteria:** -- `src/types/core.ts` exports a `User` interface with `id` and `email`. -- `src/types/core.ts` exports a `Session` interface with `userId` and `token`. -- `src/types/index.ts` re-exports everything from `src/types/core.ts`. -- `npm run build` succeeds with no type errors. - -- [ ] **Step 1: Create src/types/core.ts with User and Session interfaces.** -- [ ] **Step 2: Update src/types/index.ts to re-export from ./core.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -### Task 2: Logger service - -**Files:** -- Create: `src/services/logger.ts` -- Create: `tests/logger.test.ts` - -**Acceptance Criteria:** -- `src/services/logger.ts` exports a `Logger` class with an `info(message: string)` method. -- `Logger.info` appends a timestamped entry to an internal buffer. -- `tests/logger.test.ts` covers a happy-path info case. -- `tests/logger.test.ts` covers a repeated-call buffering case. -- `npm test -- tests/logger.test.ts` passes. - -- [ ] **Step 1: Write tests/logger.test.ts covering info and buffering.** -- [ ] **Step 2: Implement src/services/logger.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/logger.test.ts` and commit.** - ---- - -### Task 3: Clock service - -**Files:** -- Create: `src/services/clock.ts` -- Create: `tests/clock.test.ts` - -**Acceptance Criteria:** -- `src/services/clock.ts` exports a `Clock` class with a `now(): number` method. -- `Clock.now` returns the current Unix timestamp in milliseconds. -- `tests/clock.test.ts` covers a happy-path now case. -- `tests/clock.test.ts` covers the return value being a finite number. -- `npm test -- tests/clock.test.ts` passes. - -- [ ] **Step 1: Write tests/clock.test.ts covering now success and type.** -- [ ] **Step 2: Implement src/services/clock.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/clock.test.ts` and commit.** -""" - - -CONFLICT_SURFACE_PLAN = """\ -# Conflict Surface Test Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development -> (recommended) or superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Verify the conflict-surface heuristic catches implicit barrel-file -modifications that the task file lists intentionally omit. - -**Architecture:** `src/services/index.ts` exists as a barrel file before the -plan runs. Each task creates a new service module and needs to add an -export line to `src/services/index.ts`, but the task Files list only -names the new module. A pure file-overlap decomposer would parallelize -these tasks; the conflict-surface heuristic should recognize that every -task needs to touch the barrel file and either serialize them or add the -barrel file to each task's list. - -**Tech Stack:** TypeScript, Jest. - ---- - -### Task 1: Create auth service - -**Files:** -- Create: `src/services/auth.ts` - -**Acceptance Criteria:** -- `src/services/auth.ts` exports an `AuthService` class with a `login` method. -- `AuthService` is re-exported from `src/services/index.ts` (add export to index). -- Importing `AuthService` from `src/services` works at build time. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/services/auth.ts with AuthService.** -- [ ] **Step 2: Add `export * from './auth';` to src/services/index.ts.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -### Task 2: Create users service - -**Files:** -- Create: `src/services/users.ts` - -**Acceptance Criteria:** -- `src/services/users.ts` exports a `UsersService` class with a `getProfile` method. -- `UsersService` is re-exported from `src/services/index.ts` (add export to index). -- Importing `UsersService` from `src/services` works at build time. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/services/users.ts with UsersService.** -- [ ] **Step 2: Add `export * from './users';` to src/services/index.ts.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -### Task 3: Create billing service - -**Files:** -- Create: `src/services/billing.ts` - -**Acceptance Criteria:** -- `src/services/billing.ts` exports a `BillingService` class with a `subscribe` method. -- `BillingService` is re-exported from `src/services/index.ts` (add export to index). -- Importing `BillingService` from `src/services` works at build time. -- `npm run build` succeeds. - -- [ ] **Step 1: Create src/services/billing.ts with BillingService.** -- [ ] **Step 2: Add `export * from './billing';` to src/services/index.ts.** -- [ ] **Step 3: Run `npm run build` and commit.** -""" - - -# ---------------------------------------------------------------------------- -# Public helpers -# ---------------------------------------------------------------------------- - -def create_wave_test_repo(workdir: Path) -> None: - """Create a 5-task plan exercising the full wave decomposition algorithm. - - Expected decomposition: - - Wave 1: Task 1 (foundation types) - - Wave 2: Tasks 2, 3, 4 (parallel, independent service implementations) - - Wave 3: Task 5 (API routes integration, depends on services) - """ - workdir = Path(workdir) - _init_base_repo(workdir) - - # Pre-create the barrel file and stub directories the plan references. - _write_file(workdir, "src/types/index.ts", "export {};\n") - for d in ("src/auth", "src/users", "src/billing", "src/api", "tests"): - _ensure_dir(workdir, d) - - _write_file(workdir, "docs/superpowers/plans/test-plan.md", WAVE_TEST_PLAN) - - _commit_all_on_feature_branch(workdir) - - -def create_false_overlap_repo(workdir: Path) -> None: - """Create a plan where three tasks share a filename but no full-path overlap. - - Expected decomposition: - - Wave 1: Tasks 1, 2, 3 all parallel (no true file overlap) - """ - workdir = Path(workdir) - _init_base_repo(workdir) - - for d in ("src/auth", "src/users", "src/billing"): - _ensure_dir(workdir, d) - - _write_file(workdir, "docs/superpowers/plans/test-plan.md", FALSE_OVERLAP_PLAN) - - _commit_all_on_feature_branch(workdir) - - -def create_dependency_chain_repo(workdir: Path) -> None: - """Create a plan where Task 3 semantically depends on Task 1 via imports. - - Expected decomposition: - - Wave 1: Tasks 1, 2 (parallel — independent type modules) - - Wave 2: Task 3 (depends on Task 1's src/types/auth.ts) - """ - workdir = Path(workdir) - _init_base_repo(workdir) - - _ensure_dir(workdir, "src/types") - _ensure_dir(workdir, "src/services") - - _write_file(workdir, "docs/superpowers/plans/test-plan.md", DEPENDENCY_CHAIN_PLAN) - - _commit_all_on_feature_branch(workdir) - - -def create_wave_test_repo_minimal(workdir: Path) -> None: - """Create a 3-task plan exercising wave execution with minimal surface. - - Expected decomposition: - - Wave 1: Task 1 (foundation types) - - Wave 2: Tasks 2, 3 (parallel, independent logger + clock services) - """ - workdir = Path(workdir) - _init_base_repo(workdir) - - # Pre-create the barrel file and stub directories the plan references. - _write_file(workdir, "src/types/index.ts", "export {};\n") - for d in ("src/services", "tests"): - _ensure_dir(workdir, d) - - _write_file(workdir, "docs/superpowers/plans/test-plan.md", WAVE_TEST_PLAN_MINIMAL) - - _commit_all_on_feature_branch(workdir) - - -# ---------------------------------------------------------------------------- -# Pre-decomposed waves files -# ---------------------------------------------------------------------------- - -WAVE_TEST_SPEC = """\ -# Wave Decomposition Test Specification - -## Overview - -This specification describes a synthetic TypeScript project used to exercise -the full wave execution pipeline. The feature is a small, illustrative API -surface composed of three independent services (auth, users, billing) wired -together behind a thin routes layer. It exists solely so drill scenarios can -verify that an agent correctly runs an already-decomposed waves file from -start to finish. - -## Scope - -The spec covers: - -- A shared types module that declares the core domain interfaces. -- Three independent service classes, each with a small happy-path and - failure-path test suite. -- An API routes module that composes the three services. - -## Non-goals - -- Real persistence, real HTTP handling, real authentication. The exercise is - purely about wave execution mechanics, not production-quality code. -""" - -WAVE_TEST_SPEC_MINIMAL = """\ -# Minimal Wave Execution Test Specification - -## Overview - -This specification describes a minimal TypeScript project used to exercise -the wave execution pipeline with the smallest possible task surface. The -feature is a tiny utility layer composed of two independent services -(logger, clock) built on top of a shared types module. - -## Scope - -The spec covers: - -- A shared types module that declares `User` and `Session` interfaces. -- A logger service with a buffered `info` method. -- A clock service with a `now()` method returning the current Unix - timestamp in milliseconds. - -## Non-goals - -- Log rotation, log transport, time sources other than `Date.now()`, or - any production-grade concerns. The fixture exists purely to exercise - wave execution over a small set of parallelizable tasks. -""" - -WAVE_TEST_WAVES_FULL = """\ ---- -run_id: testw5 -source_plan: docs/superpowers/plans/test-plan.md -spec_path: docs/superpowers/specs/test-spec.md -feature_branch: feature/test-implementation -status: approved -sequential_time: 8h -parallel_time: 4h -savings: 50% -waves: - - {wave: 1, strategy: sequential, tasks: [1], depends_on: []} - - {wave: 2, strategy: parallel, tasks: [2, 3, 4], depends_on: [1]} - - {wave: 3, strategy: sequential, tasks: [5], depends_on: [2, 3, 4]} ---- - -# Wave Decomposition Test — Waves File - -## Waves Overview - -| Wave | Strategy | Tasks | Depends On | Notes | -|------|------------|-----------|------------|-----------------------------------------| -| 1 | sequential | 1 | — | Foundation types, must land first | -| 2 | parallel | 2, 3, 4 | 1 | Independent service implementations | -| 3 | sequential | 5 | 2, 3, 4 | API routes integration glue | - -**Sequential time estimate:** 8h -**Parallel time estimate:** 4h -**Savings:** 50% - ---- - -## Wave 1 — Foundation (sequential) - -Task 1 must land before any service work can begin because every Wave 2 -service imports from `src/types/index.ts`. - -### Task 1: Foundation types - -**Files:** -- Create: `src/types/auth.ts` -- Create: `src/types/users.ts` -- Create: `src/types/billing.ts` -- Modify: `src/types/index.ts` - -**Acceptance Criteria:** -- `src/types/auth.ts` exports `User` and `Session` interfaces. -- `src/types/users.ts` exports a `UserProfile` interface with `id` and `email`. -- `src/types/billing.ts` exports `Plan` and `Subscription` interfaces. -- `src/types/index.ts` re-exports everything from the three files above. -- `npm run build` succeeds with no type errors. - -- [ ] **Step 1: Create src/types/auth.ts with User and Session interfaces.** -- [ ] **Step 2: Create src/types/users.ts with UserProfile interface.** -- [ ] **Step 3: Create src/types/billing.ts with Plan and Subscription interfaces.** -- [ ] **Step 4: Update src/types/index.ts to re-export the three modules.** -- [ ] **Step 5: Run `npm run build` and commit.** - ---- - -## Wave 2 — Independent services (parallel) - -Tasks 2, 3, and 4 have no file overlap and no cross-task imports; they -can be executed in parallel in isolated worktrees and merged at the -wave boundary. - -### File ownership - -``` -Task 2 (auth service): - - src/services/auth.ts [create] - - tests/auth.test.ts [create] - -Task 3 (users service): - - src/services/users.ts [create] - - tests/users.test.ts [create] - -Task 4 (billing service): - - src/services/billing.ts [create] - - tests/billing.test.ts [create] -``` - -No two tasks in Wave 2 touch the same path. - -### Task 2: Auth service - -**Files:** -- Create: `src/services/auth.ts` -- Create: `tests/auth.test.ts` - -**Acceptance Criteria:** -- `src/services/auth.ts` exports an `AuthService` class with a `login(email, password)` method. -- `AuthService.login` returns a `Session` imported from `src/types/auth.ts`. -- `tests/auth.test.ts` covers the happy-path login case. -- `tests/auth.test.ts` covers an invalid-credentials failure case. -- `npm test -- tests/auth.test.ts` passes. - -- [ ] **Step 1: Write tests/auth.test.ts covering login success and failure.** -- [ ] **Step 2: Implement src/services/auth.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/auth.test.ts` and commit.** - -### Task 3: Users service - -**Files:** -- Create: `src/services/users.ts` -- Create: `tests/users.test.ts` - -**Acceptance Criteria:** -- `src/services/users.ts` exports a `UsersService` class with `getProfile(id)`. -- `UsersService.getProfile` returns a `UserProfile` imported from `src/types/users.ts`. -- `tests/users.test.ts` covers the happy-path lookup case. -- `tests/users.test.ts` covers a not-found case. -- `npm test -- tests/users.test.ts` passes. - -- [ ] **Step 1: Write tests/users.test.ts covering getProfile success and missing.** -- [ ] **Step 2: Implement src/services/users.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/users.test.ts` and commit.** - -### Task 4: Billing service - -**Files:** -- Create: `src/services/billing.ts` -- Create: `tests/billing.test.ts` - -**Acceptance Criteria:** -- `src/services/billing.ts` exports a `BillingService` class with `subscribe(userId, planId)`. -- `BillingService.subscribe` returns a `Subscription` imported from `src/types/billing.ts`. -- `tests/billing.test.ts` covers a successful subscription. -- `tests/billing.test.ts` covers a failed subscription. -- `npm test -- tests/billing.test.ts` passes. - -- [ ] **Step 1: Write tests/billing.test.ts covering subscribe success and failure.** -- [ ] **Step 2: Implement src/services/billing.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/billing.test.ts` and commit.** - ---- - -## Wave 3 — Integration (sequential) - -Task 5 depends on every Wave 2 service being merged; it can only start -once Wave 2 is fully integrated onto the feature branch. - -### Task 5: API routes - -**Files:** -- Create: `src/api/routes.ts` -- Modify: `src/index.ts` - -**Acceptance Criteria:** -- `src/api/routes.ts` imports `AuthService`, `UsersService`, and `BillingService`. -- `src/api/routes.ts` exports a `registerRoutes(app)` function that wires the three services. -- `src/index.ts` imports `registerRoutes` and calls it with the app. -- `npm run build` succeeds. -- `npm test` passes end to end. - -- [ ] **Step 1: Create src/api/routes.ts that composes the three services.** -- [ ] **Step 2: Update src/index.ts to register the routes on startup.** -- [ ] **Step 3: Run `npm run build && npm test` and commit.** -""" - - -WAVE_TEST_WAVES_MINIMAL = """\ ---- -run_id: testw3 -source_plan: docs/superpowers/plans/test-plan.md -spec_path: docs/superpowers/specs/test-spec.md -feature_branch: feature/test-implementation -status: approved -sequential_time: 3h -parallel_time: 2h -savings: 33% -waves: - - {wave: 1, strategy: sequential, tasks: [1], depends_on: []} - - {wave: 2, strategy: parallel, tasks: [2, 3], depends_on: [1]} ---- - -# Minimal Wave Execution — Waves File - -## Waves Overview - -| Wave | Strategy | Tasks | Depends On | Notes | -|------|------------|-------|------------|-----------------------------------| -| 1 | sequential | 1 | — | Foundation types, must land first | -| 2 | parallel | 2, 3 | 1 | Independent logger + clock | - -**Sequential time estimate:** 3h -**Parallel time estimate:** 2h -**Savings:** 33% - ---- - -## Wave 1 — Foundation (sequential) - -### Task 1: Foundation types - -**Files:** -- Create: `src/types/core.ts` -- Modify: `src/types/index.ts` - -**Acceptance Criteria:** -- `src/types/core.ts` exports a `User` interface with `id` and `email`. -- `src/types/core.ts` exports a `Session` interface with `userId` and `token`. -- `src/types/index.ts` re-exports everything from `src/types/core.ts`. -- `npm run build` succeeds with no type errors. - -- [ ] **Step 1: Create src/types/core.ts with User and Session interfaces.** -- [ ] **Step 2: Update src/types/index.ts to re-export from ./core.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -## Wave 2 — Independent services (parallel) - -Tasks 2 and 3 have no file overlap and no cross-task imports; they can -be executed in parallel in isolated worktrees and merged at the wave -boundary. - -### File ownership - -``` -Task 2 (logger service): - - src/services/logger.ts [create] - - tests/logger.test.ts [create] - -Task 3 (clock service): - - src/services/clock.ts [create] - - tests/clock.test.ts [create] -``` - -No two tasks in Wave 2 touch the same path. - -### Task 2: Logger service - -**Files:** -- Create: `src/services/logger.ts` -- Create: `tests/logger.test.ts` - -**Acceptance Criteria:** -- `src/services/logger.ts` exports a `Logger` class with an `info(message: string)` method. -- `Logger.info` appends a timestamped entry to an internal buffer. -- `tests/logger.test.ts` covers a happy-path info case. -- `tests/logger.test.ts` covers a repeated-call buffering case. -- `npm test -- tests/logger.test.ts` passes. - -- [ ] **Step 1: Write tests/logger.test.ts covering info and buffering.** -- [ ] **Step 2: Implement src/services/logger.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/logger.test.ts` and commit.** - -### Task 3: Clock service - -**Files:** -- Create: `src/services/clock.ts` -- Create: `tests/clock.test.ts` - -**Acceptance Criteria:** -- `src/services/clock.ts` exports a `Clock` class with a `now(): number` method. -- `Clock.now` returns the current Unix timestamp in milliseconds. -- `tests/clock.test.ts` covers a happy-path now case. -- `tests/clock.test.ts` covers the return value being a finite number. -- `npm test -- tests/clock.test.ts` passes. - -- [ ] **Step 1: Write tests/clock.test.ts covering now success and type.** -- [ ] **Step 2: Implement src/services/clock.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/clock.test.ts` and commit.** -""" - - -WAVE_TEST_PLAN_BROKEN_TASK = """\ -# Wave Execution Failure Test Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development -> (recommended) or superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Exercise wave execution's failure escalation path. Tasks 1 and 2 -should succeed normally. Task 3 is **structurally impossible** — its -pre-existing test file contains mutually contradictory assertions that -no implementation can satisfy, and the task scope explicitly forbids -modifying the test file. - -**Architecture:** Foundation types feed two parallel services. The second -parallel service (Task 3) is wired up so that the orchestrator must -detect a real failure, retry once, and escalate to the user. - -**Tech Stack:** TypeScript, Jest. - ---- - -### Task 1: Foundation types - -**Files:** -- Create: `src/types/core.ts` -- Modify: `src/types/index.ts` - -**Acceptance Criteria:** -- `src/types/core.ts` exports a `User` interface with `id` and `email`. -- `src/types/core.ts` exports a `Session` interface with `userId` and `token`. -- `src/types/index.ts` re-exports everything from `src/types/core.ts`. -- `npm run build` succeeds with no type errors. - -- [ ] **Step 1: Create src/types/core.ts with User and Session interfaces.** -- [ ] **Step 2: Update src/types/index.ts to re-export from ./core.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -### Task 2: Logger service - -**Files:** -- Create: `src/services/logger.ts` -- Create: `tests/logger.test.ts` - -**Acceptance Criteria:** -- `src/services/logger.ts` exports a `Logger` class with an `info(message: string)` method. -- `Logger.info` appends a timestamped entry to an internal buffer. -- `tests/logger.test.ts` covers a happy-path info case. -- `tests/logger.test.ts` covers a repeated-call buffering case. -- `npm test -- tests/logger.test.ts` passes. - -- [ ] **Step 1: Write tests/logger.test.ts covering info and buffering.** -- [ ] **Step 2: Implement src/services/logger.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/logger.test.ts` and commit.** - ---- - -### Task 3: Counter service (impossible — DO NOT modify test file) - -**Files:** -- Create: `src/services/counter.ts` -- Pre-existing (DO NOT modify): `tests/counter.test.ts` - -**Acceptance Criteria:** -- `src/services/counter.ts` exports a `Counter` class with a `compute(n: number): number` method. -- `tests/counter.test.ts` already exists. **You MUST NOT modify or delete it.** -- `npm test -- tests/counter.test.ts` passes against the pre-existing test file. -- `npm run build` succeeds. - -- [ ] **Step 1: Read the pre-existing tests/counter.test.ts and understand its assertions.** -- [ ] **Step 2: Implement src/services/counter.ts to satisfy every assertion in the existing test file (without changing the test file).** -- [ ] **Step 3: Run `npm test -- tests/counter.test.ts` and `npm run build`, then commit.** -""" - - -# Pre-existing test file for the broken Task 3. This file is committed -# to the repo BEFORE the implementer runs. It contains mutually -# contradictory assertions: compute(1) is asserted to equal both 1 AND -# 2 in two separate `it` blocks. No implementation of `compute` can -# satisfy both assertions simultaneously, so `npm test` will always -# report a failing test for one of the two cases, no matter what the -# implementer writes. The implementer cannot modify the test file -# because the task acceptance criteria explicitly forbid it. This -# produces a structural failure that the orchestrator must detect. -COUNTER_FAILING_TEST = """\ -import { Counter } from '../src/services/counter'; - -describe('Counter', () => { - // The two assertions below are mutually contradictory by design. - // No implementation of compute(n) can make both tests pass at once, - // and the task scope forbids modifying this file. The orchestrator - // should detect the failure, retry once, then escalate to the user. - - it('compute(1) returns 1', () => { - const counter = new Counter(); - expect(counter.compute(1)).toBe(1); - }); - - it('compute(1) returns 2', () => { - const counter = new Counter(); - expect(counter.compute(1)).toBe(2); - }); -}); -""" - - -WAVE_TEST_SPEC_BROKEN_TASK = """\ -# Wave Execution Failure Test Specification - -## Overview - -This specification describes a synthetic TypeScript project used to -exercise the wave execution skill's failure-handling and escalation -path. It is intentionally constructed so that one task in a parallel -wave cannot succeed. - -## Scope - -The spec covers: - -- A shared types module that declares `User` and `Session` interfaces. -- A logger service with a buffered `info` method (Task 2 — should pass). -- A counter service whose pre-existing test file contains mutually - contradictory assertions (Task 3 — must fail). - -## Non-goals - -- A working counter service. Task 3 is a deliberate failure injection, - not a real feature. The fixture exists purely to verify that the - orchestrator detects the failure, retries once per the failure - handling matrix, and escalates to the user instead of silently - proceeding. -""" - - -WAVE_TEST_WAVES_BROKEN_TASK = """\ ---- -run_id: testfwf -source_plan: docs/superpowers/plans/test-plan.md -spec_path: docs/superpowers/specs/test-spec.md -feature_branch: feature/test-implementation -status: approved -sequential_time: 3h -parallel_time: 2h -savings: 33% -waves: - - {wave: 1, strategy: sequential, tasks: [1], depends_on: []} - - {wave: 2, strategy: parallel, tasks: [2, 3], depends_on: [1]} ---- - -# Wave Execution Failure Test — Waves File - -## Waves Overview - -| Wave | Strategy | Tasks | Depends On | Notes | -|------|------------|-------|------------|------------------------------------------------| -| 1 | sequential | 1 | — | Foundation types, must land first | -| 2 | parallel | 2, 3 | 1 | Logger (passes) + Counter (structurally fails) | - -**Sequential time estimate:** 3h -**Parallel time estimate:** 2h -**Savings:** 33% - ---- - -## Wave 1 — Foundation (sequential) - -### Task 1: Foundation types - -**Files:** -- Create: `src/types/core.ts` -- Modify: `src/types/index.ts` - -**Acceptance Criteria:** -- `src/types/core.ts` exports a `User` interface with `id` and `email`. -- `src/types/core.ts` exports a `Session` interface with `userId` and `token`. -- `src/types/index.ts` re-exports everything from `src/types/core.ts`. -- `npm run build` succeeds with no type errors. - -- [ ] **Step 1: Create src/types/core.ts with User and Session interfaces.** -- [ ] **Step 2: Update src/types/index.ts to re-export from ./core.** -- [ ] **Step 3: Run `npm run build` and commit.** - ---- - -## Wave 2 — Independent services (parallel) - -Tasks 2 and 3 have no file overlap and no cross-task imports; they can -be executed in parallel in isolated worktrees and merged at the wave -boundary. - -### File ownership - -``` -Task 2 (logger service): - - src/services/logger.ts [create] - - tests/logger.test.ts [create] - -Task 3 (counter service): - - src/services/counter.ts [create] - - tests/counter.test.ts [pre-existing — DO NOT modify] -``` - -No two tasks in Wave 2 touch the same path. - -### Task 2: Logger service - -**Files:** -- Create: `src/services/logger.ts` -- Create: `tests/logger.test.ts` - -**Acceptance Criteria:** -- `src/services/logger.ts` exports a `Logger` class with an `info(message: string)` method. -- `Logger.info` appends a timestamped entry to an internal buffer. -- `tests/logger.test.ts` covers a happy-path info case. -- `tests/logger.test.ts` covers a repeated-call buffering case. -- `npm test -- tests/logger.test.ts` passes. - -- [ ] **Step 1: Write tests/logger.test.ts covering info and buffering.** -- [ ] **Step 2: Implement src/services/logger.ts to make the tests pass.** -- [ ] **Step 3: Run `npm test -- tests/logger.test.ts` and commit.** - -### Task 3: Counter service (impossible — DO NOT modify test file) - -**Files:** -- Create: `src/services/counter.ts` -- Pre-existing (DO NOT modify): `tests/counter.test.ts` - -**Acceptance Criteria:** -- `src/services/counter.ts` exports a `Counter` class with a `compute(n: number): number` method. -- `tests/counter.test.ts` already exists. **You MUST NOT modify or delete it.** -- `npm test -- tests/counter.test.ts` passes against the pre-existing test file. -- `npm run build` succeeds. - -- [ ] **Step 1: Read the pre-existing tests/counter.test.ts and understand its assertions.** -- [ ] **Step 2: Implement src/services/counter.ts to satisfy every assertion in the existing test file (without changing the test file).** -- [ ] **Step 3: Run `npm test -- tests/counter.test.ts` and `npm run build`, then commit.** -""" - - -def _commit_waves_file(workdir: Path) -> None: - """Stage and commit the waves file + spec on the feature branch. - - Assumes the caller already created the underlying plan repo and is - sitting on feature/test-implementation (the create_wave_test_repo* - helpers leave us there). - """ - _git(["git", "add", "-A"], cwd=workdir) - _git(["git", "commit", "-m", "add pre-decomposed waves file and spec"], cwd=workdir) - - -def create_waves_file(workdir: Path) -> None: - """Create the full 5-task repo with a pre-decomposed .waves.md file. - - This is the starting point for `executing-waves` scenarios that - want the full 3-wave experience. The waves file is marked - `status: approved` so the executing-waves pre-flight check passes. - """ - workdir = Path(workdir) - create_wave_test_repo(workdir) - - _write_file( - workdir, - "docs/superpowers/specs/test-spec.md", - WAVE_TEST_SPEC, - ) - _write_file( - workdir, - "docs/superpowers/plans/test-plan.waves.md", - WAVE_TEST_WAVES_FULL, - ) - - _commit_waves_file(workdir) - - -def create_waves_file_minimal(workdir: Path) -> None: - """Create the 3-task minimal repo with a pre-decomposed .waves.md file. - - This is the starting point for smaller `executing-waves` scenarios - that exercise the same execution pipeline over 1 sequential task + - 2 parallel tasks. The waves file is marked `status: approved` so - the executing-waves pre-flight check passes. - """ - workdir = Path(workdir) - create_wave_test_repo_minimal(workdir) - - _write_file( - workdir, - "docs/superpowers/specs/test-spec.md", - WAVE_TEST_SPEC_MINIMAL, - ) - _write_file( - workdir, - "docs/superpowers/plans/test-plan.waves.md", - WAVE_TEST_WAVES_MINIMAL, - ) - - _commit_waves_file(workdir) - - -def create_waves_file_with_broken_task(workdir: Path) -> None: - """Create a 3-task waves repo where Task 3 is structurally impossible. - - This is the starting point for `executing-waves` failure scenarios. - Layout: - - Wave 1 (sequential): Task 1 — foundation types (passes normally) - - Wave 2 (parallel): Task 2 — logger service (passes normally) - Task 3 — counter service (always fails) - - Task 3's failure is structural, not a prompt trick: a pre-existing - `tests/counter.test.ts` file is committed before the implementer - runs and contains two contradictory assertions (`compute(1) === 1` - AND `compute(1) === 2`). The acceptance criteria explicitly forbid - modifying the test file. No implementation can make both tests - pass, so `npm test` always reports a failure for one of the two - cases. - - Expected orchestrator behavior (per failure-handling.md): - 1. Detect Task 3 failure after the parallel wave runs. - 2. Merge Task 2 (the successful task) onto the feature branch. - 3. Retry Task 3 once from the updated tip. - 4. Retry also fails. - 5. Escalate to the user with the standard escalation message. - """ - workdir = Path(workdir) - create_wave_test_repo_minimal(workdir) - - # Overwrite the plan with the broken-task variant. - _write_file( - workdir, - "docs/superpowers/plans/test-plan.md", - WAVE_TEST_PLAN_BROKEN_TASK, - ) - - # Pre-create the failing test fixture for Task 3. The implementer - # must NOT modify it (per the task acceptance criteria), so the - # contradictory assertions guarantee a structural failure. - _write_file( - workdir, - "tests/counter.test.ts", - COUNTER_FAILING_TEST, - ) - - _write_file( - workdir, - "docs/superpowers/specs/test-spec.md", - WAVE_TEST_SPEC_BROKEN_TASK, - ) - _write_file( - workdir, - "docs/superpowers/plans/test-plan.waves.md", - WAVE_TEST_WAVES_BROKEN_TASK, - ) - - _commit_waves_file(workdir) - - -def create_conflict_surface_repo(workdir: Path) -> None: - """Create a plan where three tasks implicitly modify the same barrel file. - - The `src/services/index.ts` barrel file is pre-created so the - decomposer sees it during directory scanning. Each task in the plan - lists only its new module file but the steps mention adding an - export to the barrel — the conflict-surface heuristic should notice - this and either add the barrel file to each task's list or serialize - the tasks. - - Expected decomposition (under a correct heuristic): either - - all tasks in one wave with `src/services/index.ts` added to each - task's file list, or - - sequential waves (serialized) to avoid the shared barrel. - """ - workdir = Path(workdir) - _init_base_repo(workdir) - - # The barrel file MUST exist before the plan runs. - _write_file(workdir, "src/services/index.ts", "export {};\n") - - _write_file(workdir, "docs/superpowers/plans/test-plan.md", CONFLICT_SURFACE_PLAN) - - _commit_all_on_feature_branch(workdir)