mirror of
https://github.com/obra/superpowers.git
synced 2026-05-10 19:19:03 +08:00
evals: remove unreleased wave scenarios
This commit is contained in:
@@ -1,31 +0,0 @@
|
||||
scenario: decomposing-into-waves-conflict-surface
|
||||
description: "Agent flags a barrel/index file as a conflict surface shared by multiple tasks"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_conflict_surface_repo
|
||||
assertions:
|
||||
- "test -f docs/superpowers/plans/test-plan.md"
|
||||
- "test -f src/services/index.ts"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to decompose the plan at
|
||||
docs/superpowers/plans/test-plan.md into waves. Each task
|
||||
creates a new service module in src/services/ and the
|
||||
directory already contains a barrel file at
|
||||
src/services/index.ts. Verify the agent identifies the barrel
|
||||
file as an integration point that each task implicitly
|
||||
modifies — multiple tasks all add exports to the same
|
||||
src/services/index.ts.
|
||||
|
||||
limits:
|
||||
max_turns: 10
|
||||
turn_timeout: 120
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent flagged a barrel/index file as a conflict surface (visible in terminal output)"
|
||||
- "Agent either added the barrel file to each task's file list OR moved the tasks to sequential waves because they all implicitly modify the same barrel file"
|
||||
observe: true
|
||||
@@ -1,28 +0,0 @@
|
||||
scenario: decomposing-into-waves-dependency-chain
|
||||
description: "Agent detects semantic import-based dependencies, not just file overlap"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_dependency_chain_repo
|
||||
assertions:
|
||||
- "test -f docs/superpowers/plans/test-plan.md"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to decompose the plan at
|
||||
docs/superpowers/plans/test-plan.md into waves. Task 1 creates
|
||||
src/types/auth.ts. Task 3 imports from src/types/auth.ts but
|
||||
does not modify it. Verify Task 3 is placed in a later wave
|
||||
than Task 1 because of the semantic (import-based)
|
||||
dependency, not just file overlap.
|
||||
|
||||
limits:
|
||||
max_turns: 10
|
||||
turn_timeout: 120
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Task that imports another task's output is in a later wave"
|
||||
- "Agent identified the semantic dependency beyond file overlap (mentioned in terminal output)"
|
||||
observe: true
|
||||
@@ -1,32 +0,0 @@
|
||||
scenario: decomposing-into-waves-false-overlap
|
||||
description: "Agent uses full paths, not bare filenames, when detecting file overlap between tasks"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_false_overlap_repo
|
||||
assertions:
|
||||
- "test -f docs/superpowers/plans/test-plan.md"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to decompose the plan at
|
||||
docs/superpowers/plans/test-plan.md into waves. The plan has
|
||||
three tasks that each create a file named types.ts but in
|
||||
different directories (src/auth/, src/users/, src/billing/).
|
||||
Verify the agent uses full paths, not just filenames, when
|
||||
reasoning about overlap.
|
||||
- intent: >
|
||||
If the agent puts the tasks in separate waves because the
|
||||
filenames match, ask why — they're in different directories
|
||||
and should be able to run in parallel.
|
||||
|
||||
limits:
|
||||
max_turns: 10
|
||||
turn_timeout: 120
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Tasks with same-named files in different directories are in the same parallel wave"
|
||||
- "Agent displayed overlap matrix or conflict list using full file paths (visible in terminal output)"
|
||||
observe: true
|
||||
@@ -1,37 +0,0 @@
|
||||
scenario: decomposing-into-waves-naive
|
||||
description: "Agent decomposes a multi-task plan into waves without user prompting"
|
||||
user_posture: naive
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_wave_test_repo
|
||||
assertions:
|
||||
- "git rev-parse --is-inside-work-tree"
|
||||
- "test -f docs/superpowers/plans/test-plan.md"
|
||||
- "git branch --show-current | grep feature/test-implementation"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Tell the agent you have a plan ready at
|
||||
docs/superpowers/plans/test-plan.md and you want to start
|
||||
implementing. Do NOT mention waves, parallelism, or
|
||||
decomposition. Let the agent decide how to execute.
|
||||
- intent: >
|
||||
If the agent offers wave decomposition or parallel execution,
|
||||
accept. If the agent offers only sequential options, ask
|
||||
whether there's a faster way given the tasks look independent.
|
||||
- intent: >
|
||||
If a .waves.md file is produced, review it briefly and confirm
|
||||
it looks reasonable. If not, ask what happened.
|
||||
|
||||
limits:
|
||||
max_turns: 15
|
||||
turn_timeout: 120
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent offered wave decomposition or parallel execution as an option"
|
||||
- "A .waves.md file exists in the filesystem"
|
||||
- "The .waves.md file content includes a Waves Overview table"
|
||||
- "The .waves.md file content includes file ownership blocks for parallel waves"
|
||||
observe: true
|
||||
@@ -1,32 +0,0 @@
|
||||
scenario: decomposing-into-waves-spec-aware
|
||||
description: "User explicitly asks the agent to use the decomposing-into-waves skill"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_wave_test_repo
|
||||
assertions:
|
||||
- "git rev-parse --is-inside-work-tree"
|
||||
- "test -f docs/superpowers/plans/test-plan.md"
|
||||
- "git branch --show-current | grep feature/test-implementation"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to use the decomposing-into-waves skill on the
|
||||
plan at docs/superpowers/plans/test-plan.md. Reference the
|
||||
skill by name.
|
||||
- intent: >
|
||||
Review the waves file output. Verify parallel waves don't have
|
||||
file overlaps. Confirm or ask for corrections.
|
||||
|
||||
limits:
|
||||
max_turns: 10
|
||||
turn_timeout: 120
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent invoked the decomposing-into-waves skill"
|
||||
- "Output .waves.md file has YAML frontmatter with run_id and waves structure"
|
||||
- "File ownership block present for parallel waves"
|
||||
- "Wave ordering respects task dependencies (foundation first)"
|
||||
observe: true
|
||||
@@ -1,39 +0,0 @@
|
||||
scenario: executing-waves-full-naive
|
||||
description: "Full 3-wave execution with 5 tasks; user references waves file without naming executing-waves"
|
||||
user_posture: naive
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_waves_file
|
||||
assertions:
|
||||
- "git rev-parse --is-inside-work-tree"
|
||||
- "git branch --show-current | grep feature/test-implementation"
|
||||
- "test -f docs/superpowers/plans/test-plan.waves.md"
|
||||
- "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Tell the agent the plan has been decomposed into waves and you
|
||||
want to start building. Reference the waves file at
|
||||
docs/superpowers/plans/test-plan.waves.md. Do NOT mention the
|
||||
executing-waves skill by name.
|
||||
- intent: >
|
||||
Let the agent proceed autonomously through all 3 waves. If it
|
||||
starts sequentially without using the wave structure, ask why.
|
||||
If it asks for confirmation to proceed, say "go ahead".
|
||||
- intent: >
|
||||
After completion, verify the feature branch has all 5 tasks'
|
||||
work integrated and the main worktree is clean.
|
||||
|
||||
limits:
|
||||
max_turns: 60
|
||||
turn_timeout: 900
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent used worktree isolation for Wave 2 parallel tasks"
|
||||
- "git log shows commits from all 5 tasks on the feature branch"
|
||||
- "Agent reported progress at wave boundaries"
|
||||
- "Final state has only the main worktree (no orphans)"
|
||||
- "git log --merges shows merge commits from parallel wave tasks"
|
||||
observe: true
|
||||
@@ -1,36 +0,0 @@
|
||||
scenario: executing-waves-minimal
|
||||
description: "Minimal wave execution — 1 sequential task + 2 parallel tasks"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_waves_file_minimal
|
||||
assertions:
|
||||
- "git rev-parse --is-inside-work-tree"
|
||||
- "git branch --show-current | grep feature/test-implementation"
|
||||
- "test -f docs/superpowers/plans/test-plan.waves.md"
|
||||
- "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to use the executing-waves skill on the waves file
|
||||
at docs/superpowers/plans/test-plan.waves.md. Reference the skill
|
||||
by name.
|
||||
- intent: >
|
||||
Observe execution. Let the agent proceed autonomously. If the
|
||||
agent asks for any confirmation, answer "proceed" or equivalent.
|
||||
- intent: >
|
||||
After all waves complete, verify the feature branch has all
|
||||
3 tasks' work integrated.
|
||||
|
||||
limits:
|
||||
max_turns: 50
|
||||
turn_timeout: 600
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent invoked the executing-waves skill"
|
||||
- "git log shows commits from all 3 tasks on feature branch"
|
||||
- "git worktree list shows only main worktree (no orphaned worktrees)"
|
||||
- "Wave 2 used worktree isolation for parallel tasks (visible in terminal output or git log merge commits)"
|
||||
observe: true
|
||||
@@ -1,39 +0,0 @@
|
||||
scenario: executing-waves-task-failure
|
||||
description: "Wave execution with a deliberately broken Task 3; tests failure escalation"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_waves_file_with_broken_task
|
||||
assertions:
|
||||
- "git rev-parse --is-inside-work-tree"
|
||||
- "git branch --show-current | grep feature/test-implementation"
|
||||
- "test -f docs/superpowers/plans/test-plan.waves.md"
|
||||
- "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to use the executing-waves skill on the waves file
|
||||
at docs/superpowers/plans/test-plan.waves.md. Reference the skill
|
||||
by name.
|
||||
- intent: >
|
||||
Observe execution. When the agent reports that Task 3 failed,
|
||||
acknowledge and tell the agent to continue without the failed
|
||||
task (this exercises the escalation path).
|
||||
- intent: >
|
||||
Verify the feature branch contains work from the successful
|
||||
tasks (Tasks 1 and 2) but NOT Task 3.
|
||||
|
||||
limits:
|
||||
max_turns: 60
|
||||
turn_timeout: 900
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent invoked the executing-waves skill"
|
||||
- "git log shows commits from Tasks 1 and 2 on feature branch"
|
||||
- "Task 3 was marked as failed in terminal output"
|
||||
- "Agent attempted retry of Task 3 before escalating (one retry per the failure handling matrix)"
|
||||
- "Agent escalated Task 3 failure to the user rather than silently proceeding"
|
||||
- "No orphaned worktrees remain from the failed task (except preserved for debugging)"
|
||||
observe: true
|
||||
Reference in New Issue
Block a user