mirror of
https://github.com/obra/superpowers.git
synced 2026-05-11 11:39:04 +08:00
Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b
rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding .git/, .venv/, results/, .env/, __pycache__/, *.egg-info/, .private-journal/. The drill repo is unaffected by this commit; archival is a separate manual step after this PR merges. Source SHA recorded at evals/.drill-source-sha for divergence detection.
This commit is contained in:
36
evals/scenarios/executing-waves-minimal.yaml
Normal file
36
evals/scenarios/executing-waves-minimal.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
scenario: executing-waves-minimal
|
||||
description: "Minimal wave execution — 1 sequential task + 2 parallel tasks"
|
||||
user_posture: spec-aware
|
||||
|
||||
setup:
|
||||
helpers:
|
||||
- create_waves_file_minimal
|
||||
assertions:
|
||||
- "git rev-parse --is-inside-work-tree"
|
||||
- "git branch --show-current | grep feature/test-implementation"
|
||||
- "test -f docs/superpowers/plans/test-plan.waves.md"
|
||||
- "grep -q 'status: approved' docs/superpowers/plans/test-plan.waves.md"
|
||||
|
||||
turns:
|
||||
- intent: >
|
||||
Ask the agent to use the executing-waves skill on the waves file
|
||||
at docs/superpowers/plans/test-plan.waves.md. Reference the skill
|
||||
by name.
|
||||
- intent: >
|
||||
Observe execution. Let the agent proceed autonomously. If the
|
||||
agent asks for any confirmation, answer "proceed" or equivalent.
|
||||
- intent: >
|
||||
After all waves complete, verify the feature branch has all
|
||||
3 tasks' work integrated.
|
||||
|
||||
limits:
|
||||
max_turns: 50
|
||||
turn_timeout: 600
|
||||
|
||||
verify:
|
||||
criteria:
|
||||
- "Agent invoked the executing-waves skill"
|
||||
- "git log shows commits from all 3 tasks on feature branch"
|
||||
- "git worktree list shows only main worktree (no orphaned worktrees)"
|
||||
- "Wave 2 used worktree isolation for parallel tasks (visible in terminal output or git log merge commits)"
|
||||
observe: true
|
||||
Reference in New Issue
Block a user