Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b

rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding
.git/, .venv/, results/, .env/, __pycache__/, *.egg-info/,
.private-journal/.

The drill repo is unaffected by this commit; archival is a separate
manual step after this PR merges.

Source SHA recorded at evals/.drill-source-sha for divergence
detection.
This commit is contained in:
Jesse Vincent
2026-05-06 12:15:46 -07:00
committed by Drew Ritter
parent 2e46e9590d
commit 3b412a3836
124 changed files with 13806 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
name: claude-haiku
cli: claude
args:
- "--dangerously-skip-permissions"
- "--plugin-dir"
- "${SUPERPOWERS_ROOT}"
- "--model"
- "haiku"
required_env:
- ANTHROPIC_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run: []
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 3
ready_pattern: "^|^\\$|Human:|Enter to confirm"
busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]"
max_busy_seconds: 1800
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.claude/projects/**/session-*.jsonl"

View File

@@ -0,0 +1,26 @@
name: claude-opus-4-6-1m
cli: claude
args:
- "--dangerously-skip-permissions"
- "--plugin-dir"
- "${SUPERPOWERS_ROOT}"
- "--model"
- "claude-opus-4-6[1m]"
required_env:
- ANTHROPIC_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run: []
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 3
ready_pattern: "^|^\\$|Human:|Enter to confirm"
busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]"
max_busy_seconds: 1800
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.claude/projects/**/session-*.jsonl"

View File

@@ -0,0 +1,26 @@
name: claude-opus-4-6
cli: claude
args:
- "--dangerously-skip-permissions"
- "--plugin-dir"
- "${SUPERPOWERS_ROOT}"
- "--model"
- "claude-opus-4-6"
required_env:
- ANTHROPIC_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run: []
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 3
ready_pattern: "^|^\\$|Human:|Enter to confirm"
busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]"
max_busy_seconds: 1800
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.claude/projects/**/session-*.jsonl"

View File

@@ -0,0 +1,26 @@
name: claude-opus-4-7-1m
cli: claude
args:
- "--dangerously-skip-permissions"
- "--plugin-dir"
- "${SUPERPOWERS_ROOT}"
- "--model"
- "claude-opus-4-7[1m]"
required_env:
- ANTHROPIC_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run: []
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 3
ready_pattern: "^|^\\$|Human:|Enter to confirm"
busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]"
max_busy_seconds: 1800
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.claude/projects/**/session-*.jsonl"

View File

@@ -0,0 +1,26 @@
name: claude-opus-4-7
cli: claude
args:
- "--dangerously-skip-permissions"
- "--plugin-dir"
- "${SUPERPOWERS_ROOT}"
- "--model"
- "claude-opus-4-7"
required_env:
- ANTHROPIC_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run: []
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 3
ready_pattern: "^|^\\$|Human:|Enter to confirm"
busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]"
max_busy_seconds: 1800
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.claude/projects/**/session-*.jsonl"

View File

@@ -0,0 +1,32 @@
name: claude
cli: claude
args:
- "--dangerously-skip-permissions"
- "--plugin-dir"
- "${SUPERPOWERS_ROOT}"
- "--model"
- "opus"
required_env:
- ANTHROPIC_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run: []
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 3
ready_pattern: "^|^\\$|Human:|Enter to confirm"
# Matches when Claude is actively working — spinners, "Thinking", time counter,
# or "esc to cancel". Engine extends its wait deadline when any of these match
# so the Actor doesn't interrupt long-running subagent work (e.g., wave execution).
busy_pattern: "esc to cancel|Thinking\\.\\.\\.|\\(esc to cancel[^)]*\\)|[⠇⠏⠋⠙⠹⠸⠼⠴⠦⠧⠶⠾⠽⠻⠿]"
# Maximum total seconds the engine will extend the deadline across all busy
# detections during a single _wait_for_ready call. Wave execution can take
# 10-20 minutes per wave, so 30 minutes gives plenty of headroom.
max_busy_seconds: 1800
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.claude/projects/**/session-*.jsonl"

21
evals/backends/codex.yaml Normal file
View File

@@ -0,0 +1,21 @@
name: codex
cli: codex
args:
- "--dangerously-bypass-approvals-and-sandbox"
required_env:
- OPENAI_API_KEY
- SUPERPOWERS_ROOT
hooks:
pre_run:
- symlink_superpowers
post_run: []
shutdown: "<<KEY:ctrl-d>>"
idle:
quiescence_seconds: 5
ready_pattern: "^|codex>|^>"
startup_timeout: 60
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.codex/sessions/rollout-*.jsonl"

View File

@@ -0,0 +1,24 @@
name: gemini
cli: gemini
args:
- "--yolo"
- "-m"
- "gemini-2.5-flash"
required_env:
- SUPERPOWERS_ROOT
hooks:
pre_run:
- link_gemini_extension
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 5
ready_pattern: "Type your message|^\\s*>"
busy_pattern: "Thinking\\.\\.\\.|Executing"
startup_timeout: 60
turn_timeout: 300
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.gemini/tmp/*/chats/session-*.json"