Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b

rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding
.git/, .venv/, results/, .env/, __pycache__/, *.egg-info/,
.private-journal/.

The drill repo is unaffected by this commit; archival is a separate
manual step after this PR merges.

Source SHA recorded at evals/.drill-source-sha for divergence
detection.
This commit is contained in:
Jesse Vincent
2026-05-06 12:15:46 -07:00
committed by Drew Ritter
parent 2e46e9590d
commit 3b412a3836
124 changed files with 13806 additions and 0 deletions

51
evals/tests/test_actor.py Normal file
View File

@@ -0,0 +1,51 @@
from drill.actor import Actor, ActorAction
class TestActorAction:
def test_parse_type_action(self):
action = ActorAction.from_tool_result({"action": "type", "text": "create a worktree"})
assert action.action == "type"
assert action.text == "create a worktree"
def test_parse_done_action(self):
action = ActorAction.from_tool_result({"action": "done"})
assert action.action == "done"
def test_parse_stuck_action(self):
action = ActorAction.from_tool_result({"action": "stuck"})
assert action.action == "stuck"
def test_parse_key_action(self):
action = ActorAction.from_tool_result({"action": "key", "key": "ctrl-c"})
assert action.action == "key"
assert action.key == "ctrl-c"
class TestActorPrompt:
def test_builds_system_prompt_naive(self):
actor = Actor(model="claude-sonnet-4-6", temperature=0.7)
prompt = actor.build_system_prompt(
posture="naive",
intents=["Ask the agent to create a worktree"],
)
assert "plain language" in prompt.lower() or "don't know" in prompt.lower()
assert "create a worktree" in prompt
def test_builds_system_prompt_spec_aware(self):
actor = Actor(model="claude-sonnet-4-6", temperature=0.7)
prompt = actor.build_system_prompt(
posture="spec-aware",
intents=["Use the worktree skill"],
)
assert "skill" in prompt.lower() or "convention" in prompt.lower()
class TestActorContext:
def test_appends_terminal_captures(self):
actor = Actor(model="claude-sonnet-4-6", temperature=0.7)
actor.append_capture("Screen 1: Welcome to Claude")
actor.append_capture("Screen 2: ")
messages = actor.build_messages()
assert len(messages) == 2
assert "Screen 1" in messages[0]["content"]
assert "Screen 2" in messages[1]["content"]