Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b

rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding
.git/, .venv/, results/, .env/, __pycache__/, *.egg-info/,
.private-journal/.

The drill repo is unaffected by this commit; archival is a separate
manual step after this PR merges.

Source SHA recorded at evals/.drill-source-sha for divergence
detection.
This commit is contained in:
Jesse Vincent
2026-05-06 12:15:46 -07:00
parent 895bb732d5
commit 3c046f579e
124 changed files with 13806 additions and 0 deletions

81
evals/drill/actor.py Normal file
View File

@@ -0,0 +1,81 @@
"""Actor LLM: simulates a user driving an agent session."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import anthropic
from jinja2 import Template
ACTOR_TOOL: dict[str, Any] = {
"name": "terminal_action",
"description": "Send an action to the terminal session.",
"input_schema": {
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["type", "done", "stuck", "key"],
"description": "The action to take.",
},
"text": {
"type": "string",
"description": "Text to type (only for 'type' action).",
},
"key": {
"type": "string",
"description": "Special key to send (only for 'key' action, e.g., 'ctrl-c').",
},
},
"required": ["action"],
},
}
@dataclass
class ActorAction:
action: str
text: str | None = None
key: str | None = None
@classmethod
def from_tool_result(cls, data: dict[str, Any]) -> ActorAction:
return cls(action=data["action"], text=data.get("text"), key=data.get("key"))
class Actor:
def __init__(self, model: str = "claude-sonnet-4-6", temperature: float = 0.7) -> None:
self.model = model
self.temperature = temperature
self.captures: list[str] = []
self._system_prompt: str = ""
self._client: anthropic.Anthropic = anthropic.Anthropic()
def build_system_prompt(self, posture: str, intents: list[str]) -> str:
template_path = Path(__file__).parent.parent / "prompts" / "actor.md"
template = Template(template_path.read_text())
self._system_prompt = template.render(posture=posture, intents=intents)
return self._system_prompt
def append_capture(self, terminal_output: str) -> None:
self.captures.append(terminal_output)
def build_messages(self) -> list[dict[str, str]]:
return [{"role": "user", "content": capture} for capture in self.captures]
def decide(self) -> ActorAction:
response = self._client.messages.create(
model=self.model,
max_tokens=1024,
temperature=self.temperature,
system=self._system_prompt,
tools=[ACTOR_TOOL], # ty: ignore[invalid-argument-type]
tool_choice={"type": "tool", "name": "terminal_action"},
messages=self.build_messages(), # ty: ignore[invalid-argument-type]
)
for block in response.content:
if block.type == "tool_use":
return ActorAction.from_tool_result(block.input)
raise RuntimeError("Actor did not return a tool_use block")