evals: add pi backend

This commit is contained in:
Jesse Vincent
2026-05-07 11:11:18 -07:00
parent 5ca4153994
commit 7d06d7e4f0
8 changed files with 214 additions and 2 deletions

View File

@@ -71,7 +71,7 @@ class Backend:
@property
def family(self) -> str:
"""Normalize backend name to a family for log-dir / normalizer dispatch."""
for fam in ("claude", "codex", "gemini"):
for fam in ("claude", "codex", "gemini", "pi"):
if self.name == fam or self.name.startswith(f"{fam}-"):
return fam
return "other"

View File

@@ -21,6 +21,7 @@ from drill.normalizer import (
NORMALIZERS,
collect_new_logs,
filter_codex_logs_by_cwd,
filter_pi_logs_by_cwd,
snapshot_log_dir,
)
from drill.session import TmuxSession
@@ -348,6 +349,11 @@ class Engine:
# Project name is the workdir basename, lowercased
project = workdir.resolve().name.lower()
return Path.home() / ".gemini" / "tmp" / project
elif self.backend.family == "pi":
# Pi stores sessions under ~/.pi/agent/sessions/<encoded-cwd>/.
# Return the root and filter by the session header cwd because
# multiple evals may run concurrently under the same tree.
return Path.home() / ".pi" / "agent" / "sessions"
pattern = self.backend.session_logs.get("pattern", "")
if not pattern:
return None
@@ -363,6 +369,8 @@ class Engine:
new_files = collect_new_logs(log_dir, snapshot)
if self.backend.family == "codex":
new_files = filter_codex_logs_by_cwd(new_files, str(workdir.resolve()))
elif self.backend.family == "pi":
new_files = filter_pi_logs_by_cwd(new_files, str(workdir.resolve()))
normalizer = NORMALIZERS.get(self.backend.family)
if not normalizer:
return []

View File

@@ -74,6 +74,23 @@ def filter_codex_logs_by_cwd(paths: list[Path], target_cwd: str) -> list[Path]:
return matched
def filter_pi_logs_by_cwd(paths: list[Path], target_cwd: str) -> list[Path]:
"""Drop Pi sessions whose header cwd doesn't match target_cwd."""
matched: list[Path] = []
for path in paths:
try:
with path.open() as f:
first_line = f.readline()
entry = json.loads(first_line)
except (OSError, json.JSONDecodeError):
continue
if entry.get("type") != "session":
continue
if entry.get("cwd") == target_cwd:
matched.append(path)
return matched
def normalize_claude_logs(raw_content: str) -> list[dict[str, Any]]:
"""Normalize Claude Code session logs.
@@ -155,6 +172,52 @@ def normalize_codex_logs(raw_content: str) -> list[dict[str, Any]]:
return results
# Reverse mapping: Pi tool names → Claude Code canonical names
PI_TOOL_MAP: dict[str, str] = {
"read": "Read",
"write": "Write",
"edit": "Edit",
"bash": "Bash",
"grep": "Grep",
"find": "Glob",
"ls": "Glob",
}
PI_NATIVE_TOOLS = (set(PI_TOOL_MAP.values()) - {"Bash"}) | {"subagent", "todo", "manage_todo_list"}
def normalize_pi_logs(raw_content: str) -> list[dict[str, Any]]:
"""Normalize Pi JSONL session logs.
Pi session files are JSONL entries. Assistant messages contain tool calls as
content blocks: {"type": "toolCall", "name": "read", "arguments": {...}}.
"""
results: list[dict[str, Any]] = []
for line in raw_content.strip().split("\n"):
if not line.strip():
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
continue
if entry.get("type") != "message":
continue
message = entry.get("message", {})
if message.get("role") != "assistant":
continue
for block in message.get("content", []):
if block.get("type") != "toolCall":
continue
name = block.get("name", "")
canonical = PI_TOOL_MAP.get(name, name)
source = "native" if canonical in PI_NATIVE_TOOLS else "shell"
results.append(
{"tool": canonical, "args": block.get("arguments", {}), "source": source}
)
return results
# Reverse mapping: Gemini tool names → Claude Code canonical names
GEMINI_TOOL_MAP: dict[str, str] = {
"run_shell_command": "Bash",
@@ -225,4 +288,5 @@ NORMALIZERS: dict[str, Callable[[str], list[dict[str, Any]]]] = {
"claude": normalize_claude_logs,
"codex": normalize_codex_logs,
"gemini": normalize_gemini_logs,
"pi": normalize_pi_logs,
}