mirror of
https://github.com/obra/superpowers.git
synced 2026-05-11 11:39:04 +08:00
evals: add pi backend
This commit is contained in:
@@ -44,6 +44,12 @@ class TestLoadBackend:
|
||||
assert flash_backend.family == "gemini"
|
||||
assert flash_backend.model == "gemini-2.5-flash"
|
||||
|
||||
def test_loads_pi_backend(self, backends_dir):
|
||||
backend = load_backend("pi", backends_dir)
|
||||
assert backend.name == "pi"
|
||||
assert backend.cli == "pi"
|
||||
assert backend.family == "pi"
|
||||
|
||||
|
||||
class TestBackendBuildCommand:
|
||||
def test_claude_build_command(self, backends_dir, monkeypatch):
|
||||
@@ -60,6 +66,12 @@ class TestBackendBuildCommand:
|
||||
cmd = backend.build_command("/tmp/workdir")
|
||||
assert cmd[0] == "codex"
|
||||
|
||||
def test_pi_build_command_loads_local_superpowers_package(self, backends_dir, monkeypatch):
|
||||
monkeypatch.setenv("SUPERPOWERS_ROOT", "/tmp/superpowers")
|
||||
backend = load_backend("pi", backends_dir)
|
||||
cmd = backend.build_command("/tmp/workdir")
|
||||
assert cmd == ["pi", "-e", "/tmp/superpowers"]
|
||||
|
||||
|
||||
class TestBackendEnvValidation:
|
||||
def test_missing_env_raises(self, backends_dir, monkeypatch):
|
||||
@@ -125,6 +137,21 @@ class TestBackendFamily:
|
||||
backend = load_backend("codex", backends_dir)
|
||||
assert backend.family == "codex"
|
||||
|
||||
def test_pi_backend_family(self):
|
||||
backend = Backend(
|
||||
name="pi",
|
||||
cli="pi",
|
||||
args=[],
|
||||
required_env=[],
|
||||
hooks={"pre_run": [], "post_run": []},
|
||||
shutdown="/quit",
|
||||
idle={},
|
||||
startup_timeout=30,
|
||||
terminal={},
|
||||
session_logs={},
|
||||
)
|
||||
assert backend.family == "pi"
|
||||
|
||||
def test_variant_name_preserves_family(self):
|
||||
backend = Backend(
|
||||
name="claude-opus-4-6",
|
||||
|
||||
@@ -4,7 +4,7 @@ import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from drill.engine import RunResult, ScenarioConfig, VerifyConfig, snapshot_filesystem
|
||||
from drill.engine import Engine, RunResult, ScenarioConfig, VerifyConfig, snapshot_filesystem
|
||||
|
||||
|
||||
class TestVerifyConfig:
|
||||
@@ -138,6 +138,40 @@ class TestEngineAssertionIntegration:
|
||||
assert (tmp_path / "meta.json").exists()
|
||||
|
||||
|
||||
class TestEnginePiBackend:
|
||||
def test_resolves_pi_session_log_root(self, tmp_path: Path) -> None:
|
||||
scenario = tmp_path / "scenario.yaml"
|
||||
scenario.write_text("scenario: test-pi\n")
|
||||
backends = tmp_path / "backends"
|
||||
backends.mkdir()
|
||||
(backends / "pi.yaml").write_text(
|
||||
"""
|
||||
name: pi
|
||||
cli: pi
|
||||
args: []
|
||||
required_env: []
|
||||
hooks:
|
||||
pre_run: []
|
||||
post_run: []
|
||||
shutdown: /quit
|
||||
idle: {}
|
||||
startup_timeout: 1
|
||||
terminal: {}
|
||||
session_logs:
|
||||
pattern: ~/.pi/agent/sessions/**/*.jsonl
|
||||
"""
|
||||
)
|
||||
engine = Engine(
|
||||
scenario_path=scenario,
|
||||
backend_name="pi",
|
||||
backends_dir=backends,
|
||||
fixtures_dir=tmp_path,
|
||||
results_dir=tmp_path,
|
||||
)
|
||||
|
||||
assert engine._resolve_log_dir(tmp_path) == Path.home() / ".pi" / "agent" / "sessions"
|
||||
|
||||
|
||||
class TestEngineRunParams:
|
||||
def test_run_result_uses_custom_output_dir(self, tmp_path: Path) -> None:
|
||||
custom_dir = tmp_path / "custom" / "run-00"
|
||||
|
||||
@@ -3,9 +3,11 @@ import json
|
||||
from drill.normalizer import (
|
||||
collect_new_logs,
|
||||
filter_codex_logs_by_cwd,
|
||||
filter_pi_logs_by_cwd,
|
||||
normalize_claude_logs,
|
||||
normalize_codex_logs,
|
||||
normalize_gemini_logs,
|
||||
normalize_pi_logs,
|
||||
snapshot_log_dir,
|
||||
)
|
||||
|
||||
@@ -137,6 +139,56 @@ class TestNormalizeCodexLogs:
|
||||
assert normalized[1]["source"] == "native"
|
||||
|
||||
|
||||
class TestNormalizePiLogs:
|
||||
def test_filter_by_cwd_keeps_matching_session_headers(self, tmp_path):
|
||||
target = "/tmp/drill-target"
|
||||
match = tmp_path / "match.jsonl"
|
||||
match.write_text(json.dumps({"type": "session", "cwd": target}) + "\n")
|
||||
other = tmp_path / "other.jsonl"
|
||||
other.write_text(json.dumps({"type": "session", "cwd": "/tmp/other"}) + "\n")
|
||||
malformed = tmp_path / "malformed.jsonl"
|
||||
malformed.write_text("not json\n")
|
||||
|
||||
assert filter_pi_logs_by_cwd([match, other, malformed], target) == [match]
|
||||
|
||||
def test_normalizes_assistant_tool_calls_from_session_entries(self):
|
||||
lines = [
|
||||
json.dumps({"type": "session", "cwd": "/tmp/project"}),
|
||||
json.dumps(
|
||||
{
|
||||
"type": "message",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "I will inspect this."},
|
||||
{
|
||||
"type": "toolCall",
|
||||
"name": "read",
|
||||
"arguments": {"path": "README.md"},
|
||||
},
|
||||
{
|
||||
"type": "toolCall",
|
||||
"name": "bash",
|
||||
"arguments": {"command": "git status"},
|
||||
},
|
||||
{
|
||||
"type": "toolCall",
|
||||
"name": "subagent",
|
||||
"arguments": {"agent": "reviewer"},
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
),
|
||||
]
|
||||
|
||||
assert normalize_pi_logs("\n".join(lines)) == [
|
||||
{"tool": "Read", "args": {"path": "README.md"}, "source": "native"},
|
||||
{"tool": "Bash", "args": {"command": "git status"}, "source": "shell"},
|
||||
{"tool": "subagent", "args": {"agent": "reviewer"}, "source": "native"},
|
||||
]
|
||||
|
||||
|
||||
class TestNormalizeGeminiLogs:
|
||||
def test_normalizes_jsonl_tool_calls(self):
|
||||
lines = [
|
||||
|
||||
Reference in New Issue
Block a user