Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b

rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding
.git/, .venv/, results/, .env/, __pycache__/, *.egg-info/,
.private-journal/.

The drill repo is unaffected by this commit; archival is a separate
manual step after this PR merges.

Source SHA recorded at evals/.drill-source-sha for divergence
detection.
This commit is contained in:
Jesse Vincent
2026-05-06 12:15:46 -07:00
committed by Drew Ritter
parent 2e46e9590d
commit 3b412a3836
124 changed files with 13806 additions and 0 deletions

61
evals/tests/test_cli.py Normal file
View File

@@ -0,0 +1,61 @@
"""Tests for CLI option parsing."""
from __future__ import annotations
from click.testing import CliRunner
from drill.cli import main
class TestRunCommand:
def test_backend_required_without_models(self) -> None:
runner = CliRunner()
result = runner.invoke(main, ["run", "nonexistent"])
assert result.exit_code != 0
def test_n_default_is_1(self) -> None:
runner = CliRunner()
result = runner.invoke(main, ["run", "nonexistent", "--backend", "claude", "--n", "1"])
assert "Scenario not found" in result.output or result.exit_code != 0
def test_models_flag_accepted(self) -> None:
runner = CliRunner()
result = runner.invoke(main, ["run", "nonexistent", "--models", "claude,codex"])
assert "Scenario not found" in result.output or result.exit_code != 0
def test_n_must_be_positive(self) -> None:
runner = CliRunner()
result = runner.invoke(main, ["run", "nonexistent", "--backend", "claude", "--n", "0"])
assert result.exit_code != 0
class TestListCommand:
def test_lists_scenarios(self, tmp_path):
scenarios_dir = tmp_path / "scenarios"
scenarios_dir.mkdir()
(scenarios_dir / "test-scenario.yaml").write_text("""
scenario: test-scenario
description: "A test scenario"
user_posture: naive
setup:
helpers: []
assertions: []
turns: []
limits:
max_turns: 5
turn_timeout: 30
verify:
criteria: []
observe: false
""")
runner = CliRunner()
result = runner.invoke(main, ["list", "--scenarios-dir", str(scenarios_dir)])
assert result.exit_code == 0
assert "test-scenario" in result.output
class TestCompareCommand:
def test_sweep_flag_accepted(self) -> None:
runner = CliRunner()
result = runner.invoke(main, ["compare", "nonexistent", "--sweep", "abc123"])
assert result.exit_code != 0 # No results dir, but flag is parsed