evals: add Gemini 2.5 Flash backend

This commit is contained in:
Drew Ritter
2026-05-06 15:09:59 -07:00
parent 2d4cdea2bb
commit ec9b96a7bf
4 changed files with 37 additions and 2 deletions

View File

@@ -64,7 +64,8 @@ uv run drill list
| `claude-opus-4-6-1m` | Claude Code | opus-4-6 (1M context) | | `claude-opus-4-6-1m` | Claude Code | opus-4-6 (1M context) |
| `claude-opus-4-7-1m` | Claude Code | opus-4-7 (1M context) | | `claude-opus-4-7-1m` | Claude Code | opus-4-7 (1M context) |
| `codex` | Codex CLI | — | | `codex` | Codex CLI | — |
| `gemini` | Gemini CLI | | | `gemini` | Gemini CLI | auto-gemini-3 |
| `gemini-2-5-flash` | Gemini CLI | gemini-2.5-flash |
## Project structure ## Project structure

View File

@@ -0,0 +1,23 @@
name: gemini-2-5-flash
cli: gemini
args:
- "--yolo"
- "-m"
- "gemini-2.5-flash"
required_env: []
hooks:
pre_run:
- link_gemini_extension
post_run: []
shutdown: "/exit"
idle:
quiescence_seconds: 5
ready_pattern: "Type your message|^\\s*>"
busy_pattern: "Thinking\\.\\.\\.|Executing"
startup_timeout: 60
turn_timeout: 300
terminal:
cols: 200
rows: 50
session_logs:
pattern: "~/.gemini/tmp/*/chats/session-*.json"

View File

@@ -3,7 +3,7 @@ cli: gemini
args: args:
- "--yolo" - "--yolo"
- "-m" - "-m"
- "gemini-2.5-flash" - "auto-gemini-3"
required_env: [] required_env: []
hooks: hooks:
pre_run: pre_run:

View File

@@ -33,6 +33,17 @@ class TestLoadBackend:
assert backend.family == "claude" assert backend.family == "claude"
assert backend.model == "claude-opus-4-6" assert backend.model == "claude-opus-4-6"
def test_loads_gemini_default_and_flash_variant(self, backends_dir):
backend = load_backend("gemini", backends_dir)
assert backend.name == "gemini"
assert backend.family == "gemini"
assert backend.model == "auto-gemini-3"
flash_backend = load_backend("gemini-2-5-flash", backends_dir)
assert flash_backend.name == "gemini-2-5-flash"
assert flash_backend.family == "gemini"
assert flash_backend.model == "gemini-2.5-flash"
class TestBackendBuildCommand: class TestBackendBuildCommand:
def test_claude_build_command(self, backends_dir, monkeypatch): def test_claude_build_command(self, backends_dir, monkeypatch):