Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b

rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding
.git/, .venv/, results/, .env/, __pycache__/, *.egg-info/,
.private-journal/.

The drill repo is unaffected by this commit; archival is a separate
manual step after this PR merges.

Source SHA recorded at evals/.drill-source-sha for divergence
detection.
This commit is contained in:
Jesse Vincent
2026-05-06 12:15:46 -07:00
committed by Drew Ritter
parent 2e46e9590d
commit 3b412a3836
124 changed files with 13806 additions and 0 deletions

27
evals/bin/tool-count Executable file
View File

@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
command -v jq >/dev/null || { echo "jq required"; exit 127; }
TOOL="$1"
OP="$2"
EXPECTED="$3"
FILE="tool_calls.jsonl"
COUNT=$(jq -s "[.[] | select(.tool == \"$TOOL\")] | length" "$FILE" 2>/dev/null || echo 0)
case "$OP" in
eq) TEST=$(( COUNT == EXPECTED )) ;;
gt) TEST=$(( COUNT > EXPECTED )) ;;
gte) TEST=$(( COUNT >= EXPECTED )) ;;
lt) TEST=$(( COUNT < EXPECTED )) ;;
lte) TEST=$(( COUNT <= EXPECTED )) ;;
*) echo "Unknown operator: $OP (expected: eq, gt, gte, lt, lte)"; exit 2 ;;
esac
if [ "$TEST" -eq 1 ]; then
echo "PASS: $TOOL called $COUNT time(s) ($OP $EXPECTED)"
exit 0
else
echo "FAIL: $TOOL called $COUNT time(s) (expected $OP $EXPECTED)"
exit 1
fi