mirror of
https://github.com/obra/superpowers.git
synced 2026-05-09 18:49:04 +08:00
Lift drill into evals/ at 013fcb8b7dbefd6d3fa4653493e5d2ec8e7f985b
rsync of obra/drill@013fcb8b7d into superpowers/evals/, excluding .git/, .venv/, results/, .env/, __pycache__/, *.egg-info/, .private-journal/. The drill repo is unaffected by this commit; archival is a separate manual step after this PR merges. Source SHA recorded at evals/.drill-source-sha for divergence detection.
This commit is contained in:
committed by
Drew Ritter
parent
2e46e9590d
commit
3b412a3836
54
evals/tests/test_stats.py
Normal file
54
evals/tests/test_stats.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""Tests for Wilson score confidence interval."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from drill.stats import wilson_ci
|
||||
|
||||
|
||||
class TestWilsonCI:
|
||||
def test_all_pass(self) -> None:
|
||||
lo, hi = wilson_ci(10, 10)
|
||||
assert lo > 0.69
|
||||
assert hi == 1.0 or hi > 0.99
|
||||
|
||||
def test_all_fail(self) -> None:
|
||||
lo, hi = wilson_ci(0, 10)
|
||||
assert lo < 0.01 or lo == 0.0
|
||||
assert hi < 0.31
|
||||
|
||||
def test_half_pass(self) -> None:
|
||||
lo, hi = wilson_ci(5, 10)
|
||||
assert 0.18 < lo < 0.25
|
||||
assert 0.75 < hi < 0.82
|
||||
|
||||
def test_zero_total(self) -> None:
|
||||
lo, hi = wilson_ci(0, 0)
|
||||
assert lo == 0.0
|
||||
assert hi == 0.0
|
||||
|
||||
def test_single_pass(self) -> None:
|
||||
lo, hi = wilson_ci(1, 1)
|
||||
assert lo > 0.0
|
||||
assert hi <= 1.0
|
||||
|
||||
def test_single_fail(self) -> None:
|
||||
lo, hi = wilson_ci(0, 1)
|
||||
assert lo == 0.0 or lo >= 0.0
|
||||
assert hi < 1.0
|
||||
|
||||
def test_large_sample(self) -> None:
|
||||
lo, hi = wilson_ci(80, 100)
|
||||
assert 0.70 < lo < 0.75
|
||||
assert 0.85 < hi < 0.90
|
||||
|
||||
def test_passed_greater_than_total_clamped(self) -> None:
|
||||
lo, hi = wilson_ci(12, 10)
|
||||
assert lo > 0.0
|
||||
assert hi <= 1.0
|
||||
|
||||
def test_returns_tuple_of_floats(self) -> None:
|
||||
result = wilson_ci(5, 10)
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
assert isinstance(result[0], float)
|
||||
assert isinstance(result[1], float)
|
||||
Reference in New Issue
Block a user