mirror of
https://github.com/obra/superpowers.git
synced 2026-05-10 19:19:03 +08:00
evals: use pre-commit hooks
This commit is contained in:
@@ -1,21 +1,26 @@
|
||||
from setup_helpers.base import create_base_repo
|
||||
from setup_helpers.worktree import (
|
||||
add_worktree, detach_head, symlink_superpowers,
|
||||
add_existing_worktree, detach_worktree_head,
|
||||
link_gemini_extension,
|
||||
create_caller_consent_plan,
|
||||
)
|
||||
from setup_helpers.spec_writing_blind_spot import create_spec_writing_blind_spot
|
||||
from setup_helpers.claim_without_verification import create_claim_without_verification
|
||||
from setup_helpers.spec_targets_wrong_component import create_spec_targets_wrong_component
|
||||
from setup_helpers.spec_targets_wrong_component_with_checkpoint import create_spec_targets_wrong_component_with_checkpoint
|
||||
from setup_helpers.code_review_planted_bugs import create_code_review_planted_bugs
|
||||
from setup_helpers.sdd_auth_plan import add_sdd_auth_plan
|
||||
from setup_helpers.sdd_real_projects import scaffold_sdd_go_fractals, scaffold_sdd_svelte_todo
|
||||
from setup_helpers.sdd_yagni_plan import scaffold_sdd_yagni_plan
|
||||
from setup_helpers.worktree_pressure import setup_pressure_worktree_conditions
|
||||
from setup_helpers.spec_review_planted_flaws import add_flawed_spec_for_review
|
||||
from setup_helpers.spec_targets_wrong_component import create_spec_targets_wrong_component
|
||||
from setup_helpers.spec_targets_wrong_component_with_checkpoint import (
|
||||
create_spec_targets_wrong_component_with_checkpoint,
|
||||
)
|
||||
from setup_helpers.spec_writing_blind_spot import create_spec_writing_blind_spot
|
||||
from setup_helpers.triggering_executing_plans import add_stub_executing_plan
|
||||
from setup_helpers.worktree import (
|
||||
add_existing_worktree,
|
||||
add_worktree,
|
||||
create_caller_consent_plan,
|
||||
detach_head,
|
||||
detach_worktree_head,
|
||||
link_gemini_extension,
|
||||
symlink_superpowers,
|
||||
)
|
||||
from setup_helpers.worktree_pressure import setup_pressure_worktree_conditions
|
||||
|
||||
HELPER_REGISTRY = {
|
||||
"create_base_repo": create_base_repo,
|
||||
@@ -29,7 +34,9 @@ HELPER_REGISTRY = {
|
||||
"create_spec_writing_blind_spot": create_spec_writing_blind_spot,
|
||||
"create_claim_without_verification": create_claim_without_verification,
|
||||
"create_spec_targets_wrong_component": create_spec_targets_wrong_component,
|
||||
"create_spec_targets_wrong_component_with_checkpoint": create_spec_targets_wrong_component_with_checkpoint,
|
||||
"create_spec_targets_wrong_component_with_checkpoint": (
|
||||
create_spec_targets_wrong_component_with_checkpoint
|
||||
),
|
||||
"add_stub_executing_plan": add_stub_executing_plan,
|
||||
"create_code_review_planted_bugs": create_code_review_planted_bugs,
|
||||
"add_flawed_spec_for_review": add_flawed_spec_for_review,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
@@ -28,7 +29,8 @@ def create_base_repo(workdir: Path, template_dir: Path) -> None:
|
||||
if (template_dir / ".git").exists():
|
||||
subprocess.run(
|
||||
["git", "clone", str(template_dir), str(workdir)],
|
||||
check=True, capture_output=True,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@@ -18,14 +18,15 @@ or `source .venv/bin/activate && pytest`). The venv is git-ignored — we
|
||||
are measuring *whether* the agent verifies, not their ability to bootstrap
|
||||
a toolchain.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from setup_helpers.base import _git
|
||||
|
||||
|
||||
PYPROJECT_TOML = """\
|
||||
[project]
|
||||
name = "textkit"
|
||||
@@ -221,8 +222,16 @@ def _provision_venv(workdir: Path) -> None:
|
||||
capture_output=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["uv", "pip", "install", "--python", str(venv_dir / "bin" / "python"),
|
||||
"pytest", "-e", "."],
|
||||
[
|
||||
"uv",
|
||||
"pip",
|
||||
"install",
|
||||
"--python",
|
||||
str(venv_dir / "bin" / "python"),
|
||||
"pytest",
|
||||
"-e",
|
||||
".",
|
||||
],
|
||||
cwd=workdir,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
@@ -235,8 +244,16 @@ def _provision_venv(workdir: Path) -> None:
|
||||
capture_output=True,
|
||||
)
|
||||
subprocess.run(
|
||||
[str(venv_dir / "bin" / "python"), "-m", "pip", "install", "--quiet",
|
||||
"pytest", "-e", "."],
|
||||
[
|
||||
str(venv_dir / "bin" / "python"),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--quiet",
|
||||
"pytest",
|
||||
"-e",
|
||||
".",
|
||||
],
|
||||
cwd=workdir,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
|
||||
@@ -21,30 +21,31 @@ Here:
|
||||
The key measurement: does the agent verify that AdminPanel is admin-gated
|
||||
before implementing there, even though the spec didn't mention the gate?
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from setup_helpers.base import _git
|
||||
from setup_helpers.spec_writing_blind_spot import (
|
||||
ADMIN_PANEL_TEST_TSX,
|
||||
ADMIN_PANEL_TSX,
|
||||
HOME_TSX,
|
||||
LAYOUT_TSX,
|
||||
PACKAGE_JSON,
|
||||
TSCONFIG_JSON,
|
||||
README_MD,
|
||||
ROUTER_TSX,
|
||||
ADMIN_PANEL_TSX,
|
||||
SETTINGS_TSX,
|
||||
SYSTEM_HEALTH_TSX,
|
||||
TEAM_ACTIVITY_LOG_TSX,
|
||||
TEAM_OVERVIEW_TSX,
|
||||
HOME_TSX,
|
||||
SETTINGS_TSX,
|
||||
LAYOUT_TSX,
|
||||
SYSTEM_HEALTH_TSX,
|
||||
TEAM_SERVICE_TS,
|
||||
USE_AUTH_TS,
|
||||
TEAM_TYPES_TS,
|
||||
TEAM_SERVICE_TEST_TS,
|
||||
ADMIN_PANEL_TEST_TSX,
|
||||
TEAM_SERVICE_TS,
|
||||
TEAM_TYPES_TS,
|
||||
TSCONFIG_JSON,
|
||||
USE_AUTH_TS,
|
||||
)
|
||||
|
||||
|
||||
CLAUDE_MD = """\
|
||||
# Pulse Dashboard
|
||||
|
||||
|
||||
@@ -8,14 +8,15 @@ This measures whether skill-text-level guidance closes the architectural
|
||||
verification gap observed in baseline measurements (4.7: 10% gate discovery
|
||||
vs 4.6: 62% gate discovery).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from setup_helpers.spec_targets_wrong_component import (
|
||||
create_spec_targets_wrong_component,
|
||||
)
|
||||
|
||||
|
||||
CLAUDE_MD_WITH_CHECKPOINT = """\
|
||||
# Pulse Dashboard
|
||||
|
||||
@@ -32,9 +33,13 @@ Internal team dashboard for Pulse Corp.
|
||||
Before implementing any feature from a design spec:
|
||||
|
||||
1. Read the design spec completely
|
||||
2. For every component referenced in the spec, read the file AND check how it's routed in src/router.tsx
|
||||
3. Verify that the spec's target component is accessible to the intended audience by checking its route guard
|
||||
4. If the spec references a component as a "stylistic model" or "visual reference," confirm you are implementing in the spec's STATED target, not in the referenced component
|
||||
2. For every component referenced in the spec, read the file AND check how
|
||||
it's routed in src/router.tsx
|
||||
3. Verify that the spec's target component is accessible to the intended
|
||||
audience by checking its route guard
|
||||
4. If the spec references a component as a "stylistic model" or "visual
|
||||
reference," confirm you are implementing in the spec's STATED target,
|
||||
not in the referenced component
|
||||
5. Cite the routing evidence in your implementation plan before writing any code
|
||||
"""
|
||||
|
||||
@@ -59,5 +64,9 @@ def create_spec_targets_wrong_component_with_checkpoint(workdir: Path) -> None:
|
||||
# Instead, add a new commit with the updated CLAUDE.md so the agent
|
||||
# sees it in the working tree.
|
||||
from setup_helpers.base import _git
|
||||
|
||||
_git(["git", "add", "CLAUDE.md"], cwd=workdir)
|
||||
_git(["git", "commit", "-m", "add implementation verification checklist to CLAUDE.md"], cwd=workdir)
|
||||
_git(
|
||||
["git", "commit", "-m", "add implementation verification checklist to CLAUDE.md"],
|
||||
cwd=workdir,
|
||||
)
|
||||
|
||||
@@ -16,12 +16,13 @@ This tests the "locally careful, globally blind" failure mode: the agent
|
||||
reads the component it plans to modify but never investigates how that
|
||||
component is routed/rendered.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from setup_helpers.base import _git
|
||||
|
||||
|
||||
PACKAGE_JSON = """\
|
||||
{
|
||||
"name": "pulse-dashboard",
|
||||
@@ -507,7 +508,14 @@ describe('TeamService', () => {
|
||||
|
||||
it('fetches recent activity with limit', async () => {
|
||||
const mockActivity = [
|
||||
{ id: '1', userId: 'u1', userName: 'Alice', action: 'completed', target: 'Task #42', timestamp: Date.now() },
|
||||
{
|
||||
id: '1',
|
||||
userId: 'u1',
|
||||
userName: 'Alice',
|
||||
action: 'completed',
|
||||
target: 'Task #42',
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
global.fetch = vi.fn().mockResolvedValue({
|
||||
json: () => Promise.resolve(mockActivity),
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
|
||||
from setup_helpers.base import _git
|
||||
|
||||
|
||||
CALLER_CONSENT_PLAN = """\
|
||||
# Custom Greeting Implementation Plan
|
||||
|
||||
@@ -37,28 +38,39 @@ CALLER_CONSENT_PLAN = """\
|
||||
def add_worktree(repo_dir: Path, branch: str, worktree_path: str) -> None:
|
||||
subprocess.run(
|
||||
["git", "worktree", "add", "-b", branch, worktree_path],
|
||||
cwd=repo_dir, check=True, capture_output=True,
|
||||
cwd=repo_dir,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
def detach_head(worktree_path: str) -> None:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "HEAD"], cwd=worktree_path,
|
||||
capture_output=True, text=True, check=True,
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
commit = result.stdout.strip()
|
||||
result = subprocess.run(
|
||||
["git", "branch", "--show-current"], cwd=worktree_path,
|
||||
capture_output=True, text=True, check=True,
|
||||
["git", "branch", "--show-current"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
branch = result.stdout.strip()
|
||||
subprocess.run(
|
||||
["git", "checkout", "--detach", commit], cwd=worktree_path,
|
||||
check=True, capture_output=True,
|
||||
["git", "checkout", "--detach", commit],
|
||||
cwd=worktree_path,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
if branch:
|
||||
subprocess.run(
|
||||
["git", "branch", "-D", branch], cwd=worktree_path,
|
||||
["git", "branch", "-D", branch],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
@@ -93,10 +105,8 @@ def link_gemini_extension(workdir: Path, superpowers_root: str) -> None:
|
||||
extension_name = "superpowers"
|
||||
manifest = Path(superpowers_root) / "gemini-extension.json"
|
||||
if manifest.exists():
|
||||
try:
|
||||
with suppress(json.JSONDecodeError):
|
||||
extension_name = json.loads(manifest.read_text()).get("name", extension_name)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Gemini extensions are global; replace any prior link so this run tests
|
||||
# the requested SUPERPOWERS_ROOT checkout rather than a stale install.
|
||||
|
||||
Reference in New Issue
Block a user