From 11d5db1b22bc0b4409553cdf9ab321e17ecfd12b Mon Sep 17 00:00:00 2001 From: Jesse Vincent Date: Wed, 6 May 2026 12:29:59 -0700 Subject: [PATCH] tests: annotate three kept bash tests with drill coverage notes - test-worktree-native-preference.sh: drill covers PRESSURE phase only; RED + GREEN baselines have no drill counterpart and are kept so the RED-GREEN-REFACTOR validation remains rerunnable end-to-end. - test-subagent-driven-development-integration.sh: drill covers the YAGNI subset (forbidden exports + reviewer-as-gate). Bash adds >=3 commits, >=2 subagent dispatches, TodoWrite usage, test file existence check, and token-budget telemetry. Kept until drill scenario covers those or they are retired. - test-subagent-driven-development.sh: tests agent's ability to *describe* SDD (string matches against expected keywords). Drill scenarios test behavior, not description-recall. Kept by design. Subagent verification recorded in commit messages of subsequent deletions; gap analyses driving these annotations are also in the verification subagent reports for the gating sweep. --- .../test-subagent-driven-development-integration.sh | 11 +++++++++++ tests/claude-code/test-subagent-driven-development.sh | 6 ++++++ tests/claude-code/test-worktree-native-preference.sh | 5 +++++ 3 files changed, 22 insertions(+) diff --git a/tests/claude-code/test-subagent-driven-development-integration.sh b/tests/claude-code/test-subagent-driven-development-integration.sh index 95a551bc..b37862e1 100755 --- a/tests/claude-code/test-subagent-driven-development-integration.sh +++ b/tests/claude-code/test-subagent-driven-development-integration.sh @@ -1,6 +1,17 @@ #!/usr/bin/env bash # Integration Test: subagent-driven-development workflow # Actually executes a plan and verifies the new workflow behaviors +# +# Drill coverage: evals/scenarios/sdd-rejects-extra-features.yaml covers the +# YAGNI enforcement subset (forbidden exports + reviewer-as-gate semantics) +# and is stricter on that axis. This bash test additionally asserts: +# - >=3 git commits (initial + per-task commits, exercising SDD's +# commit-per-task workflow shape) +# - >=2 Agent/Task subagent dispatches (drill only asserts >=1) +# - TodoWrite usage (drill makes no assertion) +# - test/math.test.js exists (drill relies on `npm test` succeeding) +# - analyze-token-usage.py token-budget telemetry +# Kept until those assertions are added to drill or explicitly retired. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" diff --git a/tests/claude-code/test-subagent-driven-development.sh b/tests/claude-code/test-subagent-driven-development.sh index 20d8d4c7..d1099c11 100755 --- a/tests/claude-code/test-subagent-driven-development.sh +++ b/tests/claude-code/test-subagent-driven-development.sh @@ -1,6 +1,12 @@ #!/usr/bin/env bash # Test: subagent-driven-development skill # Verifies that the skill is loaded and follows correct workflow +# +# No drill coverage: this test asks the agent to *describe* SDD (string- +# matches its verbal explanation against expected keywords like +# "self-review", "skeptical", "worktree", "Step 1", "loop"). Drill scenarios +# test behavior (real subagent dispatch, plan-following, review loops), +# not description-recall. Kept by design. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" diff --git a/tests/claude-code/test-worktree-native-preference.sh b/tests/claude-code/test-worktree-native-preference.sh index cbfe7f29..077ea19c 100755 --- a/tests/claude-code/test-worktree-native-preference.sh +++ b/tests/claude-code/test-worktree-native-preference.sh @@ -2,6 +2,11 @@ # Test: Does the agent prefer native worktree tools (EnterWorktree) over git worktree add? # Framework: RED-GREEN-REFACTOR per testing-skills-with-subagents.md # +# Drill coverage: evals/scenarios/worktree-creation-under-pressure.yaml lifts +# only the PRESSURE phase (existing .worktrees/ + urgency framing). The RED +# and GREEN baselines below are not covered by drill — kept here so the +# RED-GREEN-REFACTOR validation remains rerunnable end-to-end. +# # RED: Skill without Step 1a (no native tool preference). Agent should use git worktree add. # GREEN: Skill with Step 1a (explicit tool naming + consent bridge). Agent should use EnterWorktree. # PRESSURE: Same as GREEN but under time pressure with existing .worktrees/ dir.