From 11d5db1b22bc0b4409553cdf9ab321e17ecfd12b Mon Sep 17 00:00:00 2001
From: Jesse Vincent <jesse@primeradiant.com>
Date: Wed, 6 May 2026 12:29:59 -0700
Subject: [PATCH] tests: annotate three kept bash tests with drill coverage
 notes

- test-worktree-native-preference.sh: drill covers PRESSURE phase only;
  RED + GREEN baselines have no drill counterpart and are kept so
  the RED-GREEN-REFACTOR validation remains rerunnable end-to-end.
- test-subagent-driven-development-integration.sh: drill covers the
  YAGNI subset (forbidden exports + reviewer-as-gate). Bash adds
  >=3 commits, >=2 subagent dispatches, TodoWrite usage, test file
  existence check, and token-budget telemetry. Kept until drill
  scenario covers those or they are retired.
- test-subagent-driven-development.sh: tests agent's ability to
  *describe* SDD (string matches against expected keywords). Drill
  scenarios test behavior, not description-recall. Kept by design.

Subagent verification recorded in commit messages of subsequent
deletions; gap analyses driving these annotations are also in the
verification subagent reports for the gating sweep.
---
 .../test-subagent-driven-development-integration.sh   | 11 +++++++++++
 tests/claude-code/test-subagent-driven-development.sh |  6 ++++++
 tests/claude-code/test-worktree-native-preference.sh  |  5 +++++
 3 files changed, 22 insertions(+)

diff --git a/tests/claude-code/test-subagent-driven-development-integration.sh b/tests/claude-code/test-subagent-driven-development-integration.sh
index 95a551bc..b37862e1 100755
--- a/tests/claude-code/test-subagent-driven-development-integration.sh
+++ b/tests/claude-code/test-subagent-driven-development-integration.sh
@@ -1,6 +1,17 @@
 #!/usr/bin/env bash
 # Integration Test: subagent-driven-development workflow
 # Actually executes a plan and verifies the new workflow behaviors
+#
+# Drill coverage: evals/scenarios/sdd-rejects-extra-features.yaml covers the
+# YAGNI enforcement subset (forbidden exports + reviewer-as-gate semantics)
+# and is stricter on that axis. This bash test additionally asserts:
+#   - >=3 git commits (initial + per-task commits, exercising SDD's
+#     commit-per-task workflow shape)
+#   - >=2 Agent/Task subagent dispatches (drill only asserts >=1)
+#   - TodoWrite usage (drill makes no assertion)
+#   - test/math.test.js exists (drill relies on `npm test` succeeding)
+#   - analyze-token-usage.py token-budget telemetry
+# Kept until those assertions are added to drill or explicitly retired.
 set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
diff --git a/tests/claude-code/test-subagent-driven-development.sh b/tests/claude-code/test-subagent-driven-development.sh
index 20d8d4c7..d1099c11 100755
--- a/tests/claude-code/test-subagent-driven-development.sh
+++ b/tests/claude-code/test-subagent-driven-development.sh
@@ -1,6 +1,12 @@
 #!/usr/bin/env bash
 # Test: subagent-driven-development skill
 # Verifies that the skill is loaded and follows correct workflow
+#
+# No drill coverage: this test asks the agent to *describe* SDD (string-
+# matches its verbal explanation against expected keywords like
+# "self-review", "skeptical", "worktree", "Step 1", "loop"). Drill scenarios
+# test behavior (real subagent dispatch, plan-following, review loops),
+# not description-recall. Kept by design.
 set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
diff --git a/tests/claude-code/test-worktree-native-preference.sh b/tests/claude-code/test-worktree-native-preference.sh
index cbfe7f29..077ea19c 100755
--- a/tests/claude-code/test-worktree-native-preference.sh
+++ b/tests/claude-code/test-worktree-native-preference.sh
@@ -2,6 +2,11 @@
 # Test: Does the agent prefer native worktree tools (EnterWorktree) over git worktree add?
 # Framework: RED-GREEN-REFACTOR per testing-skills-with-subagents.md
 #
+# Drill coverage: evals/scenarios/worktree-creation-under-pressure.yaml lifts
+# only the PRESSURE phase (existing .worktrees/ + urgency framing). The RED
+# and GREEN baselines below are not covered by drill — kept here so the
+# RED-GREEN-REFACTOR validation remains rerunnable end-to-end.
+#
 # RED:   Skill without Step 1a (no native tool preference). Agent should use git worktree add.
 # GREEN: Skill with Step 1a (explicit tool naming + consent bridge). Agent should use EnterWorktree.
 # PRESSURE: Same as GREEN but under time pressure with existing .worktrees/ dir.