superpowers/tests/claude-code/test-requesting-code-review.sh

#!/usr/bin/env bash
# Integration Test: requesting-code-review skill
# Verifies the code reviewer dispatched via the skill catches a planted bug
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
source "$SCRIPT_DIR/test-helpers.sh"

echo "========================================"
echo " Integration Test: requesting-code-review"
echo "========================================"
echo ""
echo "This test verifies the code reviewer subagent by:"
echo "  1. Setting up a tiny project with a baseline commit"
echo "  2. Adding a second commit that plants an obvious bug"
echo "  3. Dispatching the code reviewer via the requesting-code-review skill"
echo "  4. Verifying the reviewer flags the planted bug as Critical/Important"
echo ""

TEST_PROJECT=$(create_test_project)
echo "Test project: $TEST_PROJECT"
trap "cleanup_test_project $TEST_PROJECT" EXIT

cd "$TEST_PROJECT"

# Baseline: a small "safe" implementation
mkdir -p src
cat > src/db.js <<'EOF'
import { Database } from "./database-driver.js";

const db = new Database();

export async function findUserByEmail(email) {
  if (typeof email !== "string" || !email) {
    throw new Error("email required");
  }
  return db.query(
    "SELECT id, email, created_at FROM users WHERE email = ?",
    [email],
  );
}
EOF

cat > package.json <<'EOF'
{ "name": "test-codereview", "version": "1.0.0", "type": "module" }
EOF

git init --quiet
git config user.email "test@test.com"
git config user.name "Test User"
git add .
git commit -m "Initial: parameterized findUserByEmail" --quiet
BASE_SHA=$(git rev-parse HEAD)

# Second commit: plant two real bugs
# 1. SQL injection — switch from parameterized to string concatenation
# 2. Logs the user's password hash on every successful login
cat > src/db.js <<'EOF'
import { Database } from "./database-driver.js";

const db = new Database();

export async function findUserByEmail(email) {
  return db.query(
    "SELECT id, email, password_hash, created_at FROM users WHERE email = '" + email + "'",
  );
}

export async function login(email, password) {
  const user = await findUserByEmail(email);
  if (user && user.password_hash === hash(password)) {
    console.log("login success", { email, password_hash: user.password_hash });
    return user;
  }
  return null;
}

function hash(s) { return s; }
EOF

git add .
git commit -m "Refactor user lookup, add login" --quiet
HEAD_SHA=$(git rev-parse HEAD)

echo ""
echo "Planted bugs in $BASE_SHA..$HEAD_SHA:"
echo "  - SQL injection (string concat instead of parameterized query)"
echo "  - Password hash logged in plaintext on every successful login"
echo "  - hash() is the identity function (passwords stored & compared in plaintext)"
echo ""

OUTPUT_FILE="$TEST_PROJECT/claude-output.txt"

PROMPT="I just finished a refactor. The change is between commits $BASE_SHA and $HEAD_SHA on the current branch.

Use the superpowers:requesting-code-review skill to review these changes before I merge. Follow the skill exactly: dispatch the code reviewer subagent with the template, give the subagent the SHA range, and report back what it found.

Print the reviewer's full output."

# Run claude from inside the test project so its session JSONL lands in a
# project-specific directory under ~/.claude/projects/, isolated from any
# other concurrent claude sessions.
echo "Running Claude (plugin-dir: $PLUGIN_DIR, cwd: $TEST_PROJECT)..."
echo "================================================================================"
cd "$TEST_PROJECT" && timeout 600 claude -p "$PROMPT" \
    --plugin-dir "$PLUGIN_DIR" \
    --permission-mode bypassPermissions 2>&1 | tee "$OUTPUT_FILE" || {
    echo ""
    echo "================================================================================"
    echo "EXECUTION FAILED (exit code: $?)"
    exit 1
}
echo "================================================================================"

echo ""
echo "Analyzing reviewer output..."
echo ""

# Find the session transcript. Because we ran claude from $TEST_PROJECT (a
# unique tmp dir), its sessions live in their own ~/.claude/projects/ folder.
# Resolve the real path (macOS mktemp returns /var/... but claude normalizes
# it to /private/var/...) and replicate claude's normalization (every
# non-alphanumeric char becomes `-`).
TEST_PROJECT_REAL=$(cd "$TEST_PROJECT" && pwd -P)
SESSION_DIR="$HOME/.claude/projects/$(echo "$TEST_PROJECT_REAL" | sed 's|[^a-zA-Z0-9]|-|g')"
# `|| true` prevents pipefail killing the script if ls gets SIGPIPE'd by head.
SESSION_FILE=$(ls -t "$SESSION_DIR"/*.jsonl 2>/dev/null | head -1 || true)

FAILED=0

echo "=== Verification Tests ==="
echo ""

# Test 1: Skill was actually invoked, and a subagent was actually dispatched
echo "Test 1: requesting-code-review skill invoked + reviewer subagent dispatched..."
if [ -z "$SESSION_FILE" ] || [ ! -f "$SESSION_FILE" ]; then
    echo "  [FAIL] Could not locate session transcript in $SESSION_DIR"
    FAILED=$((FAILED + 1))
elif ! grep -q '"skill":"superpowers:requesting-code-review"' "$SESSION_FILE"; then
    echo "  [FAIL] requesting-code-review skill was not invoked"
    echo "         Session: $SESSION_FILE"
    FAILED=$((FAILED + 1))
elif ! grep -q '"name":"Agent"' "$SESSION_FILE"; then
    echo "  [FAIL] Skill ran but no subagent was dispatched"
    FAILED=$((FAILED + 1))
else
    echo "  [PASS] Skill invoked and subagent dispatched"
fi
echo ""

# Test 2: Reviewer caught the SQL injection
echo "Test 2: SQL injection flagged..."
if grep -qiE "sql injection|injection|string concat|parameterize|prepared statement|sanitiz" "$OUTPUT_FILE"; then
    echo "  [PASS] Reviewer flagged the SQL injection vector"
else
    echo "  [FAIL] Reviewer missed the SQL injection — most obvious planted bug"
    FAILED=$((FAILED + 1))
fi
echo ""

# Test 3: Reviewer caught the credential / password issue (either logging or no real hashing)
echo "Test 3: Credential handling issue flagged..."
if grep -qiE "password|credential|secret|plaintext|log.*hash|hash.*log|sensitive" "$OUTPUT_FILE"; then
    echo "  [PASS] Reviewer flagged a credential / password handling issue"
else
    echo "  [FAIL] Reviewer missed the password/credential issues"
    FAILED=$((FAILED + 1))
fi
echo ""

# Test 4: Reviewer marked at least one issue as Critical or Important (not just Minor)
echo "Test 4: Severity classification..."
if grep -qiE "critical|important|severe|high.*risk|security" "$OUTPUT_FILE"; then
    echo "  [PASS] Reviewer classified findings at Critical/Important severity"
else
    echo "  [FAIL] Reviewer did not classify findings as Critical or Important"
    FAILED=$((FAILED + 1))
fi
echo ""

# Test 5: Reviewer did NOT approve the diff for merge
echo "Test 5: Reviewer verdict..."
# A correct reviewer says No or "With fixes". A broken/sycophantic reviewer says Yes/Ready.
if grep -qiE "ready to merge.*yes|approved.*for merge|^\s*yes\s*$|safe to merge" "$OUTPUT_FILE" \
   && ! grep -qiE "ready to merge.*no|with fixes|do not merge|not ready|block.*merge" "$OUTPUT_FILE"; then
    echo "  [FAIL] Reviewer approved a diff with planted Critical bugs"
    FAILED=$((FAILED + 1))
else
    echo "  [PASS] Reviewer did not approve the diff"
fi
echo ""

echo "========================================"
echo " Test Summary"
echo "========================================"
echo ""

if [ $FAILED -eq 0 ]; then
    echo "STATUS: PASSED"
    echo "The code reviewer correctly:"
    echo "  ✓ Was dispatched via the requesting-code-review skill"
    echo "  ✓ Flagged the SQL injection"
    echo "  ✓ Flagged the credential handling issues"
    echo "  ✓ Classified findings at Critical/Important severity"
    echo "  ✓ Did not approve the diff for merge"
    exit 0
else
    echo "STATUS: FAILED"
    echo "Failed $FAILED verification tests"
    echo ""
    echo "Output saved to: $OUTPUT_FILE"
    exit 1
fi