mirror of
https://github.com/obra/superpowers.git
synced 2026-07-06 01:39:04 +08:00
feat(skills): add spec-vs-cards checker with test harness
This commit is contained in:
136
skills/agentic-end-to-end-testing/scripts/check-cards-against-spec
Executable file
136
skills/agentic-end-to-end-testing/scripts/check-cards-against-spec
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env bash
|
||||
# check-cards-against-spec — verify scenario cards carry their spec table's
|
||||
# falsification lines verbatim. See authoring-cards-from-a-spec.md.
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage: check-cards-against-spec <spec.md> <cards-dir>
|
||||
|
||||
Verifies the spec's "E2E scenario cards" table against the cards directory:
|
||||
1. table parses (>=1 row; non-empty Card and Falsification cells)
|
||||
2. every row has <cards-dir>/<card>.md
|
||||
3. every card contains its Falsification line verbatim
|
||||
(whitespace-normalized, fixed-string, case-sensitive)
|
||||
4. every card has **What this covers** (bold inline) and ## headings
|
||||
Pre-state, Steps, Expected, Cleanup (Sharp edges not required)
|
||||
5. extra cards in <cards-dir> are reported as warnings, not failures
|
||||
|
||||
Exit: 0 all pass; 1 check failed; 2 no "E2E scenario cards" table; 64 usage.
|
||||
EOF
|
||||
}
|
||||
|
||||
[ "${1:-}" = "--help" ] && { usage; exit 0; }
|
||||
[ $# -eq 2 ] || { usage >&2; exit 64; }
|
||||
SPEC="$1"; CARDS="$2"
|
||||
[ -f "$SPEC" ] || { echo "error: spec not found: $SPEC" >&2; exit 64; }
|
||||
[ -d "$CARDS" ] || { echo "error: cards dir not found: $CARDS" >&2; exit 64; }
|
||||
|
||||
FAILURES=0
|
||||
fail() { echo "FAIL: $1"; FAILURES=$((FAILURES + 1)); }
|
||||
warn() { echo "warn: $1"; }
|
||||
|
||||
# Collapse every whitespace run to one space; trim ends. (Normative per the
|
||||
# design spec: markdown re-wrapping must not defeat the verbatim check.)
|
||||
normalize() { tr -s '[:space:]' ' ' | sed -e 's/^ //' -e 's/ $//'; }
|
||||
|
||||
# --- extract the first table under the (case-insensitive) heading ----------
|
||||
TABLE="$(awk '
|
||||
/^#{1,6}[[:space:]]/ {
|
||||
h = $0; sub(/^#+[[:space:]]*/, "", h); sub(/[[:space:]]+$/, "", h)
|
||||
if (tolower(h) == "e2e scenario cards") { insec = 1; next }
|
||||
if (insec) exit
|
||||
}
|
||||
insec && /^[[:space:]]*\|/ { intable = 1; print; next }
|
||||
insec && intable { exit }
|
||||
' "$SPEC")"
|
||||
|
||||
if [ -z "$TABLE" ]; then
|
||||
echo "no scenario table: $SPEC has no \"E2E scenario cards\" heading with a table under it" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# --- parse: protect escaped pipes, split rows into cells -------------------
|
||||
US=$'\x1f'
|
||||
CARD_COL=-1; FALS_COL=-1; ROWS=0
|
||||
declare -a ROW_CARD ROW_FALS
|
||||
|
||||
lineno=0
|
||||
while IFS= read -r line; do
|
||||
lineno=$((lineno + 1))
|
||||
esc="${line//\\|/$US}"
|
||||
IFS='|' read -r -a cells <<< "$esc"
|
||||
# drop leading/trailing empty fields produced by the outer pipes
|
||||
trimmed=()
|
||||
for c in "${cells[@]}"; do
|
||||
c="${c//$US/|}"
|
||||
c="$(printf '%s' "$c" | normalize)"
|
||||
trimmed+=("$c")
|
||||
done
|
||||
# cells[0] is empty (before first |); last may be empty too
|
||||
if [ "$lineno" -eq 1 ]; then
|
||||
for i in "${!trimmed[@]}"; do
|
||||
low="$(printf '%s' "${trimmed[$i]}" | tr '[:upper:]' '[:lower:]')"
|
||||
[ "$low" = "card" ] && CARD_COL=$i
|
||||
[ "$low" = "falsification" ] && FALS_COL=$i
|
||||
done
|
||||
continue
|
||||
fi
|
||||
# separator row: cells of dashes/colons only
|
||||
joined="$(printf '%s' "${trimmed[*]}" | tr -d ' :-')"
|
||||
[ -z "$joined" ] && continue
|
||||
if [ "$CARD_COL" -lt 0 ] || [ "$FALS_COL" -lt 0 ]; then
|
||||
fail "table header must name Card and Falsification columns"
|
||||
break
|
||||
fi
|
||||
card="${trimmed[$CARD_COL]:-}"
|
||||
falsif="${trimmed[$FALS_COL]:-}"
|
||||
card="${card//\`/}" # tolerate `card-name` backticks in the cell
|
||||
if [ -z "$card" ] || [ -z "$falsif" ]; then
|
||||
fail "row $lineno: empty Card or Falsification cell"
|
||||
continue
|
||||
fi
|
||||
ROW_CARD[$ROWS]="$card"; ROW_FALS[$ROWS]="$falsif"; ROWS=$((ROWS + 1))
|
||||
done <<< "$TABLE"
|
||||
|
||||
[ "$ROWS" -ge 1 ] || fail "scenario table has no data rows"
|
||||
|
||||
# --- checks 2-4 per row -----------------------------------------------------
|
||||
i=0
|
||||
while [ "$i" -lt "$ROWS" ]; do
|
||||
card="${ROW_CARD[$i]}"; falsif="${ROW_FALS[$i]}"
|
||||
f="$CARDS/$card.md"
|
||||
if [ ! -f "$f" ]; then
|
||||
fail "missing card file: $f"
|
||||
i=$((i + 1)); continue
|
||||
fi
|
||||
hay="$(normalize < "$f")"
|
||||
case "$hay" in
|
||||
*"$falsif"*) : ;;
|
||||
*) fail "$f: falsification line not present verbatim.
|
||||
expected (normalized): $falsif" ;;
|
||||
esac
|
||||
grep -q '\*\*What this covers\*\*' "$f" || fail "$f: missing **What this covers**"
|
||||
for sec in Pre-state Steps Expected Cleanup; do
|
||||
grep -Eiq "^#{2,}[[:space:]]*${sec}" "$f" || fail "$f: missing ## ${sec} section"
|
||||
done
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
# --- check 5: extra cards are warnings --------------------------------------
|
||||
for f in "$CARDS"/*.md; do
|
||||
[ -e "$f" ] || continue
|
||||
base="$(basename "$f" .md)"
|
||||
known=0; i=0
|
||||
while [ "$i" -lt "$ROWS" ]; do
|
||||
[ "${ROW_CARD[$i]}" = "$base" ] && known=1
|
||||
i=$((i + 1))
|
||||
done
|
||||
[ "$known" -eq 1 ] || warn "extra card not in spec table: $base"
|
||||
done
|
||||
|
||||
if [ "$FAILURES" -gt 0 ]; then
|
||||
echo "$FAILURES check(s) failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "all checks passed ($ROWS card(s))"
|
||||
163
tests/agentic-e2e-checker/test-check-cards-against-spec.sh
Executable file
163
tests/agentic-e2e-checker/test-check-cards-against-spec.sh
Executable file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
CHECKER="$REPO_ROOT/skills/agentic-end-to-end-testing/scripts/check-cards-against-spec"
|
||||
|
||||
FAILURES=0
|
||||
TEST_ROOT="$(mktemp -d)"
|
||||
cleanup() { rm -rf "$TEST_ROOT"; }
|
||||
trap cleanup EXIT
|
||||
|
||||
pass() { echo " [PASS] $1"; }
|
||||
fail() { echo " [FAIL] $1"; FAILURES=$((FAILURES + 1)); }
|
||||
|
||||
assert_exit() { # expected_code description -- command...
|
||||
local expected="$1" desc="$2"; shift 2
|
||||
local code=0
|
||||
"$@" >"$TEST_ROOT/out.txt" 2>&1 || code=$?
|
||||
if [ "$code" -eq "$expected" ]; then pass "$desc"; else
|
||||
fail "$desc (expected exit $expected, got $code)"; sed 's/^/ /' "$TEST_ROOT/out.txt"; fi
|
||||
}
|
||||
|
||||
assert_out_contains() { # needle description
|
||||
if grep -Fq -- "$1" "$TEST_ROOT/out.txt"; then pass "$2"; else
|
||||
fail "$2 (output missing: $1)"; sed 's/^/ /' "$TEST_ROOT/out.txt"; fi
|
||||
}
|
||||
|
||||
# ---- fixture builders ----------------------------------------------------
|
||||
|
||||
make_spec() { # dir (spec with 2-row table; row 2 has \| and regex chars)
|
||||
mkdir -p "$1"
|
||||
cat > "$1/spec.md" <<'EOF'
|
||||
# Widget Design
|
||||
|
||||
## Requirements
|
||||
|
||||
Widgets render a table with a TOTAL row.
|
||||
|
||||
## E2E scenario cards
|
||||
|
||||
| Card | Covers | Falsification |
|
||||
| --- | --- | --- |
|
||||
| widget-show-table | Rendered table incl. TOTAL row | If stdout's last line is not `TOTAL` followed by the two-decimal sum (20.85 for the seed fixture), or the TOTAL row is absent entirely, the scenario FAILS. |
|
||||
| widget-status-flags | Status output | If `widget status` does not print exactly `OK \| DEGRADED` (a literal pipe) with dots . and stars * intact, the scenario FAILS. |
|
||||
EOF
|
||||
}
|
||||
|
||||
good_card_1() {
|
||||
cat <<'EOF'
|
||||
# widget-show-table: table renders with TOTAL
|
||||
|
||||
**What this covers**: the rendered table.
|
||||
|
||||
## Pre-state
|
||||
A built widget binary.
|
||||
|
||||
## Steps
|
||||
1. Run `widget show`.
|
||||
|
||||
## Expected
|
||||
If stdout's last line is not `TOTAL` followed by the
|
||||
two-decimal sum (20.85 for the seed
|
||||
fixture), or the TOTAL row is absent entirely, the scenario FAILS.
|
||||
|
||||
## Cleanup
|
||||
Nothing to clean.
|
||||
EOF
|
||||
}
|
||||
|
||||
good_card_2() {
|
||||
cat <<'EOF'
|
||||
# widget-status-flags: status output
|
||||
|
||||
**What this covers**: status flags.
|
||||
|
||||
## Pre-state
|
||||
A built widget binary.
|
||||
|
||||
## Steps
|
||||
1. Run `widget status`.
|
||||
|
||||
## Expected
|
||||
If `widget status` does not print exactly `OK | DEGRADED` (a literal pipe) with dots . and stars * intact, the scenario FAILS.
|
||||
|
||||
## Cleanup
|
||||
Nothing to clean.
|
||||
EOF
|
||||
}
|
||||
|
||||
make_cards() { # dir
|
||||
mkdir -p "$1"
|
||||
good_card_1 > "$1/widget-show-table.md"
|
||||
good_card_2 > "$1/widget-status-flags.md"
|
||||
}
|
||||
|
||||
# ---- tests ----------------------------------------------------------------
|
||||
|
||||
echo "happy path"
|
||||
make_spec "$TEST_ROOT/t1"; make_cards "$TEST_ROOT/t1/cards"
|
||||
assert_exit 0 "2 rows, 2 conforming cards -> exit 0" \
|
||||
"$CHECKER" "$TEST_ROOT/t1/spec.md" "$TEST_ROOT/t1/cards"
|
||||
|
||||
echo "re-wrapped falsification line still matches (whitespace normalization)"
|
||||
# good_card_1 already wraps the line across three lines; covered above. Prove
|
||||
# the inverse too: collapse the card line to one line, still passes.
|
||||
make_spec "$TEST_ROOT/t2"; make_cards "$TEST_ROOT/t2/cards"
|
||||
perl -0pi -e 's/\n(two-decimal)/ $1/; s/\n(fixture\))/ $1/' "$TEST_ROOT/t2/cards/widget-show-table.md" 2>/dev/null || \
|
||||
sed -i '' -e ':a' -e 'N;$!ba' -e 's/the\ntwo-decimal/the two-decimal/' "$TEST_ROOT/t2/cards/widget-show-table.md"
|
||||
assert_exit 0 "single-line variant -> exit 0" \
|
||||
"$CHECKER" "$TEST_ROOT/t2/spec.md" "$TEST_ROOT/t2/cards"
|
||||
|
||||
echo "escaped pipe in table cell matches literal pipe in card"
|
||||
# covered by widget-status-flags in the happy path; also prove failure when
|
||||
# the card drops the pipe phrase entirely:
|
||||
make_spec "$TEST_ROOT/t3"; make_cards "$TEST_ROOT/t3/cards"
|
||||
sed -i.bak 's/OK | DEGRADED/OK or DEGRADED/' "$TEST_ROOT/t3/cards/widget-status-flags.md"
|
||||
assert_exit 1 "reworded falsification -> exit 1" \
|
||||
"$CHECKER" "$TEST_ROOT/t3/spec.md" "$TEST_ROOT/t3/cards"
|
||||
assert_out_contains "widget-status-flags" "failure names the card"
|
||||
|
||||
echo "missing card file"
|
||||
make_spec "$TEST_ROOT/t4"; make_cards "$TEST_ROOT/t4/cards"
|
||||
rm "$TEST_ROOT/t4/cards/widget-show-table.md"
|
||||
assert_exit 1 "missing card -> exit 1" \
|
||||
"$CHECKER" "$TEST_ROOT/t4/spec.md" "$TEST_ROOT/t4/cards"
|
||||
assert_out_contains "widget-show-table.md" "failure names the missing file"
|
||||
|
||||
echo "missing required section"
|
||||
make_spec "$TEST_ROOT/t5"; make_cards "$TEST_ROOT/t5/cards"
|
||||
sed -i.bak '/^## Cleanup/,$d' "$TEST_ROOT/t5/cards/widget-show-table.md"
|
||||
assert_exit 1 "card without Cleanup heading -> exit 1" \
|
||||
"$CHECKER" "$TEST_ROOT/t5/spec.md" "$TEST_ROOT/t5/cards"
|
||||
assert_out_contains "Cleanup" "failure names the section"
|
||||
|
||||
echo "extra card is a warning, not a failure"
|
||||
make_spec "$TEST_ROOT/t6"; make_cards "$TEST_ROOT/t6/cards"
|
||||
good_card_1 > "$TEST_ROOT/t6/cards/extra-exploration.md"
|
||||
assert_exit 0 "extra card -> exit 0" \
|
||||
"$CHECKER" "$TEST_ROOT/t6/spec.md" "$TEST_ROOT/t6/cards"
|
||||
assert_out_contains "extra-exploration" "warning names the extra card"
|
||||
|
||||
echo "no scenario table"
|
||||
mkdir -p "$TEST_ROOT/t7/cards"
|
||||
printf '# Widget Design\n\nNo table here.\n' > "$TEST_ROOT/t7/spec.md"
|
||||
assert_exit 2 "table-less spec -> exit 2" \
|
||||
"$CHECKER" "$TEST_ROOT/t7/spec.md" "$TEST_ROOT/t7/cards"
|
||||
assert_out_contains "no scenario table" "diagnostic present"
|
||||
|
||||
echo "heading match is case-insensitive"
|
||||
make_spec "$TEST_ROOT/t8"; make_cards "$TEST_ROOT/t8/cards"
|
||||
sed -i.bak 's/^## E2E scenario cards/## E2E Scenario Cards/' "$TEST_ROOT/t8/spec.md"
|
||||
assert_exit 0 "title-case heading still found" \
|
||||
"$CHECKER" "$TEST_ROOT/t8/spec.md" "$TEST_ROOT/t8/cards"
|
||||
|
||||
echo "usage"
|
||||
assert_exit 64 "no args -> exit 64" "$CHECKER"
|
||||
assert_exit 0 "--help -> exit 0" "$CHECKER" --help
|
||||
assert_out_contains "Usage:" "help text present"
|
||||
|
||||
echo
|
||||
if [ "$FAILURES" -gt 0 ]; then echo "$FAILURES test(s) failed"; exit 1; fi
|
||||
echo "all tests passed"
|
||||
Reference in New Issue
Block a user