mirror of
https://github.com/obra/superpowers.git
synced 2026-05-16 22:19:05 +08:00
Tighten cross-platform tool references
This commit is contained in:
@@ -7,8 +7,8 @@
|
||||
# and is stricter on that axis. This bash test additionally asserts:
|
||||
# - >=3 git commits (initial + per-task commits, exercising SDD's
|
||||
# commit-per-task workflow shape)
|
||||
# - >=2 Agent/Task subagent dispatches (drill only asserts >=1)
|
||||
# - TodoWrite usage (drill makes no assertion)
|
||||
# - >=2 Claude Code subagent dispatches via Agent or Task (drill only asserts >=1)
|
||||
# - Claude Code task-tracking tool usage (drill makes no assertion)
|
||||
# - test/math.test.js exists (drill relies on `npm test` succeeding)
|
||||
# - analyze-token-usage.py token-budget telemetry
|
||||
# Kept until those assertions are added to drill or explicitly retired.
|
||||
@@ -224,13 +224,13 @@ else
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 3: TodoWrite was used for tracking
|
||||
# Test 3: Claude Code task-tracking tool was used
|
||||
echo "Test 3: Task tracking..."
|
||||
todo_count=$(grep -c '"name":"TodoWrite"' "$SESSION_FILE" || echo "0")
|
||||
todo_count=$(grep -cE '"name":"(TodoWrite|TaskCreate|TaskUpdate|TaskList|TaskGet)"' "$SESSION_FILE" || echo "0")
|
||||
if [ "$todo_count" -ge 1 ]; then
|
||||
echo " [PASS] TodoWrite used $todo_count time(s) for task tracking"
|
||||
echo " [PASS] Task tracking used $todo_count time(s)"
|
||||
else
|
||||
echo " [FAIL] TodoWrite not used"
|
||||
echo " [FAIL] No Claude Code task-tracking tool used"
|
||||
FAILED=$((FAILED + 1))
|
||||
fi
|
||||
echo ""
|
||||
|
||||
@@ -109,7 +109,7 @@ if [ -n "$FIRST_SKILL_LINE" ]; then
|
||||
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$TURN3_LOG" | \
|
||||
grep '"type":"tool_use"' | \
|
||||
grep -v '"name":"Skill"' | \
|
||||
grep -v '"name":"TodoWrite"' || true)
|
||||
grep -vE '"name":"(TodoWrite|TaskCreate|TaskUpdate|TaskList|TaskGet)"' || true)
|
||||
if [ -n "$PREMATURE_TOOLS" ]; then
|
||||
echo "WARNING: Tools invoked BEFORE Skill tool in Turn 3:"
|
||||
echo "$PREMATURE_TOOLS" | head -5
|
||||
|
||||
@@ -103,11 +103,11 @@ echo "Checking for premature action..."
|
||||
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$LOG_FILE" | head -1 | cut -d: -f1)
|
||||
if [ -n "$FIRST_SKILL_LINE" ]; then
|
||||
# Check if any non-Skill, non-system tools were invoked before the first Skill invocation
|
||||
# Filter out system messages, TodoWrite (planning is ok), and other non-action tools
|
||||
# Filter out task tracking tools (planning is ok) and other non-action tools
|
||||
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$LOG_FILE" | \
|
||||
grep '"type":"tool_use"' | \
|
||||
grep -v '"name":"Skill"' | \
|
||||
grep -v '"name":"TodoWrite"' || true)
|
||||
grep -vE '"name":"(TodoWrite|TaskCreate|TaskUpdate|TaskList|TaskGet)"' || true)
|
||||
if [ -n "$PREMATURE_TOOLS" ]; then
|
||||
echo "WARNING: Tools invoked BEFORE Skill tool:"
|
||||
echo "$PREMATURE_TOOLS" | head -5
|
||||
|
||||
@@ -44,6 +44,10 @@ const result = {
|
||||
scenario,
|
||||
firstBootstrapParts: countBootstrapParts(firstOutput),
|
||||
secondBootstrapParts: countBootstrapParts(secondOutput),
|
||||
staleMentionMapping: bootstrapText(firstOutput).includes('@mention'),
|
||||
staleTaskMapping: bootstrapText(firstOutput).includes('`Task` tool with subagents'),
|
||||
mapsSubagentToTask: bootstrapText(firstOutput).includes('`task` with `subagent_type: "general"`'),
|
||||
mapsMutationToApplyPatch: bootstrapText(firstOutput).includes('`apply_patch`'),
|
||||
firstReadCount: afterFirst.readCount,
|
||||
secondReadCount: afterSecond.readCount,
|
||||
firstExistsCount: afterFirst.existsCount,
|
||||
@@ -83,6 +87,12 @@ function countBootstrapParts(output) {
|
||||
).length;
|
||||
}
|
||||
|
||||
function bootstrapText(output) {
|
||||
return output.messages[0].parts.find(
|
||||
(part) => part.type === 'text' && part.text.includes('EXTREMELY_IMPORTANT')
|
||||
)?.text || '';
|
||||
}
|
||||
|
||||
function assertPresentBootstrap(result) {
|
||||
const failures = [];
|
||||
if (result.firstBootstrapParts !== 1) {
|
||||
@@ -100,6 +110,18 @@ function assertPresentBootstrap(result) {
|
||||
if (result.secondExistsCount !== result.firstExistsCount) {
|
||||
failures.push(`expected cached second transform to do no additional exists checks, got ${result.secondExistsCount - result.firstExistsCount}`);
|
||||
}
|
||||
if (result.staleMentionMapping) {
|
||||
failures.push('expected OpenCode bootstrap not to teach @mention subagent syntax');
|
||||
}
|
||||
if (result.staleTaskMapping) {
|
||||
failures.push('expected OpenCode bootstrap not to teach stale Task-tool mapping');
|
||||
}
|
||||
if (!result.mapsSubagentToTask) {
|
||||
failures.push('expected OpenCode bootstrap to map general-purpose subagents to task with subagent_type');
|
||||
}
|
||||
if (!result.mapsMutationToApplyPatch) {
|
||||
failures.push('expected OpenCode bootstrap to map file mutation to apply_patch');
|
||||
}
|
||||
return failures;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user