From a6a4cd85b9ccce4a2e14d5e3484c947b6835d7a4 Mon Sep 17 00:00:00 2001 From: Drew Ritter Date: Wed, 10 Jun 2026 18:49:38 -0700 Subject: [PATCH] Harden companion stop ownership proof --- skills/brainstorming/scripts/start-server.sh | 15 ++- skills/brainstorming/scripts/stop-server.sh | 68 +++++++---- tests/brainstorm-server/stop-server.test.sh | 118 +++++++++++++------ 3 files changed, 139 insertions(+), 62 deletions(-) diff --git a/skills/brainstorming/scripts/start-server.sh b/skills/brainstorming/scripts/start-server.sh index b8f79a4b..016a8e48 100755 --- a/skills/brainstorming/scripts/start-server.sh +++ b/skills/brainstorming/scripts/start-server.sh @@ -126,10 +126,21 @@ fi STATE_DIR="${SESSION_DIR}/state" PID_FILE="${STATE_DIR}/server.pid" LOG_FILE="${STATE_DIR}/server.log" +SERVER_ID_FILE="${STATE_DIR}/server-instance-id" # Create fresh session directory with content and state peers mkdir -p "${SESSION_DIR}/content" "$STATE_DIR" +SERVER_ID="" +if [[ -r /dev/urandom ]]; then + SERVER_ID="$(od -An -N24 -tx1 /dev/urandom 2>/dev/null | tr -d ' \n' || true)" +fi +if ! [[ "$SERVER_ID" =~ ^[A-Za-z0-9_-]{32,64}$ ]]; then + SERVER_ID="$(printf '%08x%08x%08x%08x' "$$" "$(date +%s)" "${RANDOM:-0}" "${RANDOM:-0}")" +fi +printf '%s\n' "$SERVER_ID" > "$SERVER_ID_FILE" +chmod 600 "$SERVER_ID_FILE" 2>/dev/null || true + # Kill any existing server if [[ -f "$PID_FILE" ]]; then old_pid=$(cat "$PID_FILE") @@ -157,7 +168,7 @@ fi # Foreground mode for environments that reap detached/background processes. if [[ "$FOREGROUND" == "true" ]]; then - env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs & + env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs "--brainstorm-server-id=$SERVER_ID" & SERVER_PID=$! echo "$SERVER_PID" > "$PID_FILE" wait "$SERVER_PID" @@ -166,7 +177,7 @@ fi # Start server, capturing output to log file # Use nohup to survive shell exit; disown to remove from job table -nohup env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs > "$LOG_FILE" 2>&1 & +nohup env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs "--brainstorm-server-id=$SERVER_ID" > "$LOG_FILE" 2>&1 & SERVER_PID=$! disown "$SERVER_PID" 2>/dev/null echo "$SERVER_PID" > "$PID_FILE" diff --git a/skills/brainstorming/scripts/stop-server.sh b/skills/brainstorming/scripts/stop-server.sh index 923c4c1a..e440bb98 100755 --- a/skills/brainstorming/scripts/stop-server.sh +++ b/skills/brainstorming/scripts/stop-server.sh @@ -15,32 +15,52 @@ fi STATE_DIR="${SESSION_DIR}/state" PID_FILE="${STATE_DIR}/server.pid" +SERVER_ID_FILE="${STATE_DIR}/server-instance-id" -# Confirm a PID is actually our brainstorm server (node running server.cjs), -# not a reused/unrelated process whose PID was recycled into a stale pid file. -is_brainstorm_server() { - kill -0 "$1" 2>/dev/null || return 1 - local command_line - command_line="$(ps -p "$1" -o command= 2>/dev/null || true)" - if [[ -z "$command_line" ]]; then - command_line="$(ps -f -p "$1" 2>/dev/null | sed '1d' || true)" +read_expected_server_id() { + [[ -f "$SERVER_ID_FILE" ]] || return 1 + local id + id="$(tr -d '\r\n' < "$SERVER_ID_FILE" 2>/dev/null || true)" + [[ "$id" =~ ^[A-Za-z0-9_-]{32,64}$ ]] || return 1 + printf '%s\n' "$id" +} + +command_line_for_pid() { + local pid="$1" + if [[ -r "/proc/$pid/cmdline" ]]; then + tr '\0' '\n' < "/proc/$pid/cmdline" 2>/dev/null || true + return 0 fi - case "$command_line" in - *node*server.cjs*) ;; + ps -ww -p "$pid" -o command= 2>/dev/null || ps -f -p "$pid" 2>/dev/null | sed '1d' || true +} + +command_has_server_id() { + local pid="$1" + local expected="$2" + local expected_arg="--brainstorm-server-id=$expected" + if [[ -r "/proc/$pid/cmdline" ]]; then + local arg + while IFS= read -r -d '' arg; do + [[ "$arg" == "$expected_arg" ]] && return 0 + done < "/proc/$pid/cmdline" + return 1 + fi + local command_line + command_line="$(command_line_for_pid "$pid")" + [[ -n "$command_line" ]] || return 1 + case " $command_line " in + *" $expected_arg "*) return 0 ;; *) return 1 ;; esac - # Stronger check: if we recorded the bound port and lsof is available, require - # the PID to be the process actually LISTENING on this session's port. This - # rules out an unrelated `node ... server.cjs` (another project, an editor task - # runner, a different session) that happened to recycle the stale PID. - local info="${STATE_DIR}/server-info" - if [[ -f "$info" ]] && command -v lsof >/dev/null 2>&1; then - local port - port=$(sed -n 's/.*"port":\([0-9][0-9]*\).*/\1/p' "$info" | head -1) - if [[ -n "$port" ]]; then - [[ "$(lsof -nP -iTCP:"$port" -sTCP:LISTEN -t 2>/dev/null | head -1)" == "$1" ]] || return 1 - fi - fi +} + +# Confirm a PID has this session's per-start instance id, not just a familiar +# process name. Ambiguous or legacy metadata fails closed as stale_pid. +is_brainstorm_server() { + kill -0 "$1" 2>/dev/null || return 1 + local expected_id + expected_id="$(read_expected_server_id)" || return 1 + command_has_server_id "$1" "$expected_id" || return 1 return 0 } @@ -50,7 +70,7 @@ if [[ -f "$PID_FILE" ]]; then # Refuse to signal a PID we can't prove is our server. A stale pid file may # point at an unrelated process after a reboot/PID wraparound. if ! is_brainstorm_server "$pid"; then - rm -f "$PID_FILE" + rm -f "$PID_FILE" "$SERVER_ID_FILE" echo '{"status": "stale_pid"}' exit 0 fi @@ -79,7 +99,7 @@ if [[ -f "$PID_FILE" ]]; then exit 1 fi - rm -f "$PID_FILE" "${STATE_DIR}/server.log" + rm -f "$PID_FILE" "$SERVER_ID_FILE" "${STATE_DIR}/server.log" # Only delete ephemeral /tmp directories if [[ "$SESSION_DIR" == /tmp/* ]]; then diff --git a/tests/brainstorm-server/stop-server.test.sh b/tests/brainstorm-server/stop-server.test.sh index 37a0a576..66683b50 100755 --- a/tests/brainstorm-server/stop-server.test.sh +++ b/tests/brainstorm-server/stop-server.test.sh @@ -11,14 +11,34 @@ STOP="$SCRIPT_DIR/../../skills/brainstorming/scripts/stop-server.sh" SERVER="$SCRIPT_DIR/../../skills/brainstorming/scripts/server.cjs" PASS=0; FAIL=0 +PIDS=() +DIRS=() + +cleanup() { + for pid in "${PIDS[@]}"; do + kill -9 "$pid" 2>/dev/null || true + wait "$pid" 2>/dev/null || true + done + for dir in "${DIRS[@]}"; do + rm -rf "$dir" + done +} +trap cleanup EXIT + +track_dir() { DIRS+=("$1"); } +track_pid() { PIDS+=("$1"); } +new_server_id() { + printf 'testid%026d\n' "$RANDOM" +} + ok() { echo " PASS: $1"; PASS=$((PASS + 1)); } bad() { echo " FAIL: $1"; echo " $2"; FAIL=$((FAIL + 1)); } -reap_job() { wait "$1" 2>/dev/null || true; } # --- Test 1: an unrelated, reused PID must NOT be killed --- -SESS="$(mktemp -d)"; mkdir -p "$SESS/state" +SESS="$(mktemp -d)"; track_dir "$SESS"; mkdir -p "$SESS/state" sleep 600 & UNRELATED=$! +track_pid "$UNRELATED" disown "$UNRELATED" 2>/dev/null || true echo "$UNRELATED" > "$SESS/state/server.pid" OUT="$("$STOP" "$SESS")" @@ -30,14 +50,14 @@ if kill -0 "$UNRELATED" 2>/dev/null; then else bad "unrelated reused PID was KILLED" "$OUT" fi -kill -9 "$UNRELATED" 2>/dev/null -reap_job "$UNRELATED" -rm -rf "$SESS" -# --- Test 2: a real brainstorm server IS stopped --- -SESS="$(mktemp -d)"; mkdir -p "$SESS/content" "$SESS/state" -BRAINSTORM_DIR="$SESS" BRAINSTORM_PORT=3399 node "$SERVER" > /dev/null 2>&1 & +# --- Test 2: a real brainstorm server with matching instance id IS stopped --- +SESS="$(mktemp -d)"; track_dir "$SESS"; mkdir -p "$SESS/content" "$SESS/state" +SERVER_ID="$(new_server_id)" +printf '%s\n' "$SERVER_ID" > "$SESS/state/server-instance-id" +BRAINSTORM_DIR="$SESS" BRAINSTORM_PORT=3399 node "$SERVER" "--brainstorm-server-id=$SERVER_ID" > /dev/null 2>&1 & SRV=$! +track_pid "$SRV" disown "$SRV" 2>/dev/null || true for _ in $(seq 1 40); do kill -0 "$SRV" 2>/dev/null && break; sleep 0.1; done sleep 0.4 @@ -46,48 +66,74 @@ OUT="$("$STOP" "$SESS")" sleep 0.3 if kill -0 "$SRV" 2>/dev/null; then bad "real brainstorm server still running after stop" "$OUT" - kill -9 "$SRV" 2>/dev/null - reap_job "$SRV" else - reap_job "$SRV" case "$OUT" in - *stopped*) ok "real brainstorm server is stopped" ;; + *stopped*) ok "real brainstorm server with matching instance id is stopped" ;; *) bad "server stopped but status was not 'stopped'" "$OUT" ;; esac fi -rm -rf "$SESS" # --- Test 3: no pid file --- -SESS="$(mktemp -d)"; mkdir -p "$SESS/state" +SESS="$(mktemp -d)"; track_dir "$SESS"; mkdir -p "$SESS/state" OUT="$("$STOP" "$SESS")" case "$OUT" in *not_running*) ok "missing pid file reports not_running" ;; *) bad "missing pid file: unexpected status" "$OUT" ;; esac -rm -rf "$SESS" -# --- Test 4: a `node server.cjs` impostor NOT listening on our port is spared --- -if command -v lsof > /dev/null 2>&1; then - SESS="$(mktemp -d)"; mkdir -p "$SESS/state" - echo '{"type":"server-started","port":3499}' > "$SESS/state/server-info" # nothing listens on 3499 - ( exec -a "node server.cjs" sleep 600 ) & - IMPOSTOR=$! - disown "$IMPOSTOR" 2>/dev/null || true - echo "$IMPOSTOR" > "$SESS/state/server.pid" - OUT="$("$STOP" "$SESS")" - if kill -0 "$IMPOSTOR" 2>/dev/null; then - case "$OUT" in - *stale_pid*) ok "a node server.cjs not listening on our port is left alone" ;; - *) bad "impostor survived but status was not stale_pid" "$OUT" ;; - esac - else - bad "killed a node server.cjs that was NOT on our recorded port" "$OUT" - fi - kill -9 "$IMPOSTOR" 2>/dev/null - reap_job "$IMPOSTOR" - rm -rf "$SESS" +# --- Test 4: a node server.cjs impostor with missing instance id is spared --- +SESS="$(mktemp -d)"; track_dir "$SESS"; mkdir -p "$SESS/state" +( exec -a "node server.cjs" sleep 600 ) & +IMPOSTOR=$! +track_pid "$IMPOSTOR" +disown "$IMPOSTOR" 2>/dev/null || true +echo "$IMPOSTOR" > "$SESS/state/server.pid" +OUT="$("$STOP" "$SESS")" +if kill -0 "$IMPOSTOR" 2>/dev/null; then + case "$OUT" in + *stale_pid*) ok "missing instance id leaves node server.cjs impostor alone" ;; + *) bad "impostor survived but status was not stale_pid" "$OUT" ;; + esac else - echo " SKIP: lsof unavailable — port cross-check test" + bad "killed a node server.cjs impostor with missing instance id" "$OUT" +fi + +# --- Test 5: a node server.cjs impostor with wrong instance id is spared --- +SESS="$(mktemp -d)"; track_dir "$SESS"; mkdir -p "$SESS/state" +EXPECTED_ID="$(new_server_id)" +WRONG_ID="$(new_server_id)" +printf '%s\n' "$EXPECTED_ID" > "$SESS/state/server-instance-id" +( exec -a "node server.cjs --brainstorm-server-id=$WRONG_ID" sleep 600 ) & +IMPOSTOR=$! +track_pid "$IMPOSTOR" +disown "$IMPOSTOR" 2>/dev/null || true +echo "$IMPOSTOR" > "$SESS/state/server.pid" +OUT="$("$STOP" "$SESS")" +if kill -0 "$IMPOSTOR" 2>/dev/null; then + case "$OUT" in + *stale_pid*) ok "wrong instance id leaves node server.cjs impostor alone" ;; + *) bad "wrong-id impostor survived but status was not stale_pid" "$OUT" ;; + esac +else + bad "killed a node server.cjs impostor with wrong instance id" "$OUT" +fi + +# --- Test 6: malformed instance id is fail-closed --- +SESS="$(mktemp -d)"; track_dir "$SESS"; mkdir -p "$SESS/state" +printf '%s\n' 'bad id with spaces' > "$SESS/state/server-instance-id" +( exec -a "node server.cjs --brainstorm-server-id=bad-id-with-spaces" sleep 600 ) & +IMPOSTOR=$! +track_pid "$IMPOSTOR" +disown "$IMPOSTOR" 2>/dev/null || true +echo "$IMPOSTOR" > "$SESS/state/server.pid" +OUT="$("$STOP" "$SESS")" +if kill -0 "$IMPOSTOR" 2>/dev/null; then + case "$OUT" in + *stale_pid*) ok "malformed instance id is fail-closed" ;; + *) bad "malformed-id impostor survived but status was not stale_pid" "$OUT" ;; + esac +else + bad "killed process despite malformed instance id" "$OUT" fi echo "--- Results: $PASS passed, $FAIL failed ---"