mirror of
https://github.com/obra/superpowers.git
synced 2026-04-21 00:49:06 +08:00
Fix owner-PID lifecycle monitoring for cross-platform reliability
Two bugs caused the brainstorm server to self-terminate within 60s: 1. ownerAlive() treated EPERM (permission denied) as "process dead". When the owner PID belongs to a different user (Tailscale SSH, system daemons), process.kill(pid, 0) throws EPERM — but the process IS alive. Fixed: return e.code === 'EPERM'. 2. On WSL, the grandparent PID resolves to a short-lived subprocess that exits before the first 60s lifecycle check. The PID is genuinely dead (ESRCH), so the EPERM fix alone doesn't help. Fixed: validate the owner PID at server startup — if it's already dead, it was a bad resolution, so disable monitoring and rely on the 30-minute idle timeout. This also removes the Windows/MSYS2-specific OWNER_PID="" carve-out from start-server.sh, since the server now handles invalid PIDs generically at startup regardless of platform. Tested on Linux (magic-kingdom) via Tailscale SSH: - Root-owned owner PID (EPERM): server survives ✓ - Dead owner PID at startup (WSL sim): monitoring disabled, survives ✓ - Valid owner that dies: server shuts down within 60s ✓ Fixes #879
This commit is contained in:
@@ -17,7 +17,7 @@ The subagent review loop (dispatching a fresh agent to review plans/specs) doubl
|
|||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
- **Owner-PID false positives** — the brainstorm server's `ownerAlive()` check treated EPERM (permission denied) the same as ESRCH (process not found), causing the server to self-terminate within 60 seconds whenever the owner process ran as a different user. This affected WSL (owner is a Windows process), Tailscale SSH, and any cross-user scenario. Fixed by treating EPERM as "alive". (#879)
|
- **Owner-PID lifecycle fixes** — the brainstorm server's owner-PID monitoring had two bugs causing false shutdowns within 60 seconds: (1) EPERM from cross-user PIDs (Tailscale SSH, etc.) was treated as "process dead", and (2) on WSL the grandparent PID resolves to a short-lived subprocess that exits before the first lifecycle check. Fixed by treating EPERM as "alive" and validating the owner PID at startup — if it's already dead, monitoring is disabled and the server relies on the 30-minute idle timeout. This also removes the Windows/MSYS2-specific carve-out from `start-server.sh` since the server now handles it generically. (#879)
|
||||||
- **writing-skills** — corrected false claim that SKILL.md frontmatter supports "only two fields"; now says "two required fields" and links to the agentskills.io specification for all supported fields (PR #882 by @arittr)
|
- **writing-skills** — corrected false claim that SKILL.md frontmatter supports "only two fields"; now says "two required fields" and links to the agentskills.io specification for all supported fields (PR #882 by @arittr)
|
||||||
|
|
||||||
### Codex App Compatibility
|
### Codex App Compatibility
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ const URL_HOST = process.env.BRAINSTORM_URL_HOST || (HOST === '127.0.0.1' ? 'loc
|
|||||||
const SESSION_DIR = process.env.BRAINSTORM_DIR || '/tmp/brainstorm';
|
const SESSION_DIR = process.env.BRAINSTORM_DIR || '/tmp/brainstorm';
|
||||||
const CONTENT_DIR = path.join(SESSION_DIR, 'content');
|
const CONTENT_DIR = path.join(SESSION_DIR, 'content');
|
||||||
const STATE_DIR = path.join(SESSION_DIR, 'state');
|
const STATE_DIR = path.join(SESSION_DIR, 'state');
|
||||||
const OWNER_PID = process.env.BRAINSTORM_OWNER_PID ? Number(process.env.BRAINSTORM_OWNER_PID) : null;
|
let ownerPid = process.env.BRAINSTORM_OWNER_PID ? Number(process.env.BRAINSTORM_OWNER_PID) : null;
|
||||||
|
|
||||||
const MIME_TYPES = {
|
const MIME_TYPES = {
|
||||||
'.html': 'text/html', '.css': 'text/css', '.js': 'application/javascript',
|
'.html': 'text/html', '.css': 'text/css', '.js': 'application/javascript',
|
||||||
@@ -312,8 +312,8 @@ function startServer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function ownerAlive() {
|
function ownerAlive() {
|
||||||
if (!OWNER_PID) return true;
|
if (!ownerPid) return true;
|
||||||
try { process.kill(OWNER_PID, 0); return true; } catch (e) { return e.code === 'EPERM'; }
|
try { process.kill(ownerPid, 0); return true; } catch (e) { return e.code === 'EPERM'; }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check every 60s: exit if owner process died or idle for 30 minutes
|
// Check every 60s: exit if owner process died or idle for 30 minutes
|
||||||
@@ -323,6 +323,19 @@ function startServer() {
|
|||||||
}, 60 * 1000);
|
}, 60 * 1000);
|
||||||
lifecycleCheck.unref();
|
lifecycleCheck.unref();
|
||||||
|
|
||||||
|
// Validate owner PID at startup. If it's already dead, the PID resolution
|
||||||
|
// was wrong (common on WSL, Tailscale SSH, and cross-user scenarios).
|
||||||
|
// Disable monitoring and rely on the idle timeout instead.
|
||||||
|
if (ownerPid) {
|
||||||
|
try { process.kill(ownerPid, 0); }
|
||||||
|
catch (e) {
|
||||||
|
if (e.code !== 'EPERM') {
|
||||||
|
console.log(JSON.stringify({ type: 'owner-pid-invalid', pid: ownerPid, reason: 'dead at startup' }));
|
||||||
|
ownerPid = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
server.listen(PORT, HOST, () => {
|
server.listen(PORT, HOST, () => {
|
||||||
const info = JSON.stringify({
|
const info = JSON.stringify({
|
||||||
type: 'server-started', port: Number(PORT), host: HOST,
|
type: 'server-started', port: Number(PORT), host: HOST,
|
||||||
|
|||||||
@@ -107,12 +107,6 @@ if [[ -z "$OWNER_PID" || "$OWNER_PID" == "1" ]]; then
|
|||||||
OWNER_PID="$PPID"
|
OWNER_PID="$PPID"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# On Windows/MSYS2, the MSYS2 PID namespace is invisible to Node.js.
|
|
||||||
# Skip owner-PID monitoring — the 30-minute idle timeout prevents orphans.
|
|
||||||
case "${OSTYPE:-}" in
|
|
||||||
msys*|cygwin*|mingw*) OWNER_PID="" ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Foreground mode for environments that reap detached/background processes.
|
# Foreground mode for environments that reap detached/background processes.
|
||||||
if [[ "$FOREGROUND" == "true" ]]; then
|
if [[ "$FOREGROUND" == "true" ]]; then
|
||||||
echo "$$" > "$PID_FILE"
|
echo "$$" > "$PID_FILE"
|
||||||
|
|||||||
Reference in New Issue
Block a user