diff --git a/packages/desktop/build/bin/kanban b/packages/desktop/build/bin/kanban index c393f9631..bb79281f2 100755 --- a/packages/desktop/build/bin/kanban +++ b/packages/desktop/build/bin/kanban @@ -14,6 +14,18 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" RESOURCES_DIR="$(dirname "$SCRIPT_DIR")" CLI_ENTRY="$RESOURCES_DIR/app.asar.unpacked/cli/cli.js" +# Runtime-update override. The desktop orchestrator pre-validates the +# path; we fail loudly on a missing file rather than silently falling +# back, so the parent's rollback bookkeeping stays in sync with what +# actually ran. +if [ -n "$KANBAN_CLI_OVERRIDE" ]; then + if [ ! -f "$KANBAN_CLI_OVERRIDE" ]; then + echo "error: KANBAN_CLI_OVERRIDE points to missing file: $KANBAN_CLI_OVERRIDE" >&2 + exit 1 + fi + CLI_ENTRY="$KANBAN_CLI_OVERRIDE" +fi + if [ ! -f "$CLI_ENTRY" ]; then echo "error: Kanban CLI not found at $CLI_ENTRY" >&2 exit 1 diff --git a/packages/desktop/build/bin/kanban.cmd b/packages/desktop/build/bin/kanban.cmd index bdd14c7d5..b08a7b333 100755 --- a/packages/desktop/build/bin/kanban.cmd +++ b/packages/desktop/build/bin/kanban.cmd @@ -11,6 +11,17 @@ set "SCRIPT_DIR=%~dp0" set "RESOURCES_DIR=%SCRIPT_DIR%.." set "CLI_ENTRY=%RESOURCES_DIR%\app.asar.unpacked\cli\cli.js" +REM Runtime-update override (see POSIX shim for rationale): fail loud +REM on missing file rather than silently falling back to bundled. +if defined KANBAN_CLI_OVERRIDE ( + if not exist "%KANBAN_CLI_OVERRIDE%" ( + echo error: KANBAN_CLI_OVERRIDE points to missing file: %KANBAN_CLI_OVERRIDE% >&2 + endlocal + exit /b 1 + ) + set "CLI_ENTRY=%KANBAN_CLI_OVERRIDE%" +) + REM Windows packaged layout: REM Kanban\resources\bin\kanban.cmd (this file) REM RESOURCES_DIR = Kanban\resources diff --git a/packages/desktop/src/runtime-child.ts b/packages/desktop/src/runtime-child.ts index b5149e42b..e42914b18 100644 --- a/packages/desktop/src/runtime-child.ts +++ b/packages/desktop/src/runtime-child.ts @@ -49,6 +49,9 @@ export interface RuntimeChildManagerOptions { * Node process, so generous headroom matters for multi-agent workloads. */ maxOldSpaceMb?: number; + /** Absolute path to a cli.js the shim should run instead of the bundled + * one. Forwarded via `KANBAN_CLI_OVERRIDE`. */ + cliEntryOverride?: string; spawnFn?: typeof spawn; } @@ -147,6 +150,7 @@ export class RuntimeChildManager extends EventEmitter pollIntervalMs: number; startupTimeoutMs: number; maxOldSpaceMb: number; + cliEntryOverride: string | undefined; spawnFn: typeof spawn; }; @@ -163,6 +167,7 @@ export class RuntimeChildManager extends EventEmitter pollIntervalMs: options.pollIntervalMs ?? 200, startupTimeoutMs: options.startupTimeoutMs ?? 30_000, maxOldSpaceMb: options.maxOldSpaceMb ?? DEFAULT_MAX_OLD_SPACE_MB, + cliEntryOverride: options.cliEntryOverride || undefined, spawnFn: options.spawnFn ?? spawn, }; } @@ -214,6 +219,9 @@ export class RuntimeChildManager extends EventEmitter const env = buildFilteredEnv(); env.KANBAN_DESKTOP = "1"; + if (this.opts.cliEntryOverride !== undefined) { + env.KANBAN_CLI_OVERRIDE = this.opts.cliEntryOverride; + } // Merge our V8 heap limit with any existing NODE_OPTIONS from parent. // Strip both hyphen and underscore variants to avoid duplicates. const existingNodeOptions = env.NODE_OPTIONS?.trim() || ""; diff --git a/packages/desktop/src/runtime-orchestrator.ts b/packages/desktop/src/runtime-orchestrator.ts index 1e021211c..0fdef673a 100644 --- a/packages/desktop/src/runtime-orchestrator.ts +++ b/packages/desktop/src/runtime-orchestrator.ts @@ -4,13 +4,21 @@ import { powerSaveBlocker } from "electron"; import { RuntimeChildManager } from "./runtime-child.js"; - interface RuntimeOrchestratorOptions { - host: string; port: number; healthTimeoutMs: number; resolveCliShimPath: () => string; + /** Re-evaluated on every spawn. `null` ⇒ use the shim's bundled cli. */ + resolveCliEntryOverride?: () => string | null; + /** Called when a staged spawn fails its readiness probe. The + * orchestrator retries once with the bundled cli on this same launch; + * the callback should clear/roll back the version it just tried. + * `cliEntry` is the exact override path used for the failed spawn — + * capturing it at spawn time avoids racing with a concurrent + * background staging that may have moved the pointer to a *new* + * version (which we must not roll back). */ + onCliEntryOverrideFailed?: (reason: string, cliEntry: string) => void; fetchImpl?: typeof fetch; attachedProbeIntervalMs?: number; attachedProbeFailureThreshold?: number; @@ -71,6 +79,15 @@ export class RuntimeOrchestrator extends EventEmitter { expect(options.env.NODE_OPTIONS).not.toContain("4096"); }); + // `cliEntryOverride` is forwarded to the child env as + // `KANBAN_CLI_OVERRIDE` for the shim to pick up. + it("forwards cliEntryOverride to the child env as KANBAN_CLI_OVERRIDE", async () => { + const spawnSpy = createSpawnFn(mockChild); + manager = new RuntimeChildManager({ + cliPath: CLI_PATH, + spawnFn: spawnSpy, + cliEntryOverride: "/some/abs/path/cli.js", + }); + await manager.start(TEST_CONFIG); + + const spawnCall = (spawnSpy as ReturnType).mock.calls[0]; + const options = spawnCall[2] as { env: NodeJS.ProcessEnv }; + expect(options.env.KANBAN_CLI_OVERRIDE).toBe("/some/abs/path/cli.js"); + }); + + it("does not set KANBAN_CLI_OVERRIDE when override is omitted", async () => { + const spawnSpy = createSpawnFn(mockChild); + manager = new RuntimeChildManager({ + cliPath: CLI_PATH, + spawnFn: spawnSpy, + }); + await manager.start(TEST_CONFIG); + + const spawnCall = (spawnSpy as ReturnType).mock.calls[0]; + const options = spawnCall[2] as { env: NodeJS.ProcessEnv }; + expect(options.env.KANBAN_CLI_OVERRIDE).toBeUndefined(); + }); + + it("treats an empty string cliEntryOverride as 'no override'", async () => { + // Optional-chained callers can surface "" instead of undefined. + const spawnSpy = createSpawnFn(mockChild); + manager = new RuntimeChildManager({ + cliPath: CLI_PATH, + spawnFn: spawnSpy, + cliEntryOverride: "", + }); + await manager.start(TEST_CONFIG); + + const spawnCall = (spawnSpy as ReturnType).mock.calls[0]; + const options = spawnCall[2] as { env: NodeJS.ProcessEnv }; + expect(options.env.KANBAN_CLI_OVERRIDE).toBeUndefined(); + }); + // Platform-aware spawn options — pinned because regressing either // one breaks a specific failure mode: // - POSIX `detached: true` : required so treeKill(-pid) walks PTYs diff --git a/packages/desktop/test/runtime-orchestrator.test.ts b/packages/desktop/test/runtime-orchestrator.test.ts index 41ba9712d..491340966 100644 --- a/packages/desktop/test/runtime-orchestrator.test.ts +++ b/packages/desktop/test/runtime-orchestrator.test.ts @@ -13,11 +13,22 @@ vi.mock("electron", () => ({ const childManagers: FakeChildManager[] = []; class FakeChildManager extends EventEmitter { - constructor() { + /** One-shot: next `start()` rejects with this error, then clears. */ + static nextStartError: Error | null = null; + /** Constructor options observed across spawns, in order. */ + static lastConstructorOptions: Array> = []; + + constructor(options: Record = {}) { super(); + FakeChildManager.lastConstructorOptions.push(options); childManagers.push(this); } async start(): Promise { + const err = FakeChildManager.nextStartError; + if (err) { + FakeChildManager.nextStartError = null; + throw err; + } return "http://127.0.0.1:3484"; } async shutdown(): Promise {} @@ -1787,7 +1798,7 @@ describe("RuntimeOrchestrator health-probe runtime identification", () => { await orchestrator.shutdown(); }); - it("checkHealth() public method returns false for title-less 200", async () => { + it("checkHealth() public method returns false for title-less 200 (regression: title-grep no-op)", async () => { // Direct API-surface check — guards against the title grep being // regressed into a no-op (e.g. someone removing the .text() read // during a refactor). @@ -1812,5 +1823,262 @@ describe("RuntimeOrchestrator health-probe runtime identification", () => { }); }); +// --------------------------------------------------------------------- +// `cliEntryOverride` callback wiring + same-launch fallback. The host +// owns runtime-store / staging concerns; the orchestrator just plumbs +// a callback into RuntimeChildManager and signals back when a staged +// spawn fails so the host can clear its persistent pointer. +// --------------------------------------------------------------------- +describe("RuntimeOrchestrator cliEntryOverride wiring + fallback", () => { + beforeEach(() => { + childManagers.length = 0; + FakeChildManager.lastConstructorOptions.length = 0; + FakeChildManager.nextStartError = null; + }); + + it("forwards resolveCliEntryOverride() result into the child manager on every spawn", async () => { + // Returns a different override per call so we can verify the + // resolver is re-evaluated on each spawn (a freshly-staged runtime + // must take effect on the next restart, not be cached at construct + // time). Using an iterator instead of a counter so the resolver + // itself stays a pure value-producing function. + const overrides = ["/staged/v1/dist/cli.js", "/staged/v2/dist/cli.js"]; + let i = 0; + const resolveCliEntryOverride = vi.fn(() => overrides[i++] ?? null); + + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + resolveCliEntryOverride, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + // First spawn — connect → startOwnRuntime → createManager. + await orchestrator.connect(); + expect(resolveCliEntryOverride).toHaveBeenCalledTimes(1); + expect( + FakeChildManager.lastConstructorOptions.at(-1)?.cliEntryOverride, + ).toBe("/staged/v1/dist/cli.js"); + + // Second spawn — restart() shuts the manager down and respawns, + // re-querying the resolver. + await orchestrator.restart(); + expect(resolveCliEntryOverride).toHaveBeenCalledTimes(2); + expect( + FakeChildManager.lastConstructorOptions.at(-1)?.cliEntryOverride, + ).toBe("/staged/v2/dist/cli.js"); + + await orchestrator.shutdown(); + }); + + it("passes undefined cliEntryOverride when resolveCliEntryOverride returns null (use bundled cli)", async () => { + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + resolveCliEntryOverride: () => null, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + await orchestrator.connect(); + expect( + FakeChildManager.lastConstructorOptions.at(-1)?.cliEntryOverride, + ).toBeUndefined(); + + await orchestrator.shutdown(); + }); + + it("passes undefined cliEntryOverride when no resolver is wired (dev / unmanaged)", async () => { + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + await orchestrator.connect(); + expect( + FakeChildManager.lastConstructorOptions.at(-1)?.cliEntryOverride, + ).toBeUndefined(); + + await orchestrator.shutdown(); + }); + + it("on staged-spawn failure: invokes onCliEntryOverrideFailed and retries once with bundled cli", async () => { + // Spawn 1: resolver returns staged cli; FakeChildManager rejects. + // → onCliEntryOverrideFailed fires, orchestrator retries. + // Spawn 2: resolver returns null (host cleared the pointer); + // FakeChildManager succeeds. + const overrides: Array = ["/staged/v1/dist/cli.js", null]; + let i = 0; + const resolveCliEntryOverride = vi.fn(() => overrides[i++] ?? null); + const onCliEntryOverrideFailed = vi.fn(); + + // Single-shot failure on the first spawn — second spawn (bundled) + // resolves to the default URL. + FakeChildManager.nextStartError = new Error( + "runtime exited during startup: ENOENT staged/cli.js", + ); + + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + resolveCliEntryOverride, + onCliEntryOverrideFailed, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + // connect() must succeed thanks to the same-launch retry, *not* + // reject — the user shouldn't see the failure. + await orchestrator.connect(); + expect(orchestrator.isOwned()).toBe(true); + expect(orchestrator.getUrl()).toBe("http://127.0.0.1:3484"); + + expect(onCliEntryOverrideFailed).toHaveBeenCalledTimes(1); + expect(onCliEntryOverrideFailed.mock.calls[0]?.[0]).toMatch( + /ENOENT staged\/cli.js/, + ); + // `cliEntry` must be captured at spawn time and forwarded — + // not re-derived from the pointer at failure time, which a + // concurrent background stage could have advanced. See + // runtime-auto-update onFailed for the matching consumer. + expect(onCliEntryOverrideFailed.mock.calls[0]?.[1]).toBe( + "/staged/v1/dist/cli.js", + ); + + // Two child managers were constructed — first staged (failed), + // second bundled (succeeded). + expect(FakeChildManager.lastConstructorOptions).toHaveLength(2); + expect( + FakeChildManager.lastConstructorOptions[0]?.cliEntryOverride, + ).toBe("/staged/v1/dist/cli.js"); + expect( + FakeChildManager.lastConstructorOptions[1]?.cliEntryOverride, + ).toBeUndefined(); + + await orchestrator.shutdown(); + }); + + it("does not loop forever if the bundled retry also fails (single retry only)", async () => { + // Both spawns fail. After the staged failure → callback → retry, + // the bundled spawn fails too — and that error propagates without + // triggering another callback or another retry. + const resolveCliEntryOverride = vi.fn(() => "/staged/v1/dist/cli.js"); + const onCliEntryOverrideFailed = vi.fn(); + + // Make every FakeChildManager.start() call fail. Using a + // once-per-instance error setter doesn't compose cleanly across + // two manager instances, so spy directly on the prototype. + const startSpy = vi + .spyOn(FakeChildManager.prototype, "start") + .mockRejectedValue(new Error("spawn failed")); + + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + resolveCliEntryOverride, + onCliEntryOverrideFailed, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + await expect(orchestrator.connect()).rejects.toThrow(/spawn failed/); + + // Callback fired exactly once for the staged failure; the bundled + // retry's failure does not re-enter the callback. + expect(onCliEntryOverrideFailed).toHaveBeenCalledTimes(1); + // Two spawn attempts total: staged + bundled retry. + expect(FakeChildManager.lastConstructorOptions).toHaveLength(2); + + startSpy.mockRestore(); + }); + + it("does not invoke onCliEntryOverrideFailed when a *bundled* spawn fails", async () => { + // No override → first spawn is bundled. Failure is a packaging + // issue, not a staged-runtime issue; the callback must not fire + // and the error propagates directly. + const resolveCliEntryOverride = vi.fn(() => null); + const onCliEntryOverrideFailed = vi.fn(); + + FakeChildManager.nextStartError = new Error( + "runtime exited during startup: ENOENT bundled/cli.js", + ); + + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + resolveCliEntryOverride, + onCliEntryOverrideFailed, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + await expect(orchestrator.connect()).rejects.toThrow(/ENOENT bundled/); + expect(onCliEntryOverrideFailed).not.toHaveBeenCalled(); + // Single spawn attempt — no retry. + expect(FakeChildManager.lastConstructorOptions).toHaveLength(1); + }); + + it("falls back to bundled when resolveCliEntryOverride throws (does not brick the spawn)", async () => { + const resolveCliEntryOverride = vi.fn(() => { + throw new Error("disk I/O"); + }); + + const orchestrator = new RuntimeOrchestrator({ + host: "127.0.0.1", + port: 3484, + healthTimeoutMs: 500, + resolveCliShimPath: () => process.execPath, + fetchImpl: vi.fn(async () => + Promise.reject(new Error("ECONNREFUSED")), + ) as unknown as typeof fetch, + resolveCliEntryOverride, + attachedProbeIntervalMs: 0, + recoveryProbeIntervalMs: 0, + }); + + // connect() succeeds with bundled. + await orchestrator.connect(); + expect(orchestrator.isOwned()).toBe(true); + expect( + FakeChildManager.lastConstructorOptions.at(-1)?.cliEntryOverride, + ).toBeUndefined(); + + await orchestrator.shutdown(); + }); +}); +