File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -308,7 +308,24 @@ class EngineProvider extends ChangeNotifier {
308308 DateTime .now ().difference (lastKill) < _bundledEngineKillCooldown) {
309309 return ;
310310 }
311- unawaited (_recoverStalledLiveChannel ());
311+ unawaited (_maybeRecoverStalledLiveChannel ());
312+ }
313+
314+ /// Only treat missing live frames as a hung engine if HTTP health still says the
315+ /// engine is up. If the process was killed externally, health is down — let the
316+ /// normal reconnect path run instead of [recoverOwnedAfterStall] , which uses
317+ /// [forceRestart] and can burn through the launcher's restart budget.
318+ Future <void > _maybeRecoverStalledLiveChannel () async {
319+ if (_recoveringLive) return ;
320+ try {
321+ final j = await _service.getHealth ();
322+ if (j == null || j['engine' ] != true ) {
323+ return ;
324+ }
325+ } catch (_) {
326+ return ;
327+ }
328+ await _recoverStalledLiveChannel ();
312329 }
313330
314331 /// HTTP/WS up but no live telemetry (hung orchestrator): restart bundled engine
Original file line number Diff line number Diff line change @@ -93,7 +93,9 @@ class EngineBundledLauncher {
9393 // installer, or a different launch path). If HTTP health succeeds, align disk and
9494 // connect — do not spawn a second engine. Stall recovery uses [forceRestart] and
9595 // skips this path.
96- if (! forceRestart && await _strictHealth (cfg.host, cfg.port)) {
96+ final httpHealthy =
97+ ! forceRestart && await _strictHealth (cfg.host, cfg.port);
98+ if (httpHealthy) {
9799 final dirty = cfg.status != EngineStatus .running ||
98100 cfg.lastError.isNotEmpty;
99101 if (dirty) {
@@ -114,6 +116,18 @@ class EngineBundledLauncher {
114116 );
115117 }
116118
119+ // Disk still says "running" (e.g. Task Manager kill — Python never rewrote the
120+ // file). HTTP is down; clear stale state so we skip the long "wait for existing"
121+ // window on a dead PID.
122+ if (! forceRestart && ! httpHealthy && cfg.status == EngineStatus .running) {
123+ cfg = cfg.copyWith (
124+ status: EngineStatus .stopped,
125+ lastError: '' ,
126+ pid: 0 ,
127+ );
128+ await EngineConfigStore .writeAtomic (cfg);
129+ }
130+
117131 if (cfg.status == EngineStatus .failed && ! userRetry) {
118132 return EngineBootstrapOutcome (
119133 success: false ,
You can’t perform that action at this time.
0 commit comments