fix: Improve e2e timeouts#2658
Conversation
Greptile SummaryThis PR improves e2e CI reliability by adding explicit timeouts to chain-start operations and parallelizing the compatibility-test matrix. Previously a consumer chain that never produced blocks would block indefinitely; now it fails within 5 minutes at the Go level, and the CI job itself is capped at 30 minutes per consumer version.
Confidence Score: 5/5The changes are safe to merge; they tighten timeouts and parallelize CI without altering any production logic. The goroutine + buffered-channel (size 1) + select pattern is correctly implemented in both actions files: the goroutine can always drain without blocking even if the timeout branch fires first, and log.Fatalf exits the process before any leak becomes a concern. The workflow matrix correctly sets fail-fast: false so a hanging version does not cancel siblings, and the reduced per-job timeout (30 min) is generous relative to the stated healthy startup time (under 1 minute). No files require special attention. Important Files Changed
|
| go func() { | ||
| for scanner.Scan() { | ||
| out := scanner.Text() | ||
| if verbose { | ||
| fmt.Println("startChain: " + out) | ||
| } | ||
| if out == done { | ||
| scanDone <- nil | ||
| return | ||
| } | ||
| } | ||
| if out == done { | ||
| break | ||
| if err := scanner.Err(); err != nil { | ||
| scanDone <- err | ||
| return | ||
| } | ||
| } | ||
| if err := scanner.Err(); err != nil { | ||
| log.Fatal(err) | ||
| // The script exited (stdout closed) before signaling done, which means | ||
| // the chain failed to start rather than just being slow. | ||
| scanDone <- fmt.Errorf("chain %s start script exited before signaling done", action.Chain) | ||
| }() |
| go func() { | ||
| for scanner.Scan() { | ||
| out := scanner.Text() | ||
| if verbose { | ||
| fmt.Println("startChain: " + out) | ||
| } | ||
| if out == done { | ||
| scanDone <- nil | ||
| return | ||
| } | ||
| } | ||
| if out == done { | ||
| break | ||
| if err := scanner.Err(); err != nil { | ||
| scanDone <- err | ||
| return | ||
| } | ||
| } | ||
| if err := scanner.Err(); err != nil { | ||
| log.Fatal(err) | ||
| scanDone <- fmt.Errorf("chain %s start script exited before signaling done", action.Chain) | ||
| }() |
| go func() { | ||
| for scanner.Scan() { | ||
| out := scanner.Text() | ||
| if verbose { | ||
| fmt.Println("assign key - reconfigure: " + out) | ||
| } | ||
| if out == done { | ||
| scanDone <- nil | ||
| return | ||
| } | ||
| } | ||
| if out == done { | ||
| break | ||
| if err := scanner.Err(); err != nil { | ||
| scanDone <- err | ||
| return | ||
| } | ||
| } | ||
| if err := scanner.Err(); err != nil { | ||
| log.Fatal(err) | ||
| scanDone <- fmt.Errorf("reconfigure node for %s exited before signaling done", action.Chain) | ||
| }() |
| matrix: | ||
| # Consumer versions tested against the latest provider. | ||
| # For new versions to be tested add/remove entries here. | ||
| consumer-version: [latest, v5.2.0, v6.3.0] |
There was a problem hiding this comment.
We can probably just test latest. Stride is really the only one we care about right now and they're on v7 That will likely reduce test time even further.
|
Queued — the merge queue status continues in this comment ↓. |
Merge Queue Status
This pull request spent 20 seconds in the queue, including 4 seconds running CI. Waiting for
All conditions
ReasonPull request #2658 has been dequeued merge conditions no longer match. Blocked by:
HintYou should look at the reason for the failure and decide if the pull request needs to be fixed or if you want to requeue it. Tick the box to put this pull request back in the merge queue (same as
|
Please go to the
Previewtab and select the appropriate sub-template:fix,feat, andrefactor.