Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmd/hyperfleet-api/server/api_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,5 +157,7 @@ func (s apiServer) Start() {
}

func (s apiServer) Stop() error {
return s.httpServer.Shutdown(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return s.httpServer.Shutdown(ctx)
}
4 changes: 3 additions & 1 deletion cmd/hyperfleet-api/server/metrics_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,7 @@ func (s metricsServer) Start() {
}

func (s metricsServer) Stop() error {
return s.httpServer.Shutdown(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return s.httpServer.Shutdown(ctx)
}
6 changes: 5 additions & 1 deletion pkg/db/db_session/testcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ func (f *Testcontainer) CheckConnection() error {
}

func (f *Testcontainer) Close() error {
ctx := context.Background()
// Use a timeout to prevent hanging indefinitely during teardown.
// Without this, a hung container.Terminate() would block the process from
// exiting, causing Prow CI jobs to stay in "pending" state (HYPERFLEET-625).
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

// Close SQL connection
if f.sqlDB != nil {
Expand Down
19 changes: 16 additions & 3 deletions test/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,17 @@ func NewHelper(t *testing.T) *Helper {

// Start JWK certificate mock server for testing
jwkMockTeardown := helper.StartJWKCertServerMock()
// Teardown order: terminate the testcontainer FIRST so the
// container is removed before anything else. If server shutdown hangs
// and the force-exit goroutine kills the process, the container
// would remain alive and keep the Prow pod stuck (HYPERFLEET-625).
// CleanDB is omitted because the container is destroyed anyway.
helper.teardowns = []func() error{
helper.CleanDB,
jwkMockTeardown,
helper.stopAPIServer,
helper.teardownEnv,
helper.stopAPIServer,
helper.stopMetricsServer,
helper.stopHealthServer,
jwkMockTeardown,
}
helper.startAPIServer()
helper.startMetricsServer()
Expand Down Expand Up @@ -181,6 +187,13 @@ func (helper *Helper) stopMetricsServer() error {
return nil
}

func (helper *Helper) stopHealthServer() error {
if err := helper.HealthServer.Stop(); err != nil {
return fmt.Errorf("unable to stop health server: %s", err.Error())
}
return nil
}

func (helper *Helper) startHealthServer() {
ctx := context.Background()
helper.HealthServer = server.NewHealthServer()
Expand Down
8 changes: 8 additions & 0 deletions test/integration/clusters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -778,3 +778,11 @@ func TestClusterPost_WrongKind(t *testing.T) {
Expect(ok).To(BeTrue())
Expect(detail).To(ContainSubstring("kind must be 'Cluster'"))
}

// TestClusterPanicFailure is a temporary test to verify that Prow correctly
// reports integration test failures when a panic occurs during test execution.
// This test should be removed after confirming the behavior.
func TestClusterPanicFailure(t *testing.T) {
_, _ = test.RegisterIntegration(t)
panic("intentional panic to test Prow failure reporting")
}
16 changes: 16 additions & 0 deletions test/integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"path/filepath"
"runtime"
"testing"
"time"

"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
"github.com/openshift-hyperfleet/hyperfleet-api/test"
Expand Down Expand Up @@ -57,6 +58,21 @@ func TestMain(m *testing.M) {

helper := test.NewHelper(&testing.T{})
exitCode := m.Run()

// Force exit if teardown hangs (e.g., due to a panic leaving resources in a bad state).
// Without this, hung teardown blocks the process from exiting, causing
// Prow CI jobs to stay in "pending" state indefinitely (HYPERFLEET-625).
// 45s allows the testcontainer termination (30s timeout) to complete first.
localExit := exitCode
go func() {
time.Sleep(45 * time.Second)
logger.Error(ctx, "Teardown timed out after 45s, forcing exit")
if localExit == 0 {
localExit = 1
}
os.Exit(localExit)
}()

helper.Teardown()
os.Exit(exitCode)
}