From 088eb465135df752f9909325c4a2da497476e716 Mon Sep 17 00:00:00 2001 From: Steven Clarkson Date: Thu, 21 May 2026 10:01:31 +1000 Subject: [PATCH 1/3] initial commit of oc adm to prow artifacts --- test/util/framework/hcp_helper.go | 11 ++- test/util/framework/per_test_framework.go | 97 +++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/test/util/framework/hcp_helper.go b/test/util/framework/hcp_helper.go index b3cd40c0776..a60877b5d41 100644 --- a/test/util/framework/hcp_helper.go +++ b/test/util/framework/hcp_helper.go @@ -118,12 +118,21 @@ func (tc *perItOrDescribeTestContext) GetAdminRESTConfigForHCPCluster( switch m := any(operationResult).(type) { case hcpsdk20240610preview.HcpOpenShiftClustersClientRequestAdminCredentialResponse: - return clientcmd.BuildConfigFromKubeconfigGetter("", func() (*clientcmdapi.Config, error) { + restConfig, err := clientcmd.BuildConfigFromKubeconfigGetter("", func() (*clientcmdapi.Config, error) { if m.Kubeconfig == nil { return nil, fmt.Errorf("kubeconfig content is nil") } return clientcmd.Load([]byte(*m.Kubeconfig)) }) + if err != nil { + return nil, err + } + + tc.contextLock.Lock() + tc.hcpAdminConfigs[resourceGroupName+"/"+hcpClusterName] = restConfig + tc.contextLock.Unlock() + + return restConfig, nil default: return nil, fmt.Errorf("unknown type %T", m) } diff --git a/test/util/framework/per_test_framework.go b/test/util/framework/per_test_framework.go index 1bb5015065b..e4608fff2b3 100644 --- a/test/util/framework/per_test_framework.go +++ b/test/util/framework/per_test_framework.go @@ -25,6 +25,7 @@ import ( "log/slog" "net/http" "os" + "os/exec" "path/filepath" "regexp" "strings" @@ -38,6 +39,7 @@ import ( "k8s.io/apimachinery/pkg/util/rand" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/rest" "sigs.k8s.io/yaml" @@ -75,6 +77,7 @@ type perItOrDescribeTestContext struct { azureLogFile *os.File timingMetadata timing.SpecTimingMetadata knownDeployments []deploymentInfo + hcpAdminConfigs map[string]*rest.Config } type deploymentInfo struct { @@ -131,6 +134,7 @@ func NewTestContext() *perItOrDescribeTestContext { perBinaryInvocationTestContext: invocationContext(), LogDirPath: logDirPath, azureLogFile: azureLogFile, + hcpAdminConfigs: make(map[string]*rest.Config), timingMetadata: timing.SpecTimingMetadata{ // Answering the question of "what's the currently-running test name?" in Ginkgo is difficult - // all we know in general is the hierarchy of nodes under which we are currently running. We @@ -374,6 +378,11 @@ func (tc *perItOrDescribeTestContext) collectDebugInfo(ctx context.Context) { return tc.collectDebugInfoForResourceGroup(ctx, currResourceGroupName) }) } + waitGroup.Go(func() error { + defer utilruntime.HandleCrashWithContext(ctx) + tc.collectHCPInspectData(ctx) + return nil + }) if err := waitGroup.Wait(); err != nil { // remember that Wait only shows the first error, not all the errors. if !isResourceGroupNotFoundError(err) { @@ -647,6 +656,94 @@ func (tc *perItOrDescribeTestContext) collectDebugInfoForResourceGroup(ctx conte return errors.Join(errs...) } +func (tc *perItOrDescribeTestContext) collectHCPInspectData(ctx context.Context) { + if _, err := exec.LookPath("oc"); err != nil { + ginkgo.GinkgoLogr.Info("oc not found in PATH, skipping HCP inspect data collection") + return + } + + tc.contextLock.RLock() + configs := make(map[string]*rest.Config, len(tc.hcpAdminConfigs)) + for k, v := range tc.hcpAdminConfigs { + configs[k] = v + } + tc.contextLock.RUnlock() + + if len(configs) == 0 { + return + } + + inspectGroup, inspectCtx := errgroup.WithContext(ctx) + for clusterKey, restConfig := range configs { + currKey := clusterKey + currConfig := restConfig + inspectGroup.Go(func() error { + utilruntime.HandleCrashWithContext(inspectCtx) + tc.runOCAdmInspect(inspectCtx, currKey, currConfig) + return nil + }) + } + _ = inspectGroup.Wait() +} + +func (tc *perItOrDescribeTestContext) runOCAdmInspect(ctx context.Context, clusterKey string, restConfig *rest.Config) { + parts := strings.SplitN(clusterKey, "/", 2) + if len(parts) != 2 { + ginkgo.GinkgoLogr.Error(fmt.Errorf("invalid cluster key %q", clusterKey), "skipping oc adm inspect") + return + } + clusterName := parts[1] + logger := ginkgo.GinkgoLogr.WithValues("cluster", clusterKey) + logger.Info("starting oc adm inspect for HCP cluster") + + startTime := time.Now() + defer func() { + tc.RecordTestStep(fmt.Sprintf("oc adm inspect %s", clusterKey), startTime, time.Now()) + }() + + kubeconfigContent, err := GenerateKubeconfig(restConfig) + if err != nil { + logger.Error(err, "failed to generate kubeconfig for HCP inspect, skipping") + return + } + + kubeconfigFile, err := os.CreateTemp("", fmt.Sprintf("inspect-kubeconfig-%s-*.yaml", clusterName)) + if err != nil { + logger.Error(err, "failed to create temp kubeconfig file, skipping") + return + } + defer os.Remove(kubeconfigFile.Name()) + + if _, err := kubeconfigFile.WriteString(kubeconfigContent); err != nil { + logger.Error(err, "failed to write temp kubeconfig file, skipping") + return + } + kubeconfigFile.Close() + + inspectDir := filepath.Join(tc.perBinaryInvocationTestContext.artifactDir, fmt.Sprintf("inspect-%s", clusterName)) + + inspectCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + + cmd := exec.CommandContext(inspectCtx, "oc", "adm", "inspect", + "--kubeconfig", kubeconfigFile.Name(), + "--dest-dir", inspectDir, + "ns/openshift-ingress", + "ns/openshift-ingress-operator", + "clusteroperator/ingress", + ) + output, err := cmd.CombinedOutput() + if err != nil { + logger.Error(err, "oc adm inspect failed", "output", string(output)) + if mkdirErr := os.MkdirAll(inspectDir, 0755); mkdirErr == nil { + _ = os.WriteFile(filepath.Join(inspectDir, "inspect-error.log"), output, 0644) + } + return + } + + logger.Info("oc adm inspect completed successfully", "outputDir", inspectDir) +} + func (tc *perItOrDescribeTestContext) NewAppRegistrationWithServicePrincipal(ctx context.Context) (*graphutil.Application, *graphutil.ServicePrincipal, error) { appName := fmt.Sprintf("%s%d", graphutil.AppRegistrationPrefix, rand.Int()) ginkgo.GinkgoLogr.Info("creating app registration", "appName", appName) From fa1f5c25cd726def3aeaa4aca4e87290c090a2f9 Mon Sep 17 00:00:00 2001 From: Steven Clarkson Date: Thu, 21 May 2026 14:15:10 +1000 Subject: [PATCH 2/3] moving output dir into test folder --- test/util/framework/per_test_framework.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/util/framework/per_test_framework.go b/test/util/framework/per_test_framework.go index e4608fff2b3..94ee08f8ee0 100644 --- a/test/util/framework/per_test_framework.go +++ b/test/util/framework/per_test_framework.go @@ -657,6 +657,10 @@ func (tc *perItOrDescribeTestContext) collectDebugInfoForResourceGroup(ctx conte } func (tc *perItOrDescribeTestContext) collectHCPInspectData(ctx context.Context) { + if tc.LogDirPath == "" { + return + } + if _, err := exec.LookPath("oc"); err != nil { ginkgo.GinkgoLogr.Info("oc not found in PATH, skipping HCP inspect data collection") return @@ -720,7 +724,7 @@ func (tc *perItOrDescribeTestContext) runOCAdmInspect(ctx context.Context, clust } kubeconfigFile.Close() - inspectDir := filepath.Join(tc.perBinaryInvocationTestContext.artifactDir, fmt.Sprintf("inspect-%s", clusterName)) + inspectDir := filepath.Join(tc.LogDirPath, fmt.Sprintf("inspect-%s", clusterName)) inspectCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) defer cancel() From 6187152156931ce0842a3671c904526419899d37 Mon Sep 17 00:00:00 2001 From: Steven Clarkson Date: Thu, 21 May 2026 17:24:37 +1000 Subject: [PATCH 3/3] copilot suggestions --- test/util/framework/per_test_framework.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/util/framework/per_test_framework.go b/test/util/framework/per_test_framework.go index 94ee08f8ee0..a6f5532cac1 100644 --- a/test/util/framework/per_test_framework.go +++ b/test/util/framework/per_test_framework.go @@ -682,7 +682,7 @@ func (tc *perItOrDescribeTestContext) collectHCPInspectData(ctx context.Context) currKey := clusterKey currConfig := restConfig inspectGroup.Go(func() error { - utilruntime.HandleCrashWithContext(inspectCtx) + defer utilruntime.HandleCrashWithContext(inspectCtx) tc.runOCAdmInspect(inspectCtx, currKey, currConfig) return nil }) @@ -717,12 +717,16 @@ func (tc *perItOrDescribeTestContext) runOCAdmInspect(ctx context.Context, clust return } defer os.Remove(kubeconfigFile.Name()) + defer kubeconfigFile.Close() if _, err := kubeconfigFile.WriteString(kubeconfigContent); err != nil { logger.Error(err, "failed to write temp kubeconfig file, skipping") return } - kubeconfigFile.Close() + if err := kubeconfigFile.Close(); err != nil { + logger.Error(err, "failed to flush temp kubeconfig file, skipping") + return + } inspectDir := filepath.Join(tc.LogDirPath, fmt.Sprintf("inspect-%s", clusterName))