Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions backend/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,16 +377,27 @@ func (f *BackendRootCmdFlags) ToBackendOptions(ctx context.Context, cmd *cobra.C

smiClientBuilder := app.NewServiceManagedIdentityClientBuilder(fpaMIDataplaneClientBuilder, azureConfig)

resourcesCosmosDBClient, billingDBClient, err := app.NewCosmosDBClients(
ctx,
azCoreClientOptions := *azureConfig.CloudEnvironment.AZCoreClientOptions()

cosmosDatabaseClient, err := app.NewCosmosDatabaseClient(
f.AzureCosmosDBURL,
f.AzureCosmosDBName,
*azureConfig.CloudEnvironment.AZCoreClientOptions(),
azCoreClientOptions,
)
if err != nil {
return nil, utils.TrackError(err)
}

resourcesCosmosDBClient, billingDBClient, err := app.NewCosmosDBClients(cosmosDatabaseClient)
if err != nil {
return nil, utils.TrackError(err)
}

fleetDBClient, err := app.NewFleetDBClient(cosmosDatabaseClient)
if err != nil {
return nil, utils.TrackError(fmt.Errorf("failed to create fleet db client: %w", err))
}

clustersServiceClient, err := app.NewClustersServiceClient(ctx, f.ClustersServiceURL, f.ClustersServiceTLSInsecure)
if err != nil {
return nil, utils.TrackError(fmt.Errorf("failed to create clusters service client: %w", err))
Expand All @@ -401,6 +412,7 @@ func (f *BackendRootCmdFlags) ToBackendOptions(ctx context.Context, cmd *cobra.C
LeaderElectionLock: leaderElectionLock,
ResourcesDBClient: resourcesCosmosDBClient,
BillingDBClient: billingDBClient,
FleetDBClient: fleetDBClient,
ClustersServiceClient: clustersServiceClient,
MetricsServerListenAddress: f.MetricsServerListenAddress,
HealthzServerListenAddress: f.HealthzServerListenAddress,
Expand Down
2 changes: 1 addition & 1 deletion backend/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/Azure/msi-dataplane v0.4.3
github.com/blang/semver/v4 v4.0.0
github.com/go-logr/logr v1.4.3
github.com/google/go-cmp v0.7.0
github.com/google/uuid v1.6.0
github.com/openshift-online/maestro v0.0.0-20260213014104-081c1f6df17b
github.com/openshift-online/ocm-sdk-go v0.1.499
Expand Down Expand Up @@ -75,7 +76,6 @@ require (
github.com/golang/glog v1.2.5 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.7.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down
27 changes: 25 additions & 2 deletions backend/pkg/app/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"github.com/Azure/ARO-HCP/backend/pkg/controllers/clusterpropertiescontroller"
"github.com/Azure/ARO-HCP/backend/pkg/controllers/controllerutils"
"github.com/Azure/ARO-HCP/backend/pkg/controllers/datadumpcontrollers"
"github.com/Azure/ARO-HCP/backend/pkg/controllers/managementclustercontrollers"
"github.com/Azure/ARO-HCP/backend/pkg/controllers/metricscontrollers"
"github.com/Azure/ARO-HCP/backend/pkg/controllers/mismatchcontrollers"
"github.com/Azure/ARO-HCP/backend/pkg/controllers/nodepoolpropertiescontroller"
Expand All @@ -50,6 +51,7 @@ import (
"github.com/Azure/ARO-HCP/backend/pkg/maestro"
internalazure "github.com/Azure/ARO-HCP/internal/azure"
"github.com/Azure/ARO-HCP/internal/database"
dbinformers "github.com/Azure/ARO-HCP/internal/database/informers"
"github.com/Azure/ARO-HCP/internal/ocm"
"github.com/Azure/ARO-HCP/internal/utils"
)
Expand All @@ -65,6 +67,7 @@ type BackendOptions struct {
LeaderElectionLock resourcelock.Interface
ResourcesDBClient database.ResourcesDBClient
BillingDBClient database.BillingDBClient
FleetDBClient database.FleetDBClient
ClustersServiceClient ocm.ClusterServiceClientSpec
MetricsRegisterer prometheus.Registerer
MetricsGatherer prometheus.Gatherer
Expand Down Expand Up @@ -362,6 +365,10 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
operationPhaseMetricsController := metricscontrollers.NewController(
"OperationPhaseMetrics", backendInformers.AllOperations(), operationPhaseHandler)

fleetInformers := dbinformers.NewFleetInformers(ctx, b.options.FleetDBClient.GlobalListers())
_, stampLister := fleetInformers.Stamps()
_, managementClusterLister := fleetInformers.ManagementClusters()

clusterInformer, clusterLister := backendInformers.Clusters()
clusterHandler := metricscontrollers.NewClusterMetricsHandler(b.options.MetricsRegisterer)
clusterMetricsController := metricscontrollers.NewController(
Expand All @@ -385,6 +392,7 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
clusterRecursiveDataDumpController := datadumpcontrollers.NewClusterRecursiveDataDumpController(b.options.ResourcesDBClient, activeOperationLister, backendInformers)
csStateDumpController := datadumpcontrollers.NewCSStateDumpController(b.options.ResourcesDBClient, activeOperationLister, backendInformers, b.options.ClustersServiceClient)
billingDumpController := datadumpcontrollers.NewBillingDumpController(b.options.ResourcesDBClient, b.options.BillingDBClient, activeOperationLister, backendInformers)
managementClusterDumpController := datadumpcontrollers.NewManagementClusterDataDumpController(b.options.FleetDBClient, managementClusterLister, fleetInformers)
doNothingController := controllers.NewDoNothingExampleController(b.options.ResourcesDBClient, subscriptionLister)
dispatchRequestCredentialController := operationcontrollers.NewDispatchRequestCredentialController(
utilsclock.RealClock{},
Expand Down Expand Up @@ -558,20 +566,31 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
b.options.ResourcesDBClient,
backendInformers,
)

nodePoolVersionController := upgradecontrollers.NewNodePoolVersionController(
b.options.ResourcesDBClient,
b.options.ClustersServiceClient,
activeOperationLister,
backendInformers,
)

triggerNodePoolUpgradeController := upgradecontrollers.NewTriggerNodePoolUpgradeController(
b.options.ResourcesDBClient,
b.options.ClustersServiceClient,
activeOperationLister,
backendInformers,
)
managementClusterMigrationController := managementclustercontrollers.NewManagementClusterMigrationController(
b.options.ClustersServiceClient,
b.options.FleetDBClient,
stampLister,
managementClusterLister,
)
placementSyncController := managementclustercontrollers.NewManagementClusterPlacementSyncController(
b.options.ResourcesDBClient,
b.options.ClustersServiceClient,
activeOperationLister,
managementClusterLister,
backendInformers,
)

nodePoolPropertiesSyncController := nodepoolpropertiescontroller.NewNodePoolPropertiesSyncController(
b.options.ResourcesDBClient,
Expand All @@ -596,11 +615,13 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
OnStartedLeading: func(ctx context.Context) {
// start the SharedInformers
go backendInformers.RunWithContext(ctx)
go fleetInformers.RunWithContext(ctx)

go subscriptionNonClusterDataDumpController.Run(ctx, 20)
go clusterRecursiveDataDumpController.Run(ctx, 20)
go csStateDumpController.Run(ctx, 20)
go billingDumpController.Run(ctx, 20)
go managementClusterDumpController.Run(ctx, 20)
go doNothingController.Run(ctx, 20)
go dispatchRequestCredentialController.Run(ctx, 20)
go dispatchRevokeCredentialsController.Run(ctx, 20)
Expand Down Expand Up @@ -645,6 +666,8 @@ func (b *Backend) runBackendControllersUnderLeaderElection(ctx context.Context,
go clusterMetricsController.Run(ctx, 1)
go nodePoolMetricsController.Run(ctx, 1)
go externalAuthMetricsController.Run(ctx, 1)
go managementClusterMigrationController.Run(ctx, 1)
go placementSyncController.Run(ctx, 20)
},
OnStoppedLeading: func() {
// This needs to be defined even though it does nothing.
Expand Down
26 changes: 20 additions & 6 deletions backend/pkg/app/cosmos_wiring.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,28 @@
package app

import (
"context"
"fmt"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/data/azcosmos"

"github.com/Azure/ARO-HCP/internal/database"
"github.com/Azure/ARO-HCP/internal/utils"
)

// NewCosmosDBClients opens the shared async Cosmos database and returns data-plane clients for
// ARM resource documents (Resources container) and billing documents (Billing container).
func NewCosmosDBClients(ctx context.Context, cosmosDBURL string, cosmosDBName string, azCoreClientOptions azcore.ClientOptions) (database.ResourcesDBClient, database.BillingDBClient, error) {
cosmosDatabaseClient, err := database.NewCosmosDatabaseClient(cosmosDBURL, cosmosDBName, azCoreClientOptions)
// NewCosmosDatabaseClient creates the shared Cosmos DatabaseClient that
// is passed into the per-container wiring functions below.
func NewCosmosDatabaseClient(cosmosDBURL string, cosmosDBName string, azCoreClientOptions azcore.ClientOptions) (*azcosmos.DatabaseClient, error) {
client, err := database.NewCosmosDatabaseClient(cosmosDBURL, cosmosDBName, azCoreClientOptions)
if err != nil {
return nil, nil, utils.TrackError(fmt.Errorf("failed to create Azure Cosmos database client: %w", err))
return nil, utils.TrackError(fmt.Errorf("failed to create Azure Cosmos database client: %w", err))
}
return client, nil
}

// NewCosmosDBClients returns data-plane clients for
// ARM resource documents (Resources container) and billing documents (Billing container).
func NewCosmosDBClients(cosmosDatabaseClient *azcosmos.DatabaseClient) (database.ResourcesDBClient, database.BillingDBClient, error) {
resourcesDBClient, err := database.NewResourcesDBClient(cosmosDatabaseClient)
if err != nil {
return nil, nil, utils.TrackError(fmt.Errorf("failed to create resources database client: %w", err))
Expand All @@ -44,3 +49,12 @@ func NewCosmosDBClients(ctx context.Context, cosmosDBURL string, cosmosDBName st

return resourcesDBClient, billingDBClient, nil
}

func NewFleetDBClient(cosmosDatabaseClient *azcosmos.DatabaseClient) (database.FleetDBClient, error) {
fleetClient, err := database.NewFleetDBClient(cosmosDatabaseClient)
if err != nil {
return nil, utils.TrackError(fmt.Errorf("failed to create Fleet DBClient: %w", err))
}

return fleetClient, nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright 2026 Microsoft Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controllerutils

import (
"context"
"errors"
"time"

"github.com/go-logr/logr"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"

azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"

"github.com/Azure/ARO-HCP/internal/api"
"github.com/Azure/ARO-HCP/internal/api/fleet"
"github.com/Azure/ARO-HCP/internal/database"
dbinformers "github.com/Azure/ARO-HCP/internal/database/informers"
"github.com/Azure/ARO-HCP/internal/utils"
)

type ManagementClusterKey struct {
StampIdentifier string `json:"stampIdentifier"`
}

func (k ManagementClusterKey) GetResourceID() *azcorearm.ResourceID {
return api.Must(fleet.ToManagementClusterResourceID(k.StampIdentifier))
}

func (k ManagementClusterKey) AddLoggerValues(logger logr.Logger) logr.Logger {
return logger.WithValues(
utils.LogValues{}.
AddLogValuesForResourceID(k.GetResourceID())...)
}

func (k ManagementClusterKey) InitialController(controllerName string) *api.Controller {
resourceID := api.Must(azcorearm.ParseResourceID(k.GetResourceID().String() + "/" + fleet.ControllerResourceTypeName + "/" + controllerName))
return &api.Controller{
CosmosMetadata: api.CosmosMetadata{
ResourceID: resourceID,
},
ExternalID: k.GetResourceID(),
Status: api.ControllerStatus{
Conditions: []metav1.Condition{},
},
}
}

type ManagementClusterSyncer interface {
SyncOnce(ctx context.Context, key ManagementClusterKey) error
CooldownChecker() CooldownChecker
}

type managementClusterWatchingController struct {
name string
syncer ManagementClusterSyncer
fleetDBClient database.FleetDBClient
}

// NewManagementClusterWatchingController periodically looks up all management clusters and queues them.
func NewManagementClusterWatchingController(
name string,
fleetDBClient database.FleetDBClient,
fleetInformers dbinformers.FleetInformers,
resyncDuration time.Duration,
syncer ManagementClusterSyncer,
) Controller {
mcSyncer := &managementClusterWatchingController{
name: name,
syncer: syncer,
fleetDBClient: fleetDBClient,
}
mcController := newGenericWatchingController(name, fleet.ManagementClusterResourceType, mcSyncer)

// this happens when unit tests don't want triggering. This isn't beautiful, but fails to do nothing which is pretty safe.
if fleetInformers != nil {
managementClusterInformer, _ := fleetInformers.ManagementClusters()
err := mcController.QueueForInformers(resyncDuration, managementClusterInformer)
if err != nil {
panic(err) // coding error
}
}

return mcController
}

func (c *managementClusterWatchingController) SyncOnce(ctx context.Context, key ManagementClusterKey) error {
controllerCRUD := c.fleetDBClient.Stamps().ManagementClusters(key.StampIdentifier).Controllers()

defer utilruntime.HandleCrash(DegradedControllerPanicHandler(
ctx,
controllerCRUD,
c.name,
key.InitialController))

syncErr := c.syncer.SyncOnce(ctx, key)

controllerWriteErr := WriteController(
ctx,
controllerCRUD,
c.name,
key.InitialController,
ReportSyncError(syncErr),
)

return errors.Join(syncErr, controllerWriteErr)
}

func (c *managementClusterWatchingController) CooldownChecker() CooldownChecker {
return c.syncer.CooldownChecker()
}

func (c *managementClusterWatchingController) MakeKey(resourceID *azcorearm.ResourceID) ManagementClusterKey {
return ManagementClusterKey{
StampIdentifier: resourceID.Parent.Name,
}
}
Loading
Loading