diff --git a/subcommand/health-sync/checks.go b/subcommand/health-sync/checks.go index f071e327..0ec14cc4 100644 --- a/subcommand/health-sync/checks.go +++ b/subcommand/health-sync/checks.go @@ -13,6 +13,12 @@ import ( "github.com/hashicorp/go-multierror" ) +// checkStatus holds the computed status for a Consul health check. +type checkStatus struct { + consulStatus string // Consul health status (api.HealthPassing or api.HealthCritical) + output string // the message to display in Consul +} + // fetchHealthChecks fetches the Consul health checks for both the service // and proxy registrations func (c *Command) fetchHealthChecks(consulClient *api.Client, taskMeta awsutil.ECSTaskMeta) (map[string]*api.HealthCheck, error) { @@ -62,163 +68,147 @@ func (c *Command) fetchHealthChecks(consulClient *api.Client, taskMeta awsutil.E return healthCheckMap, nil } -// setChecksCritical sets checks for all of the containers to critical -func (c *Command) setChecksCritical(consulClient *api.Client, taskMeta awsutil.ECSTaskMeta, clusterARN string, parsedContainerNames []string) error { +// setChecksCritical sets checks for all of the containers to critical. +// Used during graceful shutdown (SIGTERM). +func (c *Command) setChecksCritical(consulClient *api.Client, taskMeta awsutil.ECSTaskMeta, clusterARN string, containerNames []string) error { var result error - taskID := taskMeta.TaskID() serviceName := c.constructServiceName(taskMeta.Family) + serviceID := makeServiceID(serviceName, taskMeta.TaskID()) - for _, containerName := range parsedContainerNames { - var err error - if containerName == config.ConsulDataplaneContainerName { - err = c.handleHealthForDataplaneContainer(consulClient, taskID, serviceName, clusterARN, containerName, ecs.HealthStatusUnhealthy) - } else { - checkID := constructCheckID(makeServiceID(serviceName, taskID), containerName) - err = c.updateConsulHealthStatus(consulClient, checkID, clusterARN, ecs.HealthStatusUnhealthy) - } + // Create a map with all containers as unhealthy to get all check IDs + containerStatuses := make(map[string]string) + for _, name := range containerNames { + containerStatuses[name] = ecs.HealthStatusUnhealthy + } - if err == nil { - c.log.Info("set Consul health status to critical", - "container", containerName) - } else { - c.log.Warn("failed to set Consul health status to critical", - "err", err, - "container", containerName) + // Use computeCheckStatuses to get all check IDs + checkStatuses := c.computeCheckStatuses(serviceID, containerNames, containerStatuses) + + // Update all checks to critical + for checkID, status := range checkStatuses { + err := c.updateConsulHealthStatus(consulClient, checkID, clusterARN, status) + if err != nil { + c.log.Warn("failed to set Consul health status to critical", "err", err, "checkID", checkID) result = multierror.Append(result, err) + } else { + c.log.Info("set Consul health status to critical", "checkID", checkID) } } return result } -// syncChecks fetches metadata for the ECS task and uses that metadata to -// updates the Consul TTL checks for the containers specified in -// `parsedContainerNames`. Checks are only updated if they have changed since -// the last invocation of this function. -func (c *Command) syncChecks(consulClient *api.Client, - currentStatuses map[string]string, - clusterARN string, - parsedContainerNames []string) map[string]string { - // Fetch task metadata to get latest health of the containers - taskMeta, err := awsutil.ECSTaskMetadata() - if err != nil { - c.log.Error("unable to get task metadata", "err", err) - return currentStatuses - } - serviceName := c.constructServiceName(taskMeta.Family) - containersToSync, missingContainers := findContainersToSync(parsedContainerNames, taskMeta) - - // Mark the Consul health status as critical for missing containers - for _, name := range missingContainers { - checkID := constructCheckID(makeServiceID(serviceName, taskMeta.TaskID()), name) - c.log.Debug("marking container as unhealthy since it wasn't found in the task metadata", "name", name) +// computeOverallDataplaneHealth computes the aggregate health status. +// Returns UNHEALTHY if any container is unhealthy. +func computeOverallDataplaneHealth(containerStatuses map[string]string) string { + if len(containerStatuses) == 0 { + // This should not be possible in practice since containerNames always + // includes at least the dataplane container. Treat as unhealthy to be safe. + return ecs.HealthStatusUnhealthy + } - var err error - if name == config.ConsulDataplaneContainerName { - err = c.handleHealthForDataplaneContainer(consulClient, taskMeta.TaskID(), serviceName, clusterARN, name, ecs.HealthStatusUnhealthy) - } else { - err = c.updateConsulHealthStatus(consulClient, checkID, clusterARN, ecs.HealthStatusUnhealthy) - } - - if err != nil { - c.log.Error("failed to update Consul health status for missing container", "err", err, "container", name) - } else { - c.log.Info("container health check updated in Consul for missing container", "container", name) - currentStatuses[name] = api.HealthCritical + for _, status := range containerStatuses { + if status != ecs.HealthStatusHealthy { + return ecs.HealthStatusUnhealthy } } + return ecs.HealthStatusHealthy +} - parsedContainers := make(map[string]string) - // iterate over parse - for _, container := range containersToSync { - c.log.Debug("updating Consul check from ECS container health", - "name", container.Name, - "status", container.Health.Status, - "statusSince", container.Health.StatusSince, - "exitCode", container.Health.ExitCode, - ) - parsedContainers[container.Name] = container.Health.Status - previousStatus := currentStatuses[container.Name] - if container.Health.Status != previousStatus { - var err error - if container.Name != config.ConsulDataplaneContainerName { - checkID := constructCheckID(makeServiceID(serviceName, taskMeta.TaskID()), container.Name) - err = c.updateConsulHealthStatus(consulClient, checkID, clusterARN, container.Health.Status) - } +// computeCheckStatuses computes the desired Consul health status for each check. +// Returns a map of checkID -> checkStatus containing both Consul status and output message. +func (c *Command) computeCheckStatuses(serviceID string, containerNames []string, containerStatuses map[string]string) map[string]checkStatus { + checkStatuses := make(map[string]checkStatus) + + // Overall dataplane health is the aggregate of all container statuses + overallECSHealth := computeOverallDataplaneHealth(containerStatuses) + overallConsulHealth := ecsHealthToConsulHealth(overallECSHealth) - if err != nil { - c.log.Warn("failed to update Consul health status", "err", err) - } else { - c.log.Info("container health check updated in Consul", - "name", container.Name, - "status", container.Health.Status, - "statusSince", container.Health.StatusSince, - "exitCode", container.Health.ExitCode, - ) - currentStatuses[container.Name] = container.Health.Status + for _, name := range containerNames { + if name == config.ConsulDataplaneContainerName { + // Dataplane container maps to overall health on service check + serviceCheckID := constructCheckID(serviceID, name) + checkStatuses[serviceCheckID] = checkStatus{ + consulStatus: overallConsulHealth, + output: fmt.Sprintf("ECS health status is %q for container %q", overallECSHealth, serviceCheckID), } - } - } - overallDataplaneHealthStatus, ok := parsedContainers[config.ConsulDataplaneContainerName] - // if dataplane container exist and healthy then proceed to checking the other containers health - if ok && overallDataplaneHealthStatus == ecs.HealthStatusHealthy { - // - for _, healthStatus := range parsedContainers { - // as soon as we find any unhealthy container, we can set the dataplane health to unhealthy - if healthStatus != ecs.HealthStatusHealthy { - overallDataplaneHealthStatus = ecs.HealthStatusUnhealthy - break + // Non-gateways also have a proxy check + if !c.config.IsGateway() { + proxySvcID, _ := makeProxySvcIDAndName(serviceID, "") + proxyCheckID := constructCheckID(proxySvcID, name) + checkStatuses[proxyCheckID] = checkStatus{ + consulStatus: overallConsulHealth, + output: fmt.Sprintf("ECS health status is %q for container %q", overallECSHealth, proxyCheckID), + } + } + } else { + // Non-dataplane containers map directly to their individual check + checkID := constructCheckID(serviceID, name) + ecsHealth := containerStatuses[name] + checkStatuses[checkID] = checkStatus{ + consulStatus: ecsHealthToConsulHealth(ecsHealth), + output: fmt.Sprintf("ECS health status is %q for container %q", ecsHealth, checkID), } } - } else { - // If no dataplane container or dataplane container is not healthy set overall health to unhealthy - overallDataplaneHealthStatus = ecs.HealthStatusUnhealthy } - err = c.handleHealthForDataplaneContainer(consulClient, taskMeta.TaskID(), serviceName, clusterARN, config.ConsulDataplaneContainerName, overallDataplaneHealthStatus) + return checkStatuses +} + +// syncChecks fetches ECS task metadata and updates Consul health checks +// for the specified containers. Checks are only updated when their status +// has changed since the last invocation. +func (c *Command) syncChecks(consulClient *api.Client, + previousStatuses map[string]checkStatus, + clusterARN string, + containerNames []string) map[string]checkStatus { + + // Phase 1: Gather current container state + taskMeta, err := awsutil.ECSTaskMetadata() if err != nil { - c.log.Warn("failed to update Consul health status", "err", err) + c.log.Error("unable to get task metadata", "err", err) + return previousStatuses } - return currentStatuses -} + serviceName := c.constructServiceName(taskMeta.Family) + serviceID := makeServiceID(serviceName, taskMeta.TaskID()) + containerStatuses := getContainerHealthStatuses(containerNames, taskMeta) -// handleHealthForDataplaneContainer takes care of the special handling needed for syncing -// the health of consul-dataplane container. We register two checks (one for the service -// and the other for proxy) when registering a typical service to the catalog. Updates -// should also happen twice in such cases. -func (c *Command) handleHealthForDataplaneContainer(consulClient *api.Client, taskID, serviceName, clusterARN, containerName, ecsHealthStatus string) error { - var checkID string - serviceID := makeServiceID(serviceName, taskID) - if c.config.IsGateway() { - checkID = constructCheckID(serviceID, containerName) - return c.updateConsulHealthStatus(consulClient, checkID, clusterARN, ecsHealthStatus) - } + // Phase 2: Turn ecs container health into consul checks + currentStatuses := c.computeCheckStatuses(serviceID, containerNames, containerStatuses) - checkID = constructCheckID(serviceID, containerName) - err := c.updateConsulHealthStatus(consulClient, checkID, clusterARN, ecsHealthStatus) - if err != nil { - return err + // Phase 3: Update Consul for any checks that have changed + for checkID, status := range currentStatuses { + previousStatus := previousStatuses[checkID] + if status == previousStatus { + continue + } + + err := c.updateConsulHealthStatus(consulClient, checkID, clusterARN, status) + if err != nil { + c.log.Warn("failed to update Consul health status", "err", err, "checkID", checkID) + // Keep the previous status on error so we retry next cycle + currentStatuses[checkID] = previousStatus + } else { + c.log.Info("health check updated in Consul", "checkID", checkID, "status", status.consulStatus) + } } - proxySvcID, _ := makeProxySvcIDAndName(serviceID, "") - checkID = constructCheckID(proxySvcID, containerName) - return c.updateConsulHealthStatus(consulClient, checkID, clusterARN, ecsHealthStatus) + // Phase 4: Return current status + return currentStatuses } -func (c *Command) updateConsulHealthStatus(consulClient *api.Client, checkID string, clusterARN string, ecsHealthStatus string) error { - consulHealthStatus := ecsHealthToConsulHealth(ecsHealthStatus) - +func (c *Command) updateConsulHealthStatus(consulClient *api.Client, checkID string, clusterARN string, status checkStatus) error { check, ok := c.checks[checkID] if !ok { return fmt.Errorf("unable to find check with ID %s", checkID) } - check.Status = consulHealthStatus - check.Output = fmt.Sprintf("ECS health status is %q for container %q", ecsHealthStatus, checkID) + check.Status = status.consulStatus + check.Output = status.output c.checks[checkID] = check updateCheckReq := &api.CatalogRegistration{ @@ -245,24 +235,27 @@ func constructCheckID(serviceID, containerName string) string { return fmt.Sprintf("%s-%s", serviceID, containerName) } -func findContainersToSync(containerNames []string, taskMeta awsutil.ECSTaskMeta) ([]awsutil.ECSTaskMetaContainer, []string) { - var ecsContainers []awsutil.ECSTaskMetaContainer - var missing []string - - for _, container := range containerNames { - found := false - for _, ecsContainer := range taskMeta.Containers { - if ecsContainer.Name == container { - ecsContainers = append(ecsContainers, ecsContainer) - found = true - break - } - } - if !found { - missing = append(missing, container) +// getContainerHealthStatuses builds a map of container name to ECS health status. +// Missing containers are assigned ecs.HealthStatusUnhealthy. +func getContainerHealthStatuses(containerNames []string, taskMeta awsutil.ECSTaskMeta) map[string]string { + statuses := make(map[string]string) + + // Build a lookup map from task metadata + taskContainers := make(map[string]string) + for _, container := range taskMeta.Containers { + taskContainers[container.Name] = container.Health.Status + } + + // Map each requested container to its status + for _, name := range containerNames { + if status, found := taskContainers[name]; found { + statuses[name] = status + } else { + statuses[name] = ecs.HealthStatusUnhealthy } } - return ecsContainers, missing + + return statuses } func ecsHealthToConsulHealth(ecsHealth string) string { diff --git a/subcommand/health-sync/checks_test.go b/subcommand/health-sync/checks_test.go index 6b022fe0..779f6eef 100644 --- a/subcommand/health-sync/checks_test.go +++ b/subcommand/health-sync/checks_test.go @@ -4,10 +4,12 @@ package healthsync import ( + "fmt" "testing" "github.com/aws/aws-sdk-go/service/ecs" "github.com/hashicorp/consul-ecs/awsutil" + "github.com/hashicorp/consul-ecs/config" "github.com/hashicorp/consul/api" "github.com/stretchr/testify/require" ) @@ -19,52 +21,295 @@ func TestEcsHealthToConsulHealth(t *testing.T) { require.Equal(t, api.HealthCritical, ecsHealthToConsulHealth("")) } -func TestFindContainersToSync(t *testing.T) { - taskMetaContainer1 := awsutil.ECSTaskMetaContainer{ - Name: "container1", - } - +func TestGetContainerHealthStatuses(t *testing.T) { cases := map[string]struct { containerNames []string taskMeta awsutil.ECSTaskMeta - missing []string - found []awsutil.ECSTaskMetaContainer + expected map[string]string }{ - "A container isn't in the metadata": { - containerNames: []string{"container1"}, + "all containers present and healthy": { + containerNames: []string{"app", "sidecar"}, + taskMeta: awsutil.ECSTaskMeta{ + Containers: []awsutil.ECSTaskMetaContainer{ + {Name: "app", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, + {Name: "sidecar", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, + }, + }, + expected: map[string]string{ + "app": ecs.HealthStatusHealthy, + "sidecar": ecs.HealthStatusHealthy, + }, + }, + "one container unhealthy": { + containerNames: []string{"app", "sidecar"}, + taskMeta: awsutil.ECSTaskMeta{ + Containers: []awsutil.ECSTaskMetaContainer{ + {Name: "app", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, + {Name: "sidecar", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusUnhealthy}}, + }, + }, + expected: map[string]string{ + "app": ecs.HealthStatusHealthy, + "sidecar": ecs.HealthStatusUnhealthy, + }, + }, + "container missing from metadata": { + containerNames: []string{"app", "sidecar"}, + taskMeta: awsutil.ECSTaskMeta{ + Containers: []awsutil.ECSTaskMetaContainer{ + {Name: "app", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, + }, + }, + expected: map[string]string{ + "app": ecs.HealthStatusHealthy, + "sidecar": ecs.HealthStatusUnhealthy, + }, + }, + "all containers missing": { + containerNames: []string{"app", "sidecar"}, taskMeta: awsutil.ECSTaskMeta{}, - missing: []string{"container1"}, - found: nil, + expected: map[string]string{ + "app": ecs.HealthStatusUnhealthy, + "sidecar": ecs.HealthStatusUnhealthy, + }, }, - "The metadata has an extra container": { + "empty container list": { containerNames: []string{}, taskMeta: awsutil.ECSTaskMeta{ Containers: []awsutil.ECSTaskMetaContainer{ - taskMetaContainer1, + {Name: "app", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, + }, + }, + expected: map[string]string{}, + }, + "extra containers in metadata ignored": { + containerNames: []string{"app"}, + taskMeta: awsutil.ECSTaskMeta{ + Containers: []awsutil.ECSTaskMetaContainer{ + {Name: "app", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, + {Name: "extra", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusHealthy}}, }, }, - missing: nil, - found: nil, + expected: map[string]string{ + "app": ecs.HealthStatusHealthy, + }, }, - "some found and some not found": { - containerNames: []string{"container1", "container2"}, + "unknown status preserved": { + containerNames: []string{"app"}, taskMeta: awsutil.ECSTaskMeta{ Containers: []awsutil.ECSTaskMetaContainer{ - taskMetaContainer1, + {Name: "app", Health: awsutil.ECSTaskMetaHealth{Status: ecs.HealthStatusUnknown}}, }, }, - missing: []string{"container2"}, - found: []awsutil.ECSTaskMetaContainer{ - taskMetaContainer1, + expected: map[string]string{ + "app": ecs.HealthStatusUnknown, }, }, } - for name, testData := range cases { + for name, tc := range cases { t.Run(name, func(t *testing.T) { - found, missing := findContainersToSync(testData.containerNames, testData.taskMeta) - require.Equal(t, testData.missing, missing) - require.Equal(t, testData.found, found) + result := getContainerHealthStatuses(tc.containerNames, tc.taskMeta) + require.Equal(t, tc.expected, result) + }) + } +} + +func TestComputeOverallDataplaneHealth(t *testing.T) { + cases := map[string]struct { + containerStatuses map[string]string + expected string + }{ + "all healthy": { + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + "sidecar": ecs.HealthStatusHealthy, + }, + expected: ecs.HealthStatusHealthy, + }, + "one unhealthy": { + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + "sidecar": ecs.HealthStatusUnhealthy, + }, + expected: ecs.HealthStatusUnhealthy, + }, + "one unknown": { + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + "sidecar": ecs.HealthStatusUnknown, + }, + expected: ecs.HealthStatusUnhealthy, + }, + "all unhealthy": { + containerStatuses: map[string]string{ + "app": ecs.HealthStatusUnhealthy, + "sidecar": ecs.HealthStatusUnhealthy, + }, + expected: ecs.HealthStatusUnhealthy, + }, + "empty map treated as unhealthy": { + // This should not happen in practice since containerNames always + // includes at least the dataplane container. Treated as unhealthy to be safe. + containerStatuses: map[string]string{}, + expected: ecs.HealthStatusUnhealthy, + }, + "single healthy": { + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + }, + expected: ecs.HealthStatusHealthy, + }, + "single unhealthy": { + containerStatuses: map[string]string{ + "app": ecs.HealthStatusUnhealthy, + }, + expected: ecs.HealthStatusUnhealthy, + }, + } + + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + result := computeOverallDataplaneHealth(tc.containerStatuses) + require.Equal(t, tc.expected, result) + }) + } +} + +func TestComputeCheckStatuses(t *testing.T) { + const ( + serviceID = "test-service-12345" + dataplaneContainer = config.ConsulDataplaneContainerName + ) + + // Expected check IDs for non-gateway + serviceCheckID := constructCheckID(serviceID, dataplaneContainer) + proxySvcID, _ := makeProxySvcIDAndName(serviceID, "") + proxyCheckID := constructCheckID(proxySvcID, dataplaneContainer) + appCheckID := constructCheckID(serviceID, "app") + + cases := map[string]struct { + isGateway bool + containerNames []string + containerStatuses map[string]string + expectedConsulStatuses map[string]string + expectedOutputs map[string]string // optional, only checked if non-nil + }{ + "non-gateway all healthy": { + isGateway: false, + containerNames: []string{"app", dataplaneContainer}, + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + dataplaneContainer: ecs.HealthStatusHealthy, + }, + expectedConsulStatuses: map[string]string{ + appCheckID: api.HealthPassing, + serviceCheckID: api.HealthPassing, + proxyCheckID: api.HealthPassing, + }, + expectedOutputs: map[string]string{ + appCheckID: fmt.Sprintf("ECS health status is %q for container %q", ecs.HealthStatusHealthy, appCheckID), + }, + }, + "non-gateway app unhealthy affects overall health": { + isGateway: false, + containerNames: []string{"app", dataplaneContainer}, + containerStatuses: map[string]string{ + "app": ecs.HealthStatusUnhealthy, + dataplaneContainer: ecs.HealthStatusHealthy, + }, + expectedConsulStatuses: map[string]string{ + appCheckID: api.HealthCritical, + serviceCheckID: api.HealthCritical, + proxyCheckID: api.HealthCritical, + }, + expectedOutputs: map[string]string{ + appCheckID: fmt.Sprintf("ECS health status is %q for container %q", ecs.HealthStatusUnhealthy, appCheckID), + }, + }, + "non-gateway dataplane unhealthy": { + isGateway: false, + containerNames: []string{"app", dataplaneContainer}, + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + dataplaneContainer: ecs.HealthStatusUnhealthy, + }, + expectedConsulStatuses: map[string]string{ + appCheckID: api.HealthPassing, + serviceCheckID: api.HealthCritical, + proxyCheckID: api.HealthCritical, + }, + }, + "non-gateway dataplane only": { + isGateway: false, + containerNames: []string{dataplaneContainer}, + containerStatuses: map[string]string{ + dataplaneContainer: ecs.HealthStatusHealthy, + }, + expectedConsulStatuses: map[string]string{ + serviceCheckID: api.HealthPassing, + proxyCheckID: api.HealthPassing, + }, + }, + "gateway healthy": { + isGateway: true, + containerNames: []string{dataplaneContainer}, + containerStatuses: map[string]string{ + dataplaneContainer: ecs.HealthStatusHealthy, + }, + expectedConsulStatuses: map[string]string{ + serviceCheckID: api.HealthPassing, + }, + }, + "gateway unhealthy": { + isGateway: true, + containerNames: []string{dataplaneContainer}, + containerStatuses: map[string]string{ + dataplaneContainer: ecs.HealthStatusUnhealthy, + }, + expectedConsulStatuses: map[string]string{ + serviceCheckID: api.HealthCritical, + }, + }, + "gateway no proxy check": { + isGateway: true, + containerNames: []string{"app", dataplaneContainer}, + containerStatuses: map[string]string{ + "app": ecs.HealthStatusHealthy, + dataplaneContainer: ecs.HealthStatusHealthy, + }, + expectedConsulStatuses: map[string]string{ + appCheckID: api.HealthPassing, + serviceCheckID: api.HealthPassing, + }, + }, + } + + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + cmd := &Command{ + config: &config.Config{}, + } + if tc.isGateway { + cmd.config.Gateway = &config.GatewayRegistration{ + Kind: api.ServiceKindMeshGateway, + } + } + + result := cmd.computeCheckStatuses(serviceID, tc.containerNames, tc.containerStatuses) + + // Check consul statuses + for checkID, expectedStatus := range tc.expectedConsulStatuses { + require.Equal(t, expectedStatus, result[checkID].consulStatus, "consul status mismatch for %s", checkID) + } + + // Check output messages if specified + for checkID, expectedOutput := range tc.expectedOutputs { + require.Equal(t, expectedOutput, result[checkID].output, "output mismatch for %s", checkID) + } + + // Verify no extra checks + require.Len(t, result, len(tc.expectedConsulStatuses)) }) } } diff --git a/subcommand/health-sync/command.go b/subcommand/health-sync/command.go index a57a6303..ebdff2da 100644 --- a/subcommand/health-sync/command.go +++ b/subcommand/health-sync/command.go @@ -136,7 +136,7 @@ func (c *Command) realRun() error { var healthSyncContainers []string healthSyncContainers = append(healthSyncContainers, c.config.HealthSyncContainers...) healthSyncContainers = append(healthSyncContainers, config.ConsulDataplaneContainerName) - currentHealthStatuses := make(map[string]string) + currentHealthStatuses := make(map[string]checkStatus) c.checks, err = c.fetchHealthChecks(consulClient, taskMeta) if err != nil { diff --git a/subcommand/health-sync/command_test.go b/subcommand/health-sync/command_test.go index 3f4322c8..3407fa0a 100644 --- a/subcommand/health-sync/command_test.go +++ b/subcommand/health-sync/command_test.go @@ -4,11 +4,13 @@ package healthsync import ( + "bytes" "context" "encoding/json" "fmt" "os" "path/filepath" + "regexp" "sync/atomic" "syscall" "testing" @@ -22,6 +24,7 @@ import ( "github.com/hashicorp/consul-ecs/testutil" "github.com/hashicorp/consul-server-connection-manager/discovery" "github.com/hashicorp/consul/api" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/serf/testutil/retry" "github.com/mitchellh/cli" "github.com/stretchr/testify/require" @@ -145,7 +148,7 @@ func TestRun(t *testing.T) { status: ecs.HealthStatusUnhealthy, }, }, - expectedDataplaneHealthStatus: api.HealthPassing, + expectedDataplaneHealthStatus: api.HealthCritical, consulLogin: consulLoginCfg, }, "two unhealthy health sync containers": { @@ -201,10 +204,10 @@ func TestRun(t *testing.T) { }, "container-2": { missing: true, - status: ecs.HealthStatusUnhealthy, + status: ecs.HealthStatusHealthy, }, }, - expectedDataplaneHealthStatus: api.HealthPassing, + expectedDataplaneHealthStatus: api.HealthCritical, shouldMissingContainersReappear: true, consulLogin: consulLoginCfg, }, @@ -381,6 +384,7 @@ func TestRun(t *testing.T) { if expCheck.CheckID == checkID { if hsc.missing { expCheck.Status = api.HealthCritical + markDataplaneContainerUnhealthy = true } else { expCheck.Status = ecsHealthToConsulHealth(hsc.status) // If there are multiple health sync containers and one of them is unhealthy @@ -446,8 +450,9 @@ func TestRun(t *testing.T) { if c.shouldMissingContainersReappear { // Mark all containers as non missing c.missingDataplaneContainer = false - for _, hsc := range c.healthSyncContainers { + for name, hsc := range c.healthSyncContainers { hsc.missing = false + c.healthSyncContainers[name] = hsc } // Add the containers data into task meta response @@ -841,6 +846,18 @@ func injectContainersIntoTaskMetaResponse(t *testing.T, taskMetadataResponse *aw return taskMetaRespStr } +func buildTaskMetaWithContainers(t *testing.T, taskMetadataResponse *awsutil.ECSTaskMeta, containers map[string]string) string { + var taskMetaContainersResponse []awsutil.ECSTaskMetaContainer + for name, status := range containers { + taskMetaContainersResponse = append(taskMetaContainersResponse, constructContainerResponse(name, status)) + } + taskMetadataResponse.Containers = taskMetaContainersResponse + taskMetaRespStr, err := constructTaskMetaResponseString(taskMetadataResponse) + require.NoError(t, err) + + return taskMetaRespStr +} + func constructContainerResponse(name, health string) awsutil.ECSTaskMetaContainer { return awsutil.ECSTaskMetaContainer{ Name: name, @@ -926,3 +943,497 @@ func registerNode(t *testing.T, consulClient *api.Client, taskMeta awsutil.ECSTa _, err = consulClient.Catalog().Register(payload, nil) require.NoError(t, err) } + +// expectedCheck represents the expected state of a health check +type expectedCheck struct { + serviceName string + checkID string + status string // api.HealthPassing or api.HealthCritical +} + +// assertCheckStatuses verifies all expected checks exist and have the expected status, with retries +func assertCheckStatuses(t *testing.T, client *api.Client, expectedChecks []expectedCheck, opts *api.QueryOptions) { + timer := &retry.Timer{Timeout: 5 * time.Second, Wait: 500 * time.Millisecond} + retry.RunWith(timer, t, func(r *retry.R) { + for _, exp := range expectedChecks { + filter := fmt.Sprintf("CheckID == `%s`", exp.checkID) + queryOpts := &api.QueryOptions{ + Filter: filter, + Namespace: opts.Namespace, + Partition: opts.Partition, + } + checks, _, err := client.Health().Checks(exp.serviceName, queryOpts) + require.NoError(r, err) + require.Len(r, checks, 1, "expected exactly one check with ID %s", exp.checkID) + require.Equal(r, exp.status, checks[0].Status, "check %s has unexpected status", exp.checkID) + } + }) +} + +// extractUpdatedCheckIDs parses log output and returns a set of checkIDs that were updated +func extractUpdatedCheckIDs(logContent string) map[string]bool { + updated := make(map[string]bool) + // Match log lines like: health check updated in Consul: checkID=xxx status=yyy + re := regexp.MustCompile(`health check updated in Consul: checkID=([^\s]+)`) + matches := re.FindAllStringSubmatch(logContent, -1) + for _, match := range matches { + if len(match) >= 2 { + updated[match[1]] = true + } + } + return updated +} + +// getExpectedUpdatedCheckIDs returns the set of checkIDs that should have been updated +// (i.e., those whose status changed between before and after) +func getExpectedUpdatedCheckIDs(before, after []expectedCheck) map[string]bool { + beforeStatus := make(map[string]string) + for _, c := range before { + beforeStatus[c.checkID] = c.status + } + + expected := make(map[string]bool) + for _, c := range after { + if beforeStatus[c.checkID] != c.status { + expected[c.checkID] = true + } + } + return expected +} + +// assertExpectedUpdates verifies that exactly the expected checks were updated (no more, no less) +func assertExpectedUpdates(t *testing.T, logContent string, expectedBefore, expectedAfter []expectedCheck) { + actualUpdates := extractUpdatedCheckIDs(logContent) + expectedUpdates := getExpectedUpdatedCheckIDs(expectedBefore, expectedAfter) + + // Check for missing updates (expected but not found in logs) + for checkID := range expectedUpdates { + require.True(t, actualUpdates[checkID], + "expected check %s to be updated but no log entry found", checkID) + } + + // Check for unexpected updates (found in logs but not expected) + for checkID := range actualUpdates { + require.True(t, expectedUpdates[checkID], + "check %s was updated but should not have been", checkID) + } +} + +type syncChecksTestConfig struct { + service *config.ServiceRegistration + proxy *config.AgentServiceConnectProxyConfig + gateway *config.GatewayRegistration + healthSyncContainers []string +} + +type syncChecksTestEnvironment struct { + consulClient *api.Client + apiQueryOptions *api.QueryOptions + cmd *Command + clusterARN string + containerNames []string + taskMetadataResponse *awsutil.ECSTaskMeta + currentTaskMetaResp *atomic.Value + logBuffer *bytes.Buffer +} + +func setupSyncChecksTest(t *testing.T, cfg syncChecksTestConfig) *syncChecksTestEnvironment { + var ( + partition = "" + namespace = "" + ) + + if testutil.EnterpriseFlag() { + partition = "foo" + namespace = "default" + } + + server, consulCfg := testutil.ConsulServer(t, nil) + consulClient, err := api.NewClient(consulCfg) + require.NoError(t, err) + + if testutil.EnterpriseFlag() { + createPartitionAndNamespace(t, consulClient, partition, namespace) + } + + _, serverGRPCPort := testutil.GetHostAndPortFromAddress(server.GRPCAddr) + _, serverHTTPPort := testutil.GetHostAndPortFromAddress(server.HTTPAddr) + + taskMetadataResponse := &awsutil.ECSTaskMeta{ + Cluster: "arn:aws:ecs:us-east-1:123456789:cluster/test", + TaskARN: "arn:aws:ecs:us-east-1:123456789:task/test/abcdef", + Family: "test-family", + } + taskMetaRespStr, err := constructTaskMetaResponseString(taskMetadataResponse) + require.NoError(t, err) + + var currentTaskMetaResp atomic.Value + currentTaskMetaResp.Store(taskMetaRespStr) + testutil.TaskMetaServer(t, testutil.TaskMetaHandlerFn(t, + func() string { + return currentTaskMetaResp.Load().(string) + }, + )) + + envoyBootstrapDir := testutil.TempDir(t) + + consulEcsConfig := config.Config{ + LogLevel: "DEBUG", + BootstrapDir: envoyBootstrapDir, + HealthSyncContainers: cfg.healthSyncContainers, + ConsulServers: config.ConsulServers{ + Hosts: "127.0.0.1", + GRPC: config.GRPCSettings{ + Port: serverGRPCPort, + }, + HTTP: config.HTTPSettings{ + Port: serverHTTPPort, + }, + SkipServerWatch: true, + }, + } + + if cfg.gateway != nil { + consulEcsConfig.Gateway = cfg.gateway + if testutil.EnterpriseFlag() { + consulEcsConfig.Gateway.Namespace = namespace + consulEcsConfig.Gateway.Partition = partition + } + } else { + consulEcsConfig.Service = *cfg.service + consulEcsConfig.Proxy = cfg.proxy + if testutil.EnterpriseFlag() { + consulEcsConfig.Service.Namespace = namespace + consulEcsConfig.Service.Partition = partition + } + } + + testutil.SetECSConfigEnvVar(t, &consulEcsConfig) + + // Run mesh-init to register services and create health checks + ui := cli.NewMockUi() + ctrlPlaneCmd := meshinit.Command{UI: ui} + code := ctrlPlaneCmd.Run(nil) + require.Equal(t, 0, code, ui.ErrorWriter.String()) + + // Set up the Command with a logger that writes to a buffer for testing + logBuf := &bytes.Buffer{} + logger := hclog.New(&hclog.LoggerOptions{ + Level: hclog.LevelFromString(consulEcsConfig.LogLevel), + Output: logBuf, + }) + + cmd := &Command{UI: ui} + cmd.config = &consulEcsConfig + cmd.log = logger + + taskMeta, err := awsutil.ECSTaskMetadata() + require.NoError(t, err) + + cmd.checks, err = cmd.fetchHealthChecks(consulClient, taskMeta) + require.NoError(t, err) + + clusterARN, err := taskMeta.ClusterARN() + require.NoError(t, err) + + return &syncChecksTestEnvironment{ + consulClient: consulClient, + apiQueryOptions: &api.QueryOptions{ + Namespace: namespace, + Partition: partition, + }, + cmd: cmd, + clusterARN: clusterARN, + containerNames: append(cfg.healthSyncContainers, config.ConsulDataplaneContainerName), + taskMetadataResponse: taskMetadataResponse, + currentTaskMetaResp: ¤tTaskMetaResp, + logBuffer: logBuf, + } +} + +// TestSyncChecks_ChangeDetection tests that syncChecks correctly updates Consul +// health checks and only updates when status actually changes. +func TestSyncChecks_ChangeDetection(t *testing.T) { + serviceName := "test-service" + proxyServiceName := fmt.Sprintf("%s-sidecar-proxy", serviceName) + servicePort := 8080 + taskID := "abcdef" + + cases := map[string]struct { + healthSyncContainers []string + startingContainers map[string]string + expectedChecksBeforeUpdate []expectedCheck + updatedContainers map[string]string + expectedChecksAfterUpdate []expectedCheck + }{ + "no change should not update consul": { + healthSyncContainers: []string{"app"}, + startingContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + updatedContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + }, + "healthy to unhealthy should update all checks": { + healthSyncContainers: []string{"app"}, + startingContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + updatedContainers: map[string]string{ + "app": ecs.HealthStatusUnhealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthCritical}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + }, + "one of two containers becomes unhealthy": { + healthSyncContainers: []string{"app1", "app2"}, + startingContainers: map[string]string{ + "app1": ecs.HealthStatusHealthy, + "app2": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app1", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app2", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + updatedContainers: map[string]string{ + "app1": ecs.HealthStatusUnhealthy, + "app2": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app1", serviceName, taskID), status: api.HealthCritical}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app2", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + }, + "unhealthy to healthy recovery": { + healthSyncContainers: []string{"app"}, + startingContainers: map[string]string{ + "app": ecs.HealthStatusUnhealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthCritical}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + updatedContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + }, + "dataplane container goes missing": { + healthSyncContainers: []string{"app"}, + startingContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + updatedContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + // dataplane container is missing from task metadata + }, + expectedChecksAfterUpdate: []expectedCheck{ + // app check remains passing since app container is still healthy + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + // dataplane checks become critical because dataplane container is missing + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + }, + "missing container treated as unhealthy": { + healthSyncContainers: []string{"app"}, + startingContainers: map[string]string{ + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthCritical}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + updatedContainers: map[string]string{ + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthCritical}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + }, + "container reappears healthy": { + healthSyncContainers: []string{"app"}, + startingContainers: map[string]string{ + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthCritical}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + updatedContainers: map[string]string{ + "app": ecs.HealthStatusHealthy, + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-app", serviceName, taskID), status: api.HealthPassing}, + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + {serviceName: proxyServiceName, checkID: fmt.Sprintf("%s-%s-sidecar-proxy-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + }, + } + + for name, tc := range cases { + tc := tc + t.Run(name, func(t *testing.T) { + env := setupSyncChecksTest(t, syncChecksTestConfig{ + service: &config.ServiceRegistration{ + Name: serviceName, + Port: servicePort, + }, + proxy: &config.AgentServiceConnectProxyConfig{ + PublicListenerPort: config.DefaultPublicListenerPort, + }, + healthSyncContainers: tc.healthSyncContainers, + }) + + // Set up initial container state + env.currentTaskMetaResp.Store(buildTaskMetaWithContainers(t, env.taskMetadataResponse, tc.startingContainers)) + + // First syncChecks call + statuses := env.cmd.syncChecks(env.consulClient, map[string]checkStatus{}, env.clusterARN, env.containerNames) + + // Assert expected checks after first call + assertCheckStatuses(t, env.consulClient, tc.expectedChecksBeforeUpdate, env.apiQueryOptions) + + // Update container state for second call + env.currentTaskMetaResp.Store(buildTaskMetaWithContainers(t, env.taskMetadataResponse, tc.updatedContainers)) + + // Clear log buffer before second call to isolate its log output + env.logBuffer.Reset() + + // Second syncChecks call + statuses = env.cmd.syncChecks(env.consulClient, statuses, env.clusterARN, env.containerNames) + + // Assert expected checks after second call + assertCheckStatuses(t, env.consulClient, tc.expectedChecksAfterUpdate, env.apiQueryOptions) + + // Verify exactly the expected checks were updated (no more, no less) + assertExpectedUpdates(t, env.logBuffer.String(), tc.expectedChecksBeforeUpdate, tc.expectedChecksAfterUpdate) + }) + } +} + +// TestSyncChecks_Gateway_ChangeDetection tests syncChecks change detection for gateway services +func TestSyncChecks_Gateway_ChangeDetection(t *testing.T) { + serviceName := "test-mesh-gateway" + taskID := "abcdef" + + cases := map[string]struct { + startingContainers map[string]string + expectedChecksBeforeUpdate []expectedCheck + updatedContainers map[string]string + expectedChecksAfterUpdate []expectedCheck + }{ + "no change should not update consul": { + startingContainers: map[string]string{ + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + updatedContainers: map[string]string{ + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + }, + "healthy to missing dataplane should update": { + startingContainers: map[string]string{ + config.ConsulDataplaneContainerName: ecs.HealthStatusHealthy, + }, + expectedChecksBeforeUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthPassing}, + }, + updatedContainers: map[string]string{}, + expectedChecksAfterUpdate: []expectedCheck{ + {serviceName: serviceName, checkID: fmt.Sprintf("%s-%s-consul-dataplane", serviceName, taskID), status: api.HealthCritical}, + }, + }, + } + + for name, tc := range cases { + tc := tc + t.Run(name, func(t *testing.T) { + env := setupSyncChecksTest(t, syncChecksTestConfig{ + gateway: &config.GatewayRegistration{ + Kind: api.ServiceKindMeshGateway, + Name: serviceName, + LanAddress: &config.GatewayAddress{ + Port: 12345, + }, + }, + }) + + // Set up initial container state + env.currentTaskMetaResp.Store(buildTaskMetaWithContainers(t, env.taskMetadataResponse, tc.startingContainers)) + + // First syncChecks call + statuses := env.cmd.syncChecks(env.consulClient, map[string]checkStatus{}, env.clusterARN, env.containerNames) + + // Assert expected checks after first call + assertCheckStatuses(t, env.consulClient, tc.expectedChecksBeforeUpdate, env.apiQueryOptions) + + // Update container state for second call + env.currentTaskMetaResp.Store(buildTaskMetaWithContainers(t, env.taskMetadataResponse, tc.updatedContainers)) + + // Clear log buffer before second call to isolate its log output + env.logBuffer.Reset() + + // Second syncChecks call + statuses = env.cmd.syncChecks(env.consulClient, statuses, env.clusterARN, env.containerNames) + + // Assert expected checks after second call + assertCheckStatuses(t, env.consulClient, tc.expectedChecksAfterUpdate, env.apiQueryOptions) + + // Verify exactly the expected checks were updated (no more, no less) + assertExpectedUpdates(t, env.logBuffer.String(), tc.expectedChecksBeforeUpdate, tc.expectedChecksAfterUpdate) + }) + } +}