diff --git a/.pipelines/azure_pipeline_mergedbranches.yaml b/.pipelines/azure_pipeline_mergedbranches.yaml index 2e8d3cf51..2baf037ac 100644 --- a/.pipelines/azure_pipeline_mergedbranches.yaml +++ b/.pipelines/azure_pipeline_mergedbranches.yaml @@ -43,6 +43,7 @@ extends: - ES365AIMigrationTooling stages: - stage: stage + displayName: 'Build and Publish Container Images' jobs: - job: common pool: @@ -276,7 +277,8 @@ extends: export TRIVY_JAVA_DB_REPOSITORY=$PRIMARY_TRIVY_JAVA_DB_REPOSITORY # Function to run Trivy scan and handle output run_trivy_scan() { - trivy image --exit-code 1 --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM "${{ variables.repoImageName }}:$(linuxImagetag)" > trivy_output.log 2>&1 + #trivy image --exit-code 1 --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM "${{ variables.repoImageName }}:$(linuxImagetag)" > trivy_output.log 2>&1 + trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM "${{ variables.repoImageName }}:$(linuxImagetag)" > trivy_output.log 2>&1 return $? } # Attempt scan up to 5 times with repository fallback @@ -881,3 +883,74 @@ extends: FileDirPath: '$(Build.ArtifactStagingDirectory)' DisableRemediation: false AcceptableOutdatedSignatureInHours: 72 + + - stage: Deploy_and_Test_Images_In_Dev_Clusters + displayName: Deploy and Test Images in Dev Clusters + lockBehavior: sequential + dependsOn: + - stage + condition: | + eq(dependencies.stage.result, 'Succeeded') + variables: + # Override the helm chart's default image repository (/azuremonitor/containerinsights/ciprod) to use cidev + ImageRepositoryOverride: '/azuremonitor/containerinsights/cidev' + # Use image tags built from the previous build stage + linuxImageTagUnderTest: $[stageDependencies.stage.common.outputs['setup.linuxImagetag']] + windowsImageTagUnderTest: $[stageDependencies.stage.common.outputs['setup.windowsImageTag']] + jobs: + # TODO: remomve the two temp cluster and add more clusters from test automation framework when the tests are stable + # ============================================================ + # Cluster 1: zane-test — Deploy via Helm + # ============================================================ + - template: /.pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml@self + parameters: + clusterName: 'zane-test' + resourceGroup: 'zane-test' + region: 'westus2' + subscriptionId: $(CI_BUILD_SUB_ID) + workspaceId: $(ZANE_TEST_LA2_LAW_ID) # zane-test-la2 + amalogsLinuxImage: $(linuxImageTagUnderTest) + amalogsWindowsImage: $(windowsImageTagUnderTest) + imageRepository: $(ImageRepositoryOverride) + environment: 'CI-Agent-Dev' + azureSubscription: 'ContainerInsights_Build_Subscription_CI' + + # Cluster 1: zane-test — Run E2E Tests + - template: /.pipelines/e2e-test-templates/test-ci-image-in-aks-cluster.yml@self + parameters: + clusterName: 'zane-test' + resourceGroup: 'zane-test' + azureSubscription: 'ContainerInsights_Build_Subscription_CI' + environmentName: 'CI-Agent-Dev' + dependsOnDeployJob: 'Deploy_AmaLogs_zane_test' + azureClientId: $(ZANE_TEST_CLIENT_ID) + azureTenantId: $(CI_BUILD_AZURE_TENANT_ID) + teamsWebhookUri: $(TeamsWebhookUri) + + # ============================================================ + # Cluster 2: zane-test2 — Deploy via Helm + # ============================================================ + - template: /.pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml@self + parameters: + clusterName: 'zane-test2' + resourceGroup: 'zane-test' + region: 'centralus' + subscriptionId: $(CI_BUILD_SUB_ID) + workspaceId: $(ZANE_TEST_LA2_LAW_ID) # zane-test2-la2 + amalogsLinuxImage: $(linuxImageTagUnderTest) + amalogsWindowsImage: $(windowsImageTagUnderTest) + imageRepository: $(ImageRepositoryOverride) + environment: 'CI-Agent-Dev2' + azureSubscription: 'ContainerInsights_Build_Subscription_CI' + + # Cluster 2: zane-test2 — Run E2E Tests + - template: /.pipelines/e2e-test-templates/test-ci-image-in-aks-cluster.yml@self + parameters: + clusterName: 'zane-test2' + resourceGroup: 'zane-test' + azureSubscription: 'ContainerInsights_Build_Subscription_CI' + environmentName: 'CI-Agent-Dev2' + dependsOnDeployJob: 'Deploy_AmaLogs_zane_test2' + azureClientId: $(ZANE_TEST2_CLIENT_ID) + azureTenantId: $(CI_BUILD_AZURE_TENANT_ID) + teamsWebhookUri: $(TeamsWebhookUri) \ No newline at end of file diff --git a/.pipelines/ci-aks-prod-release.yaml b/.pipelines/ci-aks-prod-release.yaml index 42c961bb9..53e65799a 100644 --- a/.pipelines/ci-aks-prod-release.yaml +++ b/.pipelines/ci-aks-prod-release.yaml @@ -481,7 +481,7 @@ extends: # Stage 3: Deploy ama-logs to AKS Clusters via Helm # ============================================================================= # To add a new cluster, simply add an entry to the template list below. - # Each cluster only needs: clusterName, resourceGroup, region, subscriptionId, workspaceId, and imageTag. + # Each cluster only needs: clusterName, resourceGroup, region, subscriptionId, workspaceId, amalogsLinuxImage, and amalogsWindowsImage. # ============================================================================= - stage: Stage_3 displayName: Deploy ama-logs to CI AKS Prod Clusters via Helm @@ -499,7 +499,8 @@ extends: region: 'westcentralus' subscriptionId: '9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb' workspaceId: '22f38e11-4f59-480c-b4b8-2573156b6e06' # "Monitoring-Model-Cluster-WCUS" - imageTag: '$(AgentImageTagSuffix)' + amalogsLinuxImage: '$(AgentImageTagSuffix)' + amalogsWindowsImage: 'win-$(AgentImageTagSuffix)' environment: 'CI-Deploy-To-Prod-Cluster-1' # Monitoring-Model-Cluster-WEU - template: .pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml@self @@ -509,5 +510,6 @@ extends: region: 'westeurope' subscriptionId: '9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb' workspaceId: '5c269467-32a9-4468-a1d6-ec1cac551e74' # "Monitoring-Model-Cluster-WEU" - imageTag: '$(AgentImageTagSuffix)' - environment: 'CI-Deploy-To-Prod-Cluster-2' \ No newline at end of file + amalogsLinuxImage: '$(AgentImageTagSuffix)' + amalogsWindowsImage: 'win-$(AgentImageTagSuffix)' + environment: 'CI-Deploy-To-Prod-Cluster-2' diff --git a/.pipelines/e2e-test-templates/test-ci-image-in-aks-cluster.yml b/.pipelines/e2e-test-templates/test-ci-image-in-aks-cluster.yml new file mode 100644 index 000000000..39cfe0e66 --- /dev/null +++ b/.pipelines/e2e-test-templates/test-ci-image-in-aks-cluster.yml @@ -0,0 +1,115 @@ +parameters: +- name: clusterName + type: string +- name: resourceGroup + type: string +- name: azureSubscription + type: string + default: 'ContainerInsights_Build_Subscription_CI' +- name: environmentName + type: string +- name: dependsOnDeployJob + type: string + displayName: 'Name of the deploy job this test depends on (e.g., Deploy_AmaLogs_zane_test)' +- name: azureClientId + type: string +- name: azureTenantId + type: string +- name: teamsWebhookUri + type: string + default: '$(TeamsWebhookUri)' +- name: additionalTestParams + type: string + default: '' + +jobs: +- deployment: Test_${{ replace(parameters.clusterName, '-', '_') }} + displayName: 'Test: ${{ parameters.clusterName }}' + environment: ${{ parameters.environmentName }} + dependsOn: ${{ parameters.dependsOnDeployJob }} + pool: + name: Azure-Pipelines-CI-Test-EO + image: ci-1es-managed-ubuntu-2204 + os: linux + variables: + skipComponentGovernanceDetection: true + strategy: + runOnce: + deploy: + steps: + - checkout: self + persistCredentials: true + + - script: | + set -euo pipefail + echo "Ensuring kubectl is installed" + if ! command -v kubectl >/dev/null 2>&1; then + echo "Installing kubectl" + sudo az aks install-cli + else + echo "kubectl already installed: $(kubectl version --client --short || true)" + fi + displayName: 'Install kubectl' + + - task: AzureCLI@2 + displayName: 'Get credentials for ${{ parameters.clusterName }}' + inputs: + azureSubscription: ${{ parameters.azureSubscription }} + scriptLocation: 'inlineScript' + scriptType: 'bash' + inlineScript: 'az aks get-credentials -g ${{ parameters.resourceGroup }} -n ${{ parameters.clusterName }}' + + - task: Bash@3 + displayName: 'Wait for logs to be ingested into Log Analytics (20 min)' + inputs: + targetType: 'inline' + script: | + echo "========================================" + echo "Waiting for Log Analytics Ingestion" + echo "========================================" + echo "Cluster: ${{ parameters.clusterName }}" + echo "" + echo "Waiting 10 minutes to allow logs to be ingested..." + echo "This ensures queries will find logs from the newly deployed containers." + echo "" + + wait_time=600 + interval=60 + elapsed=0 + + while [ $elapsed -lt $wait_time ]; do + remaining=$((wait_time - elapsed)) + minutes_elapsed=$((elapsed / 60)) + minutes_remaining=$((remaining / 60)) + echo "⏳ Waiting... ($minutes_elapsed/$((wait_time / 60)) minutes elapsed, $minutes_remaining minutes remaining)" + sleep $interval + elapsed=$((elapsed + interval)) + done + + echo "" + echo "✓ Wait complete! Logs should now be available in Log Analytics." + echo "========================================" + + - bash: | + echo "Running tests for cluster: ${{ parameters.clusterName }}" + + chmod +x ./install-and-execute-testkube-tests.sh + ./install-and-execute-testkube-tests.sh \ + AzureClientId=${{ parameters.azureClientId }} \ + AzureTenantId=${{ parameters.azureTenantId }} \ + TeamsWebhookUri=${{ parameters.teamsWebhookUri }} \ + ${{ parameters.additionalTestParams }} + workingDirectory: $(Build.SourcesDirectory)/test/testkube/ + displayName: 'Install Testkube and run E2E tests' + + # Log completion + - bash: | + echo "=========================================" + echo "TEST COMPLETE" + echo "=========================================" + echo "Cluster: ${{ parameters.clusterName }}" + echo "Build ID: $(Build.BuildId)" + echo "✓ Testing finished for: ${{ parameters.clusterName }}" + echo "=========================================" + displayName: 'Test Completion' + condition: always() \ No newline at end of file diff --git a/.pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml b/.pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml index b585dc926..781b02090 100644 --- a/.pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml +++ b/.pipelines/helm-deploy-templates/ama-logs-helm-deploy.yaml @@ -4,10 +4,11 @@ # parameters: # clusterName: 'my-cluster' # resourceGroup: 'my-rg' -# region: 'eastus' -# subscriptionId: '9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb' +# region: e.g. 'eastus' +# subscriptionId: 'your-subscription-id' # workspaceId: 'your-workspace-id' -# imageTag: '$(AgentImageTagSuffix)' # e.g., 3.1.32 +# amalogsLinuxImage: "ama-logs linux image tag" +# amalogsWindowsImage: "ama-logs windows image tag" # environment: 'deployment environment' parameters: @@ -23,9 +24,16 @@ parameters: - name: workspaceId type: string displayName: 'Log Analytics Workspace ID' -- name: imageTag +- name: amalogsLinuxImage type: string displayName: 'Image tag suffix (e.g., 3.1.32)' +- name: amalogsWindowsImage + type: string + displayName: 'Image tag suffix (e.g., win-3.1.32)' +- name: imageRepository + type: string + default: '/azuremonitor/containerinsights/ciprod' + displayName: 'Image repository path (e.g., /azuremonitor/containerinsights/ciprod or /azuremonitor/containerinsights/cidev)' - name: environment type: string displayName: 'Azure DevOps Environment name' @@ -34,9 +42,6 @@ parameters: - name: cloudEnvironment type: string default: 'azurepubliccloud' -- name: kubernetesVersion - type: string - default: '1.32.7' - name: azureSubscription type: string default: 'ContainerInsights_Build_Subscription_CI' @@ -46,9 +51,6 @@ parameters: - name: releaseName type: string default: 'azuremonitor-containers' -- name: helmVersion - type: string - default: '3.12.3' - name: dependsOn type: object default: [] @@ -83,7 +85,7 @@ jobs: - task: HelmInstaller@1 displayName: Install Helm inputs: - helmVersionToInstall: '${{ parameters.helmVersion }}' + helmVersionToInstall: 'latest' - task: HelmDeploy@0 displayName: 'Helm Deploy: ama-logs to ${{ parameters.clusterName }}' inputs: @@ -99,7 +101,7 @@ jobs: # TODO: When it is merged to ci_prod, the following chartPath will be updated accordingly. chartPath: '$(Build.SourcesDirectory)/charts/azuremonitor-containerinsights-for-prod-clusters' releaseName: '${{ parameters.releaseName }}' - overrideValues: 'global.commonGlobals.CloudEnvironment=${{ parameters.cloudEnvironment }},global.commonGlobals.Region=${{ parameters.region }},OmsAgent.aksResourceID=$(AKS_RESOURCE_ID),OmsAgent.workspaceID=${{ parameters.workspaceId }},OmsAgent.imageTagLinux=${{ parameters.imageTag }},OmsAgent.imageTagWindows=win-${{ parameters.imageTag }}' + overrideValues: 'global.commonGlobals.CloudEnvironment=${{ parameters.cloudEnvironment }},global.commonGlobals.Region=${{ parameters.region }},OmsAgent.aksResourceID=$(AKS_RESOURCE_ID),OmsAgent.workspaceID=${{ parameters.workspaceId }},OmsAgent.imageRepository=${{ parameters.imageRepository }},OmsAgent.imageTagLinux=${{ parameters.amalogsLinuxImage }},OmsAgent.imageTagWindows=${{ parameters.amalogsWindowsImage }}' waitForExecution: false arguments: '--timeout 10m --install' - task: AzureCLI@2 @@ -115,8 +117,8 @@ jobs: echo "Cluster: ${{ parameters.clusterName }}" echo "Resource Group: ${{ parameters.resourceGroup }}" echo "Region: ${{ parameters.region }}" - echo "Linux Image Tag: ${{ parameters.imageTag }}" - echo "Windows Image Tag: win-${{ parameters.imageTag }}" + echo "Linux Image Tag: ${{ parameters.amalogsLinuxImage }}" + echo "Windows Image Tag: ${{ parameters.amalogsWindowsImage }}" echo "" echo "Getting AKS credentials..." @@ -151,8 +153,8 @@ jobs: echo "==========================================" echo "Image Verification" echo "==========================================" - EXPECTED_LINUX_TAG="${{ parameters.imageTag }}" - EXPECTED_WINDOWS_TAG="win-${{ parameters.imageTag }}" + EXPECTED_LINUX_TAG="${{ parameters.amalogsLinuxImage }}" + EXPECTED_WINDOWS_TAG="${{ parameters.amalogsWindowsImage }}" VERIFICATION_PASSED=true echo "Expected Linux image tag: $EXPECTED_LINUX_TAG" diff --git a/charts/azuremonitor-containerinsights-for-prod-clusters/templates/ama-logs.yaml b/charts/azuremonitor-containerinsights-for-prod-clusters/templates/ama-logs.yaml index 5f7a7d864..91f3f24c1 100644 --- a/charts/azuremonitor-containerinsights-for-prod-clusters/templates/ama-logs.yaml +++ b/charts/azuremonitor-containerinsights-for-prod-clusters/templates/ama-logs.yaml @@ -533,7 +533,7 @@ spec: - NET_RAW {{- end }} - name: ama-logs - image: "{{ template "addon_mcr_repository_base" $ }}/azuremonitor/containerinsights/ciprod:{{- default $amalogsLinuxDefaultImageTag $.Values.OmsAgent.imageTagLinux -}}" + image: "{{ template "addon_mcr_repository_base" $ }}{{ $.Values.OmsAgent.imageRepository | default "/azuremonitor/containerinsights/ciprod" }}:{{- default $amalogsLinuxDefaultImageTag $.Values.OmsAgent.imageTagLinux -}}" {{- if $.Values.OmsAgent.isImagePullPolicyAlways }} imagePullPolicy: Always {{- else }} @@ -715,7 +715,7 @@ spec: {{- end }} {{- if and (not $.Values.OmsAgent.isPrometheusMetricsScrapingDisabled) $.Values.OmsAgent.isSidecarScrapingEnabled }} - name: ama-logs-prometheus - image: "{{ template "addon_mcr_repository_base" $ }}/azuremonitor/containerinsights/ciprod:{{- default $amalogsLinuxDefaultImageTag $.Values.OmsAgent.imageTagLinux -}}" + image: "{{ template "addon_mcr_repository_base" $ }}{{ $.Values.OmsAgent.imageRepository | default "/azuremonitor/containerinsights/ciprod" }}:{{- default $amalogsLinuxDefaultImageTag $.Values.OmsAgent.imageTagLinux -}}" {{- if $.Values.OmsAgent.isImagePullPolicyAlways }} imagePullPolicy: Always {{- else }} @@ -1084,7 +1084,7 @@ spec: - NET_RAW {{- end }} - name: ama-logs - image: "{{ template "addon_mcr_repository_base" . }}/azuremonitor/containerinsights/ciprod:{{- default $amalogsLinuxDefaultImageTag .Values.OmsAgent.imageTagLinux -}}" + image: "{{ template "addon_mcr_repository_base" . }}{{ .Values.OmsAgent.imageRepository | default "/azuremonitor/containerinsights/ciprod" }}:{{- default $amalogsLinuxDefaultImageTag .Values.OmsAgent.imageTagLinux -}}" {{- if .Values.OmsAgent.isImagePullPolicyAlways }} imagePullPolicy: Always {{- else }} @@ -1364,7 +1364,7 @@ spec: value: "3" containers: - name: ama-logs-windows - image: "{{ template "addon_mcr_repository_base" . }}/azuremonitor/containerinsights/ciprod:{{- default $amalogsWindowsDefaultImageTag .Values.OmsAgent.imageTagWindows -}}" + image: "{{ template "addon_mcr_repository_base" . }}{{ .Values.OmsAgent.imageRepository | default "/azuremonitor/containerinsights/ciprod" }}:{{- default $amalogsWindowsDefaultImageTag .Values.OmsAgent.imageTagWindows -}}" {{- if .Values.OmsAgent.isImagePullPolicyAlways }} imagePullPolicy: Always {{- else }} @@ -1777,7 +1777,7 @@ spec: - NET_ADMIN - NET_RAW - name: ama-logs - image: "{{ template "addon_mcr_repository_base" . }}/azuremonitor/containerinsights/ciprod:{{- default $amalogsLinuxDefaultImageTag .Values.OmsAgent.imageTagLinux -}}" + image: "{{ template "addon_mcr_repository_base" . }}{{ .Values.OmsAgent.imageRepository | default "/azuremonitor/containerinsights/ciprod" }}:{{- default $amalogsLinuxDefaultImageTag .Values.OmsAgent.imageTagLinux -}}" {{- if .Values.OmsAgent.isImagePullPolicyAlways }} imagePullPolicy: Always {{- else }} diff --git a/charts/azuremonitor-containerinsights-for-prod-clusters/values.yaml b/charts/azuremonitor-containerinsights-for-prod-clusters/values.yaml index 20e5de3f8..9ed4cb5ad 100644 --- a/charts/azuremonitor-containerinsights-for-prod-clusters/values.yaml +++ b/charts/azuremonitor-containerinsights-for-prod-clusters/values.yaml @@ -34,6 +34,7 @@ OmsAgent: workspaceKey: "" # Image configuration + imageRepository: "/azuremonitor/containerinsights/ciprod" imageTagLinux: imageTagWindows: isImagePullPolicyAlways: false diff --git a/test/ginkgo-e2e/querylogs/querylogs_test.go b/test/ginkgo-e2e/querylogs/querylogs_test.go index 122ed80a0..c0e46f393 100644 --- a/test/ginkgo-e2e/querylogs/querylogs_test.go +++ b/test/ginkgo-e2e/querylogs/querylogs_test.go @@ -20,11 +20,11 @@ var _ = Describe("When querying the logs for the table", func() { Skip("ContainerLog test skipped because GENEVA_INTEGRATION is set to 'true'") } var err error - query := table + " | where TimeGenerated > ago(15m) | summarize count()" + query := table + " | where TimeGenerated > ago(5m) | summarize count()" err = utils.QueryLogsForCount(LogsClient, AKSResourceId, query, false) // If ContainerLogV2 is configured, query ContainerLogV2 table instead of ContainerLog if err != nil && strings.Contains(table, "ContainerLog") { - query := "ContainerLogV2 | where TimeGenerated > ago(15m) | summarize count()" + query := "ContainerLogV2 | where TimeGenerated > ago(5m) | summarize count()" err = utils.QueryLogsForCount(LogsClient, AKSResourceId, query, false) } Expect(err).NotTo(HaveOccurred()) @@ -46,7 +46,7 @@ var _ = Describe("When querying the logs for the ContainerInventory", func() { func(column string) { // Skip records with ContainerState 'Waiting' to avoid false positives due to the container being in a waiting state. // If the pod name contains 'ama-logs', we include it to ensure we capture the ama-logs agent containers. - query := "ContainerInventory | where TimeGenerated > ago(1h) and (ContainerState !~ 'Waiting' or ContainerHostname contains 'ama-logs') | summarize countif(isempty(" + column + ") or isnull(" + column + "))" + query := "ContainerInventory | where TimeGenerated > ago(5m) and (ContainerState !~ 'Waiting' or ContainerHostname contains 'ama-logs') | summarize countif(isempty(" + column + ") or isnull(" + column + "))" err := utils.QueryLogsForCount(LogsClient, AKSResourceId, query, true) Expect(err).NotTo(HaveOccurred()) }, diff --git a/test/ginkgo-e2e/utils/query_logs_api_utils.go b/test/ginkgo-e2e/utils/query_logs_api_utils.go index 7d79c785b..397977751 100644 --- a/test/ginkgo-e2e/utils/query_logs_api_utils.go +++ b/test/ginkgo-e2e/utils/query_logs_api_utils.go @@ -131,7 +131,7 @@ func CompareResourcesInLogsAndKubeAPI(K8sClient *kubernetes.Clientset, logsClien for _, node := range nodes { resources = append(resources, node.Name) } - query = logsTable + " | where TimeGenerated > ago(15m) | distinct Computer" + query = logsTable + " | where TimeGenerated > ago(5m) | distinct Computer" } else if logsTable == "KubePodInventory" { pods, err := GetAllAgentPods(K8sClient) if err != nil { @@ -144,7 +144,7 @@ func CompareResourcesInLogsAndKubeAPI(K8sClient *kubernetes.Clientset, logsClien } resources = append(resources, pod.Name) } - query = logsTable + " | where TimeGenerated > ago(15m) | distinct Name" + query = logsTable + " | where TimeGenerated > ago(5m) | distinct Name" } return CompareResourcesHelper(logsClient, resourceID, query, resources) diff --git a/test/testkube/testkube-test-crs.yaml b/test/testkube/testkube-test-crs.yaml index f3ce2d65e..db2c60f6c 100644 --- a/test/testkube/testkube-test-crs.yaml +++ b/test/testkube/testkube-test-crs.yaml @@ -130,7 +130,7 @@ spec: content: git: uri: https://github.com/microsoft/Docker-Provider/ - revision: ci_prod + revision: zane/ci-agent-auto-deploy paths: - test/ginkgo-e2e steps: