diff --git a/Arcade.slnx b/Arcade.slnx index 1e9aa9aae67..923cb6e046d 100644 --- a/Arcade.slnx +++ b/Arcade.slnx @@ -5,7 +5,9 @@ + + diff --git a/Directory.Packages.props b/Directory.Packages.props index dc3da890ce4..6d8d8a4c60d 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -56,6 +56,7 @@ + diff --git a/azure-pipelines-pr.yml b/azure-pipelines-pr.yml index 7ca86bef5c4..1f08335c560 100644 --- a/azure-pipelines-pr.yml +++ b/azure-pipelines-pr.yml @@ -64,8 +64,7 @@ stages: - job: Windows_NT timeoutInMinutes: 90 pool: - name: $(DncEngPublicBuildPool) - demands: ImageOverride -equals windows.vs2026.amd64.open + vmImage: windows-latest # TODO: Testing only, revert strategy: matrix: Build_Release: @@ -225,6 +224,15 @@ stages: SYSTEM_ACCESSTOKEN: $(System.AccessToken) HelixAccessToken: '' + # Helix Job Monitor runs as a job within the Test stage so it can monitor + # Helix work items submitted by the other jobs in this stage. + - template: /eng/common/core-templates/job/helix-job-monitor.yml + parameters: + timeoutInMinutes: 60 # TODO: Increase this + # Install from the nupkg produced by the Build stage's Windows_NT Release job. + toolNupkgArtifactName: Artifacts_Windows_NT_Release + toolNupkgArtifactSubPath: packages/Release/NonShipping + - stage: Test_XHarness displayName: Test XHarness SDK dependsOn: build diff --git a/eng/Version.Details.props b/eng/Version.Details.props index 6bbc63729bf..a0bf25ed1f5 100644 --- a/eng/Version.Details.props +++ b/eng/Version.Details.props @@ -34,6 +34,7 @@ This file should be imported by eng/Versions.props 10.0.3 10.0.3 10.0.3 + 10.0.3 10.0.3 10.0.3 10.0.3 @@ -82,6 +83,7 @@ This file should be imported by eng/Versions.props $(MicrosoftExtensionsFileProvidersAbstractionsPackageVersion) $(MicrosoftExtensionsFileSystemGlobbingPackageVersion) $(MicrosoftExtensionsHttpPackageVersion) + $(MicrosoftExtensionsLoggingAbstractionsPackageVersion) $(MicrosoftExtensionsLoggingConsolePackageVersion) $(SystemCompositionPackageVersion) $(SystemIOPackagingPackageVersion) diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index 6875dba638a..2a608b88458 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -90,6 +90,10 @@ + + https://github.com/dotnet/runtime + dc5fd7a8dce8309e4add8fd4bd5d8718f221b15a + https://github.com/dotnet/runtime dc5fd7a8dce8309e4add8fd4bd5d8718f221b15a diff --git a/eng/common/core-templates/job/helix-job-monitor.yml b/eng/common/core-templates/job/helix-job-monitor.yml new file mode 100644 index 00000000000..f3c7ef28555 --- /dev/null +++ b/eng/common/core-templates/job/helix-job-monitor.yml @@ -0,0 +1,254 @@ +parameters: +# Azure DevOps job identifier. +- name: jobName + type: string + default: HelixJobMonitor + +# Pool override. When empty the template selects a default azurelinux pool based on the team project. +- name: pool + type: object + default: {} + +# NuGet package id of the Helix job monitor tool. +- name: toolPackageId + type: string + default: Microsoft.DotNet.Helix.JobMonitor + +# Console command exposed by the installed tool package. +- name: toolCommand + type: string + default: dotnet-helix-job-monitor + +# Optional explicit tool version. Only honored when 'toolNupkgArtifactName' is set; in the +# default code path the version is taken from the consuming repo's .config/dotnet-tools.json. +- name: toolVersion + type: string + default: '' + +# Optional NuGet feed used as an additional source when installing the tool. Only honored +# when 'toolNupkgArtifactName' is set; in the default code path the tool is restored from +# the consuming repo's .config/dotnet-tools.json manifest and no extra feeds are consulted. +- name: toolSource + type: string + default: '' + +# Base URI for the Helix service (--helix-base-uri). +- name: helixBaseUri + type: string + default: https://helix.dot.net/ + +# Helix API access token forwarded to the tool via the HELIX_ACCESSTOKEN environment variable. +- name: helixAccessToken + type: string + default: '' + +# Polling interval in seconds (--polling-interval-seconds). +- name: pollingIntervalSeconds + type: number + default: 30 + +# Maximum run time of the monitor job in minutes. Also used for --max-wait-minutes. +- name: timeoutInMinutes + type: number + default: 360 + +# Display name reported by the tool to Azure DevOps (--job-monitor-name). +- name: jobMonitorName + type: string + default: Helix Job Monitor + +# Owner segment of the source repository (e.g. 'dotnet' for 'dotnet/runtime') passed via --organization. +# Defaults to the owner segment of BUILD_REPOSITORY_NAME when empty. +- name: organization + type: string + default: '' + +# Name of the source repository (e.g. 'runtime' for 'dotnet/runtime') passed via --repository. +# Defaults to the repo segment of BUILD_REPOSITORY_NAME when empty. +- name: repository + type: string + default: '' + +# Pull request number being built (--pr-number). Defaults to SYSTEM_PULLREQUEST_PULLREQUESTNUMBER +# when empty. +- name: prNumber + type: string + default: '' + +# When true (default), the monitor tracks Helix jobs and pipeline jobs across every stage of the +# build. When false, the monitor only tracks jobs that belong to the same Azure DevOps stage as +# the monitor job itself (the stage name is read from $(System.StageName) at runtime). +- name: monitorAllStages + type: boolean + default: true + +# Optional dependency list for the generated job. +- name: dependsOn + type: object + default: [] + +# Optional condition for the generated job. +- name: condition + type: string + default: '' + +# Advanced: optional pipeline artifact (produced earlier in this run) that contains the tool +# nupkg. When set, the artifact is downloaded and the tool is installed from the nupkg into +# a local tool-path; this bypasses the repo's .config/dotnet-tools.json manifest and is +# primarily intended for the Arcade repository itself, where the Helix job monitor tool is +# built in the same pipeline that runs this template. +# +# When this parameter is empty (the default), the consuming repository must declare the tool +# in its .config/dotnet-tools.json manifest (alongside other local .NET tools); the template +# will check out the repo and run 'dotnet tool restore' to install the version pinned there. +- name: toolNupkgArtifactName + type: string + default: '' + +# Advanced: sub-path within the downloaded artifact where the tool nupkg is located. Defaults +# to the standard Arcade non-shipping packages location for a Release build (relative to the +# pipeline artifact root, which is itself the build's 'artifacts' directory). +- name: toolNupkgArtifactSubPath + type: string + default: 'packages/Release/NonShipping' + +jobs: +- job: ${{ parameters.jobName }} + displayName: Monitor Helix Jobs + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} + ${{ if ne(length(parameters.dependsOn), 0) }}: + dependsOn: ${{ parameters.dependsOn }} + ${{ if ne(parameters.condition, '') }}: + condition: ${{ parameters.condition }} + pool: + ${{ if eq(variables['System.TeamProject'], 'public') }}: + name: $(DncEngPublicBuildPool) + demands: ImageOverride -equals build.azurelinux.3.amd64.open + ${{ else }}: + name: $(DncEngInternalBuildPool) + demands: ImageOverride -equals build.azurelinux.3.amd64 + steps: + - checkout: self + fetchDepth: 1 + + - ${{ if ne(parameters.toolNupkgArtifactName, '') }}: + - task: DownloadPipelineArtifact@2 + displayName: Download Helix Job Monitor artifact + inputs: + buildType: current + artifactName: ${{ parameters.toolNupkgArtifactName }} + itemPattern: '${{ parameters.toolNupkgArtifactSubPath }}/${{ parameters.toolPackageId }}.*.nupkg' + targetPath: $(Agent.TempDirectory)/helix-job-monitor-nupkg + + - bash: | + set -euo pipefail + + toolPath="$AGENT_TEMPDIRECTORY/helix-job-monitor-tool" + mkdir -p "$toolPath" + + packageId='${{ parameters.toolPackageId }}' + toolVersion='${{ parameters.toolVersion }}' + nupkgArtifactSubPath='${{ parameters.toolNupkgArtifactSubPath }}' + nupkgDir="$AGENT_TEMPDIRECTORY/helix-job-monitor-nupkg/$nupkgArtifactSubPath" + + if [ ! -d "$nupkgDir" ]; then + echo "Expected nupkg directory '$nupkgDir' was not produced by the artifact download." >&2 + exit 1 + fi + + nupkg=$(find "$nupkgDir" -maxdepth 1 -type f -name "$packageId.*.nupkg" | head -n 1) + if [ -z "$nupkg" ]; then + echo "No '$packageId.*.nupkg' found in '$nupkgDir'." >&2 + exit 1 + fi + + # Derive the version from the nupkg filename so the local package is selected + # deterministically instead of resolving against any other configured feed. + nupkgBase=$(basename "$nupkg" .nupkg) + derivedVersion="${nupkgBase#${packageId}.}" + if [ -z "$toolVersion" ]; then + toolVersion="$derivedVersion" + fi + + echo "Using locally built '$packageId' version '$toolVersion' from '$nupkgDir'." + + # Create a minimal NuGet.config that only references the local nupkg directory. + # This avoids conflicts with the repo's package source mapping which blocks --add-source. + toolNugetConfig="$AGENT_TEMPDIRECTORY/helix-job-monitor-nuget.config" + printf '\n\n \n \n \n \n\n' "$nupkgDir" > "$toolNugetConfig" + + pushd "$(Build.SourcesDirectory)" > /dev/null + ./eng/common/dotnet.sh tool install \ + --tool-path "$toolPath" "$packageId" \ + --version "$toolVersion" \ + --configfile "$toolNugetConfig" + + # Locate the tool DLL so the run step can invoke it via ./eng/common/dotnet.sh exec. + toolDll=$(find "$toolPath/.store" -path '*/tools/*/any/*.deps.json' -type f | head -n 1) + toolDll="${toolDll%.deps.json}.dll" + if [ ! -f "$toolDll" ]; then + echo "Could not find tool DLL in '$toolPath/.store'." >&2 + exit 1 + fi + + echo "Tool DLL: $toolDll" + echo "##vso[task.setvariable variable=HelixJobMonitorDll]$toolDll" + displayName: Install Helix Job Monitor + + - ${{ else }}: + - bash: ./eng/common/dotnet.sh tool restore + displayName: Restore Helix Job Monitor + + - bash: | + set -euo pipefail + + toolArgs=( + --helix-base-uri '${{ parameters.helixBaseUri }}' + --polling-interval-seconds '${{ parameters.pollingIntervalSeconds }}' + --max-wait-minutes '${{ parameters.timeoutInMinutes }}' + --job-monitor-name '${{ parameters.jobMonitorName }}' + --attempt '$(System.JobAttempt)' + --monitor-all-stages '${{ parameters.monitorAllStages }}' + --stage-name '$(System.StageName)' + ) + + organization='${{ parameters.organization }}' + repository='${{ parameters.repository }}' + prNumber='${{ parameters.prNumber }}' + + # Fall back to Azure DevOps-provided environment variables when the caller did not + # supply organization / repository / pr-number explicitly. BUILD_REPOSITORY_NAME is + # typically 'owner/repo' for GitHub-backed builds. + if [ -z "$organization" ] || [ -z "$repository" ]; then + buildRepoName="${BUILD_REPOSITORY_NAME:-}" + if [ -n "$buildRepoName" ] && [[ "$buildRepoName" == */* ]]; then + repoOwner="${buildRepoName%%/*}" + repoName="${buildRepoName#*/}" + if [ -z "$organization" ]; then organization="$repoOwner"; fi + if [ -z "$repository" ]; then repository="$repoName"; fi + fi + fi + + if [ -z "$prNumber" ]; then + prNumber="${SYSTEM_PULLREQUEST_PULLREQUESTNUMBER:-}" + fi + + if [ -n "$organization" ]; then toolArgs+=( --organization "$organization" ); fi + if [ -n "$repository" ]; then toolArgs+=( --repository "$repository" ); fi + if [ -n "$prNumber" ]; then toolArgs+=( --pr-number "$prNumber" ); fi + + if [ -n '${{ parameters.toolNupkgArtifactName }}' ]; then + # Tool was installed from a local nupkg; run the DLL via the repo-local dotnet. + export DOTNET_ROOT="$(Build.SourcesDirectory)/.dotnet" + ./eng/common/dotnet.sh exec "$(HelixJobMonitorDll)" "${toolArgs[@]}" + else + # Tool was restored from the local .config/dotnet-tools.json manifest; invoke it + # through the manifest from the repo root. + pushd "$BUILD_SOURCESDIRECTORY" > /dev/null + trap 'popd > /dev/null' EXIT + ./eng/common/dotnet.sh tool run '${{ parameters.toolCommand }}' -- "${toolArgs[@]}" + fi + displayName: Monitor Helix Jobs + env: + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + HELIX_ACCESSTOKEN: ${{ parameters.helixAccessToken }} diff --git a/eng/common/core-templates/stages/helix-job-monitor.yml b/eng/common/core-templates/stages/helix-job-monitor.yml new file mode 100644 index 00000000000..67ab95248bd --- /dev/null +++ b/eng/common/core-templates/stages/helix-job-monitor.yml @@ -0,0 +1,125 @@ +parameters: +# Stage identifier. +- name: stageName + type: string + default: Helix_Job_Monitor + +# Optional list of stages this stage depends on. +- name: dependsOn + type: object + default: [] + +# Optional stage condition expression. +- name: condition + type: string + default: '' + +# Job identifier produced inside the stage. +- name: jobName + type: string + default: HelixJobMonitor + +# NuGet package id of the Helix job monitor tool. +- name: toolPackageId + type: string + default: Microsoft.DotNet.Helix.JobMonitor + +# Console command exposed by the installed tool package. +- name: toolCommand + type: string + default: dotnet-helix-job-monitor + +# Optional explicit tool version. When empty, the latest available version is installed. +- name: toolVersion + type: string + default: '' + +# Optional NuGet feed used as an additional source when installing the tool. +- name: toolSource + type: string + default: '' + +# JobMonitorOptions: --helix-base-uri. +- name: helixBaseUri + type: string + default: https://helix.dot.net/ + +# Helix API access token forwarded via the HELIX_ACCESSTOKEN environment variable. +- name: helixAccessToken + type: string + default: '' + +# JobMonitorOptions: --polling-interval-seconds. +- name: pollingIntervalSeconds + type: number + default: 30 + +# JobMonitorOptions: --max-wait-minutes. Also used as the job/stage timeout. +- name: timeoutInMinutes + type: number + default: 360 + +# JobMonitorOptions: --job-monitor-name. +- name: jobMonitorName + type: string + default: Helix Job Monitor + +# JobMonitorOptions: --organization (owner segment of the source repository). +- name: organization + type: string + default: '' + +# JobMonitorOptions: --repository (name of the source repository). +- name: repository + type: string + default: '' + +# JobMonitorOptions: --pr-number. Required for PR validation pipelines. +- name: prNumber + type: string + default: '' + +# When true (default), the monitor tracks Helix jobs and pipeline jobs across every stage of the +# build. When false, the monitor only tracks jobs that belong to the same Azure DevOps stage as +# the monitor job itself (i.e. this stage). +- name: monitorAllStages + type: boolean + default: true + +# Advanced: optional pipeline artifact (produced earlier in this run) that contains the tool +# nupkg. When set, the artifact is downloaded and the directory containing the nupkg is added +# as a NuGet source for the 'dotnet tool install' command. Primarily intended for the Arcade +# repository itself; other repos should leave this empty and consume the published feed instead. +- name: toolNupkgArtifactName + type: string + default: '' + +# Advanced: sub-path within the downloaded artifact where the tool nupkg is located. +- name: toolNupkgArtifactSubPath + type: string + default: 'packages/Release/NonShipping' + +stages: +- stage: ${{ parameters.stageName }} + dependsOn: ${{ parameters.dependsOn }} + ${{ if ne(parameters.condition, '') }}: + condition: ${{ parameters.condition }} + jobs: + - template: /eng/common/core-templates/job/helix-job-monitor.yml + parameters: + jobName: ${{ parameters.jobName }} + toolPackageId: ${{ parameters.toolPackageId }} + toolCommand: ${{ parameters.toolCommand }} + toolVersion: ${{ parameters.toolVersion }} + toolSource: ${{ parameters.toolSource }} + toolNupkgArtifactName: ${{ parameters.toolNupkgArtifactName }} + toolNupkgArtifactSubPath: ${{ parameters.toolNupkgArtifactSubPath }} + helixBaseUri: ${{ parameters.helixBaseUri }} + helixAccessToken: ${{ parameters.helixAccessToken }} + pollingIntervalSeconds: ${{ parameters.pollingIntervalSeconds }} + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} + jobMonitorName: ${{ parameters.jobMonitorName }} + organization: ${{ parameters.organization }} + repository: ${{ parameters.repository }} + prNumber: ${{ parameters.prNumber }} + monitorAllStages: ${{ parameters.monitorAllStages }} diff --git a/eng/common/templates-official/stages/helix-job-monitor.yml b/eng/common/templates-official/stages/helix-job-monitor.yml new file mode 100644 index 00000000000..8c9cee87e37 --- /dev/null +++ b/eng/common/templates-official/stages/helix-job-monitor.yml @@ -0,0 +1,5 @@ +stages: +- template: /eng/common/core-templates/stages/helix-job-monitor.yml + parameters: + ${{ each parameter in parameters }}: + ${{ parameter.key }}: ${{ parameter.value }} diff --git a/eng/common/templates/stages/helix-job-monitor.yml b/eng/common/templates/stages/helix-job-monitor.yml new file mode 100644 index 00000000000..8c9cee87e37 --- /dev/null +++ b/eng/common/templates/stages/helix-job-monitor.yml @@ -0,0 +1,5 @@ +stages: +- template: /eng/common/core-templates/stages/helix-job-monitor.yml + parameters: + ${{ each parameter in parameters }}: + ${{ parameter.key }}: ${{ parameter.value }} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/AzureDevOpsResultPublisher.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/AzureDevOpsResultPublisher.cs new file mode 100644 index 00000000000..e2e2e1d9604 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/AzureDevOpsResultPublisher.cs @@ -0,0 +1,713 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.IO.Compression; +using System.Net; +using System.Net.Http.Headers; +using System.Security.Cryptography; +using System.Text; +using System.Text.Json; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; +using Microsoft.Extensions.Logging; + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; + +public sealed class AzureDevOpsResultPublisher +{ + private const int TestListBuckets = 32; + private static readonly JsonSerializerOptions s_serializerOptions = new(JsonSerializerDefaults.Web) + { + WriteIndented = false, + }; + + private static string s_lastSendContent = string.Empty; + + private readonly AzureDevOpsReportingParameters _azdoParameters; + private readonly HttpClient _httpClient; + private readonly ILogger _logger; + + public AzureDevOpsResultPublisher( + AzureDevOpsReportingParameters azdoParameters, + ILogger logger) + { + _azdoParameters = azdoParameters; + _httpClient = CreateHttpClient(azdoParameters.AccessToken); + _logger = logger; + } + + public async Task UploadTestResultsAsync(List testResultFiles, object resultMetadata, CancellationToken cancellationToken = default) + { + var testResultReader = new LocalTestResultsReader(_logger); + + Task>[] parseTasks = [.. testResultFiles.Select(file => testResultReader.ReadResultFileAsync(file, cancellationToken))]; + IReadOnlyList[] parsedResults = await Task.WhenAll(parseTasks); + if (parsedResults.Length == 0) + { + _logger.LogWarning("No test results were discovered under."); + return true; + } + + IReadOnlyList aggregatedResults = new ResultAggregator().Aggregate(parsedResults); + if (aggregatedResults.Count == 0) + { + _logger.LogWarning("Test results were discovered but none could be aggregated."); + return true; + } + + await UploadTestResultsAsync(aggregatedResults, resultMetadata, cancellationToken); + return aggregatedResults.All(result => result.Result != "Failed"); // TODO: maybe there's a better way to find out if a test failed? Is this extensive enough? + } + + public async Task UploadTestResultsAsync(IEnumerable results, object resultMetadata, CancellationToken cancellationToken = default) + { + try + { + long publishedTestCount = 0; + var converted = ConvertResults(results, resultMetadata).ToList(); + foreach (List batch in Batch(converted, 1000, static t => Size(t.Converted))) + { + IReadOnlyList publishedTests = await PublishResultsAsync(batch, cancellationToken); + publishedTestCount += publishedTests.Count; + } + + _logger.LogInformation("Uploaded {Count} results", publishedTestCount); + + await SendMetadataAsync(results, cancellationToken); + } + catch (TerminalError ex) + { + await LogErrorAsync(ex, cancellationToken); + throw; + } + } + + private async Task LogErrorAsync(Exception exception, CancellationToken cancellationToken) + { + _logger.LogError(exception, "Failed to upload test results to Azure DevOps."); + /* TODO + await _eventClient.ErrorAsync( + HelixEnvironmentSettings.FromEnvironment(), + "DevOpsReportFailure", + $"Failed to upload results: {exception.Message}", + cancellationToken: cancellationToken); + */ + } + + private static async Task SendMetadataAsync( + IEnumerable allTestResults, + CancellationToken cancellationToken) + { + var partitionedResults = new Dictionary>(); + var resultCounts = new Dictionary(StringComparer.Ordinal); + + void ProcessResultForMetadata(AggregatedResult result) + { + resultCounts[result.Result] = resultCounts.TryGetValue(result.Result, out int count) ? count + 1 : 1; + if (!string.Equals(result.Result, "Passed", StringComparison.Ordinal)) + { + return; + } + + string name = result.Name; + string? argumentHash = null; + string partitionKey = name; + int parenthesisIndex = name.IndexOf('('); + if (parenthesisIndex >= 0) + { + string argumentList = name[(parenthesisIndex + 1)..].TrimEnd(')'); + name = name[..parenthesisIndex]; + argumentHash = Convert.ToBase64String(SHA1.HashData(Encoding.UTF8.GetBytes(argumentList))); + partitionKey = name + argumentHash; + } + + int bucket = SHA1.HashData(Encoding.UTF8.GetBytes(partitionKey))[0] % TestListBuckets; + if (!partitionedResults.TryGetValue(bucket, out List? testNames)) + { + testNames = []; + partitionedResults[bucket] = testNames; + } + + testNames.Add(new TestListRow(name, argumentHash)); + } + + void ProcessTestForMetadata(AggregatedResult result) + { + if (result.AggregationType == AggregationType.DataDriven && result.SubResults.Count > 0) + { + foreach (AggregatedResult subResult in result.SubResults) + { + ProcessTestForMetadata(subResult); + } + } + else if (result.AggregationType == AggregationType.Single) + { + ProcessResultForMetadata(result); + } + } + + foreach (AggregatedResult result in allTestResults) + { + cancellationToken.ThrowIfCancellationRequested(); + ProcessTestForMetadata(result); + } + /* + var uploadedUrls = new Dictionary(); + /* TODO + foreach ((int key, List? testNames) in partitionedResults) + { + byte[] csvBytes = CreateCompressedCsv(testNames); + string fileName = $"{Guid.NewGuid():N}.csv.gz"; + uploadedUrls[key] = await _uploadClient.UploadAsync(csvBytes, fileName, "application/gzip", cancellationToken); + }* / + + var dataModel = new + { + version = 2, + rerun_tests = backChannelCases, + test_lists = uploadedUrls, + partitions = TestListBuckets, + result_counts = resultCounts, + }; + + byte[] rawBytes = Encoding.UTF8.GetBytes(JsonSerializer.Serialize(dataModel, s_serializerOptions)); + byte[] compressedBytes = Compress(rawBytes); + string base64Data = Convert.ToBase64String(compressedBytes); + string fileNameBase = $"__helix_metadata_{Guid.NewGuid():N}.json.gz"; + + //await SendWithRetryAsync( + // HttpMethod.Post, + // $"{_azdoParameters.TeamProject}/_apis/test/runs/{_azdoParameters.TestRunId}/attachments?api-version=7.1-preview.1", + // new TestRunAttachmentRequest(fileNameBase, base64Data), + // cancellationToken); + + /* TODO + string metadataUrl = await _uploadClient.UploadAsync(compressedBytes, fileNameBase, "application/gzip", cancellationToken); + await _eventClient.SendAsync( + new + { + Type = "AzureDevOpsTestRunMetadata", + TestRunProject = _azdoParameters.TeamProject, + TestRunId = _azdoParameters.TestRunId, + Url = metadataUrl, + }, + cancellationToken); + */ + } + + private async Task> PublishResultsAsync( + IReadOnlyList converted, + CancellationToken cancellationToken) + { + var testCaseResults = converted.Select(static c => c.Converted).ToList(); + var originalList = converted.Select(static c => c.Aggregated).ToList(); + + using HttpResponseMessage response = await SendWithRetryAsync( + HttpMethod.Post, + $"{_azdoParameters.TeamProject}/_apis/test/runs/{_azdoParameters.TestRunId}/results?api-version=7.1-preview.6", + testCaseResults, + cancellationToken); + + IReadOnlyList publishedResults = await ReadPublishedResultsAsync(response, cancellationToken); + if (publishedResults.Count == 0) + { + _logger.LogWarning("The test run appears to have been closed, aborting test result uploads."); + return []; + } + + var hotPathTests = new List(); + foreach ((PublishedTestCaseResultReference First, AggregatedResult Second, PublishedTestCase Third) triplet in publishedResults.Zip(originalList, testCaseResults)) + { + PublishedTestCaseResultReference published = triplet.First; + AggregatedResult original = triplet.Second; + PublishedTestCase testCase = triplet.Third; + + if (published.Id == -1) + { + _logger.LogWarning("Azure DevOps test ID returned -1, unable to attach files."); + continue; + } + + testCase = testCase with { Id = published.Id }; + bool addedTest = false; + + void AddToHotPath() + { + if (addedTest) + { + return; + } + + addedTest = true; + hotPathTests.Add(testCase); + } + + async Task IterateSubResultsAsync( + IReadOnlyList? publishedSubResults, + IReadOnlyList originalSubResults, + long testId) + { + if (publishedSubResults is null || publishedSubResults.Count == 0) + { + if (originalSubResults.Count > 0) + { + _logger.LogError("Published results do not include sub-results, attachments lost."); + } + + return; + } + + if (original.AggregationType == AggregationType.Rerun) + { + AddToHotPath(); + } + + if (publishedSubResults.Count != originalSubResults.Count) + { + _logger.LogError("Published sub-result counts do not match uploaded attachments. Attachments lost."); + return; + } + + foreach ((PublishedSubResultReference publishedSubResult, AggregatedResult originalSubResult) subTriplet in publishedSubResults.Zip(originalSubResults, (publishedSubResult, originalSubResult) => (publishedSubResult, originalSubResult))) + { + foreach (TestResultAttachment attachment in subTriplet.originalSubResult.Attachments) + { + await SendAttachmentAsync(attachment, testId, subTriplet.publishedSubResult.Id, cancellationToken); + } + + await IterateSubResultsAsync(subTriplet.publishedSubResult.SubResults, subTriplet.originalSubResult.SubResults, testId); + } + } + + foreach (TestResultAttachment attachment in original.Attachments) + { + await SendAttachmentAsync(attachment, published.Id, null, cancellationToken); + } + + await IterateSubResultsAsync(published.SubResults, original.SubResults, published.Id); + } + + return hotPathTests; + } + + private async Task SendAttachmentAsync( + TestResultAttachment attachment, + long testId, + long? subResultId, + CancellationToken cancellationToken) + { + var request = new TestRunAttachmentRequest( + attachment.Name, + Convert.ToBase64String(Encoding.UTF8.GetBytes(attachment.Text))); + + string path = subResultId is long subId + ? $"{_azdoParameters.TeamProject}/_apis/test/runs/{_azdoParameters.TestRunId}/results/{testId}/attachments?testSubResultId={subId}&api-version=7.1-preview.1" + : $"{_azdoParameters.TeamProject}/_apis/test/runs/{_azdoParameters.TestRunId}/results/{testId}/attachments?api-version=7.1-preview.1"; + + using HttpResponseMessage response = await SendWithRetryAsync(HttpMethod.Post, path, request, cancellationToken); + _ = response; + } + + private IEnumerable ConvertResults(IEnumerable results, object resultMetadata) + { + static string GetResultGroupType(AggregationType aggregationType) + { + return aggregationType switch + { + AggregationType.Single => "None", + AggregationType.DataDriven => "dataDriven", + AggregationType.Rerun => "rerun", + _ => "None", + }; + } + + string comment = JsonSerializer.Serialize(resultMetadata) ?? string.Empty; + + PublishedSubResult ConvertToSubTest(AggregatedResult result) + { + var customFields = new List(); + if (result.IsFlaky) + { + customFields.Add(new CustomField("IsTestResultFlaky", true)); + } + + if ((result.AttemptId ?? 0) > 1) + { + customFields.Add(new CustomField("AttemptId", result.AttemptId!.Value - 1)); + } + + return new PublishedSubResult + { + Comment = comment, + CustomFields = customFields, + DisplayName = result.Name, + Outcome = result.Result, + DurationInMs = result.DurationSeconds * 1000.0, + StackTrace = result.StackTrace, + ErrorMessage = result.FailureMessage, + SubResults = result.SubResults.Count == 0 ? null : [.. result.SubResults.Select(ConvertToSubTest)], + ResultGroupType = GetResultGroupType(result.AggregationType), + }; + } + + ConvertedResult ConvertResult(AggregatedResult result) + { + var customFields = new List(); + if (result.IsFlaky) + { + customFields.Add(new CustomField("IsTestResultFlaky", true)); + } + + if (result.AggregationType == AggregationType.Rerun && result.SubResults.Count > 1) + { + customFields.Add(new CustomField("AttemptId", result.SubResults.Count - 1)); + } + + return new ConvertedResult( + new PublishedTestCase + { + TestCaseTitle = result.Name, + AutomatedTestName = result.Name, + AutomatedTestType = "helix", + AutomatedTestStorage = comment, // TODO: This was workitem ID + Priority = 1, + DurationInMs = result.DurationSeconds * 1000.0, + Outcome = result.Result, + State = "Completed", + Comment = comment, + StackTrace = result.StackTrace, + ErrorMessage = result.FailureMessage, + SubResults = result.SubResults.Count == 0 ? null : [.. result.SubResults.Select(ConvertToSubTest)], + ResultGroupType = GetResultGroupType(result.AggregationType), + CustomFields = customFields, + }, + result); + } + + var converted = results.Select(ConvertResult).ToList(); + foreach (ConvertedResult? result in converted) + { + foreach (ConvertedResult chunk in Chunk(result, 950)) + { + yield return chunk; + } + } + } + + private static IEnumerable Chunk(ConvertedResult test, int limit) + { + if (Size(test.Converted) <= limit) + { + yield return test; + yield break; + } + + IEnumerable zippedSubTests = (test.Converted.SubResults ?? []) + .Zip(test.Aggregated.SubResults, (converted, aggregated) => new ChunkPair(converted, aggregated)); + + foreach (List zippedBatch in Batch(zippedSubTests, limit, static pair => Size(pair.Converted))) + { + yield return new ConvertedResult( + test.Converted with { SubResults = [.. zippedBatch.Select(static x => x.Converted)], Id = null }, + new AggregatedResult( + test.Aggregated.AggregationType, + test.Aggregated.Name, + test.Aggregated.DurationSeconds, + test.Aggregated.Result, + [.. zippedBatch.Select(static x => x.Aggregated)], + test.Aggregated.Attachments, + test.Aggregated.FailureMessage, + test.Aggregated.StackTrace, + isFlaky: test.Aggregated.IsFlaky, + attemptId: test.Aggregated.AttemptId)); + } + } + + private static int Size(PublishedTestCase test) + { + return 1 + (test.SubResults?.Sum(Size) ?? 0); + } + + private static int Size(PublishedSubResult test) + { + return 1 + (test.SubResults?.Sum(Size) ?? 0); + } + + private static IEnumerable> Batch(IEnumerable items, int limit, Func getSize) + { + var currentBatch = new List(); + int currentSize = 0; + + foreach (T? item in items) + { + int size = getSize(item); + if (size > limit) + { + throw new InvalidOperationException("Cannot split a result larger than the batching limit."); + } + + if (currentSize + size > limit && currentBatch.Count > 0) + { + yield return currentBatch; + currentBatch = []; + currentSize = 0; + } + + currentBatch.Add(item); + currentSize += size; + } + + if (currentBatch.Count > 0) + { + yield return currentBatch; + } + } + + private static HttpClient CreateHttpClient(string? accessToken) + { + var client = new HttpClient(); + client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + + if (!string.IsNullOrWhiteSpace(accessToken)) + { + string basicToken = Convert.ToBase64String(Encoding.ASCII.GetBytes($":{accessToken}")); + client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Basic", basicToken); + } + + return client; + } + + private async Task SendWithRetryAsync( + HttpMethod method, + string relativePath, + object? payload, + CancellationToken cancellationToken) + { + int triesLeft = 10; + string? body = payload is null ? null : JsonSerializer.Serialize(payload, s_serializerOptions); + if (!string.IsNullOrEmpty(body)) + { + s_lastSendContent = body; + } + + while (true) + { + Uri baseUri = _azdoParameters.CollectionUri.AbsoluteUri.EndsWith('/') + ? _azdoParameters.CollectionUri + : new Uri(_azdoParameters.CollectionUri.AbsoluteUri + '/', UriKind.Absolute); + + using var request = new HttpRequestMessage(method, new Uri(baseUri, relativePath)); + if (body is not null) + { + request.Content = new StringContent(body, Encoding.UTF8, "application/json"); + } + + HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); + if (response.IsSuccessStatusCode) + { + return response; + } + + string responseBody = await response.Content.ReadAsStringAsync(cancellationToken); + if (response.StatusCode == HttpStatusCode.ServiceUnavailable && triesLeft > 0) + { + response.Dispose(); + triesLeft--; + _logger.LogWarning("Hit HTTP 503 from Azure DevOps. Waiting three seconds and trying again."); + await Task.Delay(TimeSpan.FromSeconds(3), cancellationToken); + continue; + } + + if (responseBody.Contains("It may have been deleted", StringComparison.OrdinalIgnoreCase) + || responseBody.Contains("not authorized to access this resource", StringComparison.OrdinalIgnoreCase) + || responseBody.Contains("cannot be added or updated for a test run which is in Completed state", StringComparison.OrdinalIgnoreCase) + || response.StatusCode == HttpStatusCode.Forbidden + || response.StatusCode == HttpStatusCode.Unauthorized) + { + response.Dispose(); + throw new TerminalError(responseBody); + } + + try + { + if (!string.IsNullOrWhiteSpace(s_lastSendContent)) + { + /* TODO + await _uploadClient.UploadAsync( + Encoding.UTF8.GetBytes(s_lastSendContent), + "__failed_azdo_request_content.json", + "text/plain; charset=UTF-8", + cancellationToken); + */ + } + } + catch (Exception uploadException) + { + _logger.LogError(uploadException, "Failed to upload failed request payload."); + } + + response.Dispose(); + throw new AzureDevOpsReportingError($"Azure DevOps request failed with status code {(int)response.StatusCode}: {responseBody}"); + } + } + + private static async Task> ReadPublishedResultsAsync( + HttpResponseMessage response, + CancellationToken cancellationToken) + { + string content = await response.Content.ReadAsStringAsync(cancellationToken); + if (string.IsNullOrWhiteSpace(content)) + { + return []; + } + + using var document = JsonDocument.Parse(content); + JsonElement root = document.RootElement; + if (root.ValueKind == JsonValueKind.Array) + { + return [.. root.EnumerateArray().Select(ParsePublishedResult)]; + } + + if (root.TryGetProperty("value", out JsonElement value) && value.ValueKind == JsonValueKind.Array) + { + return [.. value.EnumerateArray().Select(ParsePublishedResult)]; + } + + return []; + } + + private static PublishedTestCaseResultReference ParsePublishedResult(JsonElement element) + { + var subResults = new List(); + if (element.TryGetProperty("subResults", out JsonElement subResultElement) && subResultElement.ValueKind == JsonValueKind.Array) + { + subResults.AddRange(subResultElement.EnumerateArray().Select(ParsePublishedSubResult)); + } + + return new PublishedTestCaseResultReference( + element.TryGetProperty("id", out JsonElement idElement) ? idElement.GetInt64() : -1, + subResults); + } + + private static PublishedSubResultReference ParsePublishedSubResult(JsonElement element) + { + var subResults = new List(); + if (element.TryGetProperty("subResults", out JsonElement subResultElement) && subResultElement.ValueKind == JsonValueKind.Array) + { + subResults.AddRange(subResultElement.EnumerateArray().Select(ParsePublishedSubResult)); + } + + return new PublishedSubResultReference( + element.TryGetProperty("id", out JsonElement idElement) ? idElement.GetInt64() : -1, + subResults); + } + + private static byte[] CreateCompressedCsv(IEnumerable rows) + { + var builder = new StringBuilder(); + foreach (TestListRow row in rows) + { + builder.Append(EscapeCsv(row.TestName)); + builder.Append(','); + builder.Append(EscapeCsv(row.ArgumentHash)); + builder.AppendLine(); + } + + return Compress(Encoding.UTF8.GetBytes(builder.ToString())); + } + + private static string EscapeCsv(string? value) + { + if (string.IsNullOrEmpty(value)) + { + return string.Empty; + } + + if (!value.Contains('"') && !value.Contains(',') && !value.Contains('\n') && !value.Contains('\r')) + { + return value; + } + + return $"\"{value.Replace("\"", "\"\"")}\""; + } + + private static byte[] Compress(ReadOnlySpan rawBytes) + { + using var target = new MemoryStream(); + using (var gzip = new GZipStream(target, CompressionLevel.SmallestSize, leaveOpen: true)) + { + gzip.Write(rawBytes); + } + + return target.ToArray(); + } + + private sealed record ConvertedResult(PublishedTestCase Converted, AggregatedResult Aggregated); + + private sealed record ChunkPair(PublishedSubResult Converted, AggregatedResult Aggregated); + + private sealed record TestListRow(string TestName, string? ArgumentHash); + + private sealed record TestRunAttachmentRequest(string FileName, string Stream); + + private sealed record CustomField(string FieldName, object Value); + + private sealed record PublishedTestCase + { + public long? Id { get; init; } + + public string TestCaseTitle { get; init; } = string.Empty; + + public string AutomatedTestName { get; init; } = string.Empty; + + public string AutomatedTestType { get; init; } = string.Empty; + + public string AutomatedTestStorage { get; init; } = string.Empty; + + public int Priority { get; init; } + + public double DurationInMs { get; init; } + + public string Outcome { get; init; } = string.Empty; + + public string State { get; init; } = string.Empty; + + public string Comment { get; init; } = string.Empty; + + public string? StackTrace { get; init; } + + public string? ErrorMessage { get; init; } + + public List? SubResults { get; init; } + + public string ResultGroupType { get; init; } = string.Empty; + + public List? CustomFields { get; init; } + } + + private sealed record PublishedSubResult + { + public long? Id { get; init; } + + public string Comment { get; init; } = string.Empty; + + public List? CustomFields { get; init; } + + public string DisplayName { get; init; } = string.Empty; + + public string Outcome { get; init; } = string.Empty; + + public double DurationInMs { get; init; } + + public string? StackTrace { get; init; } + + public string? ErrorMessage { get; init; } + + public List? SubResults { get; init; } + + public string ResultGroupType { get; init; } = string.Empty; + } + + private sealed record PublishedTestCaseResultReference(long Id, IReadOnlyList SubResults); + + private sealed record PublishedSubResultReference(long Id, IReadOnlyList SubResults); +} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/LocalTestResultsReader.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/LocalTestResultsReader.cs new file mode 100644 index 00000000000..63c4833fcfc --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/LocalTestResultsReader.cs @@ -0,0 +1,203 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Globalization; +using System.Xml.Linq; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; +using Microsoft.Extensions.Logging; + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; + +public sealed class LocalTestResultsReader(ILogger logger) +{ + private readonly ILogger _logger = logger; + + public static bool LooksLikeTestResultFile(string path) + { + string fileName = Path.GetFileName(path); + return fileName.EndsWith(".trx", StringComparison.OrdinalIgnoreCase) + || fileName.EndsWith("testResults.xml", StringComparison.OrdinalIgnoreCase) + || fileName.EndsWith("test-results.xml", StringComparison.OrdinalIgnoreCase) + || fileName.EndsWith("test_results.xml", StringComparison.OrdinalIgnoreCase) + || fileName.EndsWith("junit-results.xml", StringComparison.OrdinalIgnoreCase) + || fileName.EndsWith("junitresults.xml", StringComparison.OrdinalIgnoreCase); + } + + public async Task> ReadResultFileAsync(string filePath, CancellationToken cancellationToken = default) + { + try + { + using FileStream stream = File.OpenRead(filePath); + XDocument document = await XDocument.LoadAsync(stream, LoadOptions.PreserveWhitespace, cancellationToken); + string rootName = document.Root?.Name.LocalName ?? string.Empty; + string workItemName = new DirectoryInfo(Path.GetDirectoryName(filePath) ?? string.Empty).Name; + + return rootName switch + { + "assemblies" or "assembly" => ReadXunitResults(document), + "TestRun" => ReadTrxResults(document, workItemName), + "testsuites" or "testsuite" => ReadJUnitResults(document, workItemName), + _ => [], + }; + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to parse test results file '{Path}'.", filePath); + return []; + } + } + + private static IReadOnlyList ReadXunitResults(XDocument document) + { + return [.. + document.Descendants().Where(static e => e.Name.LocalName == "test").Select(static test => + { + XElement? failure = test.Elements().FirstOrDefault(static x => x.Name.LocalName == "failure"); + string? message = failure?.Elements().FirstOrDefault(static x => x.Name.LocalName == "message")?.Value?.Trim(); + string? stackTrace = failure?.Elements().FirstOrDefault(static x => x.Name.LocalName == "stack-trace")?.Value?.Trim(); + string? output = test.Elements().FirstOrDefault(static x => x.Name.LocalName == "output")?.Value?.Trim(); + string? skipReason = test.Elements().FirstOrDefault(static x => x.Name.LocalName == "reason")?.Value?.Trim(); + + List attachments = []; + AddAttachmentIfNotEmpty(attachments, "output.txt", output); + + string typeName = GetAttribute(test, "type") ?? string.Empty; + string method = GetAttribute(test, "method") ?? string.Empty; + string name = GetAttribute(test, "name") + ?? (!string.IsNullOrEmpty(typeName) && !string.IsNullOrEmpty(method) ? $"{typeName}.{method}" : method); + + return new TestResult( + name, + "xunit", + typeName, + method, + ParseDouble(GetAttribute(test, "time")), + NormalizeOutcome(GetAttribute(test, "result")), + GetAttribute(failure, "exception-type"), + message, + stackTrace, + skipReason, + attachments); + })]; + } + + private static IReadOnlyList ReadJUnitResults(XDocument document, string workItemName) + { + return [.. + document.Descendants().Where(static e => e.Name.LocalName == "testcase").Select(test => + { + XElement? failure = test.Elements().FirstOrDefault(static x => x.Name.LocalName is "failure" or "error"); + XElement? skipped = test.Elements().FirstOrDefault(static x => x.Name.LocalName == "skipped"); + string? stdout = test.Elements().FirstOrDefault(static x => x.Name.LocalName == "system-out")?.Value?.Trim(); + string? stderr = test.Elements().FirstOrDefault(static x => x.Name.LocalName == "system-err")?.Value?.Trim(); + + List attachments = []; + AddAttachmentIfNotEmpty(attachments, "stdout.txt", stdout); + AddAttachmentIfNotEmpty(attachments, "stderr.txt", stderr); + + string className = GetAttribute(test, "classname") ?? workItemName; + string method = GetAttribute(test, "name") ?? string.Empty; + string name = !string.IsNullOrEmpty(className) ? $"{className}.{method}" : method; + string result = skipped is not null ? "Skip" : failure is not null ? "Fail" : "Pass"; + + return new TestResult( + name, + "junit", + className, + method, + ParseDouble(GetAttribute(test, "time")), + result, + null, + failure?.Value?.Trim(), + null, + skipped?.Value?.Trim(), + attachments); + })]; + } + + private static IReadOnlyList ReadTrxResults(XDocument document, string workItemName) + { + Dictionary unitTestsById = document + .Descendants() + .Where(static e => e.Name.LocalName == "UnitTest") + .Select(static unitTest => (Id: GetAttribute(unitTest, "id"), Element: unitTest)) + .Where(static x => !string.IsNullOrEmpty(x.Id)) + .ToDictionary(static x => x.Id!, static x => x.Element, StringComparer.OrdinalIgnoreCase); + + return [.. + document.Descendants().Where(static e => e.Name.LocalName == "UnitTestResult").Select(result => + { + string testId = GetAttribute(result, "testId") ?? string.Empty; + unitTestsById.TryGetValue(testId, out XElement? unitTest); + XElement? testMethod = unitTest?.Descendants().FirstOrDefault(static x => x.Name.LocalName == "TestMethod"); + + string className = GetAttribute(testMethod, "className") ?? workItemName; + string method = GetAttribute(testMethod, "name") ?? GetAttribute(result, "testName") ?? string.Empty; + string displayName = GetAttribute(result, "testName") + ?? (!string.IsNullOrEmpty(className) ? $"{className}.{method}" : method); + + XElement? output = result.Descendants().FirstOrDefault(static x => x.Name.LocalName == "Output"); + string? failureMessage = output?.Descendants().FirstOrDefault(static x => x.Name.LocalName == "Message")?.Value?.Trim(); + string? stackTrace = output?.Descendants().FirstOrDefault(static x => x.Name.LocalName == "StackTrace")?.Value?.Trim(); + string? stdout = output?.Descendants().FirstOrDefault(static x => x.Name.LocalName == "StdOut")?.Value?.Trim(); + string? stderr = output?.Descendants().FirstOrDefault(static x => x.Name.LocalName == "StdErr")?.Value?.Trim(); + + List attachments = []; + AddAttachmentIfNotEmpty(attachments, "stdout.txt", stdout); + AddAttachmentIfNotEmpty(attachments, "stderr.txt", stderr); + + string rawOutcome = GetAttribute(result, "outcome") ?? string.Empty; + string normalizedOutcome = NormalizeOutcome(rawOutcome); + string? skipReason = string.Equals(normalizedOutcome, "Skip", StringComparison.Ordinal) ? failureMessage : null; + + return new TestResult( + displayName, + "trx", + className, + method, + ParseDuration(GetAttribute(result, "duration")), + normalizedOutcome, + null, + failureMessage, + stackTrace, + skipReason, + attachments); + })]; + } + + private static string? GetAttribute(XElement? element, string name) + => element?.Attribute(name)?.Value; + + private static double ParseDouble(string? value) + { + return double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out double result) + ? result + : 0; + } + + private static double ParseDuration(string? value) + { + return TimeSpan.TryParse(value, CultureInfo.InvariantCulture, out TimeSpan result) + ? result.TotalSeconds + : ParseDouble(value); + } + + private static string NormalizeOutcome(string? value) + { + return value?.Trim().ToLowerInvariant() switch + { + "pass" or "passed" or "success" or "succeeded" => "Pass", + "skip" or "skipped" or "notexecuted" or "notrun" => "Skip", + "fail" or "failed" or "error" or "timeout" or "aborted" => "Fail", + _ => "None", + }; + } + + private static void AddAttachmentIfNotEmpty(List attachments, string name, string? text) + { + if (!string.IsNullOrWhiteSpace(text)) + { + attachments.Add(new TestResultAttachment(name, text)); + } + } +} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.csproj b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.csproj new file mode 100644 index 00000000000..50f940b40a3 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.csproj @@ -0,0 +1,13 @@ + + + + $(BundledNETCoreAppTargetFramework) + enable + enable + + + + + + + diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/AzureDevOpsReportingError.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/AzureDevOpsReportingError.cs new file mode 100644 index 00000000000..90af5a10e36 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/AzureDevOpsReportingError.cs @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +public sealed class AzureDevOpsReportingError(string message) : Exception(message) +{ +} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/AzureDevOpsReportingParameters.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/AzureDevOpsReportingParameters.cs new file mode 100644 index 00000000000..a7635c97c46 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/AzureDevOpsReportingParameters.cs @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +public sealed record AzureDevOpsReportingParameters( + Uri CollectionUri, + string TeamProject, + string TestRunId, + string? AccessToken = null); diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/PackedTestReport.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/PackedTestReport.cs new file mode 100644 index 00000000000..1cee30425a4 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/PackedTestReport.cs @@ -0,0 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +public sealed record PackedTestReport(AzureDevOpsReportingParameters AzdoParameters, IReadOnlyList Results); diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TerminalError.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TerminalError.cs new file mode 100644 index 00000000000..174d20afaee --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TerminalError.cs @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +internal sealed class TerminalError(string message) : Exception(message) +{ +} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TestResult.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TestResult.cs new file mode 100644 index 00000000000..61904efb6c6 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TestResult.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +public sealed class TestResult( + string name, + string kind, + string typeName, + string method, + double durationSeconds, + string result, + string? exceptionType, + string? failureMessage, + string? stackTrace, + string? skipReason, + IReadOnlyList? attachments = null) +{ + public string Name { get; } = name ?? string.Empty; + + public string Kind { get; } = kind ?? string.Empty; + + public string TypeName { get; } = typeName ?? string.Empty; + + public string Method { get; } = method ?? string.Empty; + + public double DurationSeconds { get; } = durationSeconds; + + public string Result { get; } = result ?? string.Empty; + + public string? ExceptionType { get; } = exceptionType; + + public string? FailureMessage { get; } = failureMessage; + + public string? StackTrace { get; } = stackTrace; + + public string? SkipReason { get; } = skipReason; + + public IReadOnlyList Attachments { get; } = attachments ?? []; + + public bool Ignored { get; set; } +} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TestResultAttachment.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TestResultAttachment.cs new file mode 100644 index 00000000000..d5f1522b550 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/Model/TestResultAttachment.cs @@ -0,0 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +public sealed record TestResultAttachment(string Name, string Text); diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/ResultAggregator.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/ResultAggregator.cs new file mode 100644 index 00000000000..2526ca9d0f3 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/ResultAggregator.cs @@ -0,0 +1,333 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; + +public enum AggregationType +{ + Single = 0, + Rerun = 1, + DataDriven = 2, +} + +public sealed class AggregatedResult( + AggregationType aggregationType, + string name, + double durationSeconds, + string result, + IReadOnlyList? subResults = null, + IReadOnlyList? attachments = null, + string? failureMessage = null, + string? stackTrace = null, + string? skipReason = null, + bool isFlaky = false, + int? attemptId = null) +{ + public AggregationType AggregationType { get; } = aggregationType; + + public string Name { get; } = name ?? string.Empty; + + public double DurationSeconds { get; } = durationSeconds; + + public string Result { get; } = result ?? string.Empty; + + public IReadOnlyList Attachments { get; } = attachments ?? []; + + public IReadOnlyList SubResults { get; } = subResults ?? []; + + public string? FailureMessage { get; } = failureMessage ?? skipReason; + + public string? StackTrace { get; } = stackTrace; + + public int? AttemptId { get; } = attemptId; + + public bool IsFlaky { get; } = isFlaky; +} + +public sealed class ResultAggregator +{ + public IReadOnlyList Aggregate(IEnumerable>? results) + { + if (results is null) + { + return []; + } + + string GetResult(TestResult test) + { + if (test.Ignored && string.Equals(test.Result, "Fail", StringComparison.Ordinal)) + { + return "NotApplicable"; + } + + return test.Result switch + { + "Pass" => "Passed", + "Fail" => "Failed", + "Skip" => "NotExecuted", + _ => "None", + }; + } + + static string ParseBasicName(string name) + { + int separatorIndex = name.IndexOf('('); + return separatorIndex >= 0 ? name[..separatorIndex] : name; + } + + AggregatedResult CreateResultFromTest(TestResult result, int? attemptId = null) + { + return new AggregatedResult( + AggregationType.Single, + attemptId is null ? result.Name : $"Attempt #{attemptId} - {result.Name}", + result.DurationSeconds, + GetResult(result), + [], + result.Attachments, + result.FailureMessage, + result.StackTrace, + result.SkipReason, + attemptId: attemptId); + } + + string GetDataDrivenResult(IReadOnlyList groupedResults) + { + if (groupedResults.Count == 0) + { + return "None"; + } + + if (groupedResults.Any(static r => !r.Ignored && r.Result == "Fail")) + { + return "Failed"; + } + + if (groupedResults.Any(static r => r.Result == "Pass")) + { + return "Passed"; + } + + return GetResult(groupedResults[0]); + } + + (bool IsFlaky, string Outcome) GetRerunResult(IReadOnlyList groupedResults) + { + if (groupedResults.Count == 0) + { + return (false, "None"); + } + + bool anyPass = groupedResults.Any(static r => r.Result == "Pass"); + bool anyFail = groupedResults.Any(static r => !r.Ignored && r.Result == "Fail"); + bool isFlaky = anyPass && anyFail; + + if (anyPass) + { + return (isFlaky, "Passed"); + } + + if (anyFail) + { + return (isFlaky, "Failed"); + } + + return (false, GetResult(groupedResults[0])); + } + + AggregatedResult ProcessNamedTest(string name, IReadOnlyList> byIterationThenName) + { + if (byIterationThenName.Count == 1) + { + List singleRun = byIterationThenName[0]; + if (singleRun.Count == 1) + { + return CreateResultFromTest(singleRun[0]); + } + + return new AggregatedResult( + AggregationType.DataDriven, + name, + singleRun.Sum(static r => r.DurationSeconds), + GetDataDrivenResult(singleRun), + [.. singleRun.Select(testResult => CreateResultFromTest(testResult))]); + } + + bool hasDataDriven = byIterationThenName.Any(static x => x.Count > 1); + + if (hasDataDriven) + { + var dataDrivenByFullName = new Dictionary>(StringComparer.Ordinal); + foreach (List iteration in byIterationThenName) + { + foreach (TestResult test in iteration) + { + if (!dataDrivenByFullName.TryGetValue(test.Name, out List? list)) + { + list = []; + dataDrivenByFullName[test.Name] = list; + } + + list.Add(test); + } + } + + var subResults = new List(); + double totalDuration = 0; + + foreach (KeyValuePair> pair in dataDrivenByFullName) + { + List dataDrivenTests = pair.Value; + if (dataDrivenTests.Count == 1) + { + subResults.Add(CreateResultFromTest(dataDrivenTests[0])); + totalDuration += dataDrivenTests[0].DurationSeconds; + continue; + } + + (bool isFlaky, string? aggregateResult) = GetRerunResult(dataDrivenTests); + double partialDuration = dataDrivenTests.Sum(static r => r.DurationSeconds); + totalDuration += partialDuration; + subResults.Add(new AggregatedResult( + AggregationType.Rerun, + pair.Key, + partialDuration, + aggregateResult, + [.. dataDrivenTests.Select((r, index) => CreateResultFromTest(r, index + 1))], + isFlaky: isFlaky)); + } + + string aggregateOutcome = "Inconclusive"; + if (dataDrivenByFullName.Values.Any(rerunSet => rerunSet.Where(static r => !r.Ignored).All(static r => r.Result == "Fail"))) + { + aggregateOutcome = "Failed"; + } + else if (dataDrivenByFullName.Values.All(rerunSet => rerunSet.All(static r => r.Result == "Skip"))) + { + aggregateOutcome = "NotExecuted"; + } + else if (dataDrivenByFullName.Values.All(rerunSet => rerunSet.Any(static r => r.Result == "Pass"))) + { + aggregateOutcome = "Passed"; + } + + return new AggregatedResult( + AggregationType.DataDriven, + name, + totalDuration, + aggregateOutcome, + subResults); + } + + var reruns = byIterationThenName.Select(static run => run[0]).ToList(); + (bool rerunIsFlaky, string? rerunOutcome) = GetRerunResult(reruns); + return new AggregatedResult( + AggregationType.Rerun, + name, + reruns.Sum(static r => r.DurationSeconds), + rerunOutcome, + [.. reruns.Select((r, index) => CreateResultFromTest(r, index + 1))], + failureMessage: reruns[0].FailureMessage, + stackTrace: reruns[0].StackTrace, + isFlaky: rerunIsFlaky); + } + + AggregatedResult ReduceSimpleResult(AggregatedResult result) + { + if (result.SubResults.Count == 0) + { + return result; + } + + if (result.AggregationType == AggregationType.Rerun) + { + int distinctOutcomes = result.SubResults + .Select(static r => r.Result) + .Distinct(StringComparer.Ordinal) + .Count(); + + if (distinctOutcomes == 1) + { + AggregatedResult single = result.SubResults[0]; + return new AggregatedResult( + AggregationType.Single, + result.Name, + single.DurationSeconds, + single.Result, + attachments: single.Attachments, + failureMessage: single.FailureMessage, + stackTrace: single.StackTrace); + } + + return result; + } + + return new AggregatedResult( + result.AggregationType, + result.Name, + result.DurationSeconds, + result.Result, + [.. result.SubResults.Select(ReduceSimpleResult)], + result.Attachments, + result.FailureMessage, + result.StackTrace, + isFlaky: result.IsFlaky, + attemptId: result.AttemptId); + } + + var partials = new List>>(); + foreach (IEnumerable resultSet in results) + { + var perAttempt = new Dictionary>(StringComparer.Ordinal); + foreach (TestResult result in resultSet) + { + string basicName = ParseBasicName(result.Name); + if (!perAttempt.TryGetValue(basicName, out List? list)) + { + list = []; + perAttempt[basicName] = list; + } + + list.Add(result); + } + + partials.Add(perAttempt); + } + + if (partials.Count == 0 || partials[0].Count == 0) + { + return []; + } + + var aggregate = new List(); + foreach (Dictionary> run in partials) + { + foreach (KeyValuePair> pair in run.ToList()) + { + if (!run.Remove(pair.Key, out List? currentSet)) + { + continue; + } + + var fullSet = new List> { currentSet }; + foreach (Dictionary> otherRun in partials) + { + if (ReferenceEquals(otherRun, run)) + { + continue; + } + + if (otherRun.Remove(pair.Key, out List? otherSet)) + { + fullSet.Add(otherSet); + } + } + + aggregate.Add(ProcessNamedTest(pair.Key, fullSet)); + } + } + + return [.. aggregate.Select(ReduceSimpleResult)]; + } +} diff --git a/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/RetryHelper.cs b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/RetryHelper.cs new file mode 100644 index 00000000000..fe5b37e5772 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/AzureDevOpsTestPublisher/RetryHelper.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; + +public class RetryHelper +{ + public static async Task RetryAsync(Func> action, CancellationToken cancellationToken) + { + Exception? last = null; + for (int attempt = 0; attempt < 5; attempt++) + { + cancellationToken.ThrowIfCancellationRequested(); + + try + { + return await action(); + } + catch (Exception ex) when (attempt < 4) + { + last = ex; + await Task.Delay(TimeSpan.FromSeconds(Math.Pow(2, attempt + 1)), cancellationToken); + } + } + + throw last ?? new InvalidOperationException("Retry failed without capturing an exception."); + } +} diff --git a/src/Microsoft.DotNet.Helix/Client/CSharp/Job.cs b/src/Microsoft.DotNet.Helix/Client/CSharp/Job.cs index 2cc70f5c118..348ce2546d4 100644 --- a/src/Microsoft.DotNet.Helix/Client/CSharp/Job.cs +++ b/src/Microsoft.DotNet.Helix/Client/CSharp/Job.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Net.NetworkInformation; using System.Threading; using System.Threading.Tasks; using Microsoft.DotNet.Helix.Client.Models; diff --git a/src/Microsoft.DotNet.Helix/Client/CSharp/JobStatus.cs b/src/Microsoft.DotNet.Helix/Client/CSharp/JobStatus.cs new file mode 100644 index 00000000000..0cdbf823da1 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Client/CSharp/JobStatus.cs @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; + +namespace Microsoft.DotNet.Helix.Client.Models +{ + public partial class JobStatus + { + public bool IsCompleted => string.Equals(Status, "finished", StringComparison.OrdinalIgnoreCase) + || string.Equals(Status, "failed", StringComparison.OrdinalIgnoreCase); + } +} diff --git a/src/Microsoft.DotNet.Helix/Client/CSharp/generated-code/Models/JobStatus.cs b/src/Microsoft.DotNet.Helix/Client/CSharp/generated-code/Models/JobStatus.cs new file mode 100644 index 00000000000..e5794036be5 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Client/CSharp/generated-code/Models/JobStatus.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Newtonsoft.Json; + +namespace Microsoft.DotNet.Helix.Client.Models +{ + public partial class JobStatus + { + [JsonProperty("JobName")] + public string JobName { get; set; } + + [JsonProperty("Status")] + public string Status { get; set; } + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/CompactConsoleLoggerFormatter.cs b/src/Microsoft.DotNet.Helix/JobMonitor/CompactConsoleLoggerFormatter.cs new file mode 100644 index 00000000000..b2c3a91e654 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/CompactConsoleLoggerFormatter.cs @@ -0,0 +1,194 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Logging.Console; +using Microsoft.Extensions.Options; + +#nullable enable +namespace Microsoft.DotNet.Helix.JobMonitor; + +/// +/// Copied over from SimpleConsoleFormatter. Leaves out the logger name and new line, turning +/// info: test[0] +/// Log message +/// Second line of the message +/// +/// into +/// +/// info: Log message +/// Second line of the message +/// +/// Only using SimpleConsoleFormatterOptions.SingleLine didn't help because multi-line messages +/// were put together on a single line so things like stack traces of exceptions were unreadable. +/// +/// See https://github.com/dotnet/runtime/blob/0817e748b7698bef1e812fd74c8a3558b7f86421/src/libraries/Microsoft.Extensions.Logging.Console/src/SimpleConsoleFormatter.cs +/// +public class CompactConsoleLoggerFormatter : ConsoleFormatter +{ + private const string LoglevelPadding = ": "; + private const string DefaultForegroundColor = "\x1B[39m\x1B[22m"; // reset to default foreground color + private const string DefaultBackgroundColor = "\x1B[49m"; // reset to the background color + + public const string FormatterName = "compact"; + + private readonly SimpleConsoleFormatterOptions _options; + private readonly string _messagePadding; + private readonly string _newLineWithMessagePadding; + + public CompactConsoleLoggerFormatter(IOptionsMonitor options) + : base(FormatterName) + { + _options = options.CurrentValue; + _messagePadding = new string(' ', GetLogLevelString(LogLevel.Information).Length + LoglevelPadding.Length + (_options.TimestampFormat?.Length ?? 0)); + _newLineWithMessagePadding = Environment.NewLine + _messagePadding; + } + + public override void Write(in LogEntry logEntry, IExternalScopeProvider? scopeProvider, TextWriter textWriter) + { + if (logEntry.Formatter == null) + { + return; + } + + var message = logEntry.Formatter(logEntry.State, logEntry.Exception); + if (logEntry.Exception == null && message == null) + { + return; + } + + LogLevel logLevel = logEntry.LogLevel; + var logLevelColors = GetLogLevelConsoleColors(logLevel); + var logLevelString = GetLogLevelString(logLevel); + + if (_options.TimestampFormat != null) + { + var timestamp = DateTimeOffset.Now.ToString(_options.TimestampFormat); + textWriter.Write(timestamp); + } + + WriteColoredMessage(textWriter, logLevelString, logLevelColors.Background, logLevelColors.Foreground); + + textWriter.Write(LoglevelPadding); + + WriteMessage(textWriter, message, false); + + // Example: + // System.InvalidOperationException + // at Namespace.Class.Function() in File:line X + if (logEntry.Exception != null) + { + // exception message + WriteMessage(textWriter, logEntry.Exception.ToString()); + } + } + + private void WriteMessage(TextWriter textWriter, string message, bool includePadding = true) + { + if (message == null) + { + return; + } + + if (includePadding) + { + textWriter.Write(_messagePadding); + } + + textWriter.WriteLine(message.Replace(Environment.NewLine, _newLineWithMessagePadding)); + } + + private static string GetLogLevelString(LogLevel logLevel) => logLevel switch + { + LogLevel.Trace => "trce", + LogLevel.Debug => "dbug", + LogLevel.Information => "info", + LogLevel.Warning => "warn", + LogLevel.Error => "fail", + LogLevel.Critical => "crit", + _ => throw new ArgumentOutOfRangeException(nameof(logLevel)) + }; + + private (ConsoleColor? Foreground, ConsoleColor? Background) GetLogLevelConsoleColors(LogLevel logLevel) + { + if (_options.ColorBehavior == LoggerColorBehavior.Disabled) + { + return (null, null); + } + + // We must explicitly set the background color if we are setting the foreground color, + // since just setting one can look bad on the users console. + return logLevel switch + { + LogLevel.Trace => (ConsoleColor.Gray, ConsoleColor.Black), + LogLevel.Debug => (ConsoleColor.Gray, ConsoleColor.Black), + LogLevel.Information => (ConsoleColor.DarkGreen, ConsoleColor.Black), + LogLevel.Warning => (ConsoleColor.Yellow, ConsoleColor.Black), + LogLevel.Error => (ConsoleColor.Black, ConsoleColor.DarkRed), + LogLevel.Critical => (ConsoleColor.White, ConsoleColor.DarkRed), + _ => (null, null) + }; + } + + private static void WriteColoredMessage(TextWriter textWriter, string message, ConsoleColor? background, ConsoleColor? foreground) + { + // Order: backgroundcolor, foregroundcolor, Message, reset foregroundcolor, reset backgroundcolor + if (background.HasValue) + { + textWriter.Write(GetBackgroundColorEscapeCode(background.Value)); + } + + if (foreground.HasValue) + { + textWriter.Write(GetForegroundColorEscapeCode(foreground.Value)); + } + + textWriter.Write(message); + + if (foreground.HasValue) + { + textWriter.Write(DefaultForegroundColor); // reset to default foreground color + } + + if (background.HasValue) + { + textWriter.Write(DefaultBackgroundColor); // reset to the background color + } + } + + private static string GetForegroundColorEscapeCode(ConsoleColor color) => color switch + { + ConsoleColor.Black => "\x1B[30m", + ConsoleColor.DarkRed => "\x1B[31m", + ConsoleColor.DarkGreen => "\x1B[32m", + ConsoleColor.DarkYellow => "\x1B[33m", + ConsoleColor.DarkBlue => "\x1B[34m", + ConsoleColor.DarkMagenta => "\x1B[35m", + ConsoleColor.DarkCyan => "\x1B[36m", + ConsoleColor.Gray => "\x1B[37m", + ConsoleColor.Red => "\x1B[1m\x1B[31m", + ConsoleColor.Green => "\x1B[1m\x1B[32m", + ConsoleColor.Yellow => "\x1B[1m\x1B[33m", + ConsoleColor.Blue => "\x1B[1m\x1B[34m", + ConsoleColor.Magenta => "\x1B[1m\x1B[35m", + ConsoleColor.Cyan => "\x1B[1m\x1B[36m", + ConsoleColor.White => "\x1B[1m\x1B[37m", + _ => DefaultForegroundColor // default foreground color + }; + + private static string GetBackgroundColorEscapeCode(ConsoleColor color) => color switch + { + ConsoleColor.Black => "\x1B[40m", + ConsoleColor.DarkRed => "\x1B[41m", + ConsoleColor.DarkGreen => "\x1B[42m", + ConsoleColor.DarkYellow => "\x1B[43m", + ConsoleColor.DarkBlue => "\x1B[44m", + ConsoleColor.DarkMagenta => "\x1B[45m", + ConsoleColor.DarkCyan => "\x1B[46m", + ConsoleColor.Gray => "\x1B[47m", + _ => DefaultBackgroundColor // Use default background color + }; +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/HelixJobMonitorUtilities.cs b/src/Microsoft.DotNet.Helix/JobMonitor/HelixJobMonitorUtilities.cs new file mode 100644 index 00000000000..a28e96673b0 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/HelixJobMonitorUtilities.cs @@ -0,0 +1,101 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using Newtonsoft.Json; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + public sealed class AzureDevOpsTimelineRecord + { + [JsonProperty("id")] + public string Id { get; set; } + + [JsonProperty("parentId")] + public string ParentId { get; set; } + + [JsonProperty("type")] + public string Type { get; set; } + + [JsonProperty("name")] + public string Name { get; set; } + + [JsonProperty("state")] + public string State { get; set; } + + [JsonProperty("result")] + public string Result { get; set; } + } + + public static class HelixJobMonitorUtilities + { + public static bool AreNonMonitorJobsComplete(IEnumerable records, string jobMonitorName) + => GetRelevantJobRecords(records, jobMonitorName).All(IsTerminal); + + public static bool HasFailedNonMonitorJobs(IEnumerable records, string jobMonitorName) + => GetRelevantJobRecords(records, jobMonitorName).Any(r => + string.Equals(r.Result, "failed", StringComparison.OrdinalIgnoreCase) + || string.Equals(r.Result, "canceled", StringComparison.OrdinalIgnoreCase)); + + /// + /// Returns the subset of that belongs to the pipeline stage + /// named , including the Stage record itself and any + /// descendant records (Phases, Jobs, Tasks). When the named Stage is not present in the + /// timeline an empty list is returned. + /// + public static IReadOnlyList FilterRecordsToStage( + IEnumerable records, + string stageName) + { + if (string.IsNullOrEmpty(stageName)) + { + return (records ?? []).ToList(); + } + + var all = (records ?? []).ToList(); + var stageRoot = all.FirstOrDefault(r => + string.Equals(r.Type, "Stage", StringComparison.OrdinalIgnoreCase) + && string.Equals(r.Name, stageName, StringComparison.OrdinalIgnoreCase)); + + if (stageRoot == null) + { + return []; + } + + // Iteratively collect all descendants of the stage record by following ParentId. + var byParent = all + .Where(r => !string.IsNullOrEmpty(r.ParentId)) + .ToLookup(r => r.ParentId, StringComparer.OrdinalIgnoreCase); + + var result = new List { stageRoot }; + var queue = new Queue(); + queue.Enqueue(stageRoot.Id); + while (queue.Count > 0) + { + string parentId = queue.Dequeue(); + foreach (AzureDevOpsTimelineRecord child in byParent[parentId]) + { + result.Add(child); + if (!string.IsNullOrEmpty(child.Id)) + { + queue.Enqueue(child.Id); + } + } + } + + return result; + } + + private static IEnumerable GetRelevantJobRecords(IEnumerable records, string jobMonitorName) + { + return (records ?? []) + .Where(r => string.Equals(r.Type, "Job", StringComparison.OrdinalIgnoreCase)) + .Where(r => !string.Equals(r.Name, jobMonitorName, StringComparison.OrdinalIgnoreCase)); + } + + private static bool IsTerminal(AzureDevOpsTimelineRecord record) + => string.Equals(record?.State, "completed", StringComparison.OrdinalIgnoreCase); + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IAzureDevOpsService.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IAzureDevOpsService.cs new file mode 100644 index 00000000000..affcf0ea14a --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IAzureDevOpsService.cs @@ -0,0 +1,45 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + /// + /// Abstracts Azure DevOps REST API interactions needed by the job monitor. + /// + public interface IAzureDevOpsService + { + /// + /// Returns the build timeline records for the current build. + /// Used to determine whether non-monitor pipeline jobs have completed. + /// + Task> GetTimelineRecordsAsync(CancellationToken cancellationToken); + + /// + /// Returns the set of Helix job names that have already been processed + /// by a prior monitor invocation (identified via completed AzDO test run tags). + /// + Task> GetProcessedHelixJobNamesAsync(CancellationToken cancellationToken); + + /// + /// Creates a new test run in Azure DevOps and returns its ID. + /// If a test run with this name already exists in-progress (orphaned from a prior crash), + /// the implementation may reuse it. + /// + Task CreateTestRunAsync(string name, string helixJobName, CancellationToken cancellationToken); + + /// + /// Marks a test run as completed. + /// + Task CompleteTestRunAsync(int testRunId, CancellationToken cancellationToken); + + /// + /// Uploads test results for the specified work items into an existing test run. + /// Returns true if all test results passed, false otherwise. + /// + Task UploadTestResultsAsync(int testRunId, IReadOnlyList results, CancellationToken cancellationToken); + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IHelixService.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IHelixService.cs new file mode 100644 index 00000000000..1c916e40719 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IHelixService.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.Client.Models; +using Microsoft.DotNet.Helix.JobMonitor.Models; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + /// + /// Abstracts Helix API interactions needed by the job monitor. + /// + public interface IHelixService + { + /// + /// Returns Helix jobs associated with the current build/stage. + /// + Task> GetJobsAsync(CancellationToken cancellationToken); + + /// + /// Downloads test result files for a completed Helix job's work items + /// and returns metadata about each work item's results. + /// + Task> DownloadTestResultsAsync( + string jobName, + IReadOnlyCollection workItemNames, + CancellationToken cancellationToken); + + Task> ListWorkItemsAsync( + string jobName, + CancellationToken cancellationToken); + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IJobMonitorRunner.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IJobMonitorRunner.cs new file mode 100644 index 00000000000..56af49a631e --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Interfaces/IJobMonitorRunner.cs @@ -0,0 +1,19 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + /// + /// Contract for the job monitor's main execution loop. + /// + public interface IJobMonitorRunner + { + /// + /// Runs the monitor loop. Returns 0 for success, 1 for failure. + /// + Task RunAsync(CancellationToken cancellationToken); + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/JobMonitorOptions.cs b/src/Microsoft.DotNet.Helix/JobMonitor/JobMonitorOptions.cs new file mode 100644 index 00000000000..346b2ffacd8 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/JobMonitorOptions.cs @@ -0,0 +1,156 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Globalization; +using CommandLine; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + public sealed class JobMonitorOptions + { + // Helix API access token + public string HelixAccessToken { get; set; } + + /// + /// Azure DevOps build token + /// + public string SystemAccessToken { get; set; } + + public bool ShowHelp { get; private set; } + + [Option("organization", HelpText = "Organization name (e.g. 'dotnet' for 'dotnet/runtime').")] + public string Organization { get; set; } + + [Option("repository", HelpText = "Repository name (e.g. 'runtime' for 'dotnet/runtime').")] + public string RepositoryName { get; set; } + + [Option("pr-number", HelpText = "Pull request number for the build, if applicable.")] + public int? PrNumber { get; set; } + + [Option("build-id", HelpText = "Azure DevOps build ID.")] + public string BuildId { get; set; } + + [Option("collection-uri", HelpText = "Azure DevOps collection URI.")] + public string CollectionUri { get; set; } + + [Option("team-project", HelpText = "Azure DevOps team project name.")] + public string TeamProject { get; set; } + + [Option("helix-base-uri", HelpText = "Base URI for the Helix service.")] + public string HelixBaseUri { get; set; } = "https://helix.dot.net/"; + + [Option("polling-interval-seconds", HelpText = "Polling interval in seconds.", Default = 30)] + public int PollingIntervalSeconds { get; set; } = 30; + + [Option("max-wait-minutes", HelpText = "Maximum run time in minutes.", Default = 360)] + public int MaximumWaitMinutes { get; set; } = 360; + + [Option("job-monitor-name", HelpText = "Display name of the Helix Job Monitor job in Azure DevOps.")] + public string JobMonitorName { get; set; } = "Helix Job Monitor"; + + [Option("working-directory", HelpText = "Directory used to stage downloaded test results.")] + public string WorkingDirectory { get; set; } + + [Option("attempt", HelpText = "Azure DevOps attempt number for the current job.")] + public int? Attempt { get; set; } + + [Option("monitor-all-stages", HelpText = "When true (default) the monitor tracks Helix jobs and pipeline jobs across all stages of the build. When false the monitor only tracks jobs that belong to the same stage as the monitor itself (see --stage-name).", Default = true)] + public bool MonitorAllStages { get; set; } = true; + + [Option("stage-name", HelpText = "Name of the Azure DevOps pipeline stage the monitor is running in. Used to scope monitoring when --monitor-all-stages is false. Defaults to the SYSTEM_STAGENAME environment variable.")] + public string StageName { get; set; } + + public static JobMonitorOptions Parse(string[] args) + { + JobMonitorOptions parsed = null; + var parser = new Parser(settings => + { + settings.CaseInsensitiveEnumValues = true; + settings.HelpWriter = Console.Out; + }); + + parser.ParseArguments(args) + .WithParsed(options => parsed = options) + .WithNotParsed(errors => + { + parsed = new JobMonitorOptions { ShowHelp = true }; + }); + + if (parsed == null || parsed.ShowHelp) + { + return parsed ?? new JobMonitorOptions { ShowHelp = true }; + } + + parsed.ApplyEnvironmentDefaults(); + parsed.Validate(); + return parsed; + } + + private void ApplyEnvironmentDefaults() + { + HelixAccessToken ??= Environment.GetEnvironmentVariable("HELIX_ACCESSTOKEN"); +#if DEBUG + SystemAccessToken ??= new Azure.Identity.DefaultAzureCredential(includeInteractiveCredentials: true) + .GetToken(new Azure.Core.TokenRequestContext(["499b84ac-1321-427f-aa17-267ca6975798/.default"])) + .Token; +#endif + CollectionUri ??= Environment.GetEnvironmentVariable("SYSTEM_TEAMFOUNDATIONCOLLECTIONURI"); + TeamProject ??= Environment.GetEnvironmentVariable("SYSTEM_TEAMPROJECT"); + BuildId ??= Environment.GetEnvironmentVariable("BUILD_BUILDID"); + SystemAccessToken ??= Environment.GetEnvironmentVariable("SYSTEM_ACCESSTOKEN"); + RepositoryName ??= Environment.GetEnvironmentVariable("BUILD_REPOSITORY_NAME"); + WorkingDirectory ??= System.IO.Path.Combine(System.IO.Path.GetTempPath(), "helix-job-monitor", BuildId ?? "unknown"); + PrNumber ??= GetEnvironmentInt("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER"); + Attempt ??= GetEnvironmentInt("SYSTEM_JOBATTEMPT"); + StageName ??= Environment.GetEnvironmentVariable("SYSTEM_STAGENAME"); + } + + private void Validate() + { + CollectionUri = EnsureTrailingSlash(RequireValue(CollectionUri, "collection-uri", "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI")); + TeamProject = RequireValue(TeamProject, "team-project", "SYSTEM_TEAMPROJECT"); + BuildId = RequireValue(BuildId, "build-id", "BUILD_BUILDID"); + SystemAccessToken = RequireValue(SystemAccessToken, "access-token", "SYSTEM_ACCESSTOKEN"); + + if (string.IsNullOrWhiteSpace(RepositoryName)) + { + throw new InvalidOperationException("A repository identifier must be provided either by argument or pipeline environment."); + } + + if (string.IsNullOrWhiteSpace(Organization)) + { + throw new InvalidOperationException("Organization must be provided either by argument or pipeline environment."); + } + + if (!MonitorAllStages && string.IsNullOrWhiteSpace(StageName)) + { + throw new InvalidOperationException("--stage-name (or the SYSTEM_STAGENAME environment variable) must be set when --monitor-all-stages is false."); + } + } + + private static string RequireValue(string value, string argumentName, string environmentName) + { + if (string.IsNullOrWhiteSpace(value)) + { + throw new InvalidOperationException($"Missing required option --{argumentName} or environment variable {environmentName}."); + } + + return value; + } + + private static int? GetEnvironmentInt(string environmentName) + { + string value = Environment.GetEnvironmentVariable(environmentName); + if (int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out int parsed)) + { + return parsed; + } + + return null; + } + + private static string EnsureTrailingSlash(string uri) + => uri.EndsWith('/') ? uri : uri + '/'; + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/JobMonitorRunner.cs b/src/Microsoft.DotNet.Helix/JobMonitor/JobMonitorRunner.cs new file mode 100644 index 00000000000..1b657edff23 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/JobMonitorRunner.cs @@ -0,0 +1,263 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.Client.Models; +using Microsoft.DotNet.Helix.JobMonitor.Models; +using Microsoft.Extensions.Logging; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + internal sealed class JobMonitorRunner : IJobMonitorRunner, IDisposable + { + private readonly JobMonitorOptions _options; + private readonly ILogger _logger; + private readonly IAzureDevOpsService _azdo; + private readonly IHelixService _helix; + private readonly Func _delayFunc; + + /// + /// Constructor for production use with real services. + /// + public JobMonitorRunner(JobMonitorOptions options, ILogger logger) + : this(options, + logger, + new AzureDevOpsService(options, logger), + new HelixService(options, logger), + null) + { + } + + /// + /// Constructor for testing with injected services. + /// + internal JobMonitorRunner( + JobMonitorOptions options, + ILogger logger, + IAzureDevOpsService azdo, + IHelixService helix, + Func delayFunc) + { + _options = options ?? throw new ArgumentNullException(nameof(options)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + _azdo = azdo ?? throw new ArgumentNullException(nameof(azdo)); + _helix = helix ?? throw new ArgumentNullException(nameof(helix)); + _delayFunc = delayFunc ?? Task.Delay; + Directory.CreateDirectory(_options.WorkingDirectory); + } + + public Task RunAsync(CancellationToken cancellationToken) + { + return RunCoreAsync(cancellationToken); + } + + public async Task RunAsync() + { + var cancellationTokenSource = new CancellationTokenSource(); + cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(_options.MaximumWaitMinutes)); + return await RunCoreAsync(cancellationTokenSource.Token); + } + + private async Task RunCoreAsync(CancellationToken cancellationToken) + { + if (_options.MonitorAllStages || string.IsNullOrEmpty(_options.StageName)) + { + _logger.LogInformation("Monitoring Helix jobs for the pipeline"); + } + else + { + _logger.LogInformation("Monitoring Helix jobs sent from stage '{StageName}'", _options.StageName); + } + + IReadOnlySet alreadyProcessed = await _azdo.GetProcessedHelixJobNamesAsync(cancellationToken); + var processedHelixJobs = new HashSet(alreadyProcessed, StringComparer.OrdinalIgnoreCase); + IReadOnlyList latestAssociatedJobs = []; + + try + { + return await RunLoopAsync(processedHelixJobs, latestJobs => latestAssociatedJobs = latestJobs, cancellationToken); + } + catch (OperationCanceledException) + { + ReportTimeout(latestAssociatedJobs, processedHelixJobs); + return 1; + } + } + + private async Task RunLoopAsync( + HashSet processedHelixJobs, + Action> reportLatestJobs, + CancellationToken cancellationToken) + { + bool anyNonMonitorJobFailures = false; + int failedHelixJobCount = 0; + int processedHelixJobCount = 0; + int allHelixJobCount = 0; + int completedJobsCount = -1; + + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + + IReadOnlyList timelineRecords = await _azdo.GetTimelineRecordsAsync(cancellationToken); + IReadOnlyList associatedJobsWithBuild = await _helix.GetJobsAsync(cancellationToken); + + // When the monitor is scoped to a single stage, drop timeline records and Helix jobs + // that belong to other stages so they don't gate completion or contribute failures. + if (!_options.MonitorAllStages && !string.IsNullOrEmpty(_options.StageName)) + { + timelineRecords = HelixJobMonitorUtilities.FilterRecordsToStage(timelineRecords, _options.StageName); + associatedJobsWithBuild = + [ + ..associatedJobsWithBuild.Where(j => + string.IsNullOrEmpty(j.StageName) + || string.Equals(j.StageName, _options.StageName, StringComparison.OrdinalIgnoreCase)) + ]; + } + + reportLatestJobs(associatedJobsWithBuild); + + // Filter jobs to completed ones belonging to this build + IReadOnlyCollection completedJobs = + [ + ..associatedJobsWithBuild + .Where(j => j.IsCompleted) + .OrderBy(j => j.JobName, StringComparer.OrdinalIgnoreCase) + ]; + + if (allHelixJobCount != associatedJobsWithBuild.Count || completedJobsCount != completedJobs.Count) + { + _logger.LogInformation("{CompletedCount}/{TotalCount} Helix jobs finished", completedJobs.Count, associatedJobsWithBuild.Count); + allHelixJobCount = associatedJobsWithBuild.Count; + completedJobsCount = completedJobs.Count; + } + + foreach (HelixJobInfo job in completedJobs.Where(j => !processedHelixJobs.Contains(j.JobName))) + { + bool passed = await ProcessCompletedJobAsync(job, cancellationToken); + processedHelixJobs.Add(job.JobName); + processedHelixJobCount++; + if (!passed) + { + failedHelixJobCount++; + } + } + + anyNonMonitorJobFailures = HelixJobMonitorUtilities.HasFailedNonMonitorJobs(timelineRecords, _options.JobMonitorName); + bool allPipelineJobsComplete = HelixJobMonitorUtilities.AreNonMonitorJobsComplete(timelineRecords, _options.JobMonitorName); + bool allHelixJobsComplete = associatedJobsWithBuild.Count == 0 || associatedJobsWithBuild.All(j => j.IsCompleted); + + if (allPipelineJobsComplete && allHelixJobsComplete) + { + _logger.LogInformation("Final summary: processed {ProcessedCount} Helix job(s); {FailedCount} failed.", processedHelixJobCount, failedHelixJobCount); + if (anyNonMonitorJobFailures || failedHelixJobCount > 0) + { + if (anyNonMonitorJobFailures) + { + _logger.LogError("One or more non-monitor pipeline jobs failed."); + } + + if (failedHelixJobCount > 0) + { + _logger.LogError("The Helix Job Monitor detected failures in {FailedCount} Helix job(s).", failedHelixJobCount); + } + + return 1; + } + + return 0; + } + + // If all pipeline jobs are dead and Helix jobs are still running, + // those jobs are orphaned — no point waiting. + if (allPipelineJobsComplete && anyNonMonitorJobFailures && !allHelixJobsComplete) + { + _logger.LogError("All non-monitor pipeline jobs failed/canceled while Helix jobs are still running. Exiting."); + return 1; + } + + await _delayFunc(TimeSpan.FromSeconds(Math.Max(5, _options.PollingIntervalSeconds)), cancellationToken); + } + } + + private async Task ProcessCompletedJobAsync( + HelixJobInfo helixJob, + CancellationToken cancellationToken) + { + _logger.LogInformation("Processing completed job {jobName}...", helixJob.JobName); + + IReadOnlyCollection workItems = await _helix.ListWorkItemsAsync(helixJob.JobName, cancellationToken); + + int failedWorkItemCount = workItems.Count(wi => wi.ExitCode != 0 || !wi.State.Equals("Finished", StringComparison.OrdinalIgnoreCase)); + bool helixJobSuccessful = failedWorkItemCount == 0; + int sucessfulWorkItemCount = workItems.Count - failedWorkItemCount; + + int testRunId = await _azdo.CreateTestRunAsync(helixJob.TestRunName, helixJob.JobName, cancellationToken); + + try + { + IReadOnlyList downloadedFiles = await _helix.DownloadTestResultsAsync( + helixJob.JobName, + [..workItems.Select(w => w.Name)], + cancellationToken); + + if (!await _azdo.UploadTestResultsAsync(testRunId, downloadedFiles, cancellationToken)) + { + sucessfulWorkItemCount--; + failedWorkItemCount++; + helixJobSuccessful = false; + } + } + catch (Exception ex) + { + // TODO: Handle better here + _logger.LogError(ex, "Failed to upload test results for job {JobName} to Azure DevOps. Test run ID was {TestRunId}.", helixJob.JobName, testRunId); + return false; + } + finally + { + await _azdo.CompleteTestRunAsync(testRunId, cancellationToken); + } + + _logger.LogInformation("Job '{JobName}' completed ({PassedCount} passed, {FailedCount} failed).", helixJob.JobName, sucessfulWorkItemCount, failedWorkItemCount); + return failedWorkItemCount == 0; + } + + private void ReportTimeout( + IReadOnlyList latestAssociatedJobs, + HashSet processedHelixJobs) + { + var timeout = TimeSpan.FromMinutes(_options.MaximumWaitMinutes); + var unfinishedJobs = latestAssociatedJobs + .Where(j => !j.IsCompleted || !processedHelixJobs.Contains(j.JobName)) + .OrderBy(j => j.JobName, StringComparer.OrdinalIgnoreCase) + .ToList(); + + if (unfinishedJobs.Count == 0) + { + _logger.LogCritical("Helix Job Monitor timed out after {TimeoutMinutes} minute(s) ({Timeout}). No unfinished Helix jobs were tracked at the time of timeout.", + timeout.TotalMinutes, + timeout); + return; + } + + _logger.LogError( + "Helix Job Monitor timed out after {TimeoutMinutes} minute(s) ({Timeout}). {UnfinishedCount} Helix job(s) had not finished: {UnfinishedJobs}", + timeout.TotalMinutes, + timeout, + unfinishedJobs.Count, + string.Join(", ", unfinishedJobs.Select(j => $"{j.JobName} (status: {j.Status})"))); + } + + public void Dispose() + { + (_azdo as IDisposable)?.Dispose(); + (_helix as IDisposable)?.Dispose(); + } + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Microsoft.DotNet.Helix.JobMonitor.csproj b/src/Microsoft.DotNet.Helix/JobMonitor/Microsoft.DotNet.Helix.JobMonitor.csproj new file mode 100644 index 00000000000..f0dcfde2e6c --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Microsoft.DotNet.Helix.JobMonitor.csproj @@ -0,0 +1,29 @@ + + + + $(BundledNETCoreAppTargetFramework) + Exe + true + true + dotnet-helix-job-monitor + Standalone Helix Job Monitor tool for Azure DevOps pipelines + + + + + + + + + + + + + + + + + + + + diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Models/HelixJobInfo.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Models/HelixJobInfo.cs new file mode 100644 index 00000000000..32cbb9c8927 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Models/HelixJobInfo.cs @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.DotNet.Helix.Client.Models; +using Newtonsoft.Json.Linq; + +namespace Microsoft.DotNet.Helix.JobMonitor.Models +{ + /// + /// Represents a Helix job and its current status. + /// Decoupled from the Helix Client SDK's generated models. + /// + public sealed class HelixJobInfo + { + public HelixJobInfo(JobSummary helixJob) + { + JobName = helixJob.Name; + Status = helixJob.Finished != null ? "finished" : "running"; + TestRunName = GetTestRunNameFromJob(helixJob); + StageName = GetStringPropertyFromJob(helixJob, "System.StageName"); + } + + public HelixJobInfo(string jobName, string status, string testRunName = null, string stageName = null) + { + JobName = jobName ?? throw new ArgumentNullException(nameof(jobName)); + Status = status ?? throw new ArgumentNullException(nameof(status)); + TestRunName = testRunName; + StageName = stageName; + } + + public string JobName { get; } + + public string Status { get; } + + /// + /// The desired AzDO test run name for this job. May come from a Helix job property. + /// Falls back to the job name if not set. + /// + public string TestRunName { get; } + + /// + /// Name of the Azure DevOps pipeline stage that submitted this Helix job, taken from + /// the "System.StageName" property stamped onto the job by SendHelixJob. May be + /// null if the property is not present. + /// + public string StageName { get; } + + public bool IsCompleted => Status.Equals("finished", StringComparison.OrdinalIgnoreCase) + || Status.Equals("failed", StringComparison.OrdinalIgnoreCase); + + private static string GetTestRunNameFromJob(JobSummary helixJob) + { + // The Helix SDK stamps the desired Azure DevOps test run name onto the job as a + // "TestRunName" property when submitting (matching what StartAzurePipelinesTestRun + // would have used). Fall back to the Helix job name if the property is missing so + // we always produce a non-empty name. + if (helixJob.Properties is JObject properties) + { + if (properties.TryGetValue("TestRunName", out JToken testRunName)) + { + string value = testRunName?.ToString(); + if (!string.IsNullOrEmpty(value)) + { + return value; + } + } + + properties.TryGetValue("System.PhaseName", out JToken phaseName); + properties.TryGetValue("System.JobName", out JToken jobName); + return $"{phaseName} {jobName} run on {helixJob.QueueId}".Trim(); + } + + return helixJob.Name; + } + + private static string GetStringPropertyFromJob(JobSummary helixJob, string propertyName) + { + if (helixJob.Properties is JObject properties + && properties.TryGetValue(propertyName, out JToken token)) + { + string value = token?.ToString(); + if (!string.IsNullOrEmpty(value)) + { + return value; + } + } + + return null; + } + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Models/HelixJobPassFail.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Models/HelixJobPassFail.cs new file mode 100644 index 00000000000..59f50ca0d80 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Models/HelixJobPassFail.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace Microsoft.DotNet.Helix.JobMonitor.Models +{ + /// + /// Pass/fail breakdown for a completed Helix job based on work item exit codes. + /// Decoupled from the Helix Client SDK's generated models. + /// + public sealed class HelixJobPassFail + { + public HelixJobPassFail(IReadOnlyList passedWorkItems, IReadOnlyList failedWorkItems) + { + PassedWorkItems = passedWorkItems ?? Array.Empty(); + FailedWorkItems = failedWorkItems ?? Array.Empty(); + } + + public IReadOnlyList PassedWorkItems { get; } + + public IReadOnlyList FailedWorkItems { get; } + + public bool HasFailures => FailedWorkItems.Count > 0; + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Models/WorkItemTestResults.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Models/WorkItemTestResults.cs new file mode 100644 index 00000000000..d238f3d5f21 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Models/WorkItemTestResults.cs @@ -0,0 +1,9 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + public record WorkItemTestResults(string JobName, string WorkItemName, List TestResultFiles); +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Program.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Program.cs new file mode 100644 index 00000000000..515d9b8ac80 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Program.cs @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Console; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + internal static class Program + { + public static async Task Main(string[] args) + { + using ILoggerFactory loggerFactory = LoggerFactory.Create(builder => + { + builder + .SetMinimumLevel(LogLevel.Information) + .AddConsole(o => o.FormatterName = CompactConsoleLoggerFormatter.FormatterName) + .AddConsoleFormatter(); + }); + + ILogger logger = loggerFactory.CreateLogger(); + + try + { + JobMonitorOptions options = JobMonitorOptions.Parse(args); + if (options.ShowHelp) + { + return 0; + } + + using JobMonitorRunner runner = new(options, logger); + return await runner.RunAsync(); + } + catch (Exception ex) + { + logger.LogError(ex, "Helix Job Monitor terminated with an unhandled exception."); + return 1; + } + } + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Services/AzureDevOpsService.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Services/AzureDevOpsService.cs new file mode 100644 index 00000000000..ad2caede982 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Services/AzureDevOpsService.cs @@ -0,0 +1,168 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + internal sealed class AzureDevOpsService : IAzureDevOpsService, IDisposable + { + // Tag prefix used to identify Azure DevOps test runs created by this monitor for a + // particular Helix job. The full tag value is "MonitoredJob:{helixJobName}" and is + // attached to the test run when it is created. This lets us look up which Helix jobs + // we have already processed without encoding the Helix job name into the run name. + private const string MonitoredJobTagPrefix = "MonitoredJob"; + + private readonly JobMonitorOptions _options; + private readonly ILogger _logger; + private readonly HttpClient _azdoClient; + + public AzureDevOpsService(JobMonitorOptions options, ILogger logger) + { + _options = options; + _logger = logger; + _azdoClient = new HttpClient(); + string encodedToken = Convert.ToBase64String(Encoding.UTF8.GetBytes("unused:" + options.SystemAccessToken)); + _azdoClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Basic", encodedToken); + _azdoClient.DefaultRequestHeaders.UserAgent.ParseAdd("dotnet-helix-job-monitor"); + } + + public async Task> GetTimelineRecordsAsync(CancellationToken cancellationToken) + { + JObject data = await SendAsync(HttpMethod.Get, $"{_options.CollectionUri}{_options.TeamProject}/_apis/build/builds/{_options.BuildId}/timeline?api-version=7.1-preview.2", cancellationToken: cancellationToken); + return data?["records"]?.ToObject() ?? []; + } + + public async Task> GetProcessedHelixJobNamesAsync(CancellationToken cancellationToken) + { + string buildUri = Uri.EscapeDataString($"vstfs:///Build/Build/{_options.BuildId}"); + JObject data = await SendAsync(HttpMethod.Get, $"{_options.CollectionUri}{_options.TeamProject}/_apis/test/runs?buildUri={buildUri}&api-version=7.1", cancellationToken: cancellationToken); + var processed = new HashSet(StringComparer.OrdinalIgnoreCase); + + foreach (JObject run in (data?["value"] as JArray ?? []).Cast()) + { + int? runId = run.Value("id"); + string state = run.Value("state"); + if (runId == null || !string.Equals(state, "Completed", StringComparison.OrdinalIgnoreCase)) + { + continue; + } + + string helixJobName = await GetMonitoredHelixJobNameAsync(runId.Value, cancellationToken); + if (!string.IsNullOrEmpty(helixJobName)) + { + processed.Add(helixJobName); + } + } + + return processed; + } + + private async Task GetMonitoredHelixJobNameAsync(int testRunId, CancellationToken cancellationToken) + { + JObject run = await SendAsync(HttpMethod.Get, $"{_options.CollectionUri}{_options.TeamProject}/_apis/test/runs/{testRunId}?api-version=7.1", cancellationToken: cancellationToken); + if (run?["tags"] is not JArray tags) + { + return null; + } + + foreach (JToken tag in tags) + { + string tagName = tag?.Value("name"); + if (!string.IsNullOrEmpty(tagName) && tagName.StartsWith(MonitoredJobTagPrefix, StringComparison.OrdinalIgnoreCase)) + { + return tagName.Substring(MonitoredJobTagPrefix.Length); + } + } + + return null; + } + + public async Task CreateTestRunAsync(string name, string helixJobName, CancellationToken cancellationToken) + { + JObject result = await SendAsync(HttpMethod.Post, + $"{_options.CollectionUri}{_options.TeamProject}/_apis/test/runs?api-version=5.0", + new JObject + { + ["automated"] = true, + ["build"] = new JObject { ["id"] = _options.BuildId }, + ["name"] = name, + ["state"] = "InProgress", + ["tags"] = new JArray { new JObject { ["name"] = MonitoredJobTagPrefix + helixJobName } }, + }, + cancellationToken: cancellationToken); + return result?["id"]?.ToObject() ?? 0; + } + + public async Task CompleteTestRunAsync(int testRunId, CancellationToken cancellationToken) + { + await SendAsync(new HttpMethod("PATCH"), + $"{_options.CollectionUri}{_options.TeamProject}/_apis/test/runs/{testRunId}?api-version=5.0", + new JObject { ["state"] = "Completed" }, + cancellationToken: cancellationToken); + } + + public async Task UploadTestResultsAsync(int testRunId, IReadOnlyList results, CancellationToken cancellationToken) + { + var publisher = new AzureDevOpsResultPublisher( + new AzureDevOpsReportingParameters( + new Uri(_options.CollectionUri, UriKind.Absolute), + _options.TeamProject, + testRunId.ToString(CultureInfo.InvariantCulture), + _options.SystemAccessToken), + _logger); + + bool allPassed = true; + foreach (WorkItemTestResults workItem in results) + { + _logger.LogInformation("Publishing test results for work item '{WorkItemName}' in job '{JobName}'...", workItem.WorkItemName, workItem.JobName); + allPassed &= await publisher.UploadTestResultsAsync( + workItem.TestResultFiles, + new + { + HelixJobId = workItem.JobName, + HelixWorkItemName = workItem.WorkItemName + }, + cancellationToken); + } + + return allPassed; + } + + private async Task SendAsync(HttpMethod method, string requestUri, JToken body = null, CancellationToken cancellationToken = default) + { + return await RetryHelper.RetryAsync(async () => + { + using var request = new HttpRequestMessage(method, requestUri); + if (body != null) + { + request.Content = new StringContent(body.ToString(Formatting.None), Encoding.UTF8, "application/json"); + } + + using HttpResponseMessage response = await _azdoClient.SendAsync(request, cancellationToken); + string content = response.Content != null ? await response.Content.ReadAsStringAsync(cancellationToken) : null; + if (!response.IsSuccessStatusCode) + { + throw new HttpRequestException($"Request to {requestUri} failed with {(int)response.StatusCode} {response.ReasonPhrase}. {content}"); + } + + return string.IsNullOrWhiteSpace(content) ? [] : JObject.Parse(content); + }, cancellationToken); + } + + public void Dispose() => _azdoClient.Dispose(); + } +} diff --git a/src/Microsoft.DotNet.Helix/JobMonitor/Services/HelixService.cs b/src/Microsoft.DotNet.Helix/JobMonitor/Services/HelixService.cs new file mode 100644 index 00000000000..4eb73e892d6 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/JobMonitor/Services/HelixService.cs @@ -0,0 +1,179 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Azure; +using Azure.Storage.Blobs; +using Microsoft.DotNet.Helix.Client; +using Microsoft.DotNet.Helix.Client.Models; +using Microsoft.DotNet.Helix.JobMonitor.Models; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json.Linq; + +namespace Microsoft.DotNet.Helix.JobMonitor +{ + internal sealed class HelixService : IHelixService + { + private readonly JobMonitorOptions _options; + private readonly ILogger _logger; + private readonly IHelixApi _helixApi; + + public HelixService(JobMonitorOptions options, ILogger logger) + { + _options = options; + _logger = logger; + _helixApi = string.IsNullOrEmpty(options.HelixAccessToken) + ? ApiFactory.GetAnonymous(options.HelixBaseUri) + : ApiFactory.GetAuthenticated(options.HelixBaseUri, options.HelixAccessToken); + } + + public async Task> GetJobsAsync(CancellationToken cancellationToken) + { + // Build the Helix source filter. For PR builds, use the PR merge ref. + // For CI builds without a PR, use the branch-based source. + string source = _options.PrNumber.HasValue + ? $"pr/public/{_options.Organization}/{_options.RepositoryName}/refs/pull/{_options.PrNumber}/merge" + : $"official/public/{_options.Organization}/{_options.RepositoryName}"; + + IImmutableList jobs = await RetryHelper.RetryAsync( + async () => await _helixApi.Job.ListAsync(source: source), + cancellationToken); + + return + [ + ..jobs + .Where(j => ((JObject)j.Properties).TryGetValue("BuildId", out JToken buildId) && buildId?.ToString() == _options.BuildId) + .Select(j => new HelixJobInfo( + j.Name, + j.Finished != null ? "finished" : "running", + GetTestRunNameFromJob(j), + GetStringPropertyFromJob(j, "System.StageName"))) + ]; + } + + public async Task> DownloadTestResultsAsync( + string jobName, + IReadOnlyCollection workItemNames, + CancellationToken cancellationToken) + { + List downloadedFiles = []; + string outputDirectory = Path.Combine(_options.WorkingDirectory, SanitizeDirName(jobName)); + Directory.CreateDirectory(outputDirectory); + + JobResultsUri resultsUri = await RetryHelper.RetryAsync(() => _helixApi.Job.ResultsAsync(jobName), cancellationToken); + + foreach (string workItemName in workItemNames) + { + IImmutableList availableFiles = await RetryHelper.RetryAsync( + () => _helixApi.WorkItem.ListFilesAsync(workItemName, jobName, false), + cancellationToken); + + availableFiles = [.. availableFiles.Where(f => LooksLikeTestResultFile(f.Name))]; + if (availableFiles.Count == 0) + { + continue; + } + + string workItemDirectory = Path.Combine(outputDirectory, SanitizeDirName(workItemName)); + Directory.CreateDirectory(workItemDirectory); + + List workItemFiles = []; + foreach (UploadedFile file in availableFiles) + { + string relativePath = file.Name.Replace('\\', Path.DirectorySeparatorChar).Replace('/', Path.DirectorySeparatorChar); + string destinationFile = Path.Combine(workItemDirectory, relativePath); + string directory = Path.GetDirectoryName(destinationFile); + if (!string.IsNullOrEmpty(directory)) + { + Directory.CreateDirectory(directory); + } + + try + { + BlobClient blobClient = CreateBlobClient(file.Link, resultsUri.ResultsUriRSAS); + await blobClient.DownloadToAsync(destinationFile, cancellationToken); + workItemFiles.Add(destinationFile); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to download '{FileName}' for '{JobName}/{WorkItemName}'.", file.Name, jobName, workItemName); + } + } + + downloadedFiles.Add(new WorkItemTestResults(jobName, workItemName, workItemFiles)); + } + + return downloadedFiles; + } + + private static string GetTestRunNameFromJob(JobSummary helixJob) + { + if (helixJob.Properties is JObject properties + && properties.TryGetValue("TestRunName", out JToken testRunName)) + { + string value = testRunName?.ToString(); + if (!string.IsNullOrEmpty(value)) + { + return value; + } + } + + return helixJob.Name; + } + + private static string GetStringPropertyFromJob(JobSummary helixJob, string propertyName) + { + if (helixJob.Properties is JObject properties + && properties.TryGetValue(propertyName, out JToken token)) + { + string value = token?.ToString(); + if (!string.IsNullOrEmpty(value)) + { + return value; + } + } + + return null; + } + + private static BlobClient CreateBlobClient(string fileLink, string resultsSas) + { + var options = new BlobClientOptions(); + options.Retry.NetworkTimeout = TimeSpan.FromMinutes(5); + if (string.IsNullOrEmpty(resultsSas)) + { + return new BlobClient(new Uri(fileLink), options); + } + + string strippedUri = fileLink.Contains('?') ? fileLink.Substring(0, fileLink.LastIndexOf('?', StringComparison.Ordinal)) : fileLink; + return new BlobClient(new Uri(strippedUri), new AzureSasCredential(resultsSas), options); + } + + private static bool LooksLikeTestResultFile(string path) + => LocalTestResultsReader.LooksLikeTestResultFile(path); + + private static string SanitizeDirName(string value) + { + foreach (char invalidChar in Path.GetInvalidFileNameChars()) + { + value = value.Replace(invalidChar, '-'); + } + + return value; + } + + public async Task> ListWorkItemsAsync( + string jobName, + CancellationToken cancellationToken) + { + return await RetryHelper.RetryAsync(() => _helixApi.WorkItem.ListAsync(jobName), cancellationToken); + } + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Fakes/FakeAzureDevOpsService.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Fakes/FakeAzureDevOpsService.cs new file mode 100644 index 00000000000..d3af095ba4b --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Fakes/FakeAzureDevOpsService.cs @@ -0,0 +1,116 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.JobMonitor; + +namespace Microsoft.DotNet.Helix.Sdk.Tests.Fakes +{ + internal sealed class FakeAzureDevOpsService : IAzureDevOpsService + { + private readonly List _timelineSnapshots = []; + private readonly HashSet _previouslyProcessedJobs = new(StringComparer.OrdinalIgnoreCase); + private readonly Dictionary _inProgressRunsByJobName = new(StringComparer.OrdinalIgnoreCase); + private int _currentTimelineIndex; + private int _nextTestRunId; + + // Observable state for test assertions + public List CreatedTestRuns { get; } = []; + public List CompletedTestRunIds { get; } = []; + public Dictionary> UploadedResultsByRunId { get; } = []; + public List UploadedJobNames { get; } = []; + public int CreateTestRunCallCount { get; private set; } + + // Configuration + public FakeAzureDevOpsService AddTimelineSnapshot(AzureDevOpsTimelineRecord[] records) + { + _timelineSnapshots.Add(records); + return this; + } + + public FakeAzureDevOpsService WithPreviouslyProcessedJob(string jobName) + { + _previouslyProcessedJobs.Add(jobName); + return this; + } + + public void AdvanceTimeline() + { + if (_currentTimelineIndex < _timelineSnapshots.Count - 1) + { + _currentTimelineIndex++; + } + } + + // IAzureDevOpsService implementation + public Task> GetTimelineRecordsAsync(CancellationToken cancellationToken) + { + if (_timelineSnapshots.Count == 0) + { + return Task.FromResult>(Array.Empty()); + } + + AzureDevOpsTimelineRecord[] snapshot = _timelineSnapshots[Math.Min(_currentTimelineIndex, _timelineSnapshots.Count - 1)]; + return Task.FromResult>(snapshot); + } + + public Task> GetProcessedHelixJobNamesAsync(CancellationToken cancellationToken) + { + var result = new HashSet(_previouslyProcessedJobs, StringComparer.OrdinalIgnoreCase); + return Task.FromResult>(result); + } + + public Task CreateTestRunAsync(string name, string helixJobName, CancellationToken cancellationToken) + { + CreateTestRunCallCount++; + + // Idempotent: if a run for this helix job is in-progress, reuse it + if (_inProgressRunsByJobName.TryGetValue(helixJobName, out int existingId)) + { + return Task.FromResult(existingId); + } + + int id = Interlocked.Increment(ref _nextTestRunId); + CreatedTestRuns.Add(name); + _inProgressRunsByJobName[helixJobName] = id; + return Task.FromResult(id); + } + + public Task CompleteTestRunAsync(int testRunId, CancellationToken cancellationToken) + { + CompletedTestRunIds.Add(testRunId); + + string keyToRemove = null; + foreach (var kvp in _inProgressRunsByJobName) + { + if (kvp.Value == testRunId) { keyToRemove = kvp.Key; break; } + } + + if (keyToRemove != null) _inProgressRunsByJobName.Remove(keyToRemove); + return Task.CompletedTask; + } + + public Task UploadTestResultsAsync(int testRunId, IReadOnlyList results, CancellationToken cancellationToken) + { + if (!UploadedResultsByRunId.TryGetValue(testRunId, out List existing)) + { + existing = []; + UploadedResultsByRunId[testRunId] = existing; + } + + existing.AddRange(results); + + foreach (string jobName in results.Select(r => r.JobName).Distinct(StringComparer.OrdinalIgnoreCase)) + { + UploadedJobNames.Add(jobName); + _previouslyProcessedJobs.Add(jobName); + } + + return Task.FromResult(true); + } + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Fakes/FakeHelixService.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Fakes/FakeHelixService.cs new file mode 100644 index 00000000000..b6a4a51103f --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Fakes/FakeHelixService.cs @@ -0,0 +1,104 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.Client.Models; +using Microsoft.DotNet.Helix.JobMonitor; +using Microsoft.DotNet.Helix.JobMonitor.Models; + +namespace Microsoft.DotNet.Helix.Sdk.Tests.Fakes +{ + internal sealed class FakeHelixService : IHelixService + { + private readonly List _snapshots = []; + private readonly HashSet _downloadFailureJobs = new(StringComparer.OrdinalIgnoreCase); + private int _currentSnapshotIndex; + + public FakeHelixService AddSnapshot( + HelixJobInfo[] jobs, + Dictionary passFailByJob = null, + Dictionary> testResultsByJob = null) + { + _snapshots.Add(new HelixSnapshot( + jobs, + passFailByJob ?? new Dictionary(StringComparer.OrdinalIgnoreCase), + testResultsByJob ?? new Dictionary>(StringComparer.OrdinalIgnoreCase))); + return this; + } + + public FakeHelixService FailDownloadForJob(string jobName) { _downloadFailureJobs.Add(jobName); return this; } + public void ClearDownloadFailures() { _downloadFailureJobs.Clear(); } + + public void AdvanceSnapshot() + { + if (_currentSnapshotIndex < _snapshots.Count - 1) _currentSnapshotIndex++; + } + + private HelixSnapshot CurrentSnapshot => _snapshots[Math.Min(_currentSnapshotIndex, _snapshots.Count - 1)]; + + public Task> GetJobsAsync(CancellationToken cancellationToken) + { + if (_snapshots.Count == 0) + { + return Task.FromResult>([]); + } + + return Task.FromResult>(CurrentSnapshot.Jobs); + } + + public Task> DownloadTestResultsAsync( + string jobName, IReadOnlyCollection workItemNames, CancellationToken cancellationToken) + { + if (_downloadFailureJobs.Contains(jobName)) + { + throw new InvalidOperationException($"Injected download failure for Helix job '{jobName}'."); + } + + if (CurrentSnapshot.TestResultsByJob.TryGetValue(jobName, out List explicitResults)) + { + return Task.FromResult>(explicitResults); + } + + workItemNames = workItemNames + .Distinct(StringComparer.OrdinalIgnoreCase) + .DefaultIfEmpty($"{jobName}-synthetic") + .ToList(); + + IReadOnlyList generated = workItemNames + .Select(wi => new WorkItemTestResults(jobName, wi, [])) + .ToList(); + + return Task.FromResult(generated); + } + + public Task> ListWorkItemsAsync( + string jobName, + CancellationToken _) + { + var items = new List(); + + foreach (string w in CurrentSnapshot.PassFailByJob[jobName].PassedWorkItems) + { + var wi = new WorkItemSummary($"{jobName}/{w}", jobName, w, "Finished") { ExitCode = 0 }; + items.Add(wi); + } + + foreach (string w in CurrentSnapshot.PassFailByJob[jobName].FailedWorkItems) + { + var wi = new WorkItemSummary($"{jobName}/{w}", jobName, w, "Finished") { ExitCode = 1 }; + items.Add(wi); + } + + return Task.FromResult>(items); + } + + private sealed record HelixSnapshot( + HelixJobInfo[] Jobs, + Dictionary PassFailByJob, + Dictionary> TestResultsByJob); + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/HelixJobMonitorUtilitiesTests.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/HelixJobMonitorUtilitiesTests.cs new file mode 100644 index 00000000000..6169f4ff65a --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/HelixJobMonitorUtilitiesTests.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.DotNet.Helix.JobMonitor; +using Xunit; + +namespace Microsoft.DotNet.Helix.Sdk.Tests +{ + public class HelixJobMonitorUtilitiesTests + { + [Fact] + public void AreNonMonitorJobsComplete_IgnoresMonitorRecord() + { + var records = new[] + { + new AzureDevOpsTimelineRecord { Type = "Job", Name = "Build Linux", State = "completed", Result = "succeeded" }, + new AzureDevOpsTimelineRecord { Type = "Job", Name = "Helix Job Monitor", State = "inProgress", Result = null }, + }; + + Assert.True(HelixJobMonitorUtilities.AreNonMonitorJobsComplete(records, "Helix Job Monitor")); + } + + [Fact] + public void HasFailedNonMonitorJobs_DetectsFailures() + { + var records = new[] + { + new AzureDevOpsTimelineRecord { Type = "Job", Name = "Build Linux", State = "completed", Result = "failed" }, + new AzureDevOpsTimelineRecord { Type = "Job", Name = "Helix Job Monitor", State = "inProgress", Result = null }, + }; + + Assert.True(HelixJobMonitorUtilities.HasFailedNonMonitorJobs(records, "Helix Job Monitor")); + } + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/JobMonitorRunnerTests.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/JobMonitorRunnerTests.cs new file mode 100644 index 00000000000..c152d40eb55 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/JobMonitorRunnerTests.cs @@ -0,0 +1,351 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.JobMonitor; +using Microsoft.DotNet.Helix.JobMonitor.Models; +using Microsoft.DotNet.Helix.Sdk.Tests.Fakes; +using Microsoft.DotNet.Helix.Sdk.Tests.ScenarioHelpers; +using Microsoft.Extensions.Logging.Abstractions; +using Xunit; +using static Microsoft.DotNet.Helix.Sdk.Tests.ScenarioHelpers.ScenarioHelpers; + +namespace Microsoft.DotNet.Helix.Sdk.Tests +{ + [Collection("NonParallel")] + public class JobMonitorRunnerTests + { + // ----------------------------------------------------------------------- + // Happy Path + // ----------------------------------------------------------------------- + + [Fact] + public async Task AllJobsPassOnFirstPoll_ExitZero_OneTestRunPerJob() + { + var (azdo, helix, runner, delayCount) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "succeeded"), MonitorJob()]], + helixSnapshots: [(jobs: [HelixJob("job-linux", "finished")], passFail: Dict(("job-linux", PassFail(passed: ["wi-1"]))))]); + + int exitCode = await runner.RunAsync(CancellationToken.None); + + Assert.Equal(0, exitCode); + Assert.Equal(0, delayCount()); + Assert.Single(azdo.CreatedTestRuns); + Assert.Single(azdo.CompletedTestRunIds); + Assert.Equal(["job-linux"], azdo.UploadedJobNames); + } + + [Fact] + public async Task MultipleJobsAcrossMultiplePolls_ProcessesEachOnce() + { + var (azdo, helix, runner, delayCount) = CreateScenario( + timelineSnapshots: + [ + [PipelineJob("Build Linux", "inProgress"), PipelineJob("Build Win", "inProgress"), MonitorJob()], + [PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()], + [PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()], + ], + helixSnapshots: + [ + (jobs: [HelixJob("job-linux", "running")], passFail: EmptyPassFail()), + (jobs: [HelixJob("job-linux", "finished"), HelixJob("job-win", "running")], passFail: Dict(("job-linux", PassFail(passed: ["linux-wi"])))), + (jobs: [HelixJob("job-linux", "finished"), HelixJob("job-win", "finished")], passFail: Dict(("job-linux", PassFail(passed: ["linux-wi"])), ("job-win", PassFail(passed: ["win-wi"])))), + ]); + + int exitCode = await runner.RunAsync(CancellationToken.None); + + Assert.Equal(0, exitCode); + Assert.Equal(2, delayCount()); + Assert.Equal(2, azdo.CreatedTestRuns.Count); + Assert.Equal(2, azdo.CompletedTestRunIds.Count); + Assert.Equal(["job-linux", "job-win"], azdo.UploadedJobNames); + } + + [Fact] + public async Task StageCompletesWithNoHelixJobs_ExitZero_NoTestRuns() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "succeeded"), MonitorJob()]], + helixSnapshots: [(jobs: Array.Empty(), passFail: EmptyPassFail())]); + + Assert.Equal(0, await runner.RunAsync(CancellationToken.None)); + Assert.Empty(azdo.CreatedTestRuns); + Assert.Empty(azdo.UploadedJobNames); + } + + // ----------------------------------------------------------------------- + // Failure Scenarios + // ----------------------------------------------------------------------- + + [Fact] + public async Task PipelineJobFailsBeforeHelixSubmission_ExitOne_NoTestRuns() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "failed"), MonitorJob()]], + helixSnapshots: [(jobs: Array.Empty(), passFail: EmptyPassFail())]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Empty(azdo.CreatedTestRuns); + } + + [Fact] + public async Task PipelineJobCanceled_ExitOne() + { + var (_, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "canceled"), MonitorJob()]], + helixSnapshots: [(jobs: Array.Empty(), passFail: EmptyPassFail())]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + } + + [Fact] + public async Task HelixJobFails_ExitOne_ResultsStillUploaded() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "succeeded"), MonitorJob()]], + helixSnapshots: [(jobs: [HelixJob("job-linux", "finished")], passFail: Dict(("job-linux", PassFail(failed: ["wi-1"]))))]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(["job-linux"], azdo.UploadedJobNames); + Assert.Single(azdo.CompletedTestRunIds); + } + + [Fact] + public async Task AllHelixWorkItemsFail_ExitOne_ResultsUploaded() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "succeeded"), MonitorJob()]], + helixSnapshots: [(jobs: [HelixJob("job-linux", "finished")], passFail: Dict(("job-linux", PassFail(failed: ["wi-1", "wi-2"]))))]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(["job-linux"], azdo.UploadedJobNames); + } + + [Fact] + public async Task MultipleHelixJobsAllFail_ExitOne_AllResultsUploaded() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()]], + helixSnapshots: [(jobs: [HelixJob("job-linux", "finished"), HelixJob("job-win", "finished")], passFail: Dict(("job-linux", PassFail(failed: ["linux-wi"])), ("job-win", PassFail(failed: ["win-wi"]))))]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(2, azdo.UploadedJobNames.Count); + } + + [Fact] + public async Task PipelineFailsButHelixResultsStillUploaded() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "failed"), MonitorJob()]], + helixSnapshots: [(jobs: [HelixJob("job-linux", "finished")], passFail: Dict(("job-linux", PassFail(passed: ["wi-1"]))))]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(["job-linux"], azdo.UploadedJobNames); + } + + // ----------------------------------------------------------------------- + // Retry / Rerun Scenarios + // ----------------------------------------------------------------------- + + [Fact] + public async Task MonitorRetry_SkipsPreviouslyProcessed() + { + var azdo = new FakeAzureDevOpsService().WithPreviouslyProcessedJob("job-linux"); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [[PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()]], + [(jobs: [HelixJob("job-linux", "finished"), HelixJob("job-win", "finished")], passFail: Dict(("job-linux", PassFail(passed: ["linux-wi"])), ("job-win", PassFail(passed: ["win-wi"]))))]); + var runner = CreateRunner(azdo, helix); + + Assert.Equal(0, await runner.RunAsync(CancellationToken.None)); + Assert.Single(azdo.CreatedTestRuns); + Assert.Equal(["job-win"], azdo.UploadedJobNames); + } + + [Fact] + public async Task MonitorRetry_ProcessesReplacementDelta() + { + var azdo = new FakeAzureDevOpsService().WithPreviouslyProcessedJob("job-linux-attempt1"); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [[PipelineJob("Build Linux (retry)", "completed", "succeeded"), MonitorJob()]], + [(jobs: [HelixJob("job-linux-attempt1", "finished"), HelixJob("job-linux-attempt2", "finished")], passFail: Dict(("job-linux-attempt2", PassFail(passed: ["wi-2"]))))]); + var runner = CreateRunner(azdo, helix); + + Assert.Equal(0, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(["job-linux-attempt2"], azdo.UploadedJobNames); + } + + [Fact] + public async Task StageRerun_NewJobsQueuedAlongsideOld_WaitsForNew() + { + var azdo = new FakeAzureDevOpsService().WithPreviouslyProcessedJob("job-linux-v1").WithPreviouslyProcessedJob("job-win-v1"); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [ + [PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "inProgress"), MonitorJob()], + [PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()], + ], + [ + (jobs: [HelixJob("job-linux-v1", "finished"), HelixJob("job-linux-v2", "running")], passFail: EmptyPassFail()), + (jobs: [HelixJob("job-linux-v1", "finished"), HelixJob("job-linux-v2", "finished"), HelixJob("job-win-v2", "finished")], passFail: Dict(("job-linux-v2", PassFail(passed: ["linux-wi"])), ("job-win-v2", PassFail(passed: ["win-wi"])))), + ]); + int delayCount = 0; + var runner = new JobMonitorRunner(DefaultOptions(), NullLogger.Instance, azdo, helix, (_, ct) => { delayCount++; AdvanceFakes(azdo, helix); return Task.CompletedTask; }); + + Assert.Equal(0, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(1, delayCount); + Assert.Equal(["job-linux-v2", "job-win-v2"], azdo.UploadedJobNames); + } + + [Fact] + public async Task MultipleRetries_SkipsAllPriorGenerations() + { + var azdo = new FakeAzureDevOpsService() + .WithPreviouslyProcessedJob("job-linux-attempt1").WithPreviouslyProcessedJob("job-linux-attempt2") + .WithPreviouslyProcessedJob("job-win-attempt1").WithPreviouslyProcessedJob("job-win-attempt2"); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [[PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()]], + [(jobs: [HelixJob("job-linux-attempt1", "finished"), HelixJob("job-linux-attempt2", "finished"), HelixJob("job-linux-attempt3", "finished"), HelixJob("job-win-attempt1", "finished"), HelixJob("job-win-attempt2", "finished"), HelixJob("job-win-attempt3", "finished")], passFail: Dict(("job-linux-attempt3", PassFail(passed: ["linux-wi"])), ("job-win-attempt3", PassFail(passed: ["win-wi"]))))]); + var runner = CreateRunner(azdo, helix); + + Assert.Equal(0, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(["job-linux-attempt3", "job-win-attempt3"], azdo.UploadedJobNames); + } + + [Fact] + public async Task RetryWithFailedSubsetResubmitted_OnlyNewJobProcessed() + { + var azdo = new FakeAzureDevOpsService().WithPreviouslyProcessedJob("job-linux-attempt1"); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [[PipelineJob("Build Linux (retry)", "completed", "succeeded"), MonitorJob()]], + [(jobs: [HelixJob("job-linux-attempt1", "finished"), HelixJob("job-linux-attempt2", "finished")], passFail: Dict(("job-linux-attempt2", PassFail(passed: ["wi-2"]))))]); + var runner = CreateRunner(azdo, helix); + + Assert.Equal(0, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(["job-linux-attempt2"], azdo.UploadedJobNames); + } + + // ----------------------------------------------------------------------- + // Edge Cases + // ----------------------------------------------------------------------- + + [Fact] + public async Task MonitorTimesOut_ReturnsOne() + { + // The runner catches OperationCanceledException and returns 1. + var azdo = new FakeAzureDevOpsService(); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [[PipelineJob("Build Linux", "inProgress"), MonitorJob()]], + [(jobs: [HelixJob("job-linux", "running")], passFail: EmptyPassFail())]); + + using var cts = new CancellationTokenSource(); + cts.Cancel(); + var runner = CreateRunner(azdo, helix); + Assert.Equal(1, await runner.RunAsync(cts.Token)); + } + + [Fact] + public async Task AllPipelineJobsFailWhileHelixStillRunning_ExitsImmediately() + { + var (azdo, _, runner, delayCount) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "failed"), MonitorJob()]], + helixSnapshots: [(jobs: [HelixJob("job-linux", "running")], passFail: EmptyPassFail())]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Equal(0, delayCount()); + } + + [Fact] + public async Task DownloadFailure_TestRunStillCompleted() + { + // The runner always completes the test run (finally block), even on download failure. + var azdo = new FakeAzureDevOpsService(); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, + [[PipelineJob("Build Linux", "completed", "succeeded"), PipelineJob("Build Win", "completed", "succeeded"), MonitorJob()]], + [(jobs: [HelixJob("job-linux", "finished"), HelixJob("job-win", "finished")], passFail: Dict(("job-linux", PassFail(passed: ["linux-wi"])), ("job-win", PassFail(passed: ["win-wi"]))))]); + + helix.FailDownloadForJob("job-win"); + var runner = CreateRunner(azdo, helix); + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + + Assert.Equal(["job-linux"], azdo.UploadedJobNames); + // Both test runs completed (finally block ensures CompleteTestRunAsync is called) + Assert.Equal(2, azdo.CompletedTestRunIds.Count); + } + + [Fact] + public async Task AllPipelineJobsCanceled_ExitOne_NoUploads() + { + var (azdo, _, runner, _) = CreateScenario( + timelineSnapshots: [[PipelineJob("Build Linux", "completed", "canceled"), PipelineJob("Build Win", "completed", "canceled"), MonitorJob()]], + helixSnapshots: [(jobs: Array.Empty(), passFail: EmptyPassFail())]); + + Assert.Equal(1, await runner.RunAsync(CancellationToken.None)); + Assert.Empty(azdo.UploadedJobNames); + } + + // ----------------------------------------------------------------------- + // Helpers + // ----------------------------------------------------------------------- + + private static JobMonitorOptions DefaultOptions() => new() + { + BuildId = "123", + CollectionUri = "https://dev.azure.com/dnceng/", + JobMonitorName = DefaultMonitorName, + MaximumWaitMinutes = 1, + PollingIntervalSeconds = 0, + Organization = "dotnet", + RepositoryName = "arcade", + PrNumber = 99999, + SystemAccessToken = "token", + TeamProject = "public", + WorkingDirectory = System.IO.Path.Combine(System.IO.Path.GetTempPath(), "job-monitor-test"), + }; + + private static readonly Func NoDelay = (_, _) => Task.CompletedTask; + private static Dictionary EmptyPassFail() => new(StringComparer.OrdinalIgnoreCase); + + private static Dictionary Dict(params (string jobName, HelixJobPassFail pf)[] entries) + { + var dict = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var (jobName, pf) in entries) dict[jobName] = pf; + return dict; + } + + private static void AdvanceFakes(FakeAzureDevOpsService azdo, FakeHelixService helix) { azdo.AdvanceTimeline(); helix.AdvanceSnapshot(); } + + private static void ConfigureSnapshots( + FakeAzureDevOpsService azdo, FakeHelixService helix, + AzureDevOpsTimelineRecord[][] timelineSnapshots, + (HelixJobInfo[] jobs, Dictionary passFail)[] helixSnapshots) + { + foreach (var timeline in timelineSnapshots) azdo.AddTimelineSnapshot(timeline); + foreach (var (jobs, passFail) in helixSnapshots) helix.AddSnapshot(jobs, passFail); + } + + private static JobMonitorRunner CreateRunner(FakeAzureDevOpsService azdo, FakeHelixService helix, Func delayFunc = null) + => new(DefaultOptions(), NullLogger.Instance, azdo, helix, delayFunc ?? NoDelay); + + private static (FakeAzureDevOpsService azdo, FakeHelixService helix, JobMonitorRunner runner, Func delayCount) CreateScenario( + AzureDevOpsTimelineRecord[][] timelineSnapshots, + (HelixJobInfo[] jobs, Dictionary passFail)[] helixSnapshots) + { + var azdo = new FakeAzureDevOpsService(); + var helix = new FakeHelixService(); + ConfigureSnapshots(azdo, helix, timelineSnapshots, helixSnapshots); + int delays = 0; + var runner = new JobMonitorRunner(DefaultOptions(), NullLogger.Instance, azdo, helix, (_, ct) => { delays++; AdvanceFakes(azdo, helix); return Task.CompletedTask; }); + return (azdo, helix, runner, () => delays); + } + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/LocalTestResultsReaderTests.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/LocalTestResultsReaderTests.cs new file mode 100644 index 00000000000..f63fb8e567b --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/LocalTestResultsReaderTests.cs @@ -0,0 +1,94 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading.Tasks; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher; +using Microsoft.DotNet.Helix.AzureDevOpsTestPublisher.Model; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Xunit; + +namespace Microsoft.DotNet.Helix.Sdk.Tests +{ + public class LocalTestResultsReaderTests + { + [Fact] + public async Task LocalTestResultsReader_ReadsXunitFileFromDownloadedResults() + { + string tempDirectory = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); + string workItemDirectory = Path.Combine(tempDirectory, "work-item"); + Directory.CreateDirectory(workItemDirectory); + + try + { + File.WriteAllText( + Path.Combine(workItemDirectory, "testResults.xml"), + """ + + + + + + + + """); + + var reader = new LocalTestResultsReader(NullLoggerFactory.Instance.CreateLogger()); + string filePath = Path.Combine(workItemDirectory, "testResults.xml"); + IReadOnlyList resultSets = await reader.ReadResultFileAsync(filePath); + IReadOnlyList aggregate = new ResultAggregator().Aggregate([resultSets]); + AggregatedResult result = Assert.Single(aggregate); + + Assert.Equal("Sample.Tests.Passes", result.Name); + Assert.Equal("Passed", result.Result); + } + finally + { + Directory.Delete(tempDirectory, recursive: true); + } + } + + [Fact] + public async Task LocalTestResultsReader_CombinesPackedAndXmlResultsAcrossWorkItems() + { + string tempDirectory = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); + string packedDirectory = Path.Combine(tempDirectory, "packed-item"); + string xmlDirectory = Path.Combine(tempDirectory, "xml-item"); + Directory.CreateDirectory(packedDirectory); + Directory.CreateDirectory(xmlDirectory); + string originalDirectory = Environment.CurrentDirectory; + + try + { + Environment.CurrentDirectory = packedDirectory; + string filePath = Path.Combine(xmlDirectory, "testResults.xml"); + + File.WriteAllText( + filePath, + """ + + + + + + + + """); + + IReadOnlyList resultSets = await new LocalTestResultsReader(NullLoggerFactory.Instance.CreateLogger()).ReadResultFileAsync(filePath); + IReadOnlyList aggregate = new ResultAggregator().Aggregate([resultSets]); + + Assert.Single(aggregate); + Assert.Contains(aggregate, static x => x.Name == "Xml.Tests.Passes"); + } + finally + { + Environment.CurrentDirectory = originalDirectory; + Directory.Delete(tempDirectory, recursive: true); + } + } + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests.csproj b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests.csproj index 7fd8f7231cf..9a9eab6b519 100644 --- a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests.csproj +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests.csproj @@ -21,7 +21,9 @@ + + diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/NonParallelTestCollection.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/NonParallelTestCollection.cs new file mode 100644 index 00000000000..dba3679849f --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/NonParallelTestCollection.cs @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Xunit; + +namespace Microsoft.DotNet.Helix.Sdk.Tests +{ + [CollectionDefinition("NonParallel", DisableParallelization = true)] + public class NonParallelTestCollection { } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/ScenarioHelpers/ScenarioHelpers.cs b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/ScenarioHelpers/ScenarioHelpers.cs new file mode 100644 index 00000000000..f9856ee6170 --- /dev/null +++ b/src/Microsoft.DotNet.Helix/Sdk.Tests/Microsoft.DotNet.Helix.Sdk.Tests/ScenarioHelpers/ScenarioHelpers.cs @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.DotNet.Helix.JobMonitor; +using Microsoft.DotNet.Helix.JobMonitor.Models; + +namespace Microsoft.DotNet.Helix.Sdk.Tests.ScenarioHelpers +{ + internal static class ScenarioHelpers + { + public const string DefaultMonitorName = "Helix Job Monitor"; + + public static AzureDevOpsTimelineRecord PipelineJob(string name, string state, string result = null) + => new() { Type = "Job", Name = name, State = state, Result = result }; + + public static AzureDevOpsTimelineRecord MonitorJob(string name = DefaultMonitorName) + => new() { Type = "Job", Name = name, State = "inProgress" }; + + public static HelixJobInfo HelixJob(string jobName, string status) + => new(jobName, status); + + public static HelixJobPassFail PassFail(string[] passed = null, string[] failed = null) + => new(passed ?? [], failed ?? []); + } +} diff --git a/src/Microsoft.DotNet.Helix/Sdk/Readme.md b/src/Microsoft.DotNet.Helix/Sdk/Readme.md index 6472e55cc28..a4cf9b4abb1 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/Readme.md +++ b/src/Microsoft.DotNet.Helix/Sdk/Readme.md @@ -44,6 +44,65 @@ env: SYSTEM_ACCESSTOKEN: $(System.AccessToken) # We need to set this env var to publish helix results to Azure DevOps ``` +### Helix Job Monitor for Azure DevOps + +If you want to decouple Helix test execution from the build agents that submit the work, use the Helix Job Monitor. + +The job monitor is a lightweight dedicated pipeline job that: + +- polls Azure DevOps for pipeline state, +- polls Helix for jobs associated with the current build, +- downloads test result artifacts from completed Helix jobs, +- publishes results to Azure DevOps incrementally, +- returns a final green or red status once all non-monitor jobs and Helix jobs have completed. + +This allows the original build jobs to stop waiting on Helix execution while still preserving test visibility and pass/fail behavior in the pipeline. + +The job is added with the template at [/eng/common/core-templates/job/helix-job-monitor.yml](/eng/common/core-templates/job/helix-job-monitor.yml). + +Example: + +```yaml +jobs: +- template: /eng/common/core-templates/job/helix-job-monitor.yml@self + parameters: + jobName: HelixJobMonitor + displayName: Helix Job Monitor + pollingIntervalSeconds: 30 + timeoutInMinutes: 360 +``` + +Useful parameters: + +- `helixBaseUri`: base URI for the Helix service. Defaults to `https://helix.dot.net/`. +- `helixAccessToken`: optional token for authenticated Helix access on internal builds. +- `pollingIntervalSeconds`: how often the job monitor checks for new completed jobs. +- `timeoutInMinutes`: overall timeout for the job monitor. +- `jobMonitorName`: name used to identify and exclude the Helix Job Monitor job in the Azure DevOps timeline. + +Behavior notes: + +- The reporter uses its own `SYSTEM_ACCESSTOKEN`, so it does not depend on the shorter-lived token from the job that originally submitted the Helix work. +- If parseable xUnit, JUnit, or TRX result files are available, those are uploaded. +- If no result files are found, the reporter creates synthetic work-item pass/fail results so that failures are still visible in Azure DevOps. +- The reporter is safe to rerun because it checks for already-completed test runs and only processes new results. + +#### Opting in from a Helix project + +Pair the monitor job with the `EnableHelixJobMonitor` MSBuild property in the Helix `.proj` that +calls `SendHelixJob`: + +```xml + + true + +``` + +When set, the Helix SDK will submit Helix jobs and exit immediately without waiting for completion. +The Helix Job Monitor will be responsible for tracking the jobs to completion and publishing results to Azure DevOps, so no other changes are needed to the Helix project file itself. +You must however add the `helix-job-monitor.yml` template to your pipeline (see the example above) so the +results are still published to Azure DevOps. + Furthermore, when you need to make changes to Helix SDK, there's a way to run it locally with ease to test your changes in a tighter dev loop than having to have to wait for the full PR build. The repository contains E2E tests that utilize the Helix SDK to send test Helix jobs. diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets index 90456cf2394..948a777624b 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets @@ -27,11 +27,43 @@ + + + + $(HelixPostCommands); + find . -maxdepth 5 \( -iname '*.trx' -o -iname 'testResults.xml' -o -iname 'test-results.xml' -o -iname 'test_results.xml' -o -iname 'junit-results.xml' -o -iname 'junitresults.xml' \) -print0 | xargs -0 -I{} cp -f {} "$HELIX_WORKITEM_UPLOAD_ROOT/" || true + + + + + $(HelixPostCommands); + powershell -NoProfile -NonInteractive -Command "Get-ChildItem -Path . -Recurse -File -Depth 5 -Include *.trx,testResults.xml,test-results.xml,test_results.xml,junit-results.xml,junitresults.xml -ErrorAction SilentlyContinue | Copy-Item -Destination $env:HELIX_WORKITEM_UPLOAD_ROOT -Force -ErrorAction SilentlyContinue" + + + + + + + <_HelixJobMonitorTestRunName>$(TestRunNamePrefix)$(HelixTargetQueue)$(TestRunNameSuffix) + + + + diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MultiQueue.targets b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MultiQueue.targets index a6a14c7df53..45f4c54b7c7 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MultiQueue.targets +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MultiQueue.targets @@ -40,6 +40,15 @@ false + + + false + + Helix + + + false + + diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/azure-pipelines/AzurePipelines.props b/src/Microsoft.DotNet.Helix/Sdk/tools/azure-pipelines/AzurePipelines.props index e25086445d4..58b134f9447 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/azure-pipelines/AzurePipelines.props +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/azure-pipelines/AzurePipelines.props @@ -6,6 +6,15 @@ false + + + false + + true diff --git a/tests/UnitTests.proj b/tests/UnitTests.proj index 75d73434482..39530e1087b 100644 --- a/tests/UnitTests.proj +++ b/tests/UnitTests.proj @@ -19,6 +19,11 @@ $(AGENT_JOBNAME) run on 300 + + + true diff --git a/tests/XHarness.Tests.Common.props b/tests/XHarness.Tests.Common.props index 4e36863a3c4..515bc50bb44 100644 --- a/tests/XHarness.Tests.Common.props +++ b/tests/XHarness.Tests.Common.props @@ -18,6 +18,7 @@ true true https://helix.dot.net + true