From 6f279262f678af3f1d53c80e01dadc97343b5c39 Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Mon, 16 Mar 2026 16:51:51 -0700 Subject: [PATCH 1/7] Process pom.xml in order and backfill any variable version --- .../maven/MavenWithFallbackDetector.cs | 203 +++++++++++------- .../maven/PendingComponent.cs | 14 ++ 2 files changed, 144 insertions(+), 73 deletions(-) create mode 100644 src/Microsoft.ComponentDetection.Detectors/maven/PendingComponent.cs diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index 99126d319..533f4e544 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -109,7 +109,10 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet private readonly IMavenCommandService mavenCommandService; private readonly IEnvironmentVariableService envVarService; private readonly IFileUtilityService fileUtilityService; - private readonly Dictionary documentsLoaded = []; + + // Two-pass static parsing: collect variables first, then resolve components + private readonly ConcurrentDictionary collectedVariables = new(); + private readonly ConcurrentQueue pendingComponents = new(); // Track original pom.xml files for potential fallback private readonly ConcurrentBag originalPomFiles = []; @@ -466,6 +469,9 @@ protected override Task OnFileFoundAsync( protected override Task OnDetectionFinishedAsync() { + // Second pass: resolve all pending components with collected variables + this.ResolvePendingComponents(); + // Record telemetry this.Telemetry["DetectionMethod"] = this.usedDetectionMethod.ToString(); this.Telemetry["FallbackReason"] = this.fallbackReason.ToString(); @@ -474,6 +480,8 @@ protected override Task OnDetectionFinishedAsync() this.Telemetry["TotalComponentCount"] = (this.mvnCliComponentCount + this.staticParserComponentCount).ToString(); this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); + this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); + this.Telemetry["PendingComponentCount"] = this.pendingComponents.Count.ToString(); if (!this.failedEndpoints.IsEmpty) { @@ -536,16 +544,15 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) var document = new XmlDocument(); document.Load(file.Stream); - lock (this.documentsLoaded) - { - this.documentsLoaded.TryAdd(file.Location, document); - } - + // Single XML parsing pass: create namespace manager once var namespaceManager = new XmlNamespaceManager(document.NameTable); namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); + // First pass: collect all variables from this document (efficient single pass) + this.CollectVariablesFromDocument(document, namespaceManager, filePath); + + // First pass: collect dependencies (may have unresolved variables) var dependencyList = document.SelectNodes(DependencyNode, namespaceManager); - var componentsFoundInFile = 0; foreach (XmlNode dependency in dependencyList) { @@ -561,31 +568,24 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) if (version != null && !version.InnerText.Contains(',')) { var versionRef = version.InnerText.Trim('[', ']'); - string versionString; if (versionRef.StartsWith("${")) { - versionString = this.ResolveVersion(versionRef, document, file.Location); + // Store for second-pass resolution + this.pendingComponents.Enqueue(new PendingComponent( + groupId, + artifactId, + versionRef, + singleFileComponentRecorder, + filePath)); } else { - versionString = versionRef; - } - - if (!versionString.StartsWith("${")) - { - var component = new MavenComponent(groupId, artifactId, versionString); + // Direct version - register immediately + var component = new MavenComponent(groupId, artifactId, versionRef); var detectedComponent = new DetectedComponent(component); singleFileComponentRecorder.RegisterUsage(detectedComponent); - componentsFoundInFile++; - } - else - { - this.Logger.LogDebug( - "Version string {Version} for component {Group}/{Artifact} is invalid or unsupported and a component will not be recorded.", - versionString, - groupId, - artifactId); + Interlocked.Increment(ref this.staticParserComponentCount); } } else @@ -596,8 +596,6 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) artifactId); } } - - Interlocked.Add(ref this.staticParserComponentCount, componentsFoundInFile); } catch (Exception e) { @@ -605,6 +603,55 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) } } + /// + /// Collects all variable definitions from a POM document during the first pass. + /// Optimized to reuse XmlNamespaceManager and minimize XPath queries. + /// + /// The XML document to scan for variables. + /// Pre-configured namespace manager to reuse. + /// The file path for logging purposes. + private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath) + { + try + { + // Collect properties variables (single XPath query) + var propertiesNode = document.SelectSingleNode("//proj:properties", namespaceManager); + if (propertiesNode != null) + { + foreach (XmlNode propertyNode in propertiesNode.ChildNodes) + { + if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText)) + { + var variableName = propertyNode.Name; + var variableValue = propertyNode.InnerText; + this.collectedVariables.TryAdd(variableName, variableValue); + this.LogDebug($"Collected variable {variableName}={variableValue} from {filePath}"); + } + } + } + + // Collect common project-level variables with optimized single query + // Use XPath union to get all common variables in one query + var commonVariablesNodes = document.SelectNodes("//proj:version | //proj:groupId | //proj:artifactId | //proj:revision", namespaceManager); + if (commonVariablesNodes != null) + { + foreach (XmlNode node in commonVariablesNodes) + { + if (!string.IsNullOrWhiteSpace(node.InnerText)) + { + var variableName = node.LocalName; // Get the element name (version, groupId, etc.) + this.collectedVariables.TryAdd(variableName, node.InnerText); + this.LogDebug($"Collected project variable {variableName}={node.InnerText} from {filePath}"); + } + } + } + } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath); + } + } + private bool IsAuthenticationError(string errorMessage) { if (string.IsNullOrWhiteSpace(errorMessage)) @@ -649,72 +696,82 @@ private void LogAuthErrorGuidance() this.LogWarning(guidance.ToString()); } - private string ResolveVersion(string versionString, XmlDocument currentDocument, string currentDocumentFileLocation) - { - var returnedVersionString = versionString; - var match = VersionRegex.Match(versionString); - - if (match.Success) - { - var variable = match.Groups[1].Captures[0].ToString(); - var replacement = this.ReplaceVariable(variable, currentDocument, currentDocumentFileLocation); - returnedVersionString = versionString.Replace("${" + variable + "}", replacement); - } - - return returnedVersionString; - } - - private string ReplaceVariable(string variable, XmlDocument currentDocument, string currentDocumentFileLocation) + /// + /// Second pass: resolve all pending components using collected variables. + /// This ensures variables defined in any POM file can be used by components in any other POM file. + /// + private void ResolvePendingComponents() { - var result = this.FindVariableInDocument(currentDocument, currentDocumentFileLocation, variable); - if (result != null) - { - return result; - } + var resolvedCount = 0; + var skippedCount = 0; - lock (this.documentsLoaded) + while (this.pendingComponents.TryDequeue(out var pendingComponent)) { - foreach (var pathDocumentPair in this.documentsLoaded) + try { - var path = pathDocumentPair.Key; - var document = pathDocumentPair.Value; - result = this.FindVariableInDocument(document, path, variable); - if (result != null) + var resolvedVersion = this.ResolveVersionFromCollectedVariables(pendingComponent.VersionTemplate); + if (!resolvedVersion.StartsWith("${")) { - return result; + var component = new MavenComponent(pendingComponent.GroupId, pendingComponent.ArtifactId, resolvedVersion); + var detectedComponent = new DetectedComponent(component); + pendingComponent.Recorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + resolvedCount++; + + this.LogDebug($"Resolved component {pendingComponent.GroupId}/{pendingComponent.ArtifactId}:{resolvedVersion} from {pendingComponent.FilePath}"); } + else + { + skippedCount++; + this.Logger.LogDebug( + "Version string {Version} for component {Group}/{Artifact} could not be resolved and a component will not be recorded. File: {File}", + resolvedVersion, + pendingComponent.GroupId, + pendingComponent.ArtifactId, + pendingComponent.FilePath); + } + } + catch (Exception e) + { + skippedCount++; + this.Logger.LogError( + e, + "Failed to resolve pending component {Group}/{Artifact} from {File}", + pendingComponent.GroupId, + pendingComponent.ArtifactId, + pendingComponent.FilePath); } } - return $"${{{variable}}}"; + this.LogInfo($"Second pass completed: {resolvedCount} components resolved, {skippedCount} skipped due to unresolved variables"); } - private string FindVariableInDocument(XmlDocument document, string path, string variable) + /// + /// Resolves a version template using the collected variables dictionary. + /// This is more efficient than scanning all documents for each variable resolution. + /// + /// The version template with variables (e.g., "${revision}"). + /// The resolved version string, or the original template if variables cannot be resolved. + private string ResolveVersionFromCollectedVariables(string versionTemplate) { - try - { - var namespaceManager = new XmlNamespaceManager(document.NameTable); - namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); + var resolvedVersion = versionTemplate; + var match = VersionRegex.Match(versionTemplate); - var nodeListProject = document.SelectNodes($"//proj:{variable}", namespaceManager); - var nodeListProperties = document.SelectNodes($"//proj:properties/proj:{variable}", namespaceManager); - - if (nodeListProject.Count != 0) + if (match.Success) + { + var variable = match.Groups[1].Captures[0].ToString(); + if (this.collectedVariables.TryGetValue(variable, out var replacement)) { - return nodeListProject.Item(0).InnerText; + resolvedVersion = versionTemplate.Replace("${" + variable + "}", replacement); + this.LogDebug($"Resolved variable {variable} to {replacement} in version {versionTemplate}"); } - - if (nodeListProperties.Count != 0) + else { - return nodeListProperties.Item(0).InnerText; + this.LogDebug($"Variable {variable} not found in collected variables for version {versionTemplate}"); } } - catch (Exception e) - { - this.Logger.LogError(e, "Failed to read file {Path}", path); - } - return null; + return resolvedVersion; } /// diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/PendingComponent.cs b/src/Microsoft.ComponentDetection.Detectors/maven/PendingComponent.cs new file mode 100644 index 000000000..82c92b6fc --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/maven/PendingComponent.cs @@ -0,0 +1,14 @@ +#nullable disable +namespace Microsoft.ComponentDetection.Detectors.Maven; + +using Microsoft.ComponentDetection.Contracts; + +/// +/// Represents a component with unresolved variables that needs second-pass processing. +/// +internal record PendingComponent( + string GroupId, + string ArtifactId, + string VersionTemplate, + ISingleFileComponentRecorder Recorder, + string FilePath); From 2770c589ece06fe48b943f37d059b9f85740a39d Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Tue, 17 Mar 2026 16:48:14 -0700 Subject: [PATCH 2/7] Update cleanup method --- .../maven/IMavenCommandService.cs | 14 -- .../maven/MavenCommandService.cs | 116 ----------- .../maven/MavenWithFallbackDetector.cs | 34 +--- .../maven/MvnCliComponentDetector.cs | 29 +-- .../Services/DetectorProcessingService.cs | 54 +++++ .../DetectorProcessingServiceTests.cs | 187 ++++++++++++++++++ 6 files changed, 259 insertions(+), 175 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs index c6adc8a2e..32169b9e2 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs @@ -14,18 +14,4 @@ public interface IMavenCommandService Task GenerateDependenciesFileAsync(ProcessRequest processRequest, CancellationToken cancellationToken = default); void ParseDependenciesFile(ProcessRequest processRequest); - - /// - /// Registers that a detector is actively reading a dependency file. - /// This prevents premature deletion by other detectors. - /// - /// The path to the dependency file being read. - void RegisterFileReader(string dependencyFilePath); - - /// - /// Unregisters a detector's active reading of a dependency file and attempts cleanup. - /// If no other detectors are reading the file, it will be safely deleted. - /// - /// The path to the dependency file that was being read. - void UnregisterFileReader(string dependencyFilePath); } diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs index 9d21971e2..5987b09ec 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs @@ -33,12 +33,6 @@ internal class MavenCommandService : IMavenCommandService /// private readonly ConcurrentDictionary completedLocations = new(); - /// - /// Tracks the number of active readers for each dependency file. - /// Used for safe file cleanup coordination between detectors. - /// - private readonly ConcurrentDictionary fileReaderCounts = new(); - private readonly ICommandLineInvocationService commandLineInvocationService; private readonly IMavenStyleDependencyGraphParserService parserService; private readonly IEnvironmentVariableService envVarService; @@ -165,114 +159,4 @@ public void ParseDependenciesFile(ProcessRequest processRequest) var lines = sr.ReadToEnd().Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); this.parserService.Parse(lines, processRequest.SingleFileComponentRecorder); } - - /// - /// Registers that a detector is actively reading a dependency file. - /// This prevents premature deletion by other detectors. - /// - /// The path to the dependency file being read. - public void RegisterFileReader(string dependencyFilePath) - { - this.fileReaderCounts.AddOrUpdate(dependencyFilePath, 1, (key, count) => count + 1); - } - - /// - /// Unregisters a detector's active reading of a dependency file and attempts cleanup. - /// If no other detectors are reading the file, it will be safely deleted. - /// - /// The path to the dependency file that was being read. - public void UnregisterFileReader(string dependencyFilePath) - { - this.fileReaderCounts.AddOrUpdate(dependencyFilePath, 0, (key, count) => Math.Max(0, count - 1)); - this.TryDeleteDependencyFileIfNotInUse(dependencyFilePath); - } - - /// - /// Attempts to delete a dependency file if no detectors are currently using it. - /// Uses cross-process coordination to prevent race conditions with other instances. - /// - /// The path to the dependency file to delete. - private void TryDeleteDependencyFileIfNotInUse(string dependencyFilePath) - { - // Check if any local readers are using the file - if (this.fileReaderCounts.TryGetValue(dependencyFilePath, out var count) && count > 0) - { - this.logger.LogDebug("Skipping deletion of {DependencyFilePath} - {Count} local readers still active", dependencyFilePath, count); - return; - } - - var coordinationFile = dependencyFilePath + ".deleting"; - - try - { - // Try to create coordination file atomically with our process ID - var processId = Environment.ProcessId.ToString(); - - // Use FileMode.CreateNew to ensure atomic creation (fails if file exists) - using (var fs = new FileStream(coordinationFile, FileMode.CreateNew, FileAccess.Write, FileShare.None)) - using (var writer = new StreamWriter(fs)) - { - writer.Write(processId); - } - - this.logger.LogDebug("Created coordination file {CoordinationFile} for process {ProcessId}", coordinationFile, processId); - - // Give other processes a chance to create their coordination files if needed - Thread.Sleep(50); - - // Verify we still own the coordination (another process might have deleted and recreated it) - if (!File.Exists(coordinationFile)) - { - this.logger.LogDebug("Coordination file {CoordinationFile} was deleted by another process", coordinationFile); - return; - } - - var coordinationContent = File.ReadAllText(coordinationFile).Trim(); - if (coordinationContent != processId) - { - this.logger.LogDebug("Coordination file {CoordinationFile} was taken over by process {OtherProcessId}, aborting deletion", coordinationFile, coordinationContent); - return; - } - - // We own the coordination - proceed with deletion - if (File.Exists(dependencyFilePath)) - { - File.Delete(dependencyFilePath); - this.logger.LogDebug("Successfully deleted dependency file {DependencyFilePath}", dependencyFilePath); - } - else - { - this.logger.LogDebug("Dependency file {DependencyFilePath} was already deleted", dependencyFilePath); - } - } - catch (IOException ex) when (ex.Message.Contains("already exists") || ex.HResult == -2147024816) - { - // Another process is handling deletion (File already exists) - this.logger.LogDebug("Another process is already coordinating deletion of {DependencyFilePath}", dependencyFilePath); - } - catch (Exception ex) - { - this.logger.LogWarning(ex, "Failed to coordinate deletion of dependency file {DependencyFilePath}", dependencyFilePath); - } - finally - { - // Clean up our coordination file - try - { - if (File.Exists(coordinationFile)) - { - var coordinationContent = File.ReadAllText(coordinationFile).Trim(); - if (coordinationContent == Environment.ProcessId.ToString()) - { - File.Delete(coordinationFile); - this.logger.LogDebug("Cleaned up coordination file {CoordinationFile}", coordinationFile); - } - } - } - catch (Exception ex) - { - this.logger.LogDebug(ex, "Failed to clean up coordination file {CoordinationFile}, will be cleaned up later", coordinationFile); - } - } - } } diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index 533f4e544..279b7517c 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -159,7 +159,7 @@ public MavenWithFallbackDetector( private static string NormalizeDirectoryPath(string path) => path.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar) + Path.DirectorySeparatorChar; - private void LogDebug(string message) => + private void LogDebugWithId(string message) => this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); private void LogInfo(string message) => @@ -265,7 +265,7 @@ private async Task TryInitializeMavenCliAsync() return false; } - this.LogDebug("Maven CLI is available. Running MvnCli detection."); + this.LogDebugWithId("Maven CLI is available. Running MvnCli detection."); return true; } @@ -379,7 +379,7 @@ await this.RemoveNestedPomXmls(processRequests, parentPomDictionary, cancellatio // Determine detection method based on results this.DetermineDetectionMethod(cliSuccessCount, cliFailureCount); - this.LogDebug($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs."); + this.LogDebugWithId($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs."); // Use comprehensive directory scanning after Maven CLI execution to find all generated dependency files // This ensures we find dependency files from submodules even if Maven CLI was only run on parent pom.xml @@ -421,7 +421,7 @@ private void DetermineDetectionMethod(int cliSuccessCount, int cliFailureCount) if (cliFailureCount == 0 && cliSuccessCount > 0) { this.usedDetectionMethod = MavenDetectionMethod.MvnCliOnly; - this.LogDebug("All pom.xml files processed successfully with Maven CLI."); + this.LogDebugWithId("All pom.xml files processed successfully with Maven CLI."); } else if (cliSuccessCount == 0 && cliFailureCount > 0) { @@ -446,17 +446,8 @@ protected override Task OnFileFoundAsync( if (pattern == this.mavenCommandService.BcdeMvnDependencyFileName) { - // Process MvnCli result - register as reader and cleanup when done - var depsFilePath = processRequest.ComponentStream.Location; - this.mavenCommandService.RegisterFileReader(depsFilePath); - try - { - this.ProcessMvnCliResult(processRequest); - } - finally - { - this.mavenCommandService.UnregisterFileReader(depsFilePath); - } + // Process MvnCli result + this.ProcessMvnCliResult(processRequest); } else { @@ -625,7 +616,6 @@ private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceMana var variableName = propertyNode.Name; var variableValue = propertyNode.InnerText; this.collectedVariables.TryAdd(variableName, variableValue); - this.LogDebug($"Collected variable {variableName}={variableValue} from {filePath}"); } } } @@ -641,7 +631,6 @@ private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceMana { var variableName = node.LocalName; // Get the element name (version, groupId, etc.) this.collectedVariables.TryAdd(variableName, node.InnerText); - this.LogDebug($"Collected project variable {variableName}={node.InnerText} from {filePath}"); } } } @@ -717,8 +706,6 @@ private void ResolvePendingComponents() pendingComponent.Recorder.RegisterUsage(detectedComponent); Interlocked.Increment(ref this.staticParserComponentCount); resolvedCount++; - - this.LogDebug($"Resolved component {pendingComponent.GroupId}/{pendingComponent.ArtifactId}:{resolvedVersion} from {pendingComponent.FilePath}"); } else { @@ -763,11 +750,6 @@ private string ResolveVersionFromCollectedVariables(string versionTemplate) if (this.collectedVariables.TryGetValue(variable, out var replacement)) { resolvedVersion = versionTemplate.Replace("${" + variable + "}", replacement); - this.LogDebug($"Resolved variable {variable} to {replacement} in version {versionTemplate}"); - } - else - { - this.LogDebug($"Variable {variable} not found in collected variables for version {versionTemplate}"); } } @@ -829,7 +811,7 @@ private IObservable RemoveNestedPomXmls( if (location.StartsWith(pomDirectory, StringComparison.OrdinalIgnoreCase)) { - this.LogDebug($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {pomDirectory}."); + this.LogDebugWithId($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {pomDirectory}."); isNested = true; parentPomDictionary.AddOrUpdate( pomDirectory, @@ -845,7 +827,7 @@ private IObservable RemoveNestedPomXmls( if (!isNested) { - this.LogDebug($"Discovered {request.ComponentStream.Location}."); + this.LogDebugWithId($"Discovered {request.ComponentStream.Location}."); parentPomDictionary.AddOrUpdate( NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)), [request], diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs index e69d87475..7b9e9c88b 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs @@ -45,10 +45,8 @@ public MvnCliComponentDetector( public override IEnumerable Categories => [Enum.GetName(typeof(DetectorClass), DetectorClass.Maven)]; - private void LogDebugWithId(string message) - { - this.Logger.LogDebug("{DetectorId} detector: {Message}", this.Id, message); - } + private void LogDebugWithId(string message) => + this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); protected override async Task> OnPrepareDetectionAsync(IObservable processRequests, IDictionary detectorArgs, CancellationToken cancellationToken = default) { @@ -76,11 +74,9 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest => { // The file stream is going to be disposed after the iteration is finished // so is necessary to read the content and keep it in memory, for further processing. - // Register as reader for coordination with file deletion - this.mavenCommandService.RegisterFileReader(componentStream.Location); using var reader = new StreamReader(componentStream.Stream); var content = reader.ReadToEnd(); - this.mavenCommandService.UnregisterFileReader(componentStream.Location); + return new ProcessRequest { ComponentStream = new ComponentStream @@ -98,18 +94,13 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest => protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default) { - // Register as reader to coordinate with other detectors var depsFilePath = processRequest.ComponentStream.Location; - this.mavenCommandService.RegisterFileReader(depsFilePath); - try - { - this.mavenCommandService.ParseDependenciesFile(processRequest); - } - finally - { - // Unregister and attempt coordinated cleanup - this.mavenCommandService.UnregisterFileReader(depsFilePath); - } + this.LogDebugWithId($"OnFileFoundAsync: Processing {depsFilePath}"); + + var componentsBefore = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; + this.mavenCommandService.ParseDependenciesFile(processRequest); + var componentsAfter = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; + this.LogDebugWithId($"OnFileFoundAsync: {depsFilePath} contributed {componentsAfter - componentsBefore} components"); await Task.CompletedTask; } @@ -161,7 +152,7 @@ private IObservable RemoveNestedPomXmls(IObservable ProcessDetectorsAsync( var results = await Task.WhenAll(scanTasks); await this.experimentService.FinishAsync(); + // Clean up Maven CLI temporary files after all detectors have finished + await this.CleanupMavenFilesAsync(settings.SourceDirectory, detectors); + var detectorProcessingResult = this.ConvertDetectorResultsIntoResult(results, exitCode); var totalElapsedTime = stopwatch.Elapsed.TotalSeconds; @@ -422,4 +425,55 @@ private void LogTabularOutput(ConcurrentDictionary pr this.logger.LogInformation("{DetectionTimeLine}", line); } } + + /// + /// Cleans up Maven CLI temporary files after all detectors have finished. + /// This prevents race conditions between MvnCliComponentDetector and MavenWithFallbackDetector. + /// + private async Task CleanupMavenFilesAsync(DirectoryInfo sourceDirectory, IEnumerable detectors) + { + // Only clean up if Maven detectors are running + var mavenDetectorIds = new[] { "MvnCli", "MavenWithFallback" }; + var hasMavenDetectors = detectors.Any(d => mavenDetectorIds.Contains(d.Id)); + + if (!hasMavenDetectors) + { + return; + } + + try + { + var searchPattern = "*.mvndeps"; // Simple file pattern for Directory.GetFiles + + this.logger.LogDebug("Starting Maven CLI cleanup in directory: {SourceDirectory}", sourceDirectory.FullName); + + await Task.Run(() => + { + // Search recursively for Maven CLI dependency files + var filesToClean = Directory.GetFiles(sourceDirectory.FullName, searchPattern, SearchOption.AllDirectories); + + foreach (var file in filesToClean) + { + try + { + File.Delete(file); + this.logger.LogDebug("Cleaned up Maven CLI file: {File}", file); + } + catch (Exception ex) + { + this.logger.LogDebug(ex, "Failed to clean up Maven CLI file: {File}", file); + } + } + + if (filesToClean.Length > 0) + { + this.logger.LogDebug("Maven CLI cleanup completed. Removed {Count} files.", filesToClean.Length); + } + }); + } + catch (Exception ex) + { + this.logger.LogWarning(ex, "Maven CLI cleanup failed for directory: {SourceDirectory}", sourceDirectory.FullName); + } + } } diff --git a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs index 0f6a8a020..cd76d06de 100644 --- a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs +++ b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs @@ -658,4 +658,191 @@ private Mock SetupCommandDetectorMock(string id) return mockCommandDetector; } + + [TestMethod] + public async Task ProcessDetectorsAsync_WithMavenDetectors_ShouldCleanupMavenFilesAfterAllDetectorsFinish() + { + // Arrange - Create a temporary directory with mock Maven dependency files + var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + + var testFile1 = Path.Combine(tempDir, "test1.mvndeps"); + var testFile2 = Path.Combine(tempDir, "subdir", "test2.mvndeps"); + var nonMavenFile = Path.Combine(tempDir, "regular.txt"); + + Directory.CreateDirectory(Path.GetDirectoryName(testFile2)); + await File.WriteAllTextAsync(testFile1, "maven dependency content 1"); + await File.WriteAllTextAsync(testFile2, "maven dependency content 2"); + await File.WriteAllTextAsync(nonMavenFile, "regular file content"); + + try + { + var tempDirInfo = new DirectoryInfo(tempDir); + var scanSettings = new ScanSettings + { + SourceDirectory = tempDirInfo, + DetectorArgs = new Dictionary(), + CleanupCreatedFiles = true, + }; + + // Create mock Maven detectors + var mvnCliDetectorMock = new Mock(); + mvnCliDetectorMock.SetupAllProperties(); + mvnCliDetectorMock.SetupGet(x => x.Id).Returns("MvnCli"); + mvnCliDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); + mvnCliDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var mavenWithFallbackDetectorMock = new Mock(); + mavenWithFallbackDetectorMock.SetupAllProperties(); + mavenWithFallbackDetectorMock.SetupGet(x => x.Id).Returns("MavenWithFallback"); + mavenWithFallbackDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); + mavenWithFallbackDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var detectors = new List { mvnCliDetectorMock.Object, mavenWithFallbackDetectorMock.Object, }; + + // Verify files exist before running detectors + File.Exists(testFile1).Should().BeTrue("Maven dependency file 1 should exist before cleanup"); + File.Exists(testFile2).Should().BeTrue("Maven dependency file 2 should exist before cleanup"); + File.Exists(nonMavenFile).Should().BeTrue("Non-Maven file should exist before cleanup"); + + // Act + var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); + + // Assert + result.Should().NotBeNull(); + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Maven dependency files should be cleaned up + File.Exists(testFile1).Should().BeFalse("Maven dependency file 1 should be cleaned up"); + File.Exists(testFile2).Should().BeFalse("Maven dependency file 2 should be cleaned up"); + + // Non-Maven files should remain + File.Exists(nonMavenFile).Should().BeTrue("Non-Maven files should not be cleaned up"); + + // Verify both detectors were executed + mvnCliDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + mavenWithFallbackDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + } + finally + { + // Cleanup test directory + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } + + [TestMethod] + public async Task ProcessDetectorsAsync_WithoutMavenDetectors_ShouldNotCleanupMavenFiles() + { + // Arrange - Create a temporary directory with mock Maven dependency files + var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + + var testFile = Path.Combine(tempDir, "test.mvndeps"); + await File.WriteAllTextAsync(testFile, "maven dependency content"); + + try + { + var tempDirInfo = new DirectoryInfo(tempDir); + var scanSettings = new ScanSettings + { + SourceDirectory = tempDirInfo, + DetectorArgs = new Dictionary(), + CleanupCreatedFiles = true, + }; + + // Create mock non-Maven detector + var npmDetectorMock = new Mock(); + npmDetectorMock.SetupAllProperties(); + npmDetectorMock.SetupGet(x => x.Id).Returns("Npm"); + npmDetectorMock.SetupGet(x => x.Categories).Returns(["Npm"]); + npmDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var detectors = new List { npmDetectorMock.Object, }; + + // Verify file exists before running detectors + File.Exists(testFile).Should().BeTrue("Maven dependency file should exist before processing"); + + // Act + var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); + + // Assert + result.Should().NotBeNull(); + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Maven dependency file should NOT be cleaned up when no Maven detectors are present + File.Exists(testFile).Should().BeTrue("Maven dependency file should remain when no Maven detectors are running"); + + // Verify detector was executed + npmDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + } + finally + { + // Cleanup test directory + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } + + [TestMethod] + public async Task ProcessDetectorsAsync_OnlyOneMavenDetector_ShouldStillCleanupMavenFiles() + { + // Arrange - Create a temporary directory with mock Maven dependency files + var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + var testFile = Path.Combine(tempDir, "test.mvndeps"); + await File.WriteAllTextAsync(testFile, "maven dependency content"); + + try + { + var tempDirInfo = new DirectoryInfo(tempDir); + var scanSettings = new ScanSettings + { + SourceDirectory = tempDirInfo, + DetectorArgs = new Dictionary(), + CleanupCreatedFiles = true, + }; + + // Create mock Maven detector (only one) + var mvnCliDetectorMock = new Mock(); + mvnCliDetectorMock.SetupAllProperties(); + mvnCliDetectorMock.SetupGet(x => x.Id).Returns("MvnCli"); + mvnCliDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); + mvnCliDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var detectors = new List { mvnCliDetectorMock.Object, }; + + // Verify file exists before running detectors + File.Exists(testFile).Should().BeTrue("Maven dependency file should exist before cleanup"); + + // Act + var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); + + // Assert + result.Should().NotBeNull(); + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Maven dependency file should be cleaned up even with only one Maven detector + File.Exists(testFile).Should().BeFalse("Maven dependency file should be cleaned up"); + + // Verify detector was executed + mvnCliDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + } + finally + { + // Cleanup test directory + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } } From 9bec9b450589ea623ac40c43a1621b7b4d62e50c Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Tue, 17 Mar 2026 18:34:48 -0700 Subject: [PATCH 3/7] Cleanup unused method --- .../maven/MavenCommandService.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs index 79dbb92d3..06a98a378 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs @@ -63,9 +63,6 @@ public async Task GenerateDependenciesFileAsync(ProcessRequest p var pomDir = Path.GetDirectoryName(pomFile.Location); var depsFilePath = Path.Combine(pomDir, this.BcdeMvnDependencyFileName); - // Register as file reader immediately to prevent premature cleanup - this.RegisterFileReader(depsFilePath); - // Check the cache before acquiring the semaphore to allow fast-path returns // even when cancellation has been requested. if (this.completedLocations.TryGetValue(pomFile.Location, out var cachedResult) From b31cb4678364abf1564419615c05d0db9f2c12f6 Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Tue, 17 Mar 2026 19:55:10 -0700 Subject: [PATCH 4/7] Another edge case --- .../maven/MavenWithFallbackDetector.cs | 102 ++++++-- .../MavenWithFallbackDetectorTests.cs | 234 ++++++++++++++++++ 2 files changed, 317 insertions(+), 19 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index 8a2a63bb4..c90651e48 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -112,6 +112,7 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet // Two-pass static parsing: collect variables first, then resolve components private readonly ConcurrentDictionary collectedVariables = new(); + private readonly ConcurrentDictionary variableDefinitionSources = new(); private readonly ConcurrentQueue pendingComponents = new(); // Track original pom.xml files for potential fallback @@ -467,6 +468,7 @@ protected override Task OnDetectionFinishedAsync() this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); + this.Telemetry["VariableSourcesTracked"] = this.variableDefinitionSources.Count.ToString(); this.Telemetry["PendingComponentCount"] = this.pendingComponents.Count.ToString(); if (!this.failedEndpoints.IsEmpty) @@ -534,8 +536,17 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) var namespaceManager = new XmlNamespaceManager(document.NameTable); namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); - // First pass: collect all variables from this document (efficient single pass) - this.CollectVariablesFromDocument(document, namespaceManager, filePath); + // Collect variables from this document into a local dictionary first + var localVariables = new Dictionary(); + this.CollectVariablesFromDocument(document, namespaceManager, filePath, localVariables); + + // Add local variables to global collection + // Note: Later files (children) override earlier files (parents) due to processing order + foreach (var kvp in localVariables) + { + this.collectedVariables.AddOrUpdate(kvp.Key, kvp.Value, (key, oldValue) => kvp.Value); + this.variableDefinitionSources.AddOrUpdate(kvp.Key, filePath, (key, oldValue) => filePath); + } // First pass: collect dependencies (may have unresolved variables) var dependencyList = document.SelectNodes(DependencyNode, namespaceManager); @@ -557,13 +568,28 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) if (versionRef.StartsWith("${")) { - // Store for second-pass resolution - this.pendingComponents.Enqueue(new PendingComponent( - groupId, - artifactId, - versionRef, - singleFileComponentRecorder, - filePath)); + // Only resolve immediately if local variable exists (highest priority) + // Otherwise, defer to second pass to ensure proper hierarchy-aware resolution + var resolvedVersion = this.ResolveVersionFromLocalOnly(versionRef, localVariables); + if (!resolvedVersion.StartsWith("${")) + { + // Local variable found - resolve immediately (highest priority) + var component = new MavenComponent(groupId, artifactId, resolvedVersion); + var detectedComponent = new DetectedComponent(component); + singleFileComponentRecorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + } + else + { + // No local variable - defer to second pass for hierarchy-aware resolution + // This ensures we consider all variable definitions before resolving + this.pendingComponents.Enqueue(new PendingComponent( + groupId, + artifactId, + versionRef, + singleFileComponentRecorder, + filePath)); + } } else { @@ -590,13 +616,14 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) } /// - /// Collects all variable definitions from a POM document during the first pass. + /// Collects all variable definitions from a POM document into the provided local dictionary. /// Optimized to reuse XmlNamespaceManager and minimize XPath queries. /// /// The XML document to scan for variables. /// Pre-configured namespace manager to reuse. /// The file path for logging purposes. - private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath) + /// Local dictionary to collect variables into. + private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, Dictionary localVariables) { try { @@ -610,7 +637,7 @@ private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceMana { var variableName = propertyNode.Name; var variableValue = propertyNode.InnerText; - this.collectedVariables.TryAdd(variableName, variableValue); + localVariables[variableName] = variableValue; } } } @@ -625,7 +652,7 @@ private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceMana if (!string.IsNullOrWhiteSpace(node.InnerText)) { var variableName = node.LocalName; // Get the element name (version, groupId, etc.) - this.collectedVariables.TryAdd(variableName, node.InnerText); + localVariables[variableName] = node.InnerText; } } } @@ -636,6 +663,32 @@ private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceMana } } + /// + /// Resolves a version template using only local variables from the current file. + /// This ensures immediate resolution only when the variable is defined in the same file (highest priority). + /// + /// The version template with variables (e.g., "${revision}"). + /// Local variables from the current file. + /// The resolved version string, or the original template if local variable not found. + private string ResolveVersionFromLocalOnly(string versionTemplate, Dictionary localVariables) + { + var resolvedVersion = versionTemplate; + var match = VersionRegex.Match(versionTemplate); + + if (match.Success) + { + var variable = match.Groups[1].Captures[0].ToString(); + + // Only check local variables (same file priority) + if (localVariables.TryGetValue(variable, out var localReplacement)) + { + resolvedVersion = versionTemplate.Replace("${" + variable + "}", localReplacement); + } + } + + return resolvedVersion; + } + private bool IsAuthenticationError(string errorMessage) { if (string.IsNullOrWhiteSpace(errorMessage)) @@ -681,8 +734,9 @@ private void LogAuthErrorGuidance() } /// - /// Second pass: resolve all pending components using collected variables. - /// This ensures variables defined in any POM file can be used by components in any other POM file. + /// Second pass: resolve all pending components using hierarchy-aware variable resolution. + /// For components with unresolved variables, this picks the closest ancestor definition + /// based on Maven's property inheritance rules (child > parent precedence). /// private void ResolvePendingComponents() { @@ -693,7 +747,7 @@ private void ResolvePendingComponents() { try { - var resolvedVersion = this.ResolveVersionFromCollectedVariables(pendingComponent.VersionTemplate); + var resolvedVersion = this.ResolveVersionWithHierarchyAwareness(pendingComponent.VersionTemplate, pendingComponent.FilePath); if (!resolvedVersion.StartsWith("${")) { var component = new MavenComponent(pendingComponent.GroupId, pendingComponent.ArtifactId, resolvedVersion); @@ -729,12 +783,14 @@ private void ResolvePendingComponents() } /// - /// Resolves a version template using the collected variables dictionary. - /// This is more efficient than scanning all documents for each variable resolution. + /// Resolves a version template with hierarchy-aware precedence. + /// When multiple variable definitions exist, picks the closest ancestor to the requesting file. + /// This implements Maven's property inheritance rule: child properties take precedence over parent properties. /// /// The version template with variables (e.g., "${revision}"). + /// The file path of the POM requesting the variable resolution. /// The resolved version string, or the original template if variables cannot be resolved. - private string ResolveVersionFromCollectedVariables(string versionTemplate) + private string ResolveVersionWithHierarchyAwareness(string versionTemplate, string requestingFilePath) { var resolvedVersion = versionTemplate; var match = VersionRegex.Match(versionTemplate); @@ -742,9 +798,17 @@ private string ResolveVersionFromCollectedVariables(string versionTemplate) if (match.Success) { var variable = match.Groups[1].Captures[0].ToString(); + + // If variable exists in collected variables, find the closest ancestor definition if (this.collectedVariables.TryGetValue(variable, out var replacement)) { resolvedVersion = versionTemplate.Replace("${" + variable + "}", replacement); + this.LogInfo($"Resolved variable {variable} = {replacement} from {(this.variableDefinitionSources.TryGetValue(variable, out var source) ? source : "unknown")} for {requestingFilePath}"); + } + else + { + // Variable not found - log for debugging + this.LogWarning($"Variable {variable} not found in collected variables for {requestingFilePath}. Available variables: {string.Join(", ", this.collectedVariables.Keys.Take(10))}"); } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs index cf9338e90..7bca87802 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs @@ -434,6 +434,240 @@ public async Task StaticParser_IgnoresDependenciesWithUnresolvablePropertyVersio mavenComponent.ArtifactId.Should().Be("guava"); } + [TestMethod] + public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() + { + // Arrange - Test case 1: Variable defined in first file, referenced in second file + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var firstPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + + 3.12.0 + +"; + + var secondPomContent = @" + + 4.0.0 + com.test + child + 1.0.0 + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", firstPomContent) + .WithFile("module/pom.xml", secondPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.12.0"); + } + + [TestMethod] + public async Task StaticParser_BackfillsVariableFromLaterFile_Async() + { + // Arrange - Test case 2: Variable referenced in first file, defined in second file + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var firstPomContent = @" + + 4.0.0 + com.test + child + 1.0.0 + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + var secondPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + + 3.13.0 + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("module/pom.xml", firstPomContent) + .WithFile("pom.xml", secondPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.13.0"); + } + + [TestMethod] + public async Task StaticParser_LocalVariableDefinitionTakesPriority_Async() + { + // Arrange - Test case 3: Variable defined in both files, local definition has priority + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var firstPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + + 3.11.0 + +"; + + var secondPomContent = @" + + 4.0.0 + com.test + child + 1.0.0 + + 3.14.0 + + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", firstPomContent) + .WithFile("module/pom.xml", secondPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Should use the local definition (3.14.0) instead of parent definition (3.11.0) + mavenComponent.Version.Should().Be("3.14.0"); + } + + [TestMethod] + public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Async() + { + // Arrange - Test processing order independence for ancestor priority + // This test simulates the scenario where files are processed out of natural hierarchy order: + // grandparent → child → parent (instead of grandparent → parent → child) + // The child should still get the parent's variable value, not the grandparent's + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var grandparentPomContent = @" + + 4.0.0 + com.test + grandparent + 1.0.0 + + 3.10.0 + +"; + + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + + 3.11.0 + +"; + + var childPomContent = @" + + 4.0.0 + com.test + child + 1.0.0 + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act - Process files in out-of-order sequence: grandparent → child → parent + // This tests that deferred variable resolution correctly handles ancestor priority + // regardless of processing order + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", grandparentPomContent) // Processed 1st: defines commons.version = 3.10.0 + .WithFile("parent/child/pom.xml", childPomContent) // Processed 2nd: references ${commons.version} + .WithFile("parent/pom.xml", parentPomContent) // Processed 3rd: defines commons.version = 3.11.0 + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Should resolve to parent's version (3.11.0) since parent is the closest ancestor to child, + // NOT grandparent's version (3.10.0), even though grandparent was processed first. + // This validates that deferred resolution correctly implements Maven's ancestor priority rules. + mavenComponent.Version.Should().Be("3.11.0"); + } + [TestMethod] public async Task WhenNoPomXmlFiles_ReturnsSuccessWithNoComponents_Async() { From 69e3307b1ea7f4456e064bcbc907e64fabdf0122 Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Fri, 20 Mar 2026 15:52:16 -0700 Subject: [PATCH 5/7] Cleanup parent pom.xml logic --- .../maven/MavenWithFallbackDetector.cs | 532 ++++++++++-- .../MavenWithFallbackDetectorTests.cs | 821 +++++++++++++++++- 2 files changed, 1252 insertions(+), 101 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index c90651e48..806763bae 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -112,9 +112,17 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet // Two-pass static parsing: collect variables first, then resolve components private readonly ConcurrentDictionary collectedVariables = new(); - private readonly ConcurrentDictionary variableDefinitionSources = new(); private readonly ConcurrentQueue pendingComponents = new(); + // Track Maven parent-child relationships for proper variable resolution + private readonly ConcurrentDictionary mavenParentChildRelationships = new(); + + // Track processed Maven projects by coordinates (groupId:artifactId -> file path) + private readonly ConcurrentDictionary processedMavenProjects = new(); + + // Track files that couldn't establish parent relationships during first pass (for second pass re-evaluation) + private readonly ConcurrentQueue<(string FilePath, string ParentGroupId, string ParentArtifactId)> unresolvedParentRelationships = new(); + // Track original pom.xml files for potential fallback private readonly ConcurrentQueue originalPomFiles = []; @@ -122,6 +130,12 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet private readonly ConcurrentQueue mavenCliErrors = []; private readonly ConcurrentQueue failedEndpoints = []; + /// + /// Cache for parent POM lookups to avoid repeated file system operations. + /// Key: current file path, Value: parent POM path or empty string if not found. + /// + private readonly ConcurrentDictionary parentPomCache = new(); + // Telemetry tracking private MavenDetectionMethod usedDetectionMethod = MavenDetectionMethod.None; private MavenFallbackReason fallbackReason = MavenFallbackReason.None; @@ -157,8 +171,38 @@ public MavenWithFallbackDetector( // Normalizes a directory path by ensuring it ends with a directory separator. // This prevents false matches like "C:\foo" matching "C:\foobar". - private static string NormalizeDirectoryPath(string path) => - path.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar) + Path.DirectorySeparatorChar; + private static string NormalizeDirectoryPath(string path) + { + if (string.IsNullOrEmpty(path)) + { + return path; + } + + var lastChar = path[^1]; + return lastChar == Path.DirectorySeparatorChar || lastChar == Path.AltDirectorySeparatorChar + ? path + : path + Path.DirectorySeparatorChar; + } + + private static bool IsAuthenticationError(string errorMessage) + { + if (string.IsNullOrWhiteSpace(errorMessage)) + { + return false; + } + + // Use ReadOnlySpan for more efficient string searching + var messageSpan = errorMessage.AsSpan(); + foreach (var pattern in AuthErrorPatterns) + { + if (messageSpan.Contains(pattern, StringComparison.OrdinalIgnoreCase)) + { + return true; + } + } + + return false; + } private void LogDebugWithId(string message) => this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); @@ -456,19 +500,27 @@ protected override Task OnFileFoundAsync( protected override Task OnDetectionFinishedAsync() { - // Second pass: resolve all pending components with collected variables + // Second pass: resolve any parent relationships that couldn't be resolved during first pass + // This handles cases where parent POM was processed after child POM + this.ResolveUnresolvedParentRelationships(); + + // Third pass: resolve all pending components with collected variables and complete hierarchy this.ResolvePendingComponents(); - // Record telemetry - this.Telemetry["DetectionMethod"] = this.usedDetectionMethod.ToString(); - this.Telemetry["FallbackReason"] = this.fallbackReason.ToString(); - this.Telemetry["MvnCliComponentCount"] = this.mvnCliComponentCount.ToString(); - this.Telemetry["StaticParserComponentCount"] = this.staticParserComponentCount.ToString(); + // Record telemetry - cache string conversions + var detectionMethodStr = this.usedDetectionMethod.ToString(); + var fallbackReasonStr = this.fallbackReason.ToString(); + var mvnCliCountStr = this.mvnCliComponentCount.ToString(); + var staticCountStr = this.staticParserComponentCount.ToString(); + + this.Telemetry["DetectionMethod"] = detectionMethodStr; + this.Telemetry["FallbackReason"] = fallbackReasonStr; + this.Telemetry["MvnCliComponentCount"] = mvnCliCountStr; + this.Telemetry["StaticParserComponentCount"] = staticCountStr; this.Telemetry["TotalComponentCount"] = (this.mvnCliComponentCount + this.staticParserComponentCount).ToString(); this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); - this.Telemetry["VariableSourcesTracked"] = this.variableDefinitionSources.Count.ToString(); this.Telemetry["PendingComponentCount"] = this.pendingComponents.Count.ToString(); if (!this.failedEndpoints.IsEmpty) @@ -476,10 +528,10 @@ protected override Task OnDetectionFinishedAsync() this.Telemetry["FailedEndpoints"] = string.Join(";", this.failedEndpoints.Distinct().Take(10)); } - this.LogInfo($"Detection completed. Method: {this.usedDetectionMethod}, " + - $"FallbackReason: {this.fallbackReason}, " + - $"MvnCli components: {this.mvnCliComponentCount}, " + - $"Static parser components: {this.staticParserComponentCount}"); + this.LogInfo($"Detection completed. Method: {detectionMethodStr}, " + + $"FallbackReason: {fallbackReasonStr}, " + + $"MvnCli components: {mvnCliCountStr}, " + + $"Static parser components: {staticCountStr}"); return Task.CompletedTask; } @@ -490,7 +542,7 @@ protected override Task OnDetectionFinishedAsync() private void AnalyzeMvnCliFailure() { // Check if any recorded errors indicate authentication failure - var hasAuthError = this.mavenCliErrors.Any(this.IsAuthenticationError); + var hasAuthError = this.mavenCliErrors.Any(IsAuthenticationError); if (hasAuthError) { @@ -540,12 +592,23 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) var localVariables = new Dictionary(); this.CollectVariablesFromDocument(document, namespaceManager, filePath, localVariables); - // Add local variables to global collection - // Note: Later files (children) override earlier files (parents) due to processing order - foreach (var kvp in localVariables) + // Batch add local variables to global collection for better performance + // Key format: "filePath::variableName" enables Maven hierarchy-aware lookup + if (localVariables.Count > 0) { - this.collectedVariables.AddOrUpdate(kvp.Key, kvp.Value, (key, oldValue) => kvp.Value); - this.variableDefinitionSources.AddOrUpdate(kvp.Key, filePath, (key, oldValue) => filePath); + var keyBuilder = new StringBuilder(filePath.Length + 64); // Pre-allocate capacity + var filePathWithSeparator = filePath + "::"; + + foreach (var (variableName, variableValue) in localVariables) + { + keyBuilder.Clear(); + keyBuilder.Append(filePathWithSeparator).Append(variableName); + var key = keyBuilder.ToString(); + + this.collectedVariables.AddOrUpdate(key, variableValue, (_, _) => variableValue); + } + + this.Logger.LogDebug("MavenWithFallback: Collected {Count} variables from {File}", localVariables.Count, Path.GetFileName(filePath)); } // First pass: collect dependencies (may have unresolved variables) @@ -627,39 +690,228 @@ private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceMana { try { - // Collect properties variables (single XPath query) - var propertiesNode = document.SelectSingleNode("//proj:properties", namespaceManager); - if (propertiesNode != null) + // Query project coordinates once - used for both variable collection and project tracking + var projectGroupIdNode = document.SelectSingleNode("/proj:project/proj:groupId", namespaceManager); + var projectArtifactIdNode = document.SelectSingleNode("/proj:project/proj:artifactId", namespaceManager); + var projectVersionNode = document.SelectSingleNode("/proj:project/proj:version", namespaceManager); + + // Track this project by Maven coordinates for parent resolution (reuses queried nodes) + this.TrackMavenProjectCoordinates(document, namespaceManager, filePath, projectGroupIdNode, projectArtifactIdNode); + + // Parse Maven parent relationship to build proper hierarchy + this.ParseMavenParentRelationship(document, namespaceManager, filePath); + + // Collect properties variables from ALL properties sections (handles malformed XML with multiple ) + var propertiesNodes = document.SelectNodes("//proj:properties", namespaceManager); + if (propertiesNodes?.Count > 0) { - foreach (XmlNode propertyNode in propertiesNode.ChildNodes) + if (propertiesNodes.Count > 1) + { + this.Logger.LogDebug("MavenWithFallback: Found {Count} properties sections in {File}", propertiesNodes.Count, Path.GetFileName(filePath)); + } + + foreach (XmlNode propertiesNode in propertiesNodes) { - if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText)) + foreach (XmlNode propertyNode in propertiesNode.ChildNodes) { - var variableName = propertyNode.Name; - var variableValue = propertyNode.InnerText; - localVariables[variableName] = variableValue; + if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText)) + { + // Later properties sections override earlier ones (last wins - Maven behavior) + localVariables[propertyNode.Name] = propertyNode.InnerText; + } } } } - // Collect common project-level variables with optimized single query - // Use XPath union to get all common variables in one query - var commonVariablesNodes = document.SelectNodes("//proj:version | //proj:groupId | //proj:artifactId | //proj:revision", namespaceManager); - if (commonVariablesNodes != null) + // Collect project-level variables from already-queried nodes + if (projectVersionNode != null && !string.IsNullOrWhiteSpace(projectVersionNode.InnerText)) + { + localVariables["version"] = projectVersionNode.InnerText; + localVariables["project.version"] = projectVersionNode.InnerText; + } + + if (projectGroupIdNode != null && !string.IsNullOrWhiteSpace(projectGroupIdNode.InnerText)) { - foreach (XmlNode node in commonVariablesNodes) + localVariables["groupId"] = projectGroupIdNode.InnerText; + localVariables["project.groupId"] = projectGroupIdNode.InnerText; + } + + if (projectArtifactIdNode != null && !string.IsNullOrWhiteSpace(projectArtifactIdNode.InnerText)) + { + localVariables["artifactId"] = projectArtifactIdNode.InnerText; + localVariables["project.artifactId"] = projectArtifactIdNode.InnerText; + } + } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath); + } + } + + /// + /// Parses Maven parent relationship from pom.xml to build proper inheritance hierarchy. + /// This is needed for Maven-compliant variable resolution that respects parent-child relationships. + /// + /// The XML document to parse. + /// XML namespace manager for Maven POM. + /// Current pom.xml file path. + private void ParseMavenParentRelationship(XmlDocument document, XmlNamespaceManager namespaceManager, string currentFilePath) + { + try + { + // Query parent element once and access children directly (more efficient than union XPath) + var parentNode = document.SelectSingleNode("/proj:project/proj:parent", namespaceManager); + + if (parentNode != null) + { + var parentGroupId = parentNode["groupId"]?.InnerText; + var parentArtifactId = parentNode["artifactId"]?.InnerText; + + if (!string.IsNullOrWhiteSpace(parentArtifactId)) { - if (!string.IsNullOrWhiteSpace(node.InnerText)) + // Try to find parent pom.xml file by searching processed files for matching artifactId + // This works if parent was processed before child + var parentPath = this.FindParentPomByArtifactId(parentGroupId, parentArtifactId, currentFilePath); + if (!string.IsNullOrEmpty(parentPath)) + { + this.mavenParentChildRelationships[currentFilePath] = parentPath; + this.Logger.LogDebug( + "MavenWithFallback: Parsed parent relationship: {Child} → {Parent}", + Path.GetFileName(currentFilePath), + Path.GetFileName(parentPath)); + } + else { - var variableName = node.LocalName; // Get the element name (version, groupId, etc.) - localVariables[variableName] = node.InnerText; + // Parent not found yet - queue for second pass resolution after all files are processed + this.unresolvedParentRelationships.Enqueue((currentFilePath, parentGroupId, parentArtifactId)); + this.Logger.LogDebug( + "MavenWithFallback: Queued unresolved parent relationship for {Child} → {ParentArtifactId}", + Path.GetFileName(currentFilePath), + parentArtifactId); } } } } catch (Exception e) { - this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath); + this.Logger.LogError(e, "Failed to parse parent relationship from {FilePath}", currentFilePath); + } + } + + /// + /// Finds parent pom.xml file path by Maven coordinates (groupId:artifactId). + /// First searches by coordinates among processed projects, then falls back to directory traversal. + /// + /// Parent groupId to match. + /// Parent artifactId to match. + /// Current file path to start searching from. + /// Parent pom.xml file path, or empty string if not found. + private string FindParentPomByArtifactId(string parentGroupId, string parentArtifactId, string currentFilePath) + { + // Use cache to avoid repeated operations for the same file + return this.parentPomCache.GetOrAdd(currentFilePath, filePath => + { + try + { + // First, try to find by Maven coordinates (handles sibling projects) + if (!string.IsNullOrWhiteSpace(parentArtifactId)) + { + var coordinateKey = string.IsNullOrWhiteSpace(parentGroupId) + ? parentArtifactId + : $"{parentGroupId}:{parentArtifactId}"; + + if (this.processedMavenProjects.TryGetValue(coordinateKey, out var coordinateBasedPath)) + { + this.Logger.LogDebug( + "MavenWithFallback: Found parent {ParentCoordinate} at {Path} for {Child}", + coordinateKey, + Path.GetFileName(coordinateBasedPath), + Path.GetFileName(filePath)); + return coordinateBasedPath; + } + } + + // Fallback: Maven convention parent directory search + var currentDir = Path.GetDirectoryName(filePath); + var parentDir = Path.GetDirectoryName(currentDir); + + // Track visited directories to prevent infinite loops from circular directory structures + var visitedDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); + + while (!string.IsNullOrEmpty(parentDir)) + { + // Prevent infinite loops from circular directory references or file system anomalies + if (!visitedDirectories.Add(parentDir)) + { + this.Logger.LogDebug( + "MavenWithFallback: Circular directory reference detected while searching for parent POM, breaking at {Directory}", + parentDir); + break; + } + + var parentPomPath = Path.Combine(parentDir, "pom.xml"); + if (this.fileUtilityService.Exists(parentPomPath) && + !string.Equals(parentPomPath, filePath, StringComparison.OrdinalIgnoreCase)) + { + return parentPomPath; + } + + var nextParentDir = Path.GetDirectoryName(parentDir); + if (string.Equals(nextParentDir, parentDir, StringComparison.OrdinalIgnoreCase)) + { + break; // Reached file system root + } + + parentDir = nextParentDir; + } + + return string.Empty; // Not found + } + catch (Exception ex) + { + this.Logger.LogDebug(ex, "Error finding parent POM for {FilePath}", Path.GetFileName(filePath)); + return string.Empty; + } + }); + } + + /// + /// Tracks a Maven project by its coordinates to enable coordinate-based parent resolution. + /// + /// The XML document to parse. + /// XML namespace manager for Maven POM. + /// Current pom.xml file path. + /// Pre-queried groupId node (can be null). + /// Pre-queried artifactId node (can be null). + private void TrackMavenProjectCoordinates(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, XmlNode groupIdNode, XmlNode artifactIdNode) + { + try + { + // If project doesn't have its own groupId, try to get it from parent + groupIdNode ??= document.SelectSingleNode("/proj:project/proj:parent/proj:groupId", namespaceManager); + + if (artifactIdNode != null && !string.IsNullOrWhiteSpace(artifactIdNode.InnerText)) + { + var groupId = groupIdNode?.InnerText; + var artifactId = artifactIdNode.InnerText; + + // Store with both artifactId-only and groupId:artifactId keys for flexible lookup + this.processedMavenProjects.TryAdd(artifactId, filePath); + if (!string.IsNullOrWhiteSpace(groupId)) + { + this.processedMavenProjects.TryAdd($"{groupId}:{artifactId}", filePath); + } + + this.Logger.LogDebug( + "MavenWithFallback: Tracked project {GroupId}:{ArtifactId} at {Path}", + groupId ?? "(inherited)", + artifactId, + Path.GetFileName(filePath)); + } + } + catch (Exception e) + { + this.Logger.LogDebug(e, "Failed to track Maven project coordinates from {Path}", filePath); } } @@ -689,17 +941,6 @@ private string ResolveVersionFromLocalOnly(string versionTemplate, Dictionary - errorMessage.Contains(pattern, StringComparison.OrdinalIgnoreCase)); - } - private IEnumerable ExtractFailedEndpoints(string errorMessage) { if (string.IsNullOrWhiteSpace(errorMessage)) @@ -734,7 +975,89 @@ private void LogAuthErrorGuidance() } /// - /// Second pass: resolve all pending components using hierarchy-aware variable resolution. + /// Resolves parent relationships that couldn't be established during first pass. + /// This handles cases where the parent POM was processed after the child POM. + /// + private void ResolveUnresolvedParentRelationships() + { + var resolvedCount = 0; + var unresolvedCount = 0; + + while (this.unresolvedParentRelationships.TryDequeue(out var unresolvedRelationship)) + { + var (filePath, parentGroupId, parentArtifactId) = unresolvedRelationship; + + // Skip if already resolved (could happen if resolved via directory traversal during first pass) + if (this.mavenParentChildRelationships.ContainsKey(filePath)) + { + continue; + } + + // Clear the cache entry so we can try again with the now-complete processedMavenProjects + this.parentPomCache.TryRemove(filePath, out _); + + // Try to find parent by coordinates now that all files have been processed + var parentPath = this.FindParentPomByCoordinatesOnly(parentGroupId, parentArtifactId, filePath); + if (!string.IsNullOrEmpty(parentPath)) + { + this.mavenParentChildRelationships[filePath] = parentPath; + resolvedCount++; + this.Logger.LogDebug( + "MavenWithFallback: Resolved deferred parent relationship: {Child} → {Parent}", + Path.GetFileName(filePath), + Path.GetFileName(parentPath)); + } + else + { + unresolvedCount++; + this.Logger.LogDebug( + "MavenWithFallback: Could not resolve parent {ParentGroupId}:{ParentArtifactId} for {Child}", + parentGroupId ?? "(null)", + parentArtifactId, + Path.GetFileName(filePath)); + } + } + + if (resolvedCount > 0 || unresolvedCount > 0) + { + this.LogInfo($"Resolved {resolvedCount} deferred parent relationships, {unresolvedCount} remain unresolved"); + } + } + + /// + /// Finds parent POM by Maven coordinates only (no directory traversal). + /// Used for deferred parent resolution after all files have been processed. + /// + private string FindParentPomByCoordinatesOnly(string parentGroupId, string parentArtifactId, string currentFilePath) + { + if (string.IsNullOrWhiteSpace(parentArtifactId)) + { + return string.Empty; + } + + // Try with full coordinates first + if (!string.IsNullOrWhiteSpace(parentGroupId)) + { + var fullCoordinateKey = $"{parentGroupId}:{parentArtifactId}"; + if (this.processedMavenProjects.TryGetValue(fullCoordinateKey, out var fullCoordinatePath) && + !string.Equals(fullCoordinatePath, currentFilePath, StringComparison.OrdinalIgnoreCase)) + { + return fullCoordinatePath; + } + } + + // Try with artifactId only + if (this.processedMavenProjects.TryGetValue(parentArtifactId, out var artifactIdPath) && + !string.Equals(artifactIdPath, currentFilePath, StringComparison.OrdinalIgnoreCase)) + { + return artifactIdPath; + } + + return string.Empty; + } + + /// + /// Third pass: resolve all pending components using hierarchy-aware variable resolution. /// For components with unresolved variables, this picks the closest ancestor definition /// based on Maven's property inheritance rules (child > parent precedence). /// @@ -799,22 +1122,68 @@ private string ResolveVersionWithHierarchyAwareness(string versionTemplate, stri { var variable = match.Groups[1].Captures[0].ToString(); - // If variable exists in collected variables, find the closest ancestor definition - if (this.collectedVariables.TryGetValue(variable, out var replacement)) + // Use Maven-compliant hierarchy search: current → parent → grandparent + var foundValue = this.FindVariableInMavenHierarchy(variable, requestingFilePath); + if (foundValue != null) { - resolvedVersion = versionTemplate.Replace("${" + variable + "}", replacement); - this.LogInfo($"Resolved variable {variable} = {replacement} from {(this.variableDefinitionSources.TryGetValue(variable, out var source) ? source : "unknown")} for {requestingFilePath}"); + resolvedVersion = versionTemplate.Replace("${" + variable + "}", foundValue.Value.Value); } else { - // Variable not found - log for debugging - this.LogWarning($"Variable {variable} not found in collected variables for {requestingFilePath}. Available variables: {string.Join(", ", this.collectedVariables.Keys.Take(10))}"); + // Variable not found in Maven hierarchy - log for debugging + this.Logger.LogWarning( + "MavenWithFallback: Variable {Variable} not found in Maven hierarchy for {File}", + variable, + Path.GetFileName(requestingFilePath)); } } return resolvedVersion; } + /// + /// Finds a variable value using Maven-compliant hierarchy search. + /// Searches in order: current file → parent → grandparent (stops at first match). + /// + /// Variable name to find. + /// The pom.xml file requesting the variable. + /// Variable value and source file, or null if not found in hierarchy. + private (string Value, string SourceFile)? FindVariableInMavenHierarchy(string variable, string requestingFilePath) + { + var currentFile = requestingFilePath; + var visitedFiles = new HashSet(StringComparer.OrdinalIgnoreCase); + var keyBuilder = new StringBuilder(256); // Pre-allocate for typical path lengths + + // Walk up Maven parent hierarchy until variable found or no more parents + while (!string.IsNullOrEmpty(currentFile)) + { + // Prevent infinite loops from circular parent references + if (!visitedFiles.Add(currentFile)) + { + this.Logger.LogWarning( + "MavenWithFallback: Circular parent reference detected while resolving variable {Variable}, breaking at {File}", + variable, + Path.GetFileName(currentFile)); + break; + } + + // Check if this file has the variable definition using StringBuilder for efficiency + keyBuilder.Clear(); + keyBuilder.Append(currentFile).Append("::").Append(variable); + var variableKey = keyBuilder.ToString(); + + if (this.collectedVariables.TryGetValue(variableKey, out var value)) + { + return (value, currentFile); + } + + // Move to Maven parent (not directory parent) + this.mavenParentChildRelationships.TryGetValue(currentFile, out currentFile); + } + + return null; // Variable not found in Maven hierarchy + } + /// /// Filters out nested pom.xml files, keeping only root-level ones. /// A pom.xml is considered nested if there's another pom.xml in a parent directory. @@ -842,14 +1211,8 @@ private IObservable RemoveNestedPomXmls( .ThenBy(r => r.ComponentStream.Location, StringComparer.OrdinalIgnoreCase) .ToList(); - // Build a list of all directories that contain a pom.xml, ordered by path length (shortest first). - // This ensures parent directories are checked before their children. - var pomDirectories = sortedRequests - .Select(r => NormalizeDirectoryPath(Path.GetDirectoryName(r.ComponentStream.Location))) - .Distinct(StringComparer.OrdinalIgnoreCase) - .OrderBy(d => d.Length) - .ToList(); - + // Use a HashSet of root directories for O(1) lookup instead of O(n) list iteration + var rootPomDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); var filteredRequests = new List(); foreach (var request in sortedRequests) @@ -857,23 +1220,21 @@ private IObservable RemoveNestedPomXmls( cancellationToken.ThrowIfCancellationRequested(); var location = NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)); + + // Check if any ancestor directory is already a root POM directory + // Walk up the directory tree (O(depth) instead of O(n)) var isNested = false; + var parentDir = Path.GetDirectoryName(location.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)); - foreach (var pomDirectory in pomDirectories) + while (!string.IsNullOrEmpty(parentDir)) { - if (pomDirectory.Length >= location.Length) - { - // Since the list is ordered by length, if the pomDirectory is longer than - // or equal to the location, there are no possible parent directories left. - break; - } - - if (location.StartsWith(pomDirectory, StringComparison.OrdinalIgnoreCase)) + var normalizedParent = NormalizeDirectoryPath(parentDir); + if (rootPomDirectories.Contains(normalizedParent)) { - this.LogDebugWithId($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {pomDirectory}."); + this.LogDebugWithId($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {normalizedParent}."); isNested = true; parentPomDictionary.AddOrUpdate( - pomDirectory, + normalizedParent, [request], (key, existingList) => { @@ -882,19 +1243,28 @@ private IObservable RemoveNestedPomXmls( }); break; } + + var nextParent = Path.GetDirectoryName(parentDir); + if (string.Equals(nextParent, parentDir, StringComparison.OrdinalIgnoreCase)) + { + break; // Reached root + } + + parentDir = nextParent; } if (!isNested) { this.LogDebugWithId($"Discovered {request.ComponentStream.Location}."); + rootPomDirectories.Add(location); parentPomDictionary.AddOrUpdate( - NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)), - [request], - (key, existingList) => - { - existingList.Add(request); - return existingList; - }); + location, + [request], + (key, existingList) => + { + existingList.Add(request); + return existingList; + }); filteredRequests.Add(request); } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs index 7bca87802..021f42cd8 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs @@ -437,27 +437,36 @@ public async Task StaticParser_IgnoresDependenciesWithUnresolvablePropertyVersio [TestMethod] public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() { - // Arrange - Test case 1: Variable defined in first file, referenced in second file + // Arrange - Test case 1: Variable defined in parent POM, referenced in child POM + // Uses Maven's standard parent inheritance mechanism this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) .ReturnsAsync(false); - var firstPomContent = @" + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) + .Returns(true); + + var parentPomContent = @" 4.0.0 com.test parent 1.0.0 + pom 3.12.0 "; - var secondPomContent = @" + var childPomContent = @" 4.0.0 - com.test + + com.test + parent + 1.0.0 + child - 1.0.0 org.apache.commons @@ -467,10 +476,10 @@ public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() "; - // Act + // Act - Parent processed first, then child var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", firstPomContent) - .WithFile("module/pom.xml", secondPomContent) + .WithFile("pom.xml", parentPomContent) + .WithFile("module/pom.xml", childPomContent) .ExecuteDetectorAsync(); // Assert @@ -489,16 +498,24 @@ public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() [TestMethod] public async Task StaticParser_BackfillsVariableFromLaterFile_Async() { - // Arrange - Test case 2: Variable referenced in first file, defined in second file + // Arrange - Test case 2: Child processed first, parent processed second (deferred resolution) + // Tests that variables can be resolved even when parent is processed after child this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) .ReturnsAsync(false); - var firstPomContent = @" + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) + .Returns(true); + + var childPomContent = @" 4.0.0 - com.test + + com.test + parent + 1.0.0 + child - 1.0.0 org.apache.commons @@ -508,21 +525,22 @@ public async Task StaticParser_BackfillsVariableFromLaterFile_Async() "; - var secondPomContent = @" + var parentPomContent = @" 4.0.0 com.test parent 1.0.0 + pom 3.13.0 "; - // Act + // Act - Child processed first (has unresolved variable), then parent var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("module/pom.xml", firstPomContent) - .WithFile("pom.xml", secondPomContent) + .WithFile("module/pom.xml", childPomContent) + .WithFile("pom.xml", parentPomContent) .ExecuteDetectorAsync(); // Assert @@ -605,12 +623,19 @@ public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Asy this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) .ReturnsAsync(false); + // Setup fileUtilityService to return false for directory traversal + // This forces the detector to use coordinate-based parent resolution + // which correctly handles out-of-order processing through deferred resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns(false); + var grandparentPomContent = @" 4.0.0 com.test grandparent 1.0.0 + pom 3.10.0 @@ -619,9 +644,13 @@ public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Asy var parentPomContent = @" 4.0.0 - com.test + + com.test + grandparent + 1.0.0 + parent - 1.0.0 + pom 3.11.0 @@ -630,9 +659,12 @@ public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Asy var childPomContent = @" 4.0.0 - com.test + + com.test + parent + 1.0.0 + child - 1.0.0 org.apache.commons @@ -645,6 +677,7 @@ public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Asy // Act - Process files in out-of-order sequence: grandparent → child → parent // This tests that deferred variable resolution correctly handles ancestor priority // regardless of processing order + // Use file structure matching the working tests: root pom.xml and nested paths var (detectorResult, componentRecorder) = await this.DetectorTestUtility .WithFile("pom.xml", grandparentPomContent) // Processed 1st: defines commons.version = 3.10.0 .WithFile("parent/child/pom.xml", childPomContent) // Processed 2nd: references ${commons.version} @@ -1451,4 +1484,752 @@ private void SetupMvnCliSuccess(string depsFileContent) // Add the dependency file that Maven CLI would have generated this.DetectorTestUtility.WithFile(BcdeMvnFileName, depsFileContent, [BcdeMvnFileName]); } + + [TestMethod] + public async Task VariableResolution_SiblingPomVariablesShouldNotBeUsed_Async() + { + // Arrange - Maven-compliant behavior: sibling POM variables should NOT be resolved + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Parent POM with a property + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.12.0 + +"; + + // Sibling POM with different variable - should NOT be used for resolution + var siblingPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + sibling + + 31.1-jre + +"; + + // Target POM trying to use sibling's variable - should fail to resolve + var targetPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + target + + + com.google.guava + guava + ${guava.version} + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("parent/pom.xml", parentPomContent) + .WithFile("sibling/pom.xml", siblingPomContent) + .WithFile("target/pom.xml", targetPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve commons-lang3 (from parent), not guava (sibling variable) + detectedComponents.Should().HaveCount(1); + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("3.12.0"); // Resolved from parent + } + + [TestMethod] + public async Task VariableResolution_ParentHierarchyVariablesShouldBeUsed_Async() + { + // Arrange - Maven-compliant behavior: parent/grandparent variables should be resolved + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns((string path) => path.EndsWith("pom.xml")); + + // Grandparent POM + var grandparentPomContent = @" + + 4.0.0 + com.test + grandparent + 1.0.0 + pom + + 4.13.2 + +"; + + // Parent POM + var parentPomContent = @" + + 4.0.0 + + com.test + grandparent + 1.0.0 + + com.test + parent + + 3.12.0 + +"; + + // Child POM using variables from parent hierarchy + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + child + + 31.1-jre + + + + org.apache.commons + commons-lang3 + ${commons.version} + + + junit + junit + ${junit.version} + + + com.google.guava + guava + ${guava.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("grandparent/pom.xml", grandparentPomContent) + .WithFile("parent/pom.xml", parentPomContent) + .WithFile("child/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); + + // Should resolve commons-lang3 from parent + var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); + commonsComponent.Should().NotBeNull(); + commonsComponent.Version.Should().Be("3.12.0"); + + // Should resolve junit from grandparent + var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); + junitComponent.Should().NotBeNull(); + junitComponent.Version.Should().Be("4.13.2"); + + // Should resolve guava from current POM + var guavaComponent = components.FirstOrDefault(c => c.ArtifactId == "guava"); + guavaComponent.Should().NotBeNull(); + guavaComponent.Version.Should().Be("31.1-jre"); + } + + [TestMethod] + public async Task VariableResolution_CircularReferencesShouldBeHandled_Async() + { + // Arrange - Test circular reference detection and prevention + // NOTE: Maven CLI fails completely when circular references exist (even if unused) + // Our detector should fall back to static parsing and handle circular refs gracefully + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // POM with circular references (Maven CLI would fail entirely) + var pomContent = @" + + 4.0.0 + com.test + circular-test + 1.0.0 + + + ${another.version} + ${circular.version} + + + 1.0.0 + + + + + org.apache.commons + commons-lang3 + ${circular.version} + + + + com.google.guava + guava + ${valid.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve guava (valid version), skip commons-lang3 (circular reference) + // This matches our detector's behavior: graceful handling vs Maven CLI's complete failure + detectedComponents.Should().HaveCount(1); + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("com.google.guava"); + component.ArtifactId.Should().Be("guava"); + component.Version.Should().Be("1.0.0"); + } + + [TestMethod] + public async Task VariableResolution_DeferredResolutionSecondPass_Async() + { + // Arrange - Test second pass resolution for components with initially unresolved variables + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to ONLY return true for the specific parent pom.xml file + // This prevents directory-based parent resolution from finding a "fake" parent + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns((string path) => path.Contains("parent") && path.EndsWith("pom.xml")); + + // Parent POM loaded after child (simulation) + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 2.0.0 + +"; + + // Child POM with dependency on parent variable + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + child + + + org.apache.commons + commons-lang3 + ${deferred.version} + + +"; + + // Act - Files are processed in order that might cause deferred resolution + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("child/pom.xml", childPomContent) + .WithFile("parent/pom.xml", parentPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("2.0.0"); // Should be resolved in second pass + } + + [TestMethod] + public async Task VariableResolution_MavenBuiltInVariablesShouldWork_Async() + { + // Arrange - Test Maven built-in variables like ${project.version} + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService (not needed for this test but required for consistency) + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns(false); + + var pomContent = @" + + 4.0.0 + com.test + maven-builtin-test + 1.5.0 + + + com.test + internal-dependency + ${project.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("com.test"); + component.ArtifactId.Should().Be("internal-dependency"); + component.Version.Should().Be("1.5.0"); // Should resolve ${project.version} + } + + [TestMethod] + public async Task VariableResolution_SiblingPomAsParentShouldBeUsed_Async() + { + // Arrange - Test case where a "sibling" POM is referenced as parent + // In this scenario, the sibling POM variables SHOULD be used for resolution + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // "Sibling" POM that will be referenced as parent by another module + var parentModulePomContent = @" + + 4.0.0 + com.test + shared-parent + 1.0.0 + pom + + 2.13.4 + 4.13.2 + +"; + + // Another "sibling" POM that references the first as its parent + var childModulePomContent = @" + + 4.0.0 + + com.test + shared-parent + 1.0.0 + ../shared-parent/pom.xml + + com.test + consumer-module + + 3.0.0 + + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + junit + junit + ${junit.version} + + + org.apache.commons + commons-lang3 + ${local.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("shared-parent/pom.xml", parentModulePomContent) + .WithFile("consumer-module/pom.xml", childModulePomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); + + // Should resolve jackson-core from parent (shared-parent) + var jacksonComponent = components.FirstOrDefault(c => c.ArtifactId == "jackson-core"); + jacksonComponent.Should().NotBeNull(); + jacksonComponent.Version.Should().Be("2.13.4"); // From parent POM + + // Should resolve junit from parent (shared-parent) + var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); + junitComponent.Should().NotBeNull(); + junitComponent.Version.Should().Be("4.13.2"); // From parent POM + + // Should resolve commons-lang3 from local properties + var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); + commonsComponent.Should().NotBeNull(); + commonsComponent.Version.Should().Be("3.0.0"); // From local POM + } + + [TestMethod] + public async Task VariableResolution_UnreferencedSiblingPomShouldNotBeUsed_Async() + { + // Arrange - Contrast test: sibling POM NOT referenced as parent should be ignored + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // True parent POM + var actualParentPomContent = @" + + 4.0.0 + com.test + actual-parent + 1.0.0 + pom + + 3.12.0 + +"; + + // Sibling POM with different variables - should NOT be used + var siblingPomContent = @" + + 4.0.0 + + com.test + actual-parent + 1.0.0 + + com.test + sibling-module + + 2.13.4 + +"; + + // Child POM that references actual-parent, not sibling + var childPomContent = @" + + 4.0.0 + + com.test + actual-parent + 1.0.0 + + com.test + child-module + + + org.apache.commons + commons-lang3 + ${commons.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("actual-parent/pom.xml", actualParentPomContent) + .WithFile("sibling-module/pom.xml", siblingPomContent) + .WithFile("child-module/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve commons-lang3 (from actual parent), not jackson-core (sibling variable) + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("3.12.0"); // Resolved from actual parent + } + + [TestMethod] + public async Task TestSmartLoopPreventionInDirectoryTraversal() + { + // Arrange - Setup Maven CLI to fail so we use static parser + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a child POM that references a parent that won't be found in directory traversal + var childPomContent = """ + + 4.0.0 + + com.example + parent-project + 1.0.0 + + com.example + child-project + ${parent.version} + + + junit + junit + 4.13.2 + + + + """; + + // Act & Assert - This should not hang or throw due to infinite directory traversal + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Should complete successfully without infinite loops + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should register the dependency with direct version (junit) + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("junit"); + component.ArtifactId.Should().Be("junit"); + component.Version.Should().Be("4.13.2"); + } + + [TestMethod] + public async Task TestDeepDirectoryTraversalWithoutInfiniteLoop() + { + // Arrange - Setup to use static parser only + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a child POM in a deep directory structure + var childPomContent = """ + + 4.0.0 + + com.example + deep-parent + 1.0.0 + + deep-child + + + org.junit.jupiter + junit-jupiter + 5.8.2 + + + + """; + + // Act - Should handle deep traversal without issues + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("very/deep/nested/directory/structure/project/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert - Should complete successfully + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.GroupId.Should().Be("org.junit.jupiter"); + component.ArtifactId.Should().Be("junit-jupiter"); + component.Version.Should().Be("5.8.2"); + } + + [TestMethod] + public async Task TestCircularDirectoryReferenceDetection() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a simple POM that tests directory traversal robustness + var pomContent = """ + + 4.0.0 + com.example + circular-test + 1.0.0 + + + com.fasterxml.jackson.core + jackson-core + 2.13.3 + + + + """; + + // Act - Should not hang on edge cases + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("subdir/pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert - Should complete without infinite loops + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.GroupId.Should().Be("com.fasterxml.jackson.core"); + component.ArtifactId.Should().Be("jackson-core"); + component.Version.Should().Be("2.13.3"); + } + + [TestMethod] + public async Task TestFileSystemRootReachedScenario() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a simple POM + var pomContent = """ + + 4.0.0 + com.example + root-test + 1.0.0 + + + org.springframework + spring-core + 5.3.21 + + + + """; + + // Act - Should handle file system root gracefully + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert - Should complete without issues + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.GroupId.Should().Be("org.springframework"); + component.ArtifactId.Should().Be("spring-core"); + component.Version.Should().Be("5.3.21"); + } + + [TestMethod] + public async Task TestPerformanceOfSmartLoopPrevention() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create multiple POMs that will trigger directory traversal + var pom1Content = CreatePomWithParentReference("project1", "parent-project", "1.0.0"); + var pom2Content = CreatePomWithParentReference("project2", "parent-project", "1.0.0"); + var pom3Content = CreatePomWithParentReference("project3", "parent-project", "1.0.0"); + + var stopwatch = System.Diagnostics.Stopwatch.StartNew(); + + // Act - Process multiple POMs + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("project1/pom.xml", pom1Content) + .WithFile("project2/pom.xml", pom2Content) + .WithFile("project3/pom.xml", pom3Content) + .ExecuteDetectorAsync(); + + stopwatch.Stop(); + + // Assert - Should complete quickly (well under 1 second for this simple test) + stopwatch.ElapsedMilliseconds.Should().BeLessThan(1000, "Smart loop prevention should not significantly impact performance"); + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should have detected direct dependencies from all 3 POMs + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + } + + private static string CreatePomWithParentReference(string artifactId, string parentArtifactId, string parentVersion) + { + return $""" + + 4.0.0 + + com.example + {parentArtifactId} + {parentVersion} + + {artifactId} + + + com.example + {artifactId}-dependency + 2.0.0 + + + + """; + } } From ac90cf56f888fe42f55110491980dd050fa2194a Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Mon, 23 Mar 2026 17:07:50 -0700 Subject: [PATCH 6/7] Update cleanup approach --- .../Records/MavenCliCleanupTelemetryRecord.cs | 29 +++++ .../maven/MavenCommandService.cs | 2 +- .../maven/MavenConstants.cs | 23 ++++ .../maven/MavenWithFallbackDetector.cs | 52 ++++++++- .../maven/MvnCliComponentDetector.cs | 2 +- .../Services/DetectorProcessingService.cs | 108 ++++++++++++++---- .../MavenWithFallbackDetectorTests.cs | 13 +-- .../DetectorProcessingServiceTests.cs | 21 +++- 8 files changed, 215 insertions(+), 35 deletions(-) create mode 100644 src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs diff --git a/src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs new file mode 100644 index 000000000..0261c3c04 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs @@ -0,0 +1,29 @@ +namespace Microsoft.ComponentDetection.Common.Telemetry.Records; + +using Microsoft.ComponentDetection.Common.Telemetry.Attributes; + +/// +/// Telemetry record for tracking Maven CLI file cleanup operations. +/// +public class MavenCliCleanupTelemetryRecord : BaseDetectionTelemetryRecord +{ + /// + public override string RecordName => "MavenCliCleanup"; + + /// + /// Gets or sets the number of files successfully cleaned up. + /// + [Metric] + public int FilesCleanedCount { get; set; } + + /// + /// Gets or sets the number of files that failed to be cleaned up. + /// + [Metric] + public int FilesFailedCount { get; set; } + + /// + /// Gets or sets the source directory that was scanned for cleanup. + /// + public string? SourceDirectory { get; set; } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs index 06a98a378..98593a4b0 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs @@ -50,7 +50,7 @@ public MavenCommandService( this.logger = logger; } - public string BcdeMvnDependencyFileName => "bcde.mvndeps"; + public string BcdeMvnDependencyFileName => MavenConstants.BcdeMvnDependencyFileName; public async Task MavenCLIExistsAsync() { diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs new file mode 100644 index 000000000..ceb1e8bee --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs @@ -0,0 +1,23 @@ +namespace Microsoft.ComponentDetection.Detectors.Maven; + +/// +/// Shared constants for Maven detectors. +/// +public static class MavenConstants +{ + /// + /// The filename for Maven CLI dependency output files. + /// This file is generated by MavenCommandService and consumed by Maven detectors. + /// + public const string BcdeMvnDependencyFileName = "bcde.mvndeps"; + + /// + /// Detector ID for MvnCliComponentDetector. + /// + public const string MvnCliDetectorId = "MvnCli"; + + /// + /// Detector ID for MavenWithFallbackDetector. + /// + public const string MavenWithFallbackDetectorId = "MavenWithFallback"; +} diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index 806763bae..fb3d8cbfc 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -159,7 +159,7 @@ public MavenWithFallbackDetector( this.Logger = logger; } - public override string Id => "MavenWithFallback"; + public override string Id => MavenConstants.MavenWithFallbackDetectorId; public override IList SearchPatterns => [MavenManifest]; @@ -213,11 +213,61 @@ private void LogInfo(string message) => private void LogWarning(string message) => this.Logger.LogWarning("{DetectorId}: {Message}", this.Id, message); + /// + /// Resets all per-scan state to prevent stale data from leaking between scans. + /// This is critical because detectors are registered as singletons. + /// + private void ResetScanState() + { + // Clear all concurrent collections + this.collectedVariables.Clear(); + this.mavenParentChildRelationships.Clear(); + this.processedMavenProjects.Clear(); + this.parentPomCache.Clear(); + + // Drain all concurrent queues + while (this.pendingComponents.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.unresolvedParentRelationships.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.originalPomFiles.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.mavenCliErrors.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.failedEndpoints.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + // Reset telemetry counters and flags + this.usedDetectionMethod = MavenDetectionMethod.None; + this.fallbackReason = MavenFallbackReason.None; + this.mvnCliComponentCount = 0; + this.staticParserComponentCount = 0; + this.mavenCliAvailable = false; + } + protected override async Task> OnPrepareDetectionAsync( IObservable processRequests, IDictionary detectorArgs, CancellationToken cancellationToken = default) { + // Reset all per-scan state to prevent stale data from previous scans + // This is critical because detectors are registered as singletons + this.ResetScanState(); + // Wrap the entire method in a try-catch with timeout to protect against hangs. // OnPrepareDetectionAsync doesn't have the same guardrails as OnFileFoundAsync, // so we need to be extra careful in this experimental detector. diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs index 7b9e9c88b..10d3d112d 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs @@ -35,7 +35,7 @@ public MvnCliComponentDetector( this.Logger = logger; } - public override string Id => "MvnCli"; + public override string Id => MavenConstants.MvnCliDetectorId; public override IList SearchPatterns => [MavenManifest]; diff --git a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs index 1b30fcd81..f45e27058 100644 --- a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs +++ b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs @@ -6,6 +6,7 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services; using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.IO.Enumeration; using System.Linq; using System.Text.Json; using System.Threading; @@ -16,6 +17,7 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services; using Microsoft.ComponentDetection.Common.Telemetry.Records; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.BcdeModels; +using Microsoft.ComponentDetection.Detectors.Maven; using Microsoft.ComponentDetection.Orchestrator.Commands; using Microsoft.ComponentDetection.Orchestrator.Experiments; using Microsoft.Extensions.Logging; @@ -29,17 +31,20 @@ internal class DetectorProcessingService : IDetectorProcessingService private const int ProcessTimeoutBufferSeconds = 5; private readonly IObservableDirectoryWalkerFactory scanner; + private readonly IPathUtilityService pathUtilityService; private readonly ILogger logger; private readonly IExperimentService experimentService; private readonly IAnsiConsole console; public DetectorProcessingService( IObservableDirectoryWalkerFactory scanner, + IPathUtilityService pathUtilityService, IExperimentService experimentService, ILogger logger, IAnsiConsole console = null) { this.scanner = scanner; + this.pathUtilityService = pathUtilityService; this.experimentService = experimentService; this.logger = logger; this.console = console ?? AnsiConsole.Console; @@ -161,7 +166,11 @@ public async Task ProcessDetectorsAsync( await this.experimentService.FinishAsync(); // Clean up Maven CLI temporary files after all detectors have finished - await this.CleanupMavenFilesAsync(settings.SourceDirectory, detectors); + // Only cleanup if CleanupCreatedFiles is true (default) to respect user settings + if (settings.CleanupCreatedFiles ?? true) + { + this.CleanupMavenFiles(settings.SourceDirectory, detectors); + } var detectorProcessingResult = this.ConvertDetectorResultsIntoResult(results, exitCode); @@ -429,47 +438,106 @@ private void LogTabularOutput(ConcurrentDictionary pr /// /// Cleans up Maven CLI temporary files after all detectors have finished. /// This prevents race conditions between MvnCliComponentDetector and MavenWithFallbackDetector. + /// Uses the same symlink-aware traversal pattern as FastDirectoryWalkerFactory to handle + /// circular symlinks in large repositories. /// - private async Task CleanupMavenFilesAsync(DirectoryInfo sourceDirectory, IEnumerable detectors) + private void CleanupMavenFiles(DirectoryInfo sourceDirectory, IEnumerable detectors) { - // Only clean up if Maven detectors are running - var mavenDetectorIds = new[] { "MvnCli", "MavenWithFallback" }; - var hasMavenDetectors = detectors.Any(d => mavenDetectorIds.Contains(d.Id)); + // Only clean up if Maven detectors are running - use shared constants to stay in sync + var hasMavenDetectors = detectors.Any(d => + d.Id == MavenConstants.MvnCliDetectorId || + d.Id == MavenConstants.MavenWithFallbackDetectorId); if (!hasMavenDetectors) { return; } - try + using var telemetryRecord = new MavenCliCleanupTelemetryRecord { - var searchPattern = "*.mvndeps"; // Simple file pattern for Directory.GetFiles + SourceDirectory = sourceDirectory.FullName, + }; + try + { this.logger.LogDebug("Starting Maven CLI cleanup in directory: {SourceDirectory}", sourceDirectory.FullName); - await Task.Run(() => + var cleanedCount = 0; + var failedCount = 0; + + // Track visited directories by their real (resolved) paths to handle circular symlinks + // This follows the same pattern used by FastDirectoryWalkerFactory + var visitedDirectories = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); + + // Use FileSystemEnumerable with symlink-aware recursion predicate + // This matches the pattern in FastDirectoryWalkerFactory for safe directory traversal + var fileEnumerable = new FileSystemEnumerable( + sourceDirectory.FullName, + (ref FileSystemEntry entry) => entry.ToFullPath(), + new EnumerationOptions + { + RecurseSubdirectories = true, + IgnoreInaccessible = true, + }) { - // Search recursively for Maven CLI dependency files - var filesToClean = Directory.GetFiles(sourceDirectory.FullName, searchPattern, SearchOption.AllDirectories); + ShouldIncludePredicate = (ref FileSystemEntry entry) => + !entry.IsDirectory && + entry.FileName.Equals(MavenConstants.BcdeMvnDependencyFileName, StringComparison.OrdinalIgnoreCase), + + // Handle symlinks to prevent infinite loops - same pattern as FastDirectoryWalkerFactory + ShouldRecursePredicate = (ref FileSystemEntry entry) => + { + if (!entry.IsDirectory) + { + return false; + } - foreach (var file in filesToClean) + var directoryPath = entry.ToFullPath(); + var realPath = directoryPath; + + // Check if this is a symlink (reparse point) and resolve to real path + if (entry.Attributes.HasFlag(FileAttributes.ReparsePoint)) + { + realPath = this.pathUtilityService.ResolvePhysicalPath(directoryPath); + + // If we can't resolve the path, skip this directory + if (string.IsNullOrEmpty(realPath)) + { + return false; + } + } + + // Only recurse if we haven't visited this real path before + return visitedDirectories.TryAdd(realPath, true); + }, + }; + + // Use Parallel.ForEach for concurrent deletion with better throughput + Parallel.ForEach( + fileEnumerable, + new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, + filePath => { try { - File.Delete(file); - this.logger.LogDebug("Cleaned up Maven CLI file: {File}", file); + File.Delete(filePath); + Interlocked.Increment(ref cleanedCount); + this.logger.LogDebug("Cleaned up Maven CLI file: {File}", filePath); } catch (Exception ex) { - this.logger.LogDebug(ex, "Failed to clean up Maven CLI file: {File}", file); + Interlocked.Increment(ref failedCount); + this.logger.LogDebug(ex, "Failed to clean up Maven CLI file: {File}", filePath); } - } + }); - if (filesToClean.Length > 0) - { - this.logger.LogDebug("Maven CLI cleanup completed. Removed {Count} files.", filesToClean.Length); - } - }); + telemetryRecord.FilesCleanedCount = cleanedCount; + telemetryRecord.FilesFailedCount = failedCount; + + if (cleanedCount > 0 || failedCount > 0) + { + this.logger.LogDebug("Maven CLI cleanup completed. Removed {CleanedCount} files, failed {FailedCount} files.", cleanedCount, failedCount); + } } catch (Exception ex) { diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs index 021f42cd8..2ef00073b 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs @@ -2187,23 +2187,22 @@ public async Task TestPerformanceOfSmartLoopPrevention() .ReturnsAsync(false); // Create multiple POMs that will trigger directory traversal + // This validates that smart loop prevention completes without hanging + // (algorithmic validation rather than timing-based) var pom1Content = CreatePomWithParentReference("project1", "parent-project", "1.0.0"); var pom2Content = CreatePomWithParentReference("project2", "parent-project", "1.0.0"); var pom3Content = CreatePomWithParentReference("project3", "parent-project", "1.0.0"); - var stopwatch = System.Diagnostics.Stopwatch.StartNew(); - - // Act - Process multiple POMs + // Act - Process multiple POMs (test validates completion, not timing) var (scanResult, componentRecorder) = await this.DetectorTestUtility .WithFile("project1/pom.xml", pom1Content) .WithFile("project2/pom.xml", pom2Content) .WithFile("project3/pom.xml", pom3Content) .ExecuteDetectorAsync(); - stopwatch.Stop(); - - // Assert - Should complete quickly (well under 1 second for this simple test) - stopwatch.ElapsedMilliseconds.Should().BeLessThan(1000, "Smart loop prevention should not significantly impact performance"); + // Assert - Should complete without hanging and produce correct results + // The test passing validates that smart loop prevention works algorithmically + // (if it had infinite loops, the test would timeout/hang rather than fail assertions) scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); // Should have detected direct dependencies from all 3 POMs diff --git a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs index cd76d06de..2eec47db6 100644 --- a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs +++ b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs @@ -42,6 +42,7 @@ public class DetectorProcessingServiceTests private readonly Mock> loggerMock; private readonly DetectorProcessingService serviceUnderTest; private readonly Mock directoryWalkerFactory; + private readonly Mock pathUtilityServiceMock; private readonly Mock experimentServiceMock; private readonly Mock consoleMock; @@ -60,9 +61,15 @@ public DetectorProcessingServiceTests() this.experimentServiceMock = new Mock(); this.loggerMock = new Mock>(); this.directoryWalkerFactory = new Mock(); + this.pathUtilityServiceMock = new Mock(); this.consoleMock = new Mock(); + + // Setup path utility to return the same path by default (no symlinks) + this.pathUtilityServiceMock.Setup(x => x.ResolvePhysicalPath(It.IsAny())) + .Returns(path => path); + this.serviceUnderTest = - new DetectorProcessingService(this.directoryWalkerFactory.Object, this.experimentServiceMock.Object, this.loggerMock.Object, this.consoleMock.Object); + new DetectorProcessingService(this.directoryWalkerFactory.Object, this.pathUtilityServiceMock.Object, this.experimentServiceMock.Object, this.loggerMock.Object, this.consoleMock.Object); this.firstFileComponentDetectorMock = this.SetupFileDetectorMock("firstFileDetectorId"); this.secondFileComponentDetectorMock = this.SetupFileDetectorMock("secondFileDetectorId"); @@ -666,8 +673,9 @@ public async Task ProcessDetectorsAsync_WithMavenDetectors_ShouldCleanupMavenFil var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); Directory.CreateDirectory(tempDir); - var testFile1 = Path.Combine(tempDir, "test1.mvndeps"); - var testFile2 = Path.Combine(tempDir, "subdir", "test2.mvndeps"); + // Use bcde.mvndeps - the specific filename generated by MavenCommandService + var testFile1 = Path.Combine(tempDir, "bcde.mvndeps"); + var testFile2 = Path.Combine(tempDir, "subdir", "bcde.mvndeps"); var nonMavenFile = Path.Combine(tempDir, "regular.txt"); Directory.CreateDirectory(Path.GetDirectoryName(testFile2)); @@ -742,7 +750,8 @@ public async Task ProcessDetectorsAsync_WithoutMavenDetectors_ShouldNotCleanupMa var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); Directory.CreateDirectory(tempDir); - var testFile = Path.Combine(tempDir, "test.mvndeps"); + // Use bcde.mvndeps - the specific filename generated by MavenCommandService + var testFile = Path.Combine(tempDir, "bcde.mvndeps"); await File.WriteAllTextAsync(testFile, "maven dependency content"); try @@ -797,7 +806,9 @@ public async Task ProcessDetectorsAsync_OnlyOneMavenDetector_ShouldStillCleanupM // Arrange - Create a temporary directory with mock Maven dependency files var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); Directory.CreateDirectory(tempDir); - var testFile = Path.Combine(tempDir, "test.mvndeps"); + + // Use bcde.mvndeps - the specific filename generated by MavenCommandService + var testFile = Path.Combine(tempDir, "bcde.mvndeps"); await File.WriteAllTextAsync(testFile, "maven dependency content"); try From 143781cd23fb8b5adcf0d4b51e653cdd99b79e5a Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Mon, 23 Mar 2026 17:21:27 -0700 Subject: [PATCH 7/7] Nit --- .../maven/MavenWithFallbackDetector.cs | 37 ++++++++++++------- .../Services/DetectorProcessingService.cs | 34 +++++++++++------ 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index fb3d8cbfc..b57fc33b1 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -141,6 +141,8 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet private MavenFallbackReason fallbackReason = MavenFallbackReason.None; private int mvnCliComponentCount; private int staticParserComponentCount; + private int unresolvedVariableCount; + private int pendingComponentCountBeforeResolution; private bool mavenCliAvailable; public MavenWithFallbackDetector( @@ -207,9 +209,6 @@ private static bool IsAuthenticationError(string errorMessage) private void LogDebugWithId(string message) => this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); - private void LogInfo(string message) => - this.Logger.LogInformation("{DetectorId}: {Message}", this.Id, message); - private void LogWarning(string message) => this.Logger.LogWarning("{DetectorId}: {Message}", this.Id, message); @@ -256,6 +255,8 @@ private void ResetScanState() this.fallbackReason = MavenFallbackReason.None; this.mvnCliComponentCount = 0; this.staticParserComponentCount = 0; + this.unresolvedVariableCount = 0; + this.pendingComponentCountBeforeResolution = 0; this.mavenCliAvailable = false; } @@ -334,7 +335,7 @@ private bool ShouldSkipMavenCli() { if (this.envVarService.IsEnvironmentVariableValueTrue(DisableMvnCliEnvVar)) { - this.LogInfo($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only."); + this.LogDebugWithId($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only."); this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; this.fallbackReason = MavenFallbackReason.MvnCliDisabledByUser; this.mavenCliAvailable = false; @@ -354,7 +355,7 @@ private async Task TryInitializeMavenCliAsync() if (!this.mavenCliAvailable) { - this.LogInfo("Maven CLI not found in PATH. Will use static pom.xml parsing only."); + this.LogDebugWithId("Maven CLI not found in PATH. Will use static pom.xml parsing only."); this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; this.fallbackReason = MavenFallbackReason.MavenCliNotAvailable; return false; @@ -571,14 +572,15 @@ protected override Task OnDetectionFinishedAsync() this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); - this.Telemetry["PendingComponentCount"] = this.pendingComponents.Count.ToString(); + this.Telemetry["PendingComponentCount"] = this.pendingComponentCountBeforeResolution.ToString(); + this.Telemetry["UnresolvedVariableCount"] = this.unresolvedVariableCount.ToString(); if (!this.failedEndpoints.IsEmpty) { this.Telemetry["FailedEndpoints"] = string.Join(";", this.failedEndpoints.Distinct().Take(10)); } - this.LogInfo($"Detection completed. Method: {detectionMethodStr}, " + + this.LogDebugWithId($"Detection completed. Method: {detectionMethodStr}, " + $"FallbackReason: {fallbackReasonStr}, " + $"MvnCli components: {mvnCliCountStr}, " + $"Static parser components: {staticCountStr}"); @@ -1070,7 +1072,7 @@ private void ResolveUnresolvedParentRelationships() if (resolvedCount > 0 || unresolvedCount > 0) { - this.LogInfo($"Resolved {resolvedCount} deferred parent relationships, {unresolvedCount} remain unresolved"); + this.LogDebugWithId($"Second pass (parent resolution) completed: {resolvedCount} deferred parent relationships resolved, {unresolvedCount} remain unresolved"); } } @@ -1113,6 +1115,9 @@ private string FindParentPomByCoordinatesOnly(string parentGroupId, string paren /// private void ResolvePendingComponents() { + // Capture count before draining for accurate telemetry + this.pendingComponentCountBeforeResolution = this.pendingComponents.Count; + var resolvedCount = 0; var skippedCount = 0; @@ -1152,7 +1157,7 @@ private void ResolvePendingComponents() } } - this.LogInfo($"Second pass completed: {resolvedCount} components resolved, {skippedCount} skipped due to unresolved variables"); + this.LogDebugWithId($"Third pass (variable resolution) completed: {resolvedCount} components resolved, {skippedCount} skipped due to unresolved variables"); } /// @@ -1180,9 +1185,12 @@ private string ResolveVersionWithHierarchyAwareness(string versionTemplate, stri } else { - // Variable not found in Maven hierarchy - log for debugging - this.Logger.LogWarning( - "MavenWithFallback: Variable {Variable} not found in Maven hierarchy for {File}", + // Variable not found in Maven hierarchy - log at debug level since unresolved + // properties are common (profiles, external parents, etc.) and aggregate count in telemetry + Interlocked.Increment(ref this.unresolvedVariableCount); + this.Logger.LogDebug( + "{DetectorId}: Variable {Variable} not found in Maven hierarchy for {File}", + this.Id, variable, Path.GetFileName(requestingFilePath)); } @@ -1210,8 +1218,9 @@ private string ResolveVersionWithHierarchyAwareness(string versionTemplate, stri // Prevent infinite loops from circular parent references if (!visitedFiles.Add(currentFile)) { - this.Logger.LogWarning( - "MavenWithFallback: Circular parent reference detected while resolving variable {Variable}, breaking at {File}", + this.Logger.LogDebug( + "{DetectorId}: Circular parent reference detected while resolving variable {Variable}, breaking at {File}", + this.Id, variable, Path.GetFileName(currentFile)); break; diff --git a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs index f45e27058..550b94f5e 100644 --- a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs +++ b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs @@ -492,23 +492,33 @@ private void CleanupMavenFiles(DirectoryInfo sourceDirectory, IEnumerable