diff --git a/src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs new file mode 100644 index 000000000..0261c3c04 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/MavenCliCleanupTelemetryRecord.cs @@ -0,0 +1,29 @@ +namespace Microsoft.ComponentDetection.Common.Telemetry.Records; + +using Microsoft.ComponentDetection.Common.Telemetry.Attributes; + +/// +/// Telemetry record for tracking Maven CLI file cleanup operations. +/// +public class MavenCliCleanupTelemetryRecord : BaseDetectionTelemetryRecord +{ + /// + public override string RecordName => "MavenCliCleanup"; + + /// + /// Gets or sets the number of files successfully cleaned up. + /// + [Metric] + public int FilesCleanedCount { get; set; } + + /// + /// Gets or sets the number of files that failed to be cleaned up. + /// + [Metric] + public int FilesFailedCount { get; set; } + + /// + /// Gets or sets the source directory that was scanned for cleanup. + /// + public string? SourceDirectory { get; set; } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs index 309cbf40e..32169b9e2 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/IMavenCommandService.cs @@ -14,19 +14,4 @@ public interface IMavenCommandService Task GenerateDependenciesFileAsync(ProcessRequest processRequest, CancellationToken cancellationToken = default); void ParseDependenciesFile(ProcessRequest processRequest); - - /// - /// Registers that a detector is actively reading a dependency file. - /// This prevents premature deletion by other detectors. - /// - /// The path to the dependency file being read. - void RegisterFileReader(string dependencyFilePath); - - /// - /// Unregisters a detector's active reading of a dependency file and attempts cleanup. - /// If no other detectors are reading the file, it will be safely deleted. - /// - /// The path to the dependency file that was being read. - /// The identifier of the detector unregistering the file reader. - void UnregisterFileReader(string dependencyFilePath, string detectorId = null); } diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs index 66dd17d0b..98593a4b0 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs @@ -33,12 +33,6 @@ internal class MavenCommandService : IMavenCommandService /// private readonly ConcurrentDictionary completedLocations = new(); - /// - /// Tracks the number of active readers for each dependency file. - /// Used for safe file cleanup coordination between detectors. - /// - private readonly ConcurrentDictionary fileReaderCounts = new(); - private readonly ICommandLineInvocationService commandLineInvocationService; private readonly IMavenStyleDependencyGraphParserService parserService; private readonly IEnvironmentVariableService envVarService; @@ -56,7 +50,7 @@ public MavenCommandService( this.logger = logger; } - public string BcdeMvnDependencyFileName => "bcde.mvndeps"; + public string BcdeMvnDependencyFileName => MavenConstants.BcdeMvnDependencyFileName; public async Task MavenCLIExistsAsync() { @@ -69,9 +63,6 @@ public async Task GenerateDependenciesFileAsync(ProcessRequest p var pomDir = Path.GetDirectoryName(pomFile.Location); var depsFilePath = Path.Combine(pomDir, this.BcdeMvnDependencyFileName); - // Register as file reader immediately to prevent premature cleanup - this.RegisterFileReader(depsFilePath); - // Check the cache before acquiring the semaphore to allow fast-path returns // even when cancellation has been requested. if (this.completedLocations.TryGetValue(pomFile.Location, out var cachedResult) @@ -169,68 +160,4 @@ public void ParseDependenciesFile(ProcessRequest processRequest) var lines = sr.ReadToEnd().Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); this.parserService.Parse(lines, processRequest.SingleFileComponentRecorder); } - - /// - /// Registers that a detector is actively reading a dependency file. - /// This prevents premature deletion by other detectors. - /// - /// The path to the dependency file being read. - public void RegisterFileReader(string dependencyFilePath) - { - this.fileReaderCounts.AddOrUpdate(dependencyFilePath, 1, (key, count) => count + 1); - this.logger.LogDebug( - "Registered file reader for {DependencyFilePath}, count: {Count}", - dependencyFilePath, - this.fileReaderCounts[dependencyFilePath]); - } - - /// - /// Unregisters a detector's active reading of a dependency file and attempts cleanup. - /// If no other detectors are reading the file, it will be safely deleted. - /// - /// The path to the dependency file that was being read. - /// The identifier of the detector unregistering the file reader. - public void UnregisterFileReader(string dependencyFilePath, string detectorId = null) - { - var newCount = this.fileReaderCounts.AddOrUpdate(dependencyFilePath, 0, (key, count) => Math.Max(0, count - 1)); - this.logger.LogDebug( - "{DetectorId}: Unregistered file reader for {DependencyFilePath}, count: {Count}", - detectorId ?? "Unknown", - dependencyFilePath, - newCount); - - // If no readers remain, attempt cleanup - if (newCount == 0) - { - this.TryDeleteDependencyFileIfNotInUse(dependencyFilePath, detectorId); - } - } - - /// - /// Attempts to delete a dependency file if no detectors are currently using it. - /// - /// The path to the dependency file to delete. - /// The identifier of the detector requesting the deletion. - private void TryDeleteDependencyFileIfNotInUse(string dependencyFilePath, string detectorId = null) - { - var detector = detectorId ?? "Unknown"; - - // Safe to delete - no readers are using the file (count was already verified to be 0) - try - { - if (File.Exists(dependencyFilePath)) - { - File.Delete(dependencyFilePath); - this.logger.LogDebug("{DetectorId}: Successfully deleted dependency file {DependencyFilePath}", detector, dependencyFilePath); - } - else - { - this.logger.LogDebug("{DetectorId}: Dependency file {DependencyFilePath} was already deleted", detector, dependencyFilePath); - } - } - catch (Exception ex) - { - this.logger.LogWarning(ex, "{DetectorId}: Failed to delete dependency file {DependencyFilePath}", detector, dependencyFilePath); - } - } } diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs new file mode 100644 index 000000000..ceb1e8bee --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs @@ -0,0 +1,23 @@ +namespace Microsoft.ComponentDetection.Detectors.Maven; + +/// +/// Shared constants for Maven detectors. +/// +public static class MavenConstants +{ + /// + /// The filename for Maven CLI dependency output files. + /// This file is generated by MavenCommandService and consumed by Maven detectors. + /// + public const string BcdeMvnDependencyFileName = "bcde.mvndeps"; + + /// + /// Detector ID for MvnCliComponentDetector. + /// + public const string MvnCliDetectorId = "MvnCli"; + + /// + /// Detector ID for MavenWithFallbackDetector. + /// + public const string MavenWithFallbackDetectorId = "MavenWithFallback"; +} diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs index f458727a0..b57fc33b1 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs @@ -109,7 +109,19 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet private readonly IMavenCommandService mavenCommandService; private readonly IEnvironmentVariableService envVarService; private readonly IFileUtilityService fileUtilityService; - private readonly Dictionary documentsLoaded = []; + + // Two-pass static parsing: collect variables first, then resolve components + private readonly ConcurrentDictionary collectedVariables = new(); + private readonly ConcurrentQueue pendingComponents = new(); + + // Track Maven parent-child relationships for proper variable resolution + private readonly ConcurrentDictionary mavenParentChildRelationships = new(); + + // Track processed Maven projects by coordinates (groupId:artifactId -> file path) + private readonly ConcurrentDictionary processedMavenProjects = new(); + + // Track files that couldn't establish parent relationships during first pass (for second pass re-evaluation) + private readonly ConcurrentQueue<(string FilePath, string ParentGroupId, string ParentArtifactId)> unresolvedParentRelationships = new(); // Track original pom.xml files for potential fallback private readonly ConcurrentQueue originalPomFiles = []; @@ -118,11 +130,19 @@ public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDet private readonly ConcurrentQueue mavenCliErrors = []; private readonly ConcurrentQueue failedEndpoints = []; + /// + /// Cache for parent POM lookups to avoid repeated file system operations. + /// Key: current file path, Value: parent POM path or empty string if not found. + /// + private readonly ConcurrentDictionary parentPomCache = new(); + // Telemetry tracking private MavenDetectionMethod usedDetectionMethod = MavenDetectionMethod.None; private MavenFallbackReason fallbackReason = MavenFallbackReason.None; private int mvnCliComponentCount; private int staticParserComponentCount; + private int unresolvedVariableCount; + private int pendingComponentCountBeforeResolution; private bool mavenCliAvailable; public MavenWithFallbackDetector( @@ -141,7 +161,7 @@ public MavenWithFallbackDetector( this.Logger = logger; } - public override string Id => "MavenWithFallback"; + public override string Id => MavenConstants.MavenWithFallbackDetectorId; public override IList SearchPatterns => [MavenManifest]; @@ -153,23 +173,102 @@ public MavenWithFallbackDetector( // Normalizes a directory path by ensuring it ends with a directory separator. // This prevents false matches like "C:\foo" matching "C:\foobar". - private static string NormalizeDirectoryPath(string path) => - path.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar) + Path.DirectorySeparatorChar; + private static string NormalizeDirectoryPath(string path) + { + if (string.IsNullOrEmpty(path)) + { + return path; + } - private void LogDebug(string message) => - this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); + var lastChar = path[^1]; + return lastChar == Path.DirectorySeparatorChar || lastChar == Path.AltDirectorySeparatorChar + ? path + : path + Path.DirectorySeparatorChar; + } - private void LogInfo(string message) => - this.Logger.LogInformation("{DetectorId}: {Message}", this.Id, message); + private static bool IsAuthenticationError(string errorMessage) + { + if (string.IsNullOrWhiteSpace(errorMessage)) + { + return false; + } + + // Use ReadOnlySpan for more efficient string searching + var messageSpan = errorMessage.AsSpan(); + foreach (var pattern in AuthErrorPatterns) + { + if (messageSpan.Contains(pattern, StringComparison.OrdinalIgnoreCase)) + { + return true; + } + } + + return false; + } + + private void LogDebugWithId(string message) => + this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); private void LogWarning(string message) => this.Logger.LogWarning("{DetectorId}: {Message}", this.Id, message); + /// + /// Resets all per-scan state to prevent stale data from leaking between scans. + /// This is critical because detectors are registered as singletons. + /// + private void ResetScanState() + { + // Clear all concurrent collections + this.collectedVariables.Clear(); + this.mavenParentChildRelationships.Clear(); + this.processedMavenProjects.Clear(); + this.parentPomCache.Clear(); + + // Drain all concurrent queues + while (this.pendingComponents.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.unresolvedParentRelationships.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.originalPomFiles.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.mavenCliErrors.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.failedEndpoints.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + // Reset telemetry counters and flags + this.usedDetectionMethod = MavenDetectionMethod.None; + this.fallbackReason = MavenFallbackReason.None; + this.mvnCliComponentCount = 0; + this.staticParserComponentCount = 0; + this.unresolvedVariableCount = 0; + this.pendingComponentCountBeforeResolution = 0; + this.mavenCliAvailable = false; + } + protected override async Task> OnPrepareDetectionAsync( IObservable processRequests, IDictionary detectorArgs, CancellationToken cancellationToken = default) { + // Reset all per-scan state to prevent stale data from previous scans + // This is critical because detectors are registered as singletons + this.ResetScanState(); + // Wrap the entire method in a try-catch with timeout to protect against hangs. // OnPrepareDetectionAsync doesn't have the same guardrails as OnFileFoundAsync, // so we need to be extra careful in this experimental detector. @@ -236,7 +335,7 @@ private bool ShouldSkipMavenCli() { if (this.envVarService.IsEnvironmentVariableValueTrue(DisableMvnCliEnvVar)) { - this.LogInfo($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only."); + this.LogDebugWithId($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only."); this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; this.fallbackReason = MavenFallbackReason.MvnCliDisabledByUser; this.mavenCliAvailable = false; @@ -256,13 +355,13 @@ private async Task TryInitializeMavenCliAsync() if (!this.mavenCliAvailable) { - this.LogInfo("Maven CLI not found in PATH. Will use static pom.xml parsing only."); + this.LogDebugWithId("Maven CLI not found in PATH. Will use static pom.xml parsing only."); this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; this.fallbackReason = MavenFallbackReason.MavenCliNotAvailable; return false; } - this.LogDebug("Maven CLI is available. Running MvnCli detection."); + this.LogDebugWithId("Maven CLI is available. Running MvnCli detection."); return true; } @@ -377,7 +476,7 @@ await this.RemoveNestedPomXmls(processRequests, parentPomDictionary, cancellatio // Determine detection method based on results this.DetermineDetectionMethod(cliSuccessCount, cliFailureCount); - this.LogDebug($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs."); + this.LogDebugWithId($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs."); // Use comprehensive directory scanning after Maven CLI execution to find all generated dependency files // This ensures we find dependency files from submodules even if Maven CLI was only run on parent pom.xml @@ -419,7 +518,7 @@ private void DetermineDetectionMethod(int cliSuccessCount, int cliFailureCount) if (cliFailureCount == 0 && cliSuccessCount > 0) { this.usedDetectionMethod = MavenDetectionMethod.MvnCliOnly; - this.LogDebug("All pom.xml files processed successfully with Maven CLI."); + this.LogDebugWithId("All pom.xml files processed successfully with Maven CLI."); } else if (cliFailureCount > 0) { @@ -438,17 +537,8 @@ protected override Task OnFileFoundAsync( if (pattern == this.mavenCommandService.BcdeMvnDependencyFileName) { - // Process MvnCli result - file reader already registered at generation step - var depsFilePath = processRequest.ComponentStream.Location; - try - { - this.ProcessMvnCliResult(processRequest); - } - finally - { - // Unregister file reader after processing to allow cleanup - this.mavenCommandService.UnregisterFileReader(depsFilePath, this.Id); - } + // Process MvnCli result + this.ProcessMvnCliResult(processRequest); } else { @@ -461,24 +551,39 @@ protected override Task OnFileFoundAsync( protected override Task OnDetectionFinishedAsync() { - // Record telemetry - this.Telemetry["DetectionMethod"] = this.usedDetectionMethod.ToString(); - this.Telemetry["FallbackReason"] = this.fallbackReason.ToString(); - this.Telemetry["MvnCliComponentCount"] = this.mvnCliComponentCount.ToString(); - this.Telemetry["StaticParserComponentCount"] = this.staticParserComponentCount.ToString(); + // Second pass: resolve any parent relationships that couldn't be resolved during first pass + // This handles cases where parent POM was processed after child POM + this.ResolveUnresolvedParentRelationships(); + + // Third pass: resolve all pending components with collected variables and complete hierarchy + this.ResolvePendingComponents(); + + // Record telemetry - cache string conversions + var detectionMethodStr = this.usedDetectionMethod.ToString(); + var fallbackReasonStr = this.fallbackReason.ToString(); + var mvnCliCountStr = this.mvnCliComponentCount.ToString(); + var staticCountStr = this.staticParserComponentCount.ToString(); + + this.Telemetry["DetectionMethod"] = detectionMethodStr; + this.Telemetry["FallbackReason"] = fallbackReasonStr; + this.Telemetry["MvnCliComponentCount"] = mvnCliCountStr; + this.Telemetry["StaticParserComponentCount"] = staticCountStr; this.Telemetry["TotalComponentCount"] = (this.mvnCliComponentCount + this.staticParserComponentCount).ToString(); this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); + this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); + this.Telemetry["PendingComponentCount"] = this.pendingComponentCountBeforeResolution.ToString(); + this.Telemetry["UnresolvedVariableCount"] = this.unresolvedVariableCount.ToString(); if (!this.failedEndpoints.IsEmpty) { this.Telemetry["FailedEndpoints"] = string.Join(";", this.failedEndpoints.Distinct().Take(10)); } - this.LogInfo($"Detection completed. Method: {this.usedDetectionMethod}, " + - $"FallbackReason: {this.fallbackReason}, " + - $"MvnCli components: {this.mvnCliComponentCount}, " + - $"Static parser components: {this.staticParserComponentCount}"); + this.LogDebugWithId($"Detection completed. Method: {detectionMethodStr}, " + + $"FallbackReason: {fallbackReasonStr}, " + + $"MvnCli components: {mvnCliCountStr}, " + + $"Static parser components: {staticCountStr}"); return Task.CompletedTask; } @@ -489,7 +594,7 @@ protected override Task OnDetectionFinishedAsync() private void AnalyzeMvnCliFailure() { // Check if any recorded errors indicate authentication failure - var hasAuthError = this.mavenCliErrors.Any(this.IsAuthenticationError); + var hasAuthError = this.mavenCliErrors.Any(IsAuthenticationError); if (hasAuthError) { @@ -531,16 +636,35 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) var document = new XmlDocument(); document.Load(file.Stream); - lock (this.documentsLoaded) - { - this.documentsLoaded.TryAdd(file.Location, document); - } - + // Single XML parsing pass: create namespace manager once var namespaceManager = new XmlNamespaceManager(document.NameTable); namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); + // Collect variables from this document into a local dictionary first + var localVariables = new Dictionary(); + this.CollectVariablesFromDocument(document, namespaceManager, filePath, localVariables); + + // Batch add local variables to global collection for better performance + // Key format: "filePath::variableName" enables Maven hierarchy-aware lookup + if (localVariables.Count > 0) + { + var keyBuilder = new StringBuilder(filePath.Length + 64); // Pre-allocate capacity + var filePathWithSeparator = filePath + "::"; + + foreach (var (variableName, variableValue) in localVariables) + { + keyBuilder.Clear(); + keyBuilder.Append(filePathWithSeparator).Append(variableName); + var key = keyBuilder.ToString(); + + this.collectedVariables.AddOrUpdate(key, variableValue, (_, _) => variableValue); + } + + this.Logger.LogDebug("MavenWithFallback: Collected {Count} variables from {File}", localVariables.Count, Path.GetFileName(filePath)); + } + + // First pass: collect dependencies (may have unresolved variables) var dependencyList = document.SelectNodes(DependencyNode, namespaceManager); - var componentsFoundInFile = 0; foreach (XmlNode dependency in dependencyList) { @@ -556,31 +680,39 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) if (version != null && !version.InnerText.Contains(',')) { var versionRef = version.InnerText.Trim('[', ']'); - string versionString; if (versionRef.StartsWith("${")) { - versionString = this.ResolveVersion(versionRef, document, file.Location); + // Only resolve immediately if local variable exists (highest priority) + // Otherwise, defer to second pass to ensure proper hierarchy-aware resolution + var resolvedVersion = this.ResolveVersionFromLocalOnly(versionRef, localVariables); + if (!resolvedVersion.StartsWith("${")) + { + // Local variable found - resolve immediately (highest priority) + var component = new MavenComponent(groupId, artifactId, resolvedVersion); + var detectedComponent = new DetectedComponent(component); + singleFileComponentRecorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + } + else + { + // No local variable - defer to second pass for hierarchy-aware resolution + // This ensures we consider all variable definitions before resolving + this.pendingComponents.Enqueue(new PendingComponent( + groupId, + artifactId, + versionRef, + singleFileComponentRecorder, + filePath)); + } } else { - versionString = versionRef; - } - - if (!versionString.StartsWith("${")) - { - var component = new MavenComponent(groupId, artifactId, versionString); + // Direct version - register immediately + var component = new MavenComponent(groupId, artifactId, versionRef); var detectedComponent = new DetectedComponent(component); singleFileComponentRecorder.RegisterUsage(detectedComponent); - componentsFoundInFile++; - } - else - { - this.Logger.LogDebug( - "Version string {Version} for component {Group}/{Artifact} is invalid or unsupported and a component will not be recorded.", - versionString, - groupId, - artifactId); + Interlocked.Increment(ref this.staticParserComponentCount); } } else @@ -591,8 +723,6 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) artifactId); } } - - Interlocked.Add(ref this.staticParserComponentCount, componentsFoundInFile); } catch (Exception e) { @@ -600,15 +730,267 @@ private void ProcessPomFileStatically(ProcessRequest processRequest) } } - private bool IsAuthenticationError(string errorMessage) + /// + /// Collects all variable definitions from a POM document into the provided local dictionary. + /// Optimized to reuse XmlNamespaceManager and minimize XPath queries. + /// + /// The XML document to scan for variables. + /// Pre-configured namespace manager to reuse. + /// The file path for logging purposes. + /// Local dictionary to collect variables into. + private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, Dictionary localVariables) { - if (string.IsNullOrWhiteSpace(errorMessage)) + try { - return false; + // Query project coordinates once - used for both variable collection and project tracking + var projectGroupIdNode = document.SelectSingleNode("/proj:project/proj:groupId", namespaceManager); + var projectArtifactIdNode = document.SelectSingleNode("/proj:project/proj:artifactId", namespaceManager); + var projectVersionNode = document.SelectSingleNode("/proj:project/proj:version", namespaceManager); + + // Track this project by Maven coordinates for parent resolution (reuses queried nodes) + this.TrackMavenProjectCoordinates(document, namespaceManager, filePath, projectGroupIdNode, projectArtifactIdNode); + + // Parse Maven parent relationship to build proper hierarchy + this.ParseMavenParentRelationship(document, namespaceManager, filePath); + + // Collect properties variables from ALL properties sections (handles malformed XML with multiple ) + var propertiesNodes = document.SelectNodes("//proj:properties", namespaceManager); + if (propertiesNodes?.Count > 0) + { + if (propertiesNodes.Count > 1) + { + this.Logger.LogDebug("MavenWithFallback: Found {Count} properties sections in {File}", propertiesNodes.Count, Path.GetFileName(filePath)); + } + + foreach (XmlNode propertiesNode in propertiesNodes) + { + foreach (XmlNode propertyNode in propertiesNode.ChildNodes) + { + if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText)) + { + // Later properties sections override earlier ones (last wins - Maven behavior) + localVariables[propertyNode.Name] = propertyNode.InnerText; + } + } + } + } + + // Collect project-level variables from already-queried nodes + if (projectVersionNode != null && !string.IsNullOrWhiteSpace(projectVersionNode.InnerText)) + { + localVariables["version"] = projectVersionNode.InnerText; + localVariables["project.version"] = projectVersionNode.InnerText; + } + + if (projectGroupIdNode != null && !string.IsNullOrWhiteSpace(projectGroupIdNode.InnerText)) + { + localVariables["groupId"] = projectGroupIdNode.InnerText; + localVariables["project.groupId"] = projectGroupIdNode.InnerText; + } + + if (projectArtifactIdNode != null && !string.IsNullOrWhiteSpace(projectArtifactIdNode.InnerText)) + { + localVariables["artifactId"] = projectArtifactIdNode.InnerText; + localVariables["project.artifactId"] = projectArtifactIdNode.InnerText; + } } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath); + } + } + + /// + /// Parses Maven parent relationship from pom.xml to build proper inheritance hierarchy. + /// This is needed for Maven-compliant variable resolution that respects parent-child relationships. + /// + /// The XML document to parse. + /// XML namespace manager for Maven POM. + /// Current pom.xml file path. + private void ParseMavenParentRelationship(XmlDocument document, XmlNamespaceManager namespaceManager, string currentFilePath) + { + try + { + // Query parent element once and access children directly (more efficient than union XPath) + var parentNode = document.SelectSingleNode("/proj:project/proj:parent", namespaceManager); + + if (parentNode != null) + { + var parentGroupId = parentNode["groupId"]?.InnerText; + var parentArtifactId = parentNode["artifactId"]?.InnerText; - return AuthErrorPatterns.Any(pattern => - errorMessage.Contains(pattern, StringComparison.OrdinalIgnoreCase)); + if (!string.IsNullOrWhiteSpace(parentArtifactId)) + { + // Try to find parent pom.xml file by searching processed files for matching artifactId + // This works if parent was processed before child + var parentPath = this.FindParentPomByArtifactId(parentGroupId, parentArtifactId, currentFilePath); + if (!string.IsNullOrEmpty(parentPath)) + { + this.mavenParentChildRelationships[currentFilePath] = parentPath; + this.Logger.LogDebug( + "MavenWithFallback: Parsed parent relationship: {Child} → {Parent}", + Path.GetFileName(currentFilePath), + Path.GetFileName(parentPath)); + } + else + { + // Parent not found yet - queue for second pass resolution after all files are processed + this.unresolvedParentRelationships.Enqueue((currentFilePath, parentGroupId, parentArtifactId)); + this.Logger.LogDebug( + "MavenWithFallback: Queued unresolved parent relationship for {Child} → {ParentArtifactId}", + Path.GetFileName(currentFilePath), + parentArtifactId); + } + } + } + } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to parse parent relationship from {FilePath}", currentFilePath); + } + } + + /// + /// Finds parent pom.xml file path by Maven coordinates (groupId:artifactId). + /// First searches by coordinates among processed projects, then falls back to directory traversal. + /// + /// Parent groupId to match. + /// Parent artifactId to match. + /// Current file path to start searching from. + /// Parent pom.xml file path, or empty string if not found. + private string FindParentPomByArtifactId(string parentGroupId, string parentArtifactId, string currentFilePath) + { + // Use cache to avoid repeated operations for the same file + return this.parentPomCache.GetOrAdd(currentFilePath, filePath => + { + try + { + // First, try to find by Maven coordinates (handles sibling projects) + if (!string.IsNullOrWhiteSpace(parentArtifactId)) + { + var coordinateKey = string.IsNullOrWhiteSpace(parentGroupId) + ? parentArtifactId + : $"{parentGroupId}:{parentArtifactId}"; + + if (this.processedMavenProjects.TryGetValue(coordinateKey, out var coordinateBasedPath)) + { + this.Logger.LogDebug( + "MavenWithFallback: Found parent {ParentCoordinate} at {Path} for {Child}", + coordinateKey, + Path.GetFileName(coordinateBasedPath), + Path.GetFileName(filePath)); + return coordinateBasedPath; + } + } + + // Fallback: Maven convention parent directory search + var currentDir = Path.GetDirectoryName(filePath); + var parentDir = Path.GetDirectoryName(currentDir); + + // Track visited directories to prevent infinite loops from circular directory structures + var visitedDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); + + while (!string.IsNullOrEmpty(parentDir)) + { + // Prevent infinite loops from circular directory references or file system anomalies + if (!visitedDirectories.Add(parentDir)) + { + this.Logger.LogDebug( + "MavenWithFallback: Circular directory reference detected while searching for parent POM, breaking at {Directory}", + parentDir); + break; + } + + var parentPomPath = Path.Combine(parentDir, "pom.xml"); + if (this.fileUtilityService.Exists(parentPomPath) && + !string.Equals(parentPomPath, filePath, StringComparison.OrdinalIgnoreCase)) + { + return parentPomPath; + } + + var nextParentDir = Path.GetDirectoryName(parentDir); + if (string.Equals(nextParentDir, parentDir, StringComparison.OrdinalIgnoreCase)) + { + break; // Reached file system root + } + + parentDir = nextParentDir; + } + + return string.Empty; // Not found + } + catch (Exception ex) + { + this.Logger.LogDebug(ex, "Error finding parent POM for {FilePath}", Path.GetFileName(filePath)); + return string.Empty; + } + }); + } + + /// + /// Tracks a Maven project by its coordinates to enable coordinate-based parent resolution. + /// + /// The XML document to parse. + /// XML namespace manager for Maven POM. + /// Current pom.xml file path. + /// Pre-queried groupId node (can be null). + /// Pre-queried artifactId node (can be null). + private void TrackMavenProjectCoordinates(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, XmlNode groupIdNode, XmlNode artifactIdNode) + { + try + { + // If project doesn't have its own groupId, try to get it from parent + groupIdNode ??= document.SelectSingleNode("/proj:project/proj:parent/proj:groupId", namespaceManager); + + if (artifactIdNode != null && !string.IsNullOrWhiteSpace(artifactIdNode.InnerText)) + { + var groupId = groupIdNode?.InnerText; + var artifactId = artifactIdNode.InnerText; + + // Store with both artifactId-only and groupId:artifactId keys for flexible lookup + this.processedMavenProjects.TryAdd(artifactId, filePath); + if (!string.IsNullOrWhiteSpace(groupId)) + { + this.processedMavenProjects.TryAdd($"{groupId}:{artifactId}", filePath); + } + + this.Logger.LogDebug( + "MavenWithFallback: Tracked project {GroupId}:{ArtifactId} at {Path}", + groupId ?? "(inherited)", + artifactId, + Path.GetFileName(filePath)); + } + } + catch (Exception e) + { + this.Logger.LogDebug(e, "Failed to track Maven project coordinates from {Path}", filePath); + } + } + + /// + /// Resolves a version template using only local variables from the current file. + /// This ensures immediate resolution only when the variable is defined in the same file (highest priority). + /// + /// The version template with variables (e.g., "${revision}"). + /// Local variables from the current file. + /// The resolved version string, or the original template if local variable not found. + private string ResolveVersionFromLocalOnly(string versionTemplate, Dictionary localVariables) + { + var resolvedVersion = versionTemplate; + var match = VersionRegex.Match(versionTemplate); + + if (match.Success) + { + var variable = match.Groups[1].Captures[0].ToString(); + + // Only check local variables (same file priority) + if (localVariables.TryGetValue(variable, out var localReplacement)) + { + resolvedVersion = versionTemplate.Replace("${" + variable + "}", localReplacement); + } + } + + return resolvedVersion; } private IEnumerable ExtractFailedEndpoints(string errorMessage) @@ -644,72 +1026,221 @@ private void LogAuthErrorGuidance() this.LogWarning(guidance.ToString()); } - private string ResolveVersion(string versionString, XmlDocument currentDocument, string currentDocumentFileLocation) + /// + /// Resolves parent relationships that couldn't be established during first pass. + /// This handles cases where the parent POM was processed after the child POM. + /// + private void ResolveUnresolvedParentRelationships() { - var returnedVersionString = versionString; - var match = VersionRegex.Match(versionString); + var resolvedCount = 0; + var unresolvedCount = 0; - if (match.Success) + while (this.unresolvedParentRelationships.TryDequeue(out var unresolvedRelationship)) { - var variable = match.Groups[1].Captures[0].ToString(); - var replacement = this.ReplaceVariable(variable, currentDocument, currentDocumentFileLocation); - returnedVersionString = versionString.Replace("${" + variable + "}", replacement); + var (filePath, parentGroupId, parentArtifactId) = unresolvedRelationship; + + // Skip if already resolved (could happen if resolved via directory traversal during first pass) + if (this.mavenParentChildRelationships.ContainsKey(filePath)) + { + continue; + } + + // Clear the cache entry so we can try again with the now-complete processedMavenProjects + this.parentPomCache.TryRemove(filePath, out _); + + // Try to find parent by coordinates now that all files have been processed + var parentPath = this.FindParentPomByCoordinatesOnly(parentGroupId, parentArtifactId, filePath); + if (!string.IsNullOrEmpty(parentPath)) + { + this.mavenParentChildRelationships[filePath] = parentPath; + resolvedCount++; + this.Logger.LogDebug( + "MavenWithFallback: Resolved deferred parent relationship: {Child} → {Parent}", + Path.GetFileName(filePath), + Path.GetFileName(parentPath)); + } + else + { + unresolvedCount++; + this.Logger.LogDebug( + "MavenWithFallback: Could not resolve parent {ParentGroupId}:{ParentArtifactId} for {Child}", + parentGroupId ?? "(null)", + parentArtifactId, + Path.GetFileName(filePath)); + } } - return returnedVersionString; + if (resolvedCount > 0 || unresolvedCount > 0) + { + this.LogDebugWithId($"Second pass (parent resolution) completed: {resolvedCount} deferred parent relationships resolved, {unresolvedCount} remain unresolved"); + } } - private string ReplaceVariable(string variable, XmlDocument currentDocument, string currentDocumentFileLocation) + /// + /// Finds parent POM by Maven coordinates only (no directory traversal). + /// Used for deferred parent resolution after all files have been processed. + /// + private string FindParentPomByCoordinatesOnly(string parentGroupId, string parentArtifactId, string currentFilePath) { - var result = this.FindVariableInDocument(currentDocument, currentDocumentFileLocation, variable); - if (result != null) + if (string.IsNullOrWhiteSpace(parentArtifactId)) { - return result; + return string.Empty; } - lock (this.documentsLoaded) + // Try with full coordinates first + if (!string.IsNullOrWhiteSpace(parentGroupId)) { - foreach (var pathDocumentPair in this.documentsLoaded) + var fullCoordinateKey = $"{parentGroupId}:{parentArtifactId}"; + if (this.processedMavenProjects.TryGetValue(fullCoordinateKey, out var fullCoordinatePath) && + !string.Equals(fullCoordinatePath, currentFilePath, StringComparison.OrdinalIgnoreCase)) { - var path = pathDocumentPair.Key; - var document = pathDocumentPair.Value; - result = this.FindVariableInDocument(document, path, variable); - if (result != null) + return fullCoordinatePath; + } + } + + // Try with artifactId only + if (this.processedMavenProjects.TryGetValue(parentArtifactId, out var artifactIdPath) && + !string.Equals(artifactIdPath, currentFilePath, StringComparison.OrdinalIgnoreCase)) + { + return artifactIdPath; + } + + return string.Empty; + } + + /// + /// Third pass: resolve all pending components using hierarchy-aware variable resolution. + /// For components with unresolved variables, this picks the closest ancestor definition + /// based on Maven's property inheritance rules (child > parent precedence). + /// + private void ResolvePendingComponents() + { + // Capture count before draining for accurate telemetry + this.pendingComponentCountBeforeResolution = this.pendingComponents.Count; + + var resolvedCount = 0; + var skippedCount = 0; + + while (this.pendingComponents.TryDequeue(out var pendingComponent)) + { + try + { + var resolvedVersion = this.ResolveVersionWithHierarchyAwareness(pendingComponent.VersionTemplate, pendingComponent.FilePath); + if (!resolvedVersion.StartsWith("${")) { - return result; + var component = new MavenComponent(pendingComponent.GroupId, pendingComponent.ArtifactId, resolvedVersion); + var detectedComponent = new DetectedComponent(component); + pendingComponent.Recorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + resolvedCount++; } + else + { + skippedCount++; + this.Logger.LogDebug( + "Version string {Version} for component {Group}/{Artifact} could not be resolved and a component will not be recorded. File: {File}", + resolvedVersion, + pendingComponent.GroupId, + pendingComponent.ArtifactId, + pendingComponent.FilePath); + } + } + catch (Exception e) + { + skippedCount++; + this.Logger.LogError( + e, + "Failed to resolve pending component {Group}/{Artifact} from {File}", + pendingComponent.GroupId, + pendingComponent.ArtifactId, + pendingComponent.FilePath); } } - return $"${{{variable}}}"; + this.LogDebugWithId($"Third pass (variable resolution) completed: {resolvedCount} components resolved, {skippedCount} skipped due to unresolved variables"); } - private string FindVariableInDocument(XmlDocument document, string path, string variable) + /// + /// Resolves a version template with hierarchy-aware precedence. + /// When multiple variable definitions exist, picks the closest ancestor to the requesting file. + /// This implements Maven's property inheritance rule: child properties take precedence over parent properties. + /// + /// The version template with variables (e.g., "${revision}"). + /// The file path of the POM requesting the variable resolution. + /// The resolved version string, or the original template if variables cannot be resolved. + private string ResolveVersionWithHierarchyAwareness(string versionTemplate, string requestingFilePath) { - try - { - var namespaceManager = new XmlNamespaceManager(document.NameTable); - namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); + var resolvedVersion = versionTemplate; + var match = VersionRegex.Match(versionTemplate); - var nodeListProject = document.SelectNodes($"//proj:{variable}", namespaceManager); - var nodeListProperties = document.SelectNodes($"//proj:properties/proj:{variable}", namespaceManager); + if (match.Success) + { + var variable = match.Groups[1].Captures[0].ToString(); - if (nodeListProject.Count != 0) + // Use Maven-compliant hierarchy search: current → parent → grandparent + var foundValue = this.FindVariableInMavenHierarchy(variable, requestingFilePath); + if (foundValue != null) { - return nodeListProject.Item(0).InnerText; + resolvedVersion = versionTemplate.Replace("${" + variable + "}", foundValue.Value.Value); } - - if (nodeListProperties.Count != 0) + else { - return nodeListProperties.Item(0).InnerText; + // Variable not found in Maven hierarchy - log at debug level since unresolved + // properties are common (profiles, external parents, etc.) and aggregate count in telemetry + Interlocked.Increment(ref this.unresolvedVariableCount); + this.Logger.LogDebug( + "{DetectorId}: Variable {Variable} not found in Maven hierarchy for {File}", + this.Id, + variable, + Path.GetFileName(requestingFilePath)); } } - catch (Exception e) + + return resolvedVersion; + } + + /// + /// Finds a variable value using Maven-compliant hierarchy search. + /// Searches in order: current file → parent → grandparent (stops at first match). + /// + /// Variable name to find. + /// The pom.xml file requesting the variable. + /// Variable value and source file, or null if not found in hierarchy. + private (string Value, string SourceFile)? FindVariableInMavenHierarchy(string variable, string requestingFilePath) + { + var currentFile = requestingFilePath; + var visitedFiles = new HashSet(StringComparer.OrdinalIgnoreCase); + var keyBuilder = new StringBuilder(256); // Pre-allocate for typical path lengths + + // Walk up Maven parent hierarchy until variable found or no more parents + while (!string.IsNullOrEmpty(currentFile)) { - this.Logger.LogError(e, "Failed to read file {Path}", path); + // Prevent infinite loops from circular parent references + if (!visitedFiles.Add(currentFile)) + { + this.Logger.LogDebug( + "{DetectorId}: Circular parent reference detected while resolving variable {Variable}, breaking at {File}", + this.Id, + variable, + Path.GetFileName(currentFile)); + break; + } + + // Check if this file has the variable definition using StringBuilder for efficiency + keyBuilder.Clear(); + keyBuilder.Append(currentFile).Append("::").Append(variable); + var variableKey = keyBuilder.ToString(); + + if (this.collectedVariables.TryGetValue(variableKey, out var value)) + { + return (value, currentFile); + } + + // Move to Maven parent (not directory parent) + this.mavenParentChildRelationships.TryGetValue(currentFile, out currentFile); } - return null; + return null; // Variable not found in Maven hierarchy } /// @@ -739,14 +1270,8 @@ private IObservable RemoveNestedPomXmls( .ThenBy(r => r.ComponentStream.Location, StringComparer.OrdinalIgnoreCase) .ToList(); - // Build a list of all directories that contain a pom.xml, ordered by path length (shortest first). - // This ensures parent directories are checked before their children. - var pomDirectories = sortedRequests - .Select(r => NormalizeDirectoryPath(Path.GetDirectoryName(r.ComponentStream.Location))) - .Distinct(StringComparer.OrdinalIgnoreCase) - .OrderBy(d => d.Length) - .ToList(); - + // Use a HashSet of root directories for O(1) lookup instead of O(n) list iteration + var rootPomDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); var filteredRequests = new List(); foreach (var request in sortedRequests) @@ -754,23 +1279,21 @@ private IObservable RemoveNestedPomXmls( cancellationToken.ThrowIfCancellationRequested(); var location = NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)); + + // Check if any ancestor directory is already a root POM directory + // Walk up the directory tree (O(depth) instead of O(n)) var isNested = false; + var parentDir = Path.GetDirectoryName(location.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)); - foreach (var pomDirectory in pomDirectories) + while (!string.IsNullOrEmpty(parentDir)) { - if (pomDirectory.Length >= location.Length) + var normalizedParent = NormalizeDirectoryPath(parentDir); + if (rootPomDirectories.Contains(normalizedParent)) { - // Since the list is ordered by length, if the pomDirectory is longer than - // or equal to the location, there are no possible parent directories left. - break; - } - - if (location.StartsWith(pomDirectory, StringComparison.OrdinalIgnoreCase)) - { - this.LogDebug($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {pomDirectory}."); + this.LogDebugWithId($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {normalizedParent}."); isNested = true; parentPomDictionary.AddOrUpdate( - pomDirectory, + normalizedParent, [request], (key, existingList) => { @@ -779,19 +1302,28 @@ private IObservable RemoveNestedPomXmls( }); break; } + + var nextParent = Path.GetDirectoryName(parentDir); + if (string.Equals(nextParent, parentDir, StringComparison.OrdinalIgnoreCase)) + { + break; // Reached root + } + + parentDir = nextParent; } if (!isNested) { - this.LogDebug($"Discovered {request.ComponentStream.Location}."); + this.LogDebugWithId($"Discovered {request.ComponentStream.Location}."); + rootPomDirectories.Add(location); parentPomDictionary.AddOrUpdate( - NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)), - [request], - (key, existingList) => - { - existingList.Add(request); - return existingList; - }); + location, + [request], + (key, existingList) => + { + existingList.Add(request); + return existingList; + }); filteredRequests.Add(request); } } diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs index fefc11f28..10d3d112d 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs @@ -35,7 +35,7 @@ public MvnCliComponentDetector( this.Logger = logger; } - public override string Id => "MvnCli"; + public override string Id => MavenConstants.MvnCliDetectorId; public override IList SearchPatterns => [MavenManifest]; @@ -45,10 +45,8 @@ public MvnCliComponentDetector( public override IEnumerable Categories => [Enum.GetName(typeof(DetectorClass), DetectorClass.Maven)]; - private void LogDebugWithId(string message) - { - this.Logger.LogDebug("{DetectorId} detector: {Message}", this.Id, message); - } + private void LogDebugWithId(string message) => + this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); protected override async Task> OnPrepareDetectionAsync(IObservable processRequests, IDictionary detectorArgs, CancellationToken cancellationToken = default) { @@ -76,7 +74,6 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest => { // The file stream is going to be disposed after the iteration is finished // so is necessary to read the content and keep it in memory, for further processing. - // File reader registration is now handled in GenerateDependenciesFileAsync using var reader = new StreamReader(componentStream.Stream); var content = reader.ReadToEnd(); @@ -97,17 +94,13 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest => protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default) { - // File reader already registered at generation step, just process and unregister var depsFilePath = processRequest.ComponentStream.Location; - try - { - this.mavenCommandService.ParseDependenciesFile(processRequest); - } - finally - { - // Cleanup coordination with other detectors - this.mavenCommandService.UnregisterFileReader(depsFilePath, this.Id); - } + this.LogDebugWithId($"OnFileFoundAsync: Processing {depsFilePath}"); + + var componentsBefore = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; + this.mavenCommandService.ParseDependenciesFile(processRequest); + var componentsAfter = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; + this.LogDebugWithId($"OnFileFoundAsync: {depsFilePath} contributed {componentsAfter - componentsBefore} components"); await Task.CompletedTask; } @@ -159,7 +152,7 @@ private IObservable RemoveNestedPomXmls(IObservable +/// Represents a component with unresolved variables that needs second-pass processing. +/// +internal record PendingComponent( + string GroupId, + string ArtifactId, + string VersionTemplate, + ISingleFileComponentRecorder Recorder, + string FilePath); diff --git a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs index d3a7d9fe4..550b94f5e 100644 --- a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs +++ b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs @@ -6,6 +6,7 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services; using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.IO.Enumeration; using System.Linq; using System.Text.Json; using System.Threading; @@ -16,6 +17,7 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services; using Microsoft.ComponentDetection.Common.Telemetry.Records; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.BcdeModels; +using Microsoft.ComponentDetection.Detectors.Maven; using Microsoft.ComponentDetection.Orchestrator.Commands; using Microsoft.ComponentDetection.Orchestrator.Experiments; using Microsoft.Extensions.Logging; @@ -29,17 +31,20 @@ internal class DetectorProcessingService : IDetectorProcessingService private const int ProcessTimeoutBufferSeconds = 5; private readonly IObservableDirectoryWalkerFactory scanner; + private readonly IPathUtilityService pathUtilityService; private readonly ILogger logger; private readonly IExperimentService experimentService; private readonly IAnsiConsole console; public DetectorProcessingService( IObservableDirectoryWalkerFactory scanner, + IPathUtilityService pathUtilityService, IExperimentService experimentService, ILogger logger, IAnsiConsole console = null) { this.scanner = scanner; + this.pathUtilityService = pathUtilityService; this.experimentService = experimentService; this.logger = logger; this.console = console ?? AnsiConsole.Console; @@ -160,6 +165,13 @@ public async Task ProcessDetectorsAsync( var results = await Task.WhenAll(scanTasks); await this.experimentService.FinishAsync(); + // Clean up Maven CLI temporary files after all detectors have finished + // Only cleanup if CleanupCreatedFiles is true (default) to respect user settings + if (settings.CleanupCreatedFiles ?? true) + { + this.CleanupMavenFiles(settings.SourceDirectory, detectors); + } + var detectorProcessingResult = this.ConvertDetectorResultsIntoResult(results, exitCode); var totalElapsedTime = stopwatch.Elapsed.TotalSeconds; @@ -422,4 +434,124 @@ private void LogTabularOutput(ConcurrentDictionary pr this.logger.LogInformation("{DetectionTimeLine}", line); } } + + /// + /// Cleans up Maven CLI temporary files after all detectors have finished. + /// This prevents race conditions between MvnCliComponentDetector and MavenWithFallbackDetector. + /// Uses the same symlink-aware traversal pattern as FastDirectoryWalkerFactory to handle + /// circular symlinks in large repositories. + /// + private void CleanupMavenFiles(DirectoryInfo sourceDirectory, IEnumerable detectors) + { + // Only clean up if Maven detectors are running - use shared constants to stay in sync + var hasMavenDetectors = detectors.Any(d => + d.Id == MavenConstants.MvnCliDetectorId || + d.Id == MavenConstants.MavenWithFallbackDetectorId); + + if (!hasMavenDetectors) + { + return; + } + + using var telemetryRecord = new MavenCliCleanupTelemetryRecord + { + SourceDirectory = sourceDirectory.FullName, + }; + + try + { + this.logger.LogDebug("Starting Maven CLI cleanup in directory: {SourceDirectory}", sourceDirectory.FullName); + + var cleanedCount = 0; + var failedCount = 0; + + // Track visited directories by their real (resolved) paths to handle circular symlinks + // This follows the same pattern used by FastDirectoryWalkerFactory + var visitedDirectories = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); + + // Use FileSystemEnumerable with symlink-aware recursion predicate + // This matches the pattern in FastDirectoryWalkerFactory for safe directory traversal + var fileEnumerable = new FileSystemEnumerable( + sourceDirectory.FullName, + (ref FileSystemEntry entry) => entry.ToFullPath(), + new EnumerationOptions + { + RecurseSubdirectories = true, + IgnoreInaccessible = true, + }) + { + ShouldIncludePredicate = (ref FileSystemEntry entry) => + !entry.IsDirectory && + entry.FileName.Equals(MavenConstants.BcdeMvnDependencyFileName, StringComparison.OrdinalIgnoreCase), + + // Handle symlinks to prevent infinite loops - same pattern as FastDirectoryWalkerFactory + ShouldRecursePredicate = (ref FileSystemEntry entry) => + { + if (!entry.IsDirectory) + { + return false; + } + + try + { + var directoryPath = entry.ToFullPath(); + var realPath = directoryPath; + + // Check if this is a symlink (reparse point) and resolve to real path + if (entry.Attributes.HasFlag(FileAttributes.ReparsePoint)) + { + realPath = this.pathUtilityService.ResolvePhysicalPath(directoryPath); + + // If we can't resolve the path, skip this directory + if (string.IsNullOrEmpty(realPath)) + { + return false; + } + } + + // Only recurse if we haven't visited this real path before + return visitedDirectories.TryAdd(realPath, true); + } + catch (Exception ex) + { + // If symlink resolution fails (broken/inaccessible symlink), skip this directory + // and continue cleanup to avoid aborting the entire enumeration + this.logger.LogDebug(ex, "Skipping directory due to symlink resolution failure: {Directory}", entry.ToFullPath()); + return false; + } + }, + }; + + // Use Parallel.ForEach for concurrent deletion with better throughput + Parallel.ForEach( + fileEnumerable, + new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, + filePath => + { + try + { + File.Delete(filePath); + Interlocked.Increment(ref cleanedCount); + this.logger.LogDebug("Cleaned up Maven CLI file: {File}", filePath); + } + catch (Exception ex) + { + Interlocked.Increment(ref failedCount); + this.logger.LogDebug(ex, "Failed to clean up Maven CLI file: {File}", filePath); + } + }); + + telemetryRecord.FilesCleanedCount = cleanedCount; + telemetryRecord.FilesFailedCount = failedCount; + + if (cleanedCount > 0 || failedCount > 0) + { + this.logger.LogDebug("Maven CLI cleanup completed. Removed {CleanedCount} files, failed {FailedCount} files.", cleanedCount, failedCount); + } + } + catch (Exception ex) + { + this.logger.LogWarning(ex, "Maven CLI cleanup failed for directory: {SourceDirectory}", sourceDirectory.FullName); + } + } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs index cf9338e90..2ef00073b 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs @@ -434,6 +434,273 @@ public async Task StaticParser_IgnoresDependenciesWithUnresolvablePropertyVersio mavenComponent.ArtifactId.Should().Be("guava"); } + [TestMethod] + public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() + { + // Arrange - Test case 1: Variable defined in parent POM, referenced in child POM + // Uses Maven's standard parent inheritance mechanism + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) + .Returns(true); + + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.12.0 + +"; + + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + child + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act - Parent processed first, then child + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", parentPomContent) + .WithFile("module/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.12.0"); + } + + [TestMethod] + public async Task StaticParser_BackfillsVariableFromLaterFile_Async() + { + // Arrange - Test case 2: Child processed first, parent processed second (deferred resolution) + // Tests that variables can be resolved even when parent is processed after child + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) + .Returns(true); + + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + child + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.13.0 + +"; + + // Act - Child processed first (has unresolved variable), then parent + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("module/pom.xml", childPomContent) + .WithFile("pom.xml", parentPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.13.0"); + } + + [TestMethod] + public async Task StaticParser_LocalVariableDefinitionTakesPriority_Async() + { + // Arrange - Test case 3: Variable defined in both files, local definition has priority + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var firstPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + + 3.11.0 + +"; + + var secondPomContent = @" + + 4.0.0 + com.test + child + 1.0.0 + + 3.14.0 + + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", firstPomContent) + .WithFile("module/pom.xml", secondPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Should use the local definition (3.14.0) instead of parent definition (3.11.0) + mavenComponent.Version.Should().Be("3.14.0"); + } + + [TestMethod] + public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Async() + { + // Arrange - Test processing order independence for ancestor priority + // This test simulates the scenario where files are processed out of natural hierarchy order: + // grandparent → child → parent (instead of grandparent → parent → child) + // The child should still get the parent's variable value, not the grandparent's + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to return false for directory traversal + // This forces the detector to use coordinate-based parent resolution + // which correctly handles out-of-order processing through deferred resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns(false); + + var grandparentPomContent = @" + + 4.0.0 + com.test + grandparent + 1.0.0 + pom + + 3.10.0 + +"; + + var parentPomContent = @" + + 4.0.0 + + com.test + grandparent + 1.0.0 + + parent + pom + + 3.11.0 + +"; + + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + child + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act - Process files in out-of-order sequence: grandparent → child → parent + // This tests that deferred variable resolution correctly handles ancestor priority + // regardless of processing order + // Use file structure matching the working tests: root pom.xml and nested paths + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", grandparentPomContent) // Processed 1st: defines commons.version = 3.10.0 + .WithFile("parent/child/pom.xml", childPomContent) // Processed 2nd: references ${commons.version} + .WithFile("parent/pom.xml", parentPomContent) // Processed 3rd: defines commons.version = 3.11.0 + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Should resolve to parent's version (3.11.0) since parent is the closest ancestor to child, + // NOT grandparent's version (3.10.0), even though grandparent was processed first. + // This validates that deferred resolution correctly implements Maven's ancestor priority rules. + mavenComponent.Version.Should().Be("3.11.0"); + } + [TestMethod] public async Task WhenNoPomXmlFiles_ReturnsSuccessWithNoComponents_Async() { @@ -1217,4 +1484,751 @@ private void SetupMvnCliSuccess(string depsFileContent) // Add the dependency file that Maven CLI would have generated this.DetectorTestUtility.WithFile(BcdeMvnFileName, depsFileContent, [BcdeMvnFileName]); } + + [TestMethod] + public async Task VariableResolution_SiblingPomVariablesShouldNotBeUsed_Async() + { + // Arrange - Maven-compliant behavior: sibling POM variables should NOT be resolved + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Parent POM with a property + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.12.0 + +"; + + // Sibling POM with different variable - should NOT be used for resolution + var siblingPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + sibling + + 31.1-jre + +"; + + // Target POM trying to use sibling's variable - should fail to resolve + var targetPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + target + + + com.google.guava + guava + ${guava.version} + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("parent/pom.xml", parentPomContent) + .WithFile("sibling/pom.xml", siblingPomContent) + .WithFile("target/pom.xml", targetPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve commons-lang3 (from parent), not guava (sibling variable) + detectedComponents.Should().HaveCount(1); + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("3.12.0"); // Resolved from parent + } + + [TestMethod] + public async Task VariableResolution_ParentHierarchyVariablesShouldBeUsed_Async() + { + // Arrange - Maven-compliant behavior: parent/grandparent variables should be resolved + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns((string path) => path.EndsWith("pom.xml")); + + // Grandparent POM + var grandparentPomContent = @" + + 4.0.0 + com.test + grandparent + 1.0.0 + pom + + 4.13.2 + +"; + + // Parent POM + var parentPomContent = @" + + 4.0.0 + + com.test + grandparent + 1.0.0 + + com.test + parent + + 3.12.0 + +"; + + // Child POM using variables from parent hierarchy + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + child + + 31.1-jre + + + + org.apache.commons + commons-lang3 + ${commons.version} + + + junit + junit + ${junit.version} + + + com.google.guava + guava + ${guava.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("grandparent/pom.xml", grandparentPomContent) + .WithFile("parent/pom.xml", parentPomContent) + .WithFile("child/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); + + // Should resolve commons-lang3 from parent + var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); + commonsComponent.Should().NotBeNull(); + commonsComponent.Version.Should().Be("3.12.0"); + + // Should resolve junit from grandparent + var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); + junitComponent.Should().NotBeNull(); + junitComponent.Version.Should().Be("4.13.2"); + + // Should resolve guava from current POM + var guavaComponent = components.FirstOrDefault(c => c.ArtifactId == "guava"); + guavaComponent.Should().NotBeNull(); + guavaComponent.Version.Should().Be("31.1-jre"); + } + + [TestMethod] + public async Task VariableResolution_CircularReferencesShouldBeHandled_Async() + { + // Arrange - Test circular reference detection and prevention + // NOTE: Maven CLI fails completely when circular references exist (even if unused) + // Our detector should fall back to static parsing and handle circular refs gracefully + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // POM with circular references (Maven CLI would fail entirely) + var pomContent = @" + + 4.0.0 + com.test + circular-test + 1.0.0 + + + ${another.version} + ${circular.version} + + + 1.0.0 + + + + + org.apache.commons + commons-lang3 + ${circular.version} + + + + com.google.guava + guava + ${valid.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve guava (valid version), skip commons-lang3 (circular reference) + // This matches our detector's behavior: graceful handling vs Maven CLI's complete failure + detectedComponents.Should().HaveCount(1); + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("com.google.guava"); + component.ArtifactId.Should().Be("guava"); + component.Version.Should().Be("1.0.0"); + } + + [TestMethod] + public async Task VariableResolution_DeferredResolutionSecondPass_Async() + { + // Arrange - Test second pass resolution for components with initially unresolved variables + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to ONLY return true for the specific parent pom.xml file + // This prevents directory-based parent resolution from finding a "fake" parent + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns((string path) => path.Contains("parent") && path.EndsWith("pom.xml")); + + // Parent POM loaded after child (simulation) + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 2.0.0 + +"; + + // Child POM with dependency on parent variable + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + child + + + org.apache.commons + commons-lang3 + ${deferred.version} + + +"; + + // Act - Files are processed in order that might cause deferred resolution + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("child/pom.xml", childPomContent) + .WithFile("parent/pom.xml", parentPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("2.0.0"); // Should be resolved in second pass + } + + [TestMethod] + public async Task VariableResolution_MavenBuiltInVariablesShouldWork_Async() + { + // Arrange - Test Maven built-in variables like ${project.version} + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService (not needed for this test but required for consistency) + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns(false); + + var pomContent = @" + + 4.0.0 + com.test + maven-builtin-test + 1.5.0 + + + com.test + internal-dependency + ${project.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("com.test"); + component.ArtifactId.Should().Be("internal-dependency"); + component.Version.Should().Be("1.5.0"); // Should resolve ${project.version} + } + + [TestMethod] + public async Task VariableResolution_SiblingPomAsParentShouldBeUsed_Async() + { + // Arrange - Test case where a "sibling" POM is referenced as parent + // In this scenario, the sibling POM variables SHOULD be used for resolution + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // "Sibling" POM that will be referenced as parent by another module + var parentModulePomContent = @" + + 4.0.0 + com.test + shared-parent + 1.0.0 + pom + + 2.13.4 + 4.13.2 + +"; + + // Another "sibling" POM that references the first as its parent + var childModulePomContent = @" + + 4.0.0 + + com.test + shared-parent + 1.0.0 + ../shared-parent/pom.xml + + com.test + consumer-module + + 3.0.0 + + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + junit + junit + ${junit.version} + + + org.apache.commons + commons-lang3 + ${local.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("shared-parent/pom.xml", parentModulePomContent) + .WithFile("consumer-module/pom.xml", childModulePomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); + + // Should resolve jackson-core from parent (shared-parent) + var jacksonComponent = components.FirstOrDefault(c => c.ArtifactId == "jackson-core"); + jacksonComponent.Should().NotBeNull(); + jacksonComponent.Version.Should().Be("2.13.4"); // From parent POM + + // Should resolve junit from parent (shared-parent) + var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); + junitComponent.Should().NotBeNull(); + junitComponent.Version.Should().Be("4.13.2"); // From parent POM + + // Should resolve commons-lang3 from local properties + var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); + commonsComponent.Should().NotBeNull(); + commonsComponent.Version.Should().Be("3.0.0"); // From local POM + } + + [TestMethod] + public async Task VariableResolution_UnreferencedSiblingPomShouldNotBeUsed_Async() + { + // Arrange - Contrast test: sibling POM NOT referenced as parent should be ignored + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // True parent POM + var actualParentPomContent = @" + + 4.0.0 + com.test + actual-parent + 1.0.0 + pom + + 3.12.0 + +"; + + // Sibling POM with different variables - should NOT be used + var siblingPomContent = @" + + 4.0.0 + + com.test + actual-parent + 1.0.0 + + com.test + sibling-module + + 2.13.4 + +"; + + // Child POM that references actual-parent, not sibling + var childPomContent = @" + + 4.0.0 + + com.test + actual-parent + 1.0.0 + + com.test + child-module + + + org.apache.commons + commons-lang3 + ${commons.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("actual-parent/pom.xml", actualParentPomContent) + .WithFile("sibling-module/pom.xml", siblingPomContent) + .WithFile("child-module/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve commons-lang3 (from actual parent), not jackson-core (sibling variable) + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("3.12.0"); // Resolved from actual parent + } + + [TestMethod] + public async Task TestSmartLoopPreventionInDirectoryTraversal() + { + // Arrange - Setup Maven CLI to fail so we use static parser + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a child POM that references a parent that won't be found in directory traversal + var childPomContent = """ + + 4.0.0 + + com.example + parent-project + 1.0.0 + + com.example + child-project + ${parent.version} + + + junit + junit + 4.13.2 + + + + """; + + // Act & Assert - This should not hang or throw due to infinite directory traversal + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Should complete successfully without infinite loops + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should register the dependency with direct version (junit) + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("junit"); + component.ArtifactId.Should().Be("junit"); + component.Version.Should().Be("4.13.2"); + } + + [TestMethod] + public async Task TestDeepDirectoryTraversalWithoutInfiniteLoop() + { + // Arrange - Setup to use static parser only + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a child POM in a deep directory structure + var childPomContent = """ + + 4.0.0 + + com.example + deep-parent + 1.0.0 + + deep-child + + + org.junit.jupiter + junit-jupiter + 5.8.2 + + + + """; + + // Act - Should handle deep traversal without issues + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("very/deep/nested/directory/structure/project/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert - Should complete successfully + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.GroupId.Should().Be("org.junit.jupiter"); + component.ArtifactId.Should().Be("junit-jupiter"); + component.Version.Should().Be("5.8.2"); + } + + [TestMethod] + public async Task TestCircularDirectoryReferenceDetection() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a simple POM that tests directory traversal robustness + var pomContent = """ + + 4.0.0 + com.example + circular-test + 1.0.0 + + + com.fasterxml.jackson.core + jackson-core + 2.13.3 + + + + """; + + // Act - Should not hang on edge cases + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("subdir/pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert - Should complete without infinite loops + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.GroupId.Should().Be("com.fasterxml.jackson.core"); + component.ArtifactId.Should().Be("jackson-core"); + component.Version.Should().Be("2.13.3"); + } + + [TestMethod] + public async Task TestFileSystemRootReachedScenario() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a simple POM + var pomContent = """ + + 4.0.0 + com.example + root-test + 1.0.0 + + + org.springframework + spring-core + 5.3.21 + + + + """; + + // Act - Should handle file system root gracefully + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert - Should complete without issues + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.GroupId.Should().Be("org.springframework"); + component.ArtifactId.Should().Be("spring-core"); + component.Version.Should().Be("5.3.21"); + } + + [TestMethod] + public async Task TestPerformanceOfSmartLoopPrevention() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create multiple POMs that will trigger directory traversal + // This validates that smart loop prevention completes without hanging + // (algorithmic validation rather than timing-based) + var pom1Content = CreatePomWithParentReference("project1", "parent-project", "1.0.0"); + var pom2Content = CreatePomWithParentReference("project2", "parent-project", "1.0.0"); + var pom3Content = CreatePomWithParentReference("project3", "parent-project", "1.0.0"); + + // Act - Process multiple POMs (test validates completion, not timing) + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("project1/pom.xml", pom1Content) + .WithFile("project2/pom.xml", pom2Content) + .WithFile("project3/pom.xml", pom3Content) + .ExecuteDetectorAsync(); + + // Assert - Should complete without hanging and produce correct results + // The test passing validates that smart loop prevention works algorithmically + // (if it had infinite loops, the test would timeout/hang rather than fail assertions) + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should have detected direct dependencies from all 3 POMs + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + } + + private static string CreatePomWithParentReference(string artifactId, string parentArtifactId, string parentVersion) + { + return $""" + + 4.0.0 + + com.example + {parentArtifactId} + {parentVersion} + + {artifactId} + + + com.example + {artifactId}-dependency + 2.0.0 + + + + """; + } } diff --git a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs index 0f6a8a020..2eec47db6 100644 --- a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs +++ b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs @@ -42,6 +42,7 @@ public class DetectorProcessingServiceTests private readonly Mock> loggerMock; private readonly DetectorProcessingService serviceUnderTest; private readonly Mock directoryWalkerFactory; + private readonly Mock pathUtilityServiceMock; private readonly Mock experimentServiceMock; private readonly Mock consoleMock; @@ -60,9 +61,15 @@ public DetectorProcessingServiceTests() this.experimentServiceMock = new Mock(); this.loggerMock = new Mock>(); this.directoryWalkerFactory = new Mock(); + this.pathUtilityServiceMock = new Mock(); this.consoleMock = new Mock(); + + // Setup path utility to return the same path by default (no symlinks) + this.pathUtilityServiceMock.Setup(x => x.ResolvePhysicalPath(It.IsAny())) + .Returns(path => path); + this.serviceUnderTest = - new DetectorProcessingService(this.directoryWalkerFactory.Object, this.experimentServiceMock.Object, this.loggerMock.Object, this.consoleMock.Object); + new DetectorProcessingService(this.directoryWalkerFactory.Object, this.pathUtilityServiceMock.Object, this.experimentServiceMock.Object, this.loggerMock.Object, this.consoleMock.Object); this.firstFileComponentDetectorMock = this.SetupFileDetectorMock("firstFileDetectorId"); this.secondFileComponentDetectorMock = this.SetupFileDetectorMock("secondFileDetectorId"); @@ -658,4 +665,195 @@ private Mock SetupCommandDetectorMock(string id) return mockCommandDetector; } + + [TestMethod] + public async Task ProcessDetectorsAsync_WithMavenDetectors_ShouldCleanupMavenFilesAfterAllDetectorsFinish() + { + // Arrange - Create a temporary directory with mock Maven dependency files + var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + + // Use bcde.mvndeps - the specific filename generated by MavenCommandService + var testFile1 = Path.Combine(tempDir, "bcde.mvndeps"); + var testFile2 = Path.Combine(tempDir, "subdir", "bcde.mvndeps"); + var nonMavenFile = Path.Combine(tempDir, "regular.txt"); + + Directory.CreateDirectory(Path.GetDirectoryName(testFile2)); + await File.WriteAllTextAsync(testFile1, "maven dependency content 1"); + await File.WriteAllTextAsync(testFile2, "maven dependency content 2"); + await File.WriteAllTextAsync(nonMavenFile, "regular file content"); + + try + { + var tempDirInfo = new DirectoryInfo(tempDir); + var scanSettings = new ScanSettings + { + SourceDirectory = tempDirInfo, + DetectorArgs = new Dictionary(), + CleanupCreatedFiles = true, + }; + + // Create mock Maven detectors + var mvnCliDetectorMock = new Mock(); + mvnCliDetectorMock.SetupAllProperties(); + mvnCliDetectorMock.SetupGet(x => x.Id).Returns("MvnCli"); + mvnCliDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); + mvnCliDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var mavenWithFallbackDetectorMock = new Mock(); + mavenWithFallbackDetectorMock.SetupAllProperties(); + mavenWithFallbackDetectorMock.SetupGet(x => x.Id).Returns("MavenWithFallback"); + mavenWithFallbackDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); + mavenWithFallbackDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var detectors = new List { mvnCliDetectorMock.Object, mavenWithFallbackDetectorMock.Object, }; + + // Verify files exist before running detectors + File.Exists(testFile1).Should().BeTrue("Maven dependency file 1 should exist before cleanup"); + File.Exists(testFile2).Should().BeTrue("Maven dependency file 2 should exist before cleanup"); + File.Exists(nonMavenFile).Should().BeTrue("Non-Maven file should exist before cleanup"); + + // Act + var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); + + // Assert + result.Should().NotBeNull(); + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Maven dependency files should be cleaned up + File.Exists(testFile1).Should().BeFalse("Maven dependency file 1 should be cleaned up"); + File.Exists(testFile2).Should().BeFalse("Maven dependency file 2 should be cleaned up"); + + // Non-Maven files should remain + File.Exists(nonMavenFile).Should().BeTrue("Non-Maven files should not be cleaned up"); + + // Verify both detectors were executed + mvnCliDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + mavenWithFallbackDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + } + finally + { + // Cleanup test directory + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } + + [TestMethod] + public async Task ProcessDetectorsAsync_WithoutMavenDetectors_ShouldNotCleanupMavenFiles() + { + // Arrange - Create a temporary directory with mock Maven dependency files + var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + + // Use bcde.mvndeps - the specific filename generated by MavenCommandService + var testFile = Path.Combine(tempDir, "bcde.mvndeps"); + await File.WriteAllTextAsync(testFile, "maven dependency content"); + + try + { + var tempDirInfo = new DirectoryInfo(tempDir); + var scanSettings = new ScanSettings + { + SourceDirectory = tempDirInfo, + DetectorArgs = new Dictionary(), + CleanupCreatedFiles = true, + }; + + // Create mock non-Maven detector + var npmDetectorMock = new Mock(); + npmDetectorMock.SetupAllProperties(); + npmDetectorMock.SetupGet(x => x.Id).Returns("Npm"); + npmDetectorMock.SetupGet(x => x.Categories).Returns(["Npm"]); + npmDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var detectors = new List { npmDetectorMock.Object, }; + + // Verify file exists before running detectors + File.Exists(testFile).Should().BeTrue("Maven dependency file should exist before processing"); + + // Act + var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); + + // Assert + result.Should().NotBeNull(); + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Maven dependency file should NOT be cleaned up when no Maven detectors are present + File.Exists(testFile).Should().BeTrue("Maven dependency file should remain when no Maven detectors are running"); + + // Verify detector was executed + npmDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + } + finally + { + // Cleanup test directory + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } + + [TestMethod] + public async Task ProcessDetectorsAsync_OnlyOneMavenDetector_ShouldStillCleanupMavenFiles() + { + // Arrange - Create a temporary directory with mock Maven dependency files + var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + + // Use bcde.mvndeps - the specific filename generated by MavenCommandService + var testFile = Path.Combine(tempDir, "bcde.mvndeps"); + await File.WriteAllTextAsync(testFile, "maven dependency content"); + + try + { + var tempDirInfo = new DirectoryInfo(tempDir); + var scanSettings = new ScanSettings + { + SourceDirectory = tempDirInfo, + DetectorArgs = new Dictionary(), + CleanupCreatedFiles = true, + }; + + // Create mock Maven detector (only one) + var mvnCliDetectorMock = new Mock(); + mvnCliDetectorMock.SetupAllProperties(); + mvnCliDetectorMock.SetupGet(x => x.Id).Returns("MvnCli"); + mvnCliDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); + mvnCliDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); + + var detectors = new List { mvnCliDetectorMock.Object, }; + + // Verify file exists before running detectors + File.Exists(testFile).Should().BeTrue("Maven dependency file should exist before cleanup"); + + // Act + var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); + + // Assert + result.Should().NotBeNull(); + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Maven dependency file should be cleaned up even with only one Maven detector + File.Exists(testFile).Should().BeFalse("Maven dependency file should be cleaned up"); + + // Verify detector was executed + mvnCliDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); + } + finally + { + // Cleanup test directory + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } }