diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java new file mode 100644 index 000000000000..3055fa4db851 --- /dev/null +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java @@ -0,0 +1,199 @@ +/* + * SonarQube + * Copyright (C) 2009-2024 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.ce.task.projectanalysis.source; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Computes line-level mapping between the previous (DB) and current (report) version of a file + * by shelling out to {@code git diff --no-index --histogram} and parsing its unified-diff output. + * + * Returned array is indexed by current-file line (0-based): matchingLineArray[i] = the previous-file + * line number (1-based) that current line i+1 corresponds to, or 0 if the current line was added. + */ +class GitDiffFinder { + + private static final Logger LOG = LoggerFactory.getLogger(GitDiffFinder.class); + + // Matches a unified-diff hunk header, e.g. "@@ -12,5 +14,7 @@" + // group 1 = previous-file start line + // group 2 = previous-file line count (optional; defaults to 1 when omitted) + // group 3 = current-file start line + // group 4 = current-file line count (optional; defaults to 1 when omitted) + private static final Pattern HUNK_HEADER_PATTERN = Pattern.compile( + "^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@"); + + int[] findMatchingLines(List previousVersionLines, List currentVersionLines) + throws IOException, InterruptedException { + + int totalReportLines = currentVersionLines.size(); + int totalDbLines = previousVersionLines.size(); + + int[] matchingLineArray = new int[totalReportLines]; + + if (previousVersionLines.isEmpty() || currentVersionLines.isEmpty()) { + return matchingLineArray; + } + + Path diffWorkingDirectory = null; + try { + diffWorkingDirectory = Files.createTempDirectory("sonar-histogram-diff-"); + Path previousVersionFile = diffWorkingDirectory.resolve("previous.txt"); + Path currentVersionFile = diffWorkingDirectory.resolve("current.txt"); + + Files.write(previousVersionFile, previousVersionLines, StandardCharsets.UTF_8); + Files.write(currentVersionFile, currentVersionLines, StandardCharsets.UTF_8); + + executeDiffAndParseOutput(previousVersionFile, currentVersionFile, matchingLineArray, totalDbLines, + totalReportLines); + + } finally { + deleteTempFilesAndDirectory(diffWorkingDirectory); + } + + return matchingLineArray; + } + + private void executeDiffAndParseOutput(Path previousVersionFile, Path currentVersionFile, int[] matchingLineArray, + int totalDbLines, int totalReportLines) throws IOException, InterruptedException { + + ProcessBuilder gitDiffProcess = new ProcessBuilder("git", "diff", "--no-index", "--no-color", "--histogram", + previousVersionFile.toAbsolutePath().toString(), currentVersionFile.toAbsolutePath().toString()); + gitDiffProcess.redirectError(ProcessBuilder.Redirect.DISCARD); + + Process runningProcess = gitDiffProcess.start(); + + try (BufferedReader diffOutputReader = new BufferedReader( + new InputStreamReader(runningProcess.getInputStream(), StandardCharsets.UTF_8))) { + parseUnifiedDiffOutput(diffOutputReader, matchingLineArray, totalDbLines, totalReportLines); + } + + int processExitCode = runningProcess.waitFor(); + if (processExitCode > 1) { + throw new IOException("git diff --no-index failed with exit code " + processExitCode); + } + } + + private static void parseUnifiedDiffOutput(BufferedReader diffOutputReader, int[] matchingLineArray, + int totalDbLines, int totalReportLines) throws IOException { + + int currentDbLinePosition = 1; + int currentReportLinePosition = 1; + + String rawDiffLine; + boolean insideHunk = false; + + int hunkStartInPreviousFile = 0; + int hunkLengthInPreviousFile = 0; + int hunkStartInCurrentFile = 0; + int hunkLengthInCurrentFile = 0; + + while ((rawDiffLine = diffOutputReader.readLine()) != null) { + Matcher hunkHeaderMatcher = HUNK_HEADER_PATTERN.matcher(rawDiffLine); + + if (hunkHeaderMatcher.find()) { + if (insideHunk) { + currentDbLinePosition = hunkStartInPreviousFile + hunkLengthInPreviousFile; + currentReportLinePosition = hunkStartInCurrentFile + hunkLengthInCurrentFile; + } + + hunkStartInPreviousFile = Integer.parseInt(hunkHeaderMatcher.group(1)); + hunkLengthInPreviousFile = + hunkHeaderMatcher.group(2) != null ? Integer.parseInt(hunkHeaderMatcher.group(2)) : 1; + hunkStartInCurrentFile = Integer.parseInt(hunkHeaderMatcher.group(3)); + hunkLengthInCurrentFile = + hunkHeaderMatcher.group(4) != null ? Integer.parseInt(hunkHeaderMatcher.group(4)) : 1; + + fillIdenticalLinesBetweenHunks(matchingLineArray, currentDbLinePosition, currentReportLinePosition, + hunkStartInPreviousFile, hunkStartInCurrentFile); + + currentDbLinePosition = hunkStartInPreviousFile; + currentReportLinePosition = hunkStartInCurrentFile; + insideHunk = true; + + } else if (insideHunk && !rawDiffLine.isEmpty()) { + char lineTypePrefix = rawDiffLine.charAt(0); + + if (lineTypePrefix == ' ') { + int reportArrayIndex = currentReportLinePosition - 1; + if (reportArrayIndex >= 0 && reportArrayIndex < matchingLineArray.length) { + matchingLineArray[reportArrayIndex] = currentDbLinePosition; + } + currentDbLinePosition++; + currentReportLinePosition++; + + } else if (lineTypePrefix == '+') { + currentReportLinePosition++; + + } else if (lineTypePrefix == '-') { + currentDbLinePosition++; + } + } + } + + if (insideHunk) { + currentDbLinePosition = hunkStartInPreviousFile + hunkLengthInPreviousFile; + currentReportLinePosition = hunkStartInCurrentFile + hunkLengthInCurrentFile; + } + fillIdenticalLinesBetweenHunks(matchingLineArray, currentDbLinePosition, currentReportLinePosition, + totalDbLines + 1, totalReportLines + 1); + } + + private static void fillIdenticalLinesBetweenHunks(int[] matchingLineArray, int fromDbLine, int fromReportLine, + int untilDbLine, int untilReportLine) { + + int dbLinePointer = fromDbLine; + int reportLinePointer = fromReportLine; + + while (dbLinePointer < untilDbLine && reportLinePointer < untilReportLine) { + int reportArrayIndex = reportLinePointer - 1; + if (reportArrayIndex >= 0 && reportArrayIndex < matchingLineArray.length) { + matchingLineArray[reportArrayIndex] = dbLinePointer; + } + dbLinePointer++; + reportLinePointer++; + } + } + + private static void deleteTempFilesAndDirectory(Path diffWorkingDirectory) { + if (diffWorkingDirectory == null) { + return; + } + try { + Files.deleteIfExists(diffWorkingDirectory.resolve("previous.txt")); + Files.deleteIfExists(diffWorkingDirectory.resolve("current.txt")); + Files.deleteIfExists(diffWorkingDirectory); + } catch (IOException cleanupException) { + LOG.warn("Git Diff Temp file cleanup failed for directory '{}'. " + + "Reason: {}. Non-fatal — OS will reclaim on reboot.", diffWorkingDirectory, + cleanupException.getMessage()); + } + } +} diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java index 2d1d6c54e630..0b2d9721fc77 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java @@ -19,52 +19,131 @@ */ package org.sonar.ce.task.projectanalysis.source; +import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.sonar.api.config.Configuration; import org.sonar.ce.task.projectanalysis.analysis.AnalysisMetadataHolder; import org.sonar.ce.task.projectanalysis.component.Component; +import org.sonar.ce.task.projectanalysis.component.ConfigurationRepository; import org.sonar.ce.task.projectanalysis.period.NewCodeReferenceBranchComponentUuids; import org.sonar.ce.task.projectanalysis.component.ReferenceBranchComponentUuids; import org.sonar.ce.task.projectanalysis.filemove.MovedFilesRepository; import org.sonar.ce.task.projectanalysis.period.PeriodHolder; +import org.sonar.core.util.CloseableIterator; import org.sonar.db.DbClient; import org.sonar.db.DbSession; import org.sonar.db.newcodeperiod.NewCodePeriodType; +import org.sonar.db.protobuf.DbFileSources; import org.sonar.db.source.FileSourceDao; +import org.sonar.db.source.FileSourceDto; public class SourceLinesDiffImpl implements SourceLinesDiff { + private static final Logger LOG = LoggerFactory.getLogger(SourceLinesDiffImpl.class); + private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled"; + private static final String KEY_SFMETA_FILE_SUFFIXES = "sonar.sfmeta.file.suffixes"; + private final DbClient dbClient; private final FileSourceDao fileSourceDao; private final SourceLinesHashRepository sourceLinesHash; + private final SourceLinesRepository sourceLinesRepository; private final ReferenceBranchComponentUuids referenceBranchComponentUuids; private final MovedFilesRepository movedFilesRepository; private final AnalysisMetadataHolder analysisMetadataHolder; private final PeriodHolder periodHolder; private final NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids; + private final ConfigurationRepository configurationRepository; - public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, ReferenceBranchComponentUuids referenceBranchComponentUuids, + public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, + SourceLinesRepository sourceLinesRepository, ReferenceBranchComponentUuids referenceBranchComponentUuids, MovedFilesRepository movedFilesRepository, AnalysisMetadataHolder analysisMetadataHolder, PeriodHolder periodHolder, - NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids) { + NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids, ConfigurationRepository configurationRepository) { this.dbClient = dbClient; this.fileSourceDao = fileSourceDao; this.sourceLinesHash = sourceLinesHash; + this.sourceLinesRepository = sourceLinesRepository; this.referenceBranchComponentUuids = referenceBranchComponentUuids; this.movedFilesRepository = movedFilesRepository; this.analysisMetadataHolder = analysisMetadataHolder; this.periodHolder = periodHolder; this.newCodeReferenceBranchComponentUuids = newCodeReferenceBranchComponentUuids; + this.configurationRepository = configurationRepository; } @Override public int[] computeMatchingLines(Component component) { + if (isGitCliEnabled() && isSalesforceMetadataFile(component)) { + return computeWithHistogramDiff(component); + } + return computeWithMyersDiff(component); + } + + private int[] computeWithMyersDiff(Component component) { + LOG.info("Diff started for {} using Myers algorithm", component.getKey()); List database = getDBLines(component); List report = getReportLines(component); return new SourceLinesDiffFinder().findMatchingLines(database, report); } + private int[] computeWithHistogramDiff(Component component) { + LOG.info("Diff started for {} using Histogram (git-cli) algorithm", component.getKey()); + try { + List dbSourceLines = getDBSourceContent(component); + List reportSourceLines = getReportSourceContent(component); + + if (dbSourceLines.isEmpty() && reportSourceLines.isEmpty()) { + return new int[0]; + } + + if (dbSourceLines.isEmpty()) { + return new int[reportSourceLines.size()]; + } + + return new GitDiffFinder().findMatchingLines(dbSourceLines, reportSourceLines); + + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("git-cli diff interrupted for {}, falling back to Myers", component.getKey()); + return computeWithMyersDiff(component); + } catch (IOException e) { + LOG.warn("git-cli diff failed for {}, falling back to Myers", component.getKey(), e); + return computeWithMyersDiff(component); + } + } + + private boolean isGitCliEnabled() { + Configuration config = configurationRepository.getConfiguration(); + return config.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false); + } + + private boolean isSalesforceMetadataFile(Component component) { + String fileName = component.getName(); + if (StringUtils.isBlank(fileName)) { + return false; + } + String[] suffixes = configurationRepository.getConfiguration().getStringArray(KEY_SFMETA_FILE_SUFFIXES); + if (suffixes == null) { + return false; + } + for (String suffix : suffixes) { + if (StringUtils.isBlank(suffix)) { + continue; + } + String normalizedSuffix = suffix.startsWith(".") ? suffix : "." + suffix; + if (fileName.endsWith(normalizedSuffix)) { + return true; + } + } + return false; + } + private List getDBLines(Component component) { try (DbSession dbSession = dbClient.openSession(false)) { String uuid; @@ -93,4 +172,49 @@ private List getReportLines(Component component) { return sourceLinesHash.getLineHashesMatchingDBVersion(component); } + private List getDBSourceContent(Component component) { + try (DbSession dbSession = dbClient.openSession(false)) { + String uuid; + if (analysisMetadataHolder.isPullRequest()) { + uuid = referenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) { + uuid = newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else { + Optional originalFile = movedFilesRepository.getOriginalFile(component); + uuid = originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid()); + } + + if (uuid == null) { + return Collections.emptyList(); + } + + FileSourceDto dto = fileSourceDao.selectByFileUuid(dbSession, uuid); + if (dto == null) { + return Collections.emptyList(); + } + + DbFileSources.Data sourceData = dto.getSourceData(); + if (sourceData == null) { + return Collections.emptyList(); + } + + List lines = sourceData.getLinesList(); + List result = new ArrayList<>(lines.size()); + for (DbFileSources.Line line : lines) { + result.add(line.hasSource() ? line.getSource() : ""); + } + return result; + } + } + + private List getReportSourceContent(Component component) { + List lines = new ArrayList<>(); + try (CloseableIterator iterator = sourceLinesRepository.readLines(component)) { + while (iterator.hasNext()) { + lines.add(iterator.next()); + } + } + return lines; + } + }