Skip to content
Open
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
* SonarQube
* Copyright (C) 2009-2024 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.ce.task.projectanalysis.source;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Computes line-level mapping between the previous (DB) and current (report) version of a file
* by shelling out to {@code git diff --no-index --histogram} and parsing its unified-diff output.
*
* Returned array is indexed by current-file line (0-based): matchingLineArray[i] = the previous-file
* line number (1-based) that current line i+1 corresponds to, or 0 if the current line was added.
*/
class GitDiffFinder {

private static final Logger LOG = LoggerFactory.getLogger(GitDiffFinder.class);

// Matches a unified-diff hunk header, e.g. "@@ -12,5 +14,7 @@"
// group 1 = previous-file start line
// group 2 = previous-file line count (optional; defaults to 1 when omitted)
// group 3 = current-file start line
// group 4 = current-file line count (optional; defaults to 1 when omitted)
private static final Pattern HUNK_HEADER_PATTERN = Pattern.compile(
"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@");

int[] findMatchingLines(List<String> previousVersionLines, List<String> currentVersionLines)
throws IOException, InterruptedException {

int totalReportLines = currentVersionLines.size();
int totalDbLines = previousVersionLines.size();

int[] matchingLineArray = new int[totalReportLines];

if (previousVersionLines.isEmpty() || currentVersionLines.isEmpty()) {
return matchingLineArray;
}

Path diffWorkingDirectory = null;
try {
diffWorkingDirectory = Files.createTempDirectory("sonar-histogram-diff-");
Path previousVersionFile = diffWorkingDirectory.resolve("previous.txt");
Path currentVersionFile = diffWorkingDirectory.resolve("current.txt");

Files.write(previousVersionFile, previousVersionLines, StandardCharsets.UTF_8);
Files.write(currentVersionFile, currentVersionLines, StandardCharsets.UTF_8);

executeDiffAndParseOutput(previousVersionFile, currentVersionFile, matchingLineArray, totalDbLines,
totalReportLines);

} finally {
deleteTempFilesAndDirectory(diffWorkingDirectory);
}

return matchingLineArray;
}

private void executeDiffAndParseOutput(Path previousVersionFile, Path currentVersionFile, int[] matchingLineArray,
int totalDbLines, int totalReportLines) throws IOException, InterruptedException {

ProcessBuilder gitDiffProcess = new ProcessBuilder("git", "diff", "--no-index", "--no-color", "--histogram",
previousVersionFile.toAbsolutePath().toString(), currentVersionFile.toAbsolutePath().toString());
gitDiffProcess.redirectError(ProcessBuilder.Redirect.DISCARD);

Process runningProcess = gitDiffProcess.start();

try (BufferedReader diffOutputReader = new BufferedReader(
new InputStreamReader(runningProcess.getInputStream(), StandardCharsets.UTF_8))) {
parseUnifiedDiffOutput(diffOutputReader, matchingLineArray, totalDbLines, totalReportLines);
}

int processExitCode = runningProcess.waitFor();
if (processExitCode > 1) {
throw new IOException("git diff --no-index failed with exit code " + processExitCode);
}
}

private static void parseUnifiedDiffOutput(BufferedReader diffOutputReader, int[] matchingLineArray,
int totalDbLines, int totalReportLines) throws IOException {

int currentDbLinePosition = 1;
int currentReportLinePosition = 1;

String rawDiffLine;
boolean insideHunk = false;

int hunkStartInPreviousFile = 0;
int hunkLengthInPreviousFile = 0;
int hunkStartInCurrentFile = 0;
int hunkLengthInCurrentFile = 0;

while ((rawDiffLine = diffOutputReader.readLine()) != null) {
Matcher hunkHeaderMatcher = HUNK_HEADER_PATTERN.matcher(rawDiffLine);

if (hunkHeaderMatcher.find()) {
if (insideHunk) {
currentDbLinePosition = hunkStartInPreviousFile + hunkLengthInPreviousFile;
currentReportLinePosition = hunkStartInCurrentFile + hunkLengthInCurrentFile;
}

hunkStartInPreviousFile = Integer.parseInt(hunkHeaderMatcher.group(1));
hunkLengthInPreviousFile =
hunkHeaderMatcher.group(2) != null ? Integer.parseInt(hunkHeaderMatcher.group(2)) : 1;
hunkStartInCurrentFile = Integer.parseInt(hunkHeaderMatcher.group(3));
hunkLengthInCurrentFile =
hunkHeaderMatcher.group(4) != null ? Integer.parseInt(hunkHeaderMatcher.group(4)) : 1;

fillIdenticalLinesBetweenHunks(matchingLineArray, currentDbLinePosition, currentReportLinePosition,
hunkStartInPreviousFile, hunkStartInCurrentFile);

currentDbLinePosition = hunkStartInPreviousFile;
currentReportLinePosition = hunkStartInCurrentFile;
insideHunk = true;

} else if (insideHunk && !rawDiffLine.isEmpty()) {
char lineTypePrefix = rawDiffLine.charAt(0);

if (lineTypePrefix == ' ') {
int reportArrayIndex = currentReportLinePosition - 1;
if (reportArrayIndex >= 0 && reportArrayIndex < matchingLineArray.length) {
matchingLineArray[reportArrayIndex] = currentDbLinePosition;
}
currentDbLinePosition++;
currentReportLinePosition++;

} else if (lineTypePrefix == '+') {
currentReportLinePosition++;

} else if (lineTypePrefix == '-') {
currentDbLinePosition++;
}
}
}

if (insideHunk) {
currentDbLinePosition = hunkStartInPreviousFile + hunkLengthInPreviousFile;
currentReportLinePosition = hunkStartInCurrentFile + hunkLengthInCurrentFile;
}
fillIdenticalLinesBetweenHunks(matchingLineArray, currentDbLinePosition, currentReportLinePosition,
totalDbLines + 1, totalReportLines + 1);
}

private static void fillIdenticalLinesBetweenHunks(int[] matchingLineArray, int fromDbLine, int fromReportLine,
int untilDbLine, int untilReportLine) {

int dbLinePointer = fromDbLine;
int reportLinePointer = fromReportLine;

while (dbLinePointer < untilDbLine && reportLinePointer < untilReportLine) {
int reportArrayIndex = reportLinePointer - 1;
if (reportArrayIndex >= 0 && reportArrayIndex < matchingLineArray.length) {
matchingLineArray[reportArrayIndex] = dbLinePointer;
}
dbLinePointer++;
reportLinePointer++;
}
}

private static void deleteTempFilesAndDirectory(Path diffWorkingDirectory) {
if (diffWorkingDirectory == null) {
return;
}
try {
Files.deleteIfExists(diffWorkingDirectory.resolve("previous.txt"));
Files.deleteIfExists(diffWorkingDirectory.resolve("current.txt"));
Files.deleteIfExists(diffWorkingDirectory);
} catch (IOException cleanupException) {
LOG.warn("Git Diff Temp file cleanup failed for directory '{}'. "
+ "Reason: {}. Non-fatal — OS will reclaim on reboot.", diffWorkingDirectory,
cleanupException.getMessage());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,52 +19,131 @@
*/
package org.sonar.ce.task.projectanalysis.source;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sonar.api.config.Configuration;
import org.sonar.ce.task.projectanalysis.analysis.AnalysisMetadataHolder;
import org.sonar.ce.task.projectanalysis.component.Component;
import org.sonar.ce.task.projectanalysis.component.ConfigurationRepository;
import org.sonar.ce.task.projectanalysis.period.NewCodeReferenceBranchComponentUuids;
import org.sonar.ce.task.projectanalysis.component.ReferenceBranchComponentUuids;
import org.sonar.ce.task.projectanalysis.filemove.MovedFilesRepository;
import org.sonar.ce.task.projectanalysis.period.PeriodHolder;
import org.sonar.core.util.CloseableIterator;
import org.sonar.db.DbClient;
import org.sonar.db.DbSession;
import org.sonar.db.newcodeperiod.NewCodePeriodType;
import org.sonar.db.protobuf.DbFileSources;
import org.sonar.db.source.FileSourceDao;
import org.sonar.db.source.FileSourceDto;

public class SourceLinesDiffImpl implements SourceLinesDiff {

private static final Logger LOG = LoggerFactory.getLogger(SourceLinesDiffImpl.class);
private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled";
private static final String KEY_SFMETA_FILE_SUFFIXES = "sonar.sfmeta.file.suffixes";

private final DbClient dbClient;
private final FileSourceDao fileSourceDao;
private final SourceLinesHashRepository sourceLinesHash;
private final SourceLinesRepository sourceLinesRepository;
private final ReferenceBranchComponentUuids referenceBranchComponentUuids;
private final MovedFilesRepository movedFilesRepository;
private final AnalysisMetadataHolder analysisMetadataHolder;
private final PeriodHolder periodHolder;
private final NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids;
private final ConfigurationRepository configurationRepository;

public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, ReferenceBranchComponentUuids referenceBranchComponentUuids,
public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash,
SourceLinesRepository sourceLinesRepository, ReferenceBranchComponentUuids referenceBranchComponentUuids,
MovedFilesRepository movedFilesRepository, AnalysisMetadataHolder analysisMetadataHolder, PeriodHolder periodHolder,
NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids) {
NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids, ConfigurationRepository configurationRepository) {
this.dbClient = dbClient;
this.fileSourceDao = fileSourceDao;
this.sourceLinesHash = sourceLinesHash;
this.sourceLinesRepository = sourceLinesRepository;
this.referenceBranchComponentUuids = referenceBranchComponentUuids;
this.movedFilesRepository = movedFilesRepository;
this.analysisMetadataHolder = analysisMetadataHolder;
this.periodHolder = periodHolder;
this.newCodeReferenceBranchComponentUuids = newCodeReferenceBranchComponentUuids;
this.configurationRepository = configurationRepository;
}

@Override
public int[] computeMatchingLines(Component component) {
if (isGitCliEnabled() && isSalesforceMetadataFile(component)) {
return computeWithHistogramDiff(component);
}
return computeWithMyersDiff(component);
}

private int[] computeWithMyersDiff(Component component) {
LOG.info("Diff started for {} using Myers algorithm", component.getKey());
List<String> database = getDBLines(component);
List<String> report = getReportLines(component);

return new SourceLinesDiffFinder().findMatchingLines(database, report);
}

private int[] computeWithHistogramDiff(Component component) {
LOG.info("Diff started for {} using Histogram (git-cli) algorithm", component.getKey());
try {
List<String> dbSourceLines = getDBSourceContent(component);
List<String> reportSourceLines = getReportSourceContent(component);

if (dbSourceLines.isEmpty() && reportSourceLines.isEmpty()) {
return new int[0];
}

if (dbSourceLines.isEmpty()) {
return new int[reportSourceLines.size()];
}

return new GitDiffFinder().findMatchingLines(dbSourceLines, reportSourceLines);

} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("git-cli diff interrupted for {}, falling back to Myers", component.getKey());
return computeWithMyersDiff(component);
} catch (IOException e) {
LOG.warn("git-cli diff failed for {}, falling back to Myers", component.getKey(), e);
return computeWithMyersDiff(component);
}
}

private boolean isGitCliEnabled() {
Configuration config = configurationRepository.getConfiguration();
return config.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false);
}

private boolean isSalesforceMetadataFile(Component component) {
String fileName = component.getName();
if (StringUtils.isBlank(fileName)) {
return false;
}
String[] suffixes = configurationRepository.getConfiguration().getStringArray(KEY_SFMETA_FILE_SUFFIXES);
if (suffixes == null) {
return false;
}
for (String suffix : suffixes) {
if (StringUtils.isBlank(suffix)) {
continue;
}
String normalizedSuffix = suffix.startsWith(".") ? suffix : "." + suffix;
if (fileName.endsWith(normalizedSuffix)) {
return true;
}
}
return false;
}

private List<String> getDBLines(Component component) {
try (DbSession dbSession = dbClient.openSession(false)) {
String uuid;
Expand Down Expand Up @@ -93,4 +172,49 @@ private List<String> getReportLines(Component component) {
return sourceLinesHash.getLineHashesMatchingDBVersion(component);
}

private List<String> getDBSourceContent(Component component) {
try (DbSession dbSession = dbClient.openSession(false)) {
String uuid;
if (analysisMetadataHolder.isPullRequest()) {
uuid = referenceBranchComponentUuids.getComponentUuid(component.getKey());
} else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) {
uuid = newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey());
} else {
Optional<MovedFilesRepository.OriginalFile> originalFile = movedFilesRepository.getOriginalFile(component);
uuid = originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid());
}

if (uuid == null) {
return Collections.emptyList();
}

FileSourceDto dto = fileSourceDao.selectByFileUuid(dbSession, uuid);
if (dto == null) {
return Collections.emptyList();
}

DbFileSources.Data sourceData = dto.getSourceData();
if (sourceData == null) {
return Collections.emptyList();
}

List<DbFileSources.Line> lines = sourceData.getLinesList();
List<String> result = new ArrayList<>(lines.size());
for (DbFileSources.Line line : lines) {
result.add(line.hasSource() ? line.getSource() : "");
}
return result;
}
}

private List<String> getReportSourceContent(Component component) {
List<String> lines = new ArrayList<>();
try (CloseableIterator<String> iterator = sourceLinesRepository.readLines(component)) {
while (iterator.hasNext()) {
lines.add(iterator.next());
}
}
return lines;
}

}