From 6fa49a0dcfa1e7863d662ee4470b7c2cc5e14cf5 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Mon, 11 May 2026 03:07:08 +0530 Subject: [PATCH 01/11] CD-7509 Update GitScmProvider to use native Git --- .../org/sonar/scm/git/GitScmProvider.java | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java b/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java index 80b42a45bc58..bd3a1aeabefa 100644 --- a/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java +++ b/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java @@ -21,8 +21,11 @@ import com.google.common.annotations.VisibleForTesting; import java.io.BufferedOutputStream; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.time.Instant; import java.util.Arrays; @@ -65,6 +68,7 @@ import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.sonar.api.batch.scm.BlameCommand; import org.sonar.api.batch.scm.ScmProvider; +import org.sonar.api.config.Configuration; import org.sonar.api.notifications.AnalysisWarnings; import org.sonar.api.utils.MessageException; import org.sonar.api.utils.System2; @@ -88,20 +92,25 @@ public class GitScmProvider extends ScmProvider { private static final String NO_MERGE_BASE_FOUND_MESSAGE = "No merge base found between HEAD and %s"; @VisibleForTesting static final String SCM_INTEGRATION_DOCUMENTATION_SUFFIX = "/analyzing-source-code/scm-integration/"; + + private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled"; + private final BlameCommand blameCommand; private final AnalysisWarnings analysisWarnings; private final GitIgnoreCommand gitIgnoreCommand; private final System2 system2; private final DocumentationLinkGenerator documentationLinkGenerator; + private final Configuration configuration; public GitScmProvider(CompositeBlameCommand blameCommand, AnalysisWarnings analysisWarnings, GitIgnoreCommand gitIgnoreCommand, System2 system2, - DocumentationLinkGenerator documentationLinkGenerator) { + DocumentationLinkGenerator documentationLinkGenerator, Configuration configuration) { this.blameCommand = blameCommand; this.analysisWarnings = analysisWarnings; this.gitIgnoreCommand = gitIgnoreCommand; this.system2 = system2; this.documentationLinkGenerator = documentationLinkGenerator; + this.configuration = configuration; } @Override @@ -125,6 +134,10 @@ public BlameCommand blameCommand() { return this.blameCommand; } + private boolean useNativeGitDiff() { + return configuration.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false); + } + @CheckForNull @Override public Set branchChangedFiles(String targetBranchName, Path rootBaseDir) { @@ -246,8 +259,15 @@ public Map> branchChangedLinesWithFileMovementDetection(Strin Map> changedLines = new HashMap<>(); + boolean nativeGit = useNativeGitDiff(); + LOG.info("Using {} for changed-line detection", nativeGit ? "native Git CLI" : "JGit"); + for (Map.Entry entry : changedFiles.entrySet()) { - collectChangedLines(repo, mergeBaseCommit.get(), changedLines, entry.getKey(), entry.getValue()); + if (nativeGit) { + collectChangedLinesWithNativeGit(repo, mergeBaseCommit.get(), changedLines, entry.getKey()); + } else { + collectChangedLines(repo, mergeBaseCommit.get(), changedLines, entry.getKey(), entry.getValue()); + } } return changedLines; @@ -299,6 +319,45 @@ private void collectChangedLines(Repository repo, RevCommit mergeBaseCommit, Map } } + private void collectChangedLinesWithNativeGit(Repository repo, RevCommit mergeBase, + Map> changedLines, Path file) throws IOException, InterruptedException { + + Path workTree = repo.getWorkTree().toPath(); + String relPath = toGitPath(workTree.relativize(file).toString()); + + ProcessBuilder pb = new ProcessBuilder("git", "diff", "--no-color", "--diff-algorithm=histogram", + mergeBase.getName(), "--", relPath); + + pb.directory(repo.getWorkTree()); + pb.redirectError(ProcessBuilder.Redirect.DISCARD); + + Process process = pb.start(); + ChangedLinesComputer computer = new ChangedLinesComputer(); + + try (BufferedReader br = new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + String line; + while ((line = br.readLine()) != null) { + for (char c : line.toCharArray()) { + computer.receiver().write(c); + } + computer.receiver().write('\n'); + } + } + + int exitCode = process.waitFor(); + + if (exitCode != 0) { + LOG.warn("git diff failed with exit code {} for {}", exitCode, relPath); + return; + } + + Set lines = computer.changedLines(); + if (!lines.isEmpty()) { + changedLines.put(file, lines); + } + } + @Override @CheckForNull public Instant forkDate(String referenceBranchName, Path projectBaseDir) { From a8de20ffba4cde4aaba03b499195638f4c16f9c9 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Mon, 11 May 2026 05:10:24 +0530 Subject: [PATCH 02/11] CD-7509 Updated git diff algo --- .../source/SourceLinesDiffImpl.java | 141 ++++++++++++++++-- 1 file changed, 126 insertions(+), 15 deletions(-) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java index 2d1d6c54e630..f37f60ad74d1 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java @@ -19,64 +19,176 @@ */ package org.sonar.ce.task.projectanalysis.source; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.sonar.api.config.Configuration; import org.sonar.ce.task.projectanalysis.analysis.AnalysisMetadataHolder; import org.sonar.ce.task.projectanalysis.component.Component; +import org.sonar.ce.task.projectanalysis.component.ConfigurationRepository; import org.sonar.ce.task.projectanalysis.period.NewCodeReferenceBranchComponentUuids; import org.sonar.ce.task.projectanalysis.component.ReferenceBranchComponentUuids; import org.sonar.ce.task.projectanalysis.filemove.MovedFilesRepository; import org.sonar.ce.task.projectanalysis.period.PeriodHolder; +import org.sonar.core.util.CloseableIterator; import org.sonar.db.DbClient; import org.sonar.db.DbSession; import org.sonar.db.newcodeperiod.NewCodePeriodType; +import org.sonar.db.protobuf.DbFileSources; import org.sonar.db.source.FileSourceDao; +import org.sonar.db.source.FileSourceDto; public class SourceLinesDiffImpl implements SourceLinesDiff { + private static final Logger LOG = LoggerFactory.getLogger(SourceLinesDiffImpl.class); + private static final String KEY_HISTOGRAM_DIFF_ENABLED = "codescan.ce.histogramDiff.enabled"; + private final DbClient dbClient; private final FileSourceDao fileSourceDao; private final SourceLinesHashRepository sourceLinesHash; + private final SourceLinesRepository sourceLinesRepository; private final ReferenceBranchComponentUuids referenceBranchComponentUuids; private final MovedFilesRepository movedFilesRepository; private final AnalysisMetadataHolder analysisMetadataHolder; private final PeriodHolder periodHolder; private final NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids; + private final ConfigurationRepository configurationRepository; - public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, ReferenceBranchComponentUuids referenceBranchComponentUuids, + public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, + SourceLinesRepository sourceLinesRepository, ReferenceBranchComponentUuids referenceBranchComponentUuids, MovedFilesRepository movedFilesRepository, AnalysisMetadataHolder analysisMetadataHolder, PeriodHolder periodHolder, - NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids) { + NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids, ConfigurationRepository configurationRepository) { this.dbClient = dbClient; this.fileSourceDao = fileSourceDao; this.sourceLinesHash = sourceLinesHash; + this.sourceLinesRepository = sourceLinesRepository; this.referenceBranchComponentUuids = referenceBranchComponentUuids; this.movedFilesRepository = movedFilesRepository; this.analysisMetadataHolder = analysisMetadataHolder; this.periodHolder = periodHolder; this.newCodeReferenceBranchComponentUuids = newCodeReferenceBranchComponentUuids; + this.configurationRepository = configurationRepository; } @Override public int[] computeMatchingLines(Component component) { - List database = getDBLines(component); - List report = getReportLines(component); + boolean useHistogram = useHistogramDiff(); + LOG.info("[SOURCELINES-DIFF] computeMatchingLines called for component={}, useHistogramDiff={}", component.getKey(), useHistogram); + if (useHistogram) { + return computeWithHistogramDiff(component); + } + LOG.info("[SOURCELINES-DIFF] Using MYERS diff for component={}", component.getKey()); + return computeWithMyersDiff(component); + } + private int[] computeWithMyersDiff(Component component) { + List database = getDBLineHashes(component); + List report = getReportLineHashes(component); return new SourceLinesDiffFinder().findMatchingLines(database, report); } - private List getDBLines(Component component) { + private int[] computeWithHistogramDiff(Component component) { + try { + List dbSourceLines = getDBSourceContent(component); + List reportSourceLines = getReportSourceContent(component); + + LOG.info("[HISTOGRAM-DIFF] component={}, dbLines={}, reportLines={}", + component.getKey(), dbSourceLines.size(), reportSourceLines.size()); + + if (dbSourceLines.isEmpty() && reportSourceLines.isEmpty()) { + LOG.info("[HISTOGRAM-DIFF] Both empty, returning empty array for {}", component.getKey()); + return new int[0]; + } + + if (dbSourceLines.isEmpty()) { + LOG.info("[HISTOGRAM-DIFF] DB empty, all lines are new for {}", component.getKey()); + return new int[reportSourceLines.size()]; + } + + int[] result = new GitHistogramDiffFinder().findMatchingLines(dbSourceLines, reportSourceLines); + + // Log which lines are new (value == 0) + StringBuilder newLines = new StringBuilder(); + for (int i = 0; i < result.length; i++) { + if (result[i] == 0) { + if (newLines.length() > 0) newLines.append(","); + newLines.append(i + 1); + } + } + LOG.info("[HISTOGRAM-DIFF] SUCCESS for {}. New lines (match=0): [{}]", component.getKey(), newLines); + + return result; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("[HISTOGRAM-DIFF] Interrupted for {}, falling back to Myers", component.getKey()); + return computeWithMyersDiff(component); + } catch (Exception e) { + LOG.warn("[HISTOGRAM-DIFF] FAILED for {}, falling back to Myers. Error: {}", component.getKey(), e.getMessage(), e); + return computeWithMyersDiff(component); + } + } + + private boolean useHistogramDiff() { + Configuration config = configurationRepository.getConfiguration(); + Optional configValue = config.getBoolean(KEY_HISTOGRAM_DIFF_ENABLED); + boolean result = configValue.orElse(true); + LOG.info("[HISTOGRAM-DIFF] Toggle check: key={}, configValue={}, resolved={}", KEY_HISTOGRAM_DIFF_ENABLED, configValue, result); + return result; + } + + private List getDBSourceContent(Component component) { try (DbSession dbSession = dbClient.openSession(false)) { - String uuid; - if (analysisMetadataHolder.isPullRequest()) { - uuid = referenceBranchComponentUuids.getComponentUuid(component.getKey()); - } else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) { - uuid = newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey()); - } else { - Optional originalFile = movedFilesRepository.getOriginalFile(component); - uuid = originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid()); + String uuid = resolveDbFileUuid(component); + if (uuid == null) { + return Collections.emptyList(); } + FileSourceDto dto = fileSourceDao.selectByFileUuid(dbSession, uuid); + if (dto == null) { + return Collections.emptyList(); + } + + DbFileSources.Data sourceData = dto.getSourceData(); + if (sourceData == null) { + return Collections.emptyList(); + } + + List lines = sourceData.getLinesList(); + List result = new ArrayList<>(lines.size()); + for (DbFileSources.Line line : lines) { + result.add(line.hasSource() ? line.getSource() : ""); + } + return result; + } + } + + private List getReportSourceContent(Component component) { + List lines = new ArrayList<>(); + try (CloseableIterator iterator = sourceLinesRepository.readLines(component)) { + while (iterator.hasNext()) { + lines.add(iterator.next()); + } + } + return lines; + } + + private String resolveDbFileUuid(Component component) { + if (analysisMetadataHolder.isPullRequest()) { + return referenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) { + return newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else { + Optional originalFile = movedFilesRepository.getOriginalFile(component); + return originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid()); + } + } + + private List getDBLineHashes(Component component) { + try (DbSession dbSession = dbClient.openSession(false)) { + String uuid = resolveDbFileUuid(component); if (uuid == null) { return Collections.emptyList(); } @@ -89,8 +201,7 @@ private List getDBLines(Component component) { } } - private List getReportLines(Component component) { + private List getReportLineHashes(Component component) { return sourceLinesHash.getLineHashesMatchingDBVersion(component); } - } From 54f91ded5fb39201fc1c28a28ae62d865f5152cd Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Mon, 11 May 2026 05:17:06 +0530 Subject: [PATCH 03/11] CD-7509 Updated git diff algo --- .../source/GitHistogramDiffFinder.java | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java new file mode 100644 index 000000000000..b3051282c637 --- /dev/null +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java @@ -0,0 +1,181 @@ +/* + * SonarQube + * Copyright (C) 2009-2024 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.ce.task.projectanalysis.source; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Uses native git's histogram diff algorithm via {@code git diff --no-index --histogram} + * to compute matching lines between two file versions. This produces better results than + * Myers algorithm on files with repeated structures (XML, JSON, etc.). + * + * Throws on failure — caller is responsible for fallback. + */ +class GitHistogramDiffFinder { + + private static final Pattern HUNK_HEADER = Pattern.compile("^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@"); + + /** + * Computes matching lines between the DB version (left/old) and the report version (right/new) + * using git's histogram algorithm. + * + * @param dbLines actual source lines from the reference branch (DB) + * @param reportLines actual source lines from the current analysis (report) + * @return int[] where index i corresponds to report line (i+1), value is the matching DB line number (1-based) or 0 if new + * @throws IOException if git process cannot be started or temp files cannot be written + * @throws InterruptedException if the thread is interrupted while waiting for git + */ + int[] findMatchingLines(List dbLines, List reportLines) throws IOException, InterruptedException { + int[] result = new int[reportLines.size()]; + + if (dbLines.isEmpty() || reportLines.isEmpty()) { + return result; + } + + Path tempDir = null; + try { + tempDir = Files.createTempDirectory("sonar-histogram-diff-"); + Path oldFile = tempDir.resolve("old.txt"); + Path newFile = tempDir.resolve("new.txt"); + + Files.write(oldFile, dbLines, StandardCharsets.UTF_8); + Files.write(newFile, reportLines, StandardCharsets.UTF_8); + + runDiffAndParse(oldFile, newFile, result, dbLines.size(), reportLines.size()); + } finally { + cleanupTempDir(tempDir); + } + + return result; + } + + private void runDiffAndParse(Path oldFile, Path newFile, int[] result, int dbLineCount, int reportLineCount) + throws IOException, InterruptedException { + + ProcessBuilder pb = new ProcessBuilder( + "git", "diff", "--no-index", "--no-color", "--histogram", + oldFile.toAbsolutePath().toString(), + newFile.toAbsolutePath().toString() + ); + pb.redirectError(ProcessBuilder.Redirect.DISCARD); + + Process process = pb.start(); + + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + parseUnifiedDiff(reader, result, dbLineCount, reportLineCount); + } + + int exitCode = process.waitFor(); + // git diff --no-index: 0 = no differences, 1 = differences found, >1 = error + if (exitCode > 1) { + throw new IOException("git diff --no-index failed with exit code " + exitCode); + } + } + + private static void parseUnifiedDiff(BufferedReader reader, int[] result, int dbLineCount, int reportLineCount) + throws IOException { + + int oldPos = 1; + int newPos = 1; + + String line; + boolean inHunk = false; + int hunkOldStart = 0; + int hunkOldCount = 0; + int hunkNewStart = 0; + int hunkNewCount = 0; + + while ((line = reader.readLine()) != null) { + Matcher m = HUNK_HEADER.matcher(line); + if (m.find()) { + if (inHunk) { + oldPos = hunkOldStart + hunkOldCount; + newPos = hunkNewStart + hunkNewCount; + } + + hunkOldStart = Integer.parseInt(m.group(1)); + hunkOldCount = m.group(2) != null ? Integer.parseInt(m.group(2)) : 1; + hunkNewStart = Integer.parseInt(m.group(3)); + hunkNewCount = m.group(4) != null ? Integer.parseInt(m.group(4)) : 1; + + // Fill matches for lines between previous hunk end and this hunk start + fillMatches(result, oldPos, newPos, hunkOldStart, hunkNewStart); + oldPos = hunkOldStart; + newPos = hunkNewStart; + inHunk = true; + + } else if (inHunk && !line.isEmpty()) { + char firstChar = line.charAt(0); + if (firstChar == ' ') { + int reportIdx = newPos - 1; + if (reportIdx >= 0 && reportIdx < result.length) { + result[reportIdx] = oldPos; + } + oldPos++; + newPos++; + } else if (firstChar == '+') { + newPos++; + } else if (firstChar == '-') { + oldPos++; + } + } + } + + // After last hunk, fill remaining matching lines to end of file + if (inHunk) { + oldPos = hunkOldStart + hunkOldCount; + newPos = hunkNewStart + hunkNewCount; + } + fillMatches(result, oldPos, newPos, dbLineCount + 1, reportLineCount + 1); + } + + private static void fillMatches(int[] result, int currentOld, int currentNew, int targetOld, int targetNew) { + while (currentOld < targetOld && currentNew < targetNew) { + int reportIdx = currentNew - 1; + if (reportIdx >= 0 && reportIdx < result.length) { + result[reportIdx] = currentOld; + } + currentOld++; + currentNew++; + } + } + + private static void cleanupTempDir(Path tempDir) { + if (tempDir == null) { + return; + } + try { + Files.deleteIfExists(tempDir.resolve("old.txt")); + Files.deleteIfExists(tempDir.resolve("new.txt")); + Files.deleteIfExists(tempDir); + } catch (IOException e) { + // best-effort cleanup + } + } +} From ff3a251859da2cc8f32072774a71dc3244068c62 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Wed, 13 May 2026 11:19:52 +0530 Subject: [PATCH 04/11] CD-7509 Updated GitDiffFinder to use GitHistogramDiff --- .../source/SourceLinesDiffImpl.java | 132 +++++++++--------- 1 file changed, 65 insertions(+), 67 deletions(-) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java index f37f60ad74d1..0c1d1438bc02 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java @@ -44,11 +44,12 @@ public class SourceLinesDiffImpl implements SourceLinesDiff { private static final Logger LOG = LoggerFactory.getLogger(SourceLinesDiffImpl.class); - private static final String KEY_HISTOGRAM_DIFF_ENABLED = "codescan.ce.histogramDiff.enabled"; + private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled"; + private static final String KEY_SFMETA_FILE_SUFFIXES = "sonar.sfmeta.file.suffixes"; private final DbClient dbClient; private final FileSourceDao fileSourceDao; - private final SourceLinesHashRepository sourceLinesHash; + private final SourceLinesHashRepository sourceLinesHashRepository; private final SourceLinesRepository sourceLinesRepository; private final ReferenceBranchComponentUuids referenceBranchComponentUuids; private final MovedFilesRepository movedFilesRepository; @@ -57,13 +58,13 @@ public class SourceLinesDiffImpl implements SourceLinesDiff { private final NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids; private final ConfigurationRepository configurationRepository; - public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, + public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHashRepository, SourceLinesRepository sourceLinesRepository, ReferenceBranchComponentUuids referenceBranchComponentUuids, MovedFilesRepository movedFilesRepository, AnalysisMetadataHolder analysisMetadataHolder, PeriodHolder periodHolder, NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids, ConfigurationRepository configurationRepository) { this.dbClient = dbClient; this.fileSourceDao = fileSourceDao; - this.sourceLinesHash = sourceLinesHash; + this.sourceLinesHashRepository = sourceLinesHashRepository; this.sourceLinesRepository = sourceLinesRepository; this.referenceBranchComponentUuids = referenceBranchComponentUuids; this.movedFilesRepository = movedFilesRepository; @@ -75,104 +76,101 @@ public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, Sourc @Override public int[] computeMatchingLines(Component component) { - boolean useHistogram = useHistogramDiff(); - LOG.info("[SOURCELINES-DIFF] computeMatchingLines called for component={}, useHistogramDiff={}", component.getKey(), useHistogram); - if (useHistogram) { + if (isGitCliEnabled() && isSalesforceMetadataFile(component)) { return computeWithHistogramDiff(component); } - LOG.info("[SOURCELINES-DIFF] Using MYERS diff for component={}", component.getKey()); return computeWithMyersDiff(component); } private int[] computeWithMyersDiff(Component component) { - List database = getDBLineHashes(component); - List report = getReportLineHashes(component); + List database = getPreviousVersionLineHashes(component); + List report = getCurrentVersionLineHashes(component); return new SourceLinesDiffFinder().findMatchingLines(database, report); } private int[] computeWithHistogramDiff(Component component) { try { - List dbSourceLines = getDBSourceContent(component); - List reportSourceLines = getReportSourceContent(component); + List previousVersionSourceLines = getPreviousVersionSourceContent(component); + List currentVersionSourceLines = getCurrentVersionSourceContent(component); - LOG.info("[HISTOGRAM-DIFF] component={}, dbLines={}, reportLines={}", - component.getKey(), dbSourceLines.size(), reportSourceLines.size()); - - if (dbSourceLines.isEmpty() && reportSourceLines.isEmpty()) { - LOG.info("[HISTOGRAM-DIFF] Both empty, returning empty array for {}", component.getKey()); + if (previousVersionSourceLines.isEmpty() && currentVersionSourceLines.isEmpty()) { return new int[0]; } - if (dbSourceLines.isEmpty()) { - LOG.info("[HISTOGRAM-DIFF] DB empty, all lines are new for {}", component.getKey()); - return new int[reportSourceLines.size()]; + if (previousVersionSourceLines.isEmpty()) { + return new int[currentVersionSourceLines.size()]; } - int[] result = new GitHistogramDiffFinder().findMatchingLines(dbSourceLines, reportSourceLines); - - // Log which lines are new (value == 0) - StringBuilder newLines = new StringBuilder(); - for (int i = 0; i < result.length; i++) { - if (result[i] == 0) { - if (newLines.length() > 0) newLines.append(","); - newLines.append(i + 1); - } - } - LOG.info("[HISTOGRAM-DIFF] SUCCESS for {}. New lines (match=0): [{}]", component.getKey(), newLines); + return new GitDiffFinder().findMatchingLines(previousVersionSourceLines, currentVersionSourceLines); - return result; - } catch (InterruptedException e) { + } catch (InterruptedException interruptedException) { Thread.currentThread().interrupt(); LOG.warn("[HISTOGRAM-DIFF] Interrupted for {}, falling back to Myers", component.getKey()); return computeWithMyersDiff(component); - } catch (Exception e) { - LOG.warn("[HISTOGRAM-DIFF] FAILED for {}, falling back to Myers. Error: {}", component.getKey(), e.getMessage(), e); + } catch (Exception histogramDiffException) { + LOG.warn("[HISTOGRAM-DIFF] Failed for {}, falling back to Myers. Error: {}", + component.getKey(), histogramDiffException.getMessage(), histogramDiffException); return computeWithMyersDiff(component); } } - private boolean useHistogramDiff() { - Configuration config = configurationRepository.getConfiguration(); - Optional configValue = config.getBoolean(KEY_HISTOGRAM_DIFF_ENABLED); - boolean result = configValue.orElse(true); - LOG.info("[HISTOGRAM-DIFF] Toggle check: key={}, configValue={}, resolved={}", KEY_HISTOGRAM_DIFF_ENABLED, configValue, result); - return result; + private boolean isGitCliEnabled() { + Configuration projectConfiguration = configurationRepository.getConfiguration(); + return projectConfiguration.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false); + } + + private boolean isSalesforceMetadataFile(Component component) { + String fileName = component.getName(); + if (fileName == null || fileName.isEmpty()) { + return false; + } + String[] suffixes = configurationRepository.getConfiguration().getStringArray(KEY_SFMETA_FILE_SUFFIXES); + for (String suffix : suffixes) { + if (suffix == null || suffix.isEmpty()) { + continue; + } + String normalized = suffix.startsWith(".") ? suffix : "." + suffix; + if (fileName.endsWith(normalized)) { + return true; + } + } + return false; } - private List getDBSourceContent(Component component) { + private List getPreviousVersionSourceContent(Component component) { try (DbSession dbSession = dbClient.openSession(false)) { - String uuid = resolveDbFileUuid(component); - if (uuid == null) { + String previousFileUuid = resolveDbFileUuid(component); + if (previousFileUuid == null) { return Collections.emptyList(); } - FileSourceDto dto = fileSourceDao.selectByFileUuid(dbSession, uuid); - if (dto == null) { + FileSourceDto fileSourceDto = fileSourceDao.selectByFileUuid(dbSession, previousFileUuid); + if (fileSourceDto == null) { return Collections.emptyList(); } - DbFileSources.Data sourceData = dto.getSourceData(); - if (sourceData == null) { + DbFileSources.Data protobufSourceData = fileSourceDto.getSourceData(); + if (protobufSourceData == null) { return Collections.emptyList(); } - List lines = sourceData.getLinesList(); - List result = new ArrayList<>(lines.size()); - for (DbFileSources.Line line : lines) { - result.add(line.hasSource() ? line.getSource() : ""); + List protobufLines = protobufSourceData.getLinesList(); + List sourceLines = new ArrayList<>(protobufLines.size()); + for (DbFileSources.Line protobufLine : protobufLines) { + sourceLines.add(protobufLine.hasSource() ? protobufLine.getSource() : ""); } - return result; + return sourceLines; } } - private List getReportSourceContent(Component component) { - List lines = new ArrayList<>(); - try (CloseableIterator iterator = sourceLinesRepository.readLines(component)) { - while (iterator.hasNext()) { - lines.add(iterator.next()); + private List getCurrentVersionSourceContent(Component component) { + List sourceLines = new ArrayList<>(); + try (CloseableIterator lineIterator = sourceLinesRepository.readLines(component)) { + while (lineIterator.hasNext()) { + sourceLines.add(lineIterator.next()); } } - return lines; + return sourceLines; } private String resolveDbFileUuid(Component component) { @@ -186,22 +184,22 @@ private String resolveDbFileUuid(Component component) { } } - private List getDBLineHashes(Component component) { + private List getPreviousVersionLineHashes(Component component) { try (DbSession dbSession = dbClient.openSession(false)) { - String uuid = resolveDbFileUuid(component); - if (uuid == null) { + String previousFileUuid = resolveDbFileUuid(component); + if (previousFileUuid == null) { return Collections.emptyList(); } - List database = fileSourceDao.selectLineHashes(dbSession, uuid); - if (database == null) { + List lineHashes = fileSourceDao.selectLineHashes(dbSession, previousFileUuid); + if (lineHashes == null) { return Collections.emptyList(); } - return database; + return lineHashes; } } - private List getReportLineHashes(Component component) { - return sourceLinesHash.getLineHashesMatchingDBVersion(component); + private List getCurrentVersionLineHashes(Component component) { + return sourceLinesHashRepository.getLineHashesMatchingDBVersion(component); } } From 439f96509d915888d448a336bea3e46571e061bd Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Wed, 13 May 2026 11:20:08 +0530 Subject: [PATCH 05/11] CD-7509 Updated GitDiffFinder to use GitHistogramDiff --- .../source/GitHistogramDiffFinder.java | 181 ------------------ 1 file changed, 181 deletions(-) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java index b3051282c637..e69de29bb2d1 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java @@ -1,181 +0,0 @@ -/* - * SonarQube - * Copyright (C) 2009-2024 SonarSource SA - * mailto:info AT sonarsource DOT com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package org.sonar.ce.task.projectanalysis.source; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Uses native git's histogram diff algorithm via {@code git diff --no-index --histogram} - * to compute matching lines between two file versions. This produces better results than - * Myers algorithm on files with repeated structures (XML, JSON, etc.). - * - * Throws on failure — caller is responsible for fallback. - */ -class GitHistogramDiffFinder { - - private static final Pattern HUNK_HEADER = Pattern.compile("^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@"); - - /** - * Computes matching lines between the DB version (left/old) and the report version (right/new) - * using git's histogram algorithm. - * - * @param dbLines actual source lines from the reference branch (DB) - * @param reportLines actual source lines from the current analysis (report) - * @return int[] where index i corresponds to report line (i+1), value is the matching DB line number (1-based) or 0 if new - * @throws IOException if git process cannot be started or temp files cannot be written - * @throws InterruptedException if the thread is interrupted while waiting for git - */ - int[] findMatchingLines(List dbLines, List reportLines) throws IOException, InterruptedException { - int[] result = new int[reportLines.size()]; - - if (dbLines.isEmpty() || reportLines.isEmpty()) { - return result; - } - - Path tempDir = null; - try { - tempDir = Files.createTempDirectory("sonar-histogram-diff-"); - Path oldFile = tempDir.resolve("old.txt"); - Path newFile = tempDir.resolve("new.txt"); - - Files.write(oldFile, dbLines, StandardCharsets.UTF_8); - Files.write(newFile, reportLines, StandardCharsets.UTF_8); - - runDiffAndParse(oldFile, newFile, result, dbLines.size(), reportLines.size()); - } finally { - cleanupTempDir(tempDir); - } - - return result; - } - - private void runDiffAndParse(Path oldFile, Path newFile, int[] result, int dbLineCount, int reportLineCount) - throws IOException, InterruptedException { - - ProcessBuilder pb = new ProcessBuilder( - "git", "diff", "--no-index", "--no-color", "--histogram", - oldFile.toAbsolutePath().toString(), - newFile.toAbsolutePath().toString() - ); - pb.redirectError(ProcessBuilder.Redirect.DISCARD); - - Process process = pb.start(); - - try (BufferedReader reader = new BufferedReader( - new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { - parseUnifiedDiff(reader, result, dbLineCount, reportLineCount); - } - - int exitCode = process.waitFor(); - // git diff --no-index: 0 = no differences, 1 = differences found, >1 = error - if (exitCode > 1) { - throw new IOException("git diff --no-index failed with exit code " + exitCode); - } - } - - private static void parseUnifiedDiff(BufferedReader reader, int[] result, int dbLineCount, int reportLineCount) - throws IOException { - - int oldPos = 1; - int newPos = 1; - - String line; - boolean inHunk = false; - int hunkOldStart = 0; - int hunkOldCount = 0; - int hunkNewStart = 0; - int hunkNewCount = 0; - - while ((line = reader.readLine()) != null) { - Matcher m = HUNK_HEADER.matcher(line); - if (m.find()) { - if (inHunk) { - oldPos = hunkOldStart + hunkOldCount; - newPos = hunkNewStart + hunkNewCount; - } - - hunkOldStart = Integer.parseInt(m.group(1)); - hunkOldCount = m.group(2) != null ? Integer.parseInt(m.group(2)) : 1; - hunkNewStart = Integer.parseInt(m.group(3)); - hunkNewCount = m.group(4) != null ? Integer.parseInt(m.group(4)) : 1; - - // Fill matches for lines between previous hunk end and this hunk start - fillMatches(result, oldPos, newPos, hunkOldStart, hunkNewStart); - oldPos = hunkOldStart; - newPos = hunkNewStart; - inHunk = true; - - } else if (inHunk && !line.isEmpty()) { - char firstChar = line.charAt(0); - if (firstChar == ' ') { - int reportIdx = newPos - 1; - if (reportIdx >= 0 && reportIdx < result.length) { - result[reportIdx] = oldPos; - } - oldPos++; - newPos++; - } else if (firstChar == '+') { - newPos++; - } else if (firstChar == '-') { - oldPos++; - } - } - } - - // After last hunk, fill remaining matching lines to end of file - if (inHunk) { - oldPos = hunkOldStart + hunkOldCount; - newPos = hunkNewStart + hunkNewCount; - } - fillMatches(result, oldPos, newPos, dbLineCount + 1, reportLineCount + 1); - } - - private static void fillMatches(int[] result, int currentOld, int currentNew, int targetOld, int targetNew) { - while (currentOld < targetOld && currentNew < targetNew) { - int reportIdx = currentNew - 1; - if (reportIdx >= 0 && reportIdx < result.length) { - result[reportIdx] = currentOld; - } - currentOld++; - currentNew++; - } - } - - private static void cleanupTempDir(Path tempDir) { - if (tempDir == null) { - return; - } - try { - Files.deleteIfExists(tempDir.resolve("old.txt")); - Files.deleteIfExists(tempDir.resolve("new.txt")); - Files.deleteIfExists(tempDir); - } catch (IOException e) { - // best-effort cleanup - } - } -} From 779d7d1e7433e2939b923abc782d7072d9746069 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Thu, 14 May 2026 02:42:09 +0530 Subject: [PATCH 06/11] CD-7509 Updated SourceLinesDiffImpl --- .../source/SourceLinesDiffImpl.java | 142 +++++++++--------- 1 file changed, 75 insertions(+), 67 deletions(-) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java index 0c1d1438bc02..55d66d211a26 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java @@ -19,12 +19,12 @@ */ package org.sonar.ce.task.projectanalysis.source; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.StringUtils; import org.sonar.api.config.Configuration; import org.sonar.ce.task.projectanalysis.analysis.AnalysisMetadataHolder; import org.sonar.ce.task.projectanalysis.component.Component; @@ -43,13 +43,12 @@ public class SourceLinesDiffImpl implements SourceLinesDiff { - private static final Logger LOG = LoggerFactory.getLogger(SourceLinesDiffImpl.class); private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled"; private static final String KEY_SFMETA_FILE_SUFFIXES = "sonar.sfmeta.file.suffixes"; private final DbClient dbClient; private final FileSourceDao fileSourceDao; - private final SourceLinesHashRepository sourceLinesHashRepository; + private final SourceLinesHashRepository sourceLinesHash; private final SourceLinesRepository sourceLinesRepository; private final ReferenceBranchComponentUuids referenceBranchComponentUuids; private final MovedFilesRepository movedFilesRepository; @@ -58,13 +57,13 @@ public class SourceLinesDiffImpl implements SourceLinesDiff { private final NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids; private final ConfigurationRepository configurationRepository; - public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHashRepository, + public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, SourceLinesHashRepository sourceLinesHash, SourceLinesRepository sourceLinesRepository, ReferenceBranchComponentUuids referenceBranchComponentUuids, MovedFilesRepository movedFilesRepository, AnalysisMetadataHolder analysisMetadataHolder, PeriodHolder periodHolder, NewCodeReferenceBranchComponentUuids newCodeReferenceBranchComponentUuids, ConfigurationRepository configurationRepository) { this.dbClient = dbClient; this.fileSourceDao = fileSourceDao; - this.sourceLinesHashRepository = sourceLinesHashRepository; + this.sourceLinesHash = sourceLinesHash; this.sourceLinesRepository = sourceLinesRepository; this.referenceBranchComponentUuids = referenceBranchComponentUuids; this.movedFilesRepository = movedFilesRepository; @@ -83,123 +82,132 @@ public int[] computeMatchingLines(Component component) { } private int[] computeWithMyersDiff(Component component) { - List database = getPreviousVersionLineHashes(component); - List report = getCurrentVersionLineHashes(component); + List database = getDBLines(component); + List report = getReportLines(component); + return new SourceLinesDiffFinder().findMatchingLines(database, report); } private int[] computeWithHistogramDiff(Component component) { try { - List previousVersionSourceLines = getPreviousVersionSourceContent(component); - List currentVersionSourceLines = getCurrentVersionSourceContent(component); + List dbSourceLines = getDBSourceContent(component); + List reportSourceLines = getReportSourceContent(component); - if (previousVersionSourceLines.isEmpty() && currentVersionSourceLines.isEmpty()) { + if (dbSourceLines.isEmpty() && reportSourceLines.isEmpty()) { return new int[0]; } - if (previousVersionSourceLines.isEmpty()) { - return new int[currentVersionSourceLines.size()]; + if (dbSourceLines.isEmpty()) { + return new int[reportSourceLines.size()]; } - return new GitDiffFinder().findMatchingLines(previousVersionSourceLines, currentVersionSourceLines); + return new GitDiffFinder().findMatchingLines(dbSourceLines, reportSourceLines); - } catch (InterruptedException interruptedException) { + } catch (InterruptedException e) { Thread.currentThread().interrupt(); - LOG.warn("[HISTOGRAM-DIFF] Interrupted for {}, falling back to Myers", component.getKey()); return computeWithMyersDiff(component); - } catch (Exception histogramDiffException) { - LOG.warn("[HISTOGRAM-DIFF] Failed for {}, falling back to Myers. Error: {}", - component.getKey(), histogramDiffException.getMessage(), histogramDiffException); + } catch (IOException e) { return computeWithMyersDiff(component); } } private boolean isGitCliEnabled() { - Configuration projectConfiguration = configurationRepository.getConfiguration(); - return projectConfiguration.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false); + Configuration config = configurationRepository.getConfiguration(); + return config.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false); } private boolean isSalesforceMetadataFile(Component component) { String fileName = component.getName(); - if (fileName == null || fileName.isEmpty()) { + if (StringUtils.isBlank(fileName)) { return false; } String[] suffixes = configurationRepository.getConfiguration().getStringArray(KEY_SFMETA_FILE_SUFFIXES); + if (suffixes == null) { + return false; + } for (String suffix : suffixes) { - if (suffix == null || suffix.isEmpty()) { + if (StringUtils.isBlank(suffix)) { continue; } - String normalized = suffix.startsWith(".") ? suffix : "." + suffix; - if (fileName.endsWith(normalized)) { + String normalizedSuffix = suffix.startsWith(".") ? suffix : "." + suffix; + if (fileName.endsWith(normalizedSuffix)) { return true; } } return false; } - private List getPreviousVersionSourceContent(Component component) { + private List getDBLines(Component component) { try (DbSession dbSession = dbClient.openSession(false)) { - String previousFileUuid = resolveDbFileUuid(component); - if (previousFileUuid == null) { - return Collections.emptyList(); + String uuid; + if (analysisMetadataHolder.isPullRequest()) { + uuid = referenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) { + uuid = newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else { + Optional originalFile = movedFilesRepository.getOriginalFile(component); + uuid = originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid()); } - FileSourceDto fileSourceDto = fileSourceDao.selectByFileUuid(dbSession, previousFileUuid); - if (fileSourceDto == null) { + if (uuid == null) { return Collections.emptyList(); } - DbFileSources.Data protobufSourceData = fileSourceDto.getSourceData(); - if (protobufSourceData == null) { + List database = fileSourceDao.selectLineHashes(dbSession, uuid); + if (database == null) { return Collections.emptyList(); } - - List protobufLines = protobufSourceData.getLinesList(); - List sourceLines = new ArrayList<>(protobufLines.size()); - for (DbFileSources.Line protobufLine : protobufLines) { - sourceLines.add(protobufLine.hasSource() ? protobufLine.getSource() : ""); - } - return sourceLines; + return database; } } - private List getCurrentVersionSourceContent(Component component) { - List sourceLines = new ArrayList<>(); - try (CloseableIterator lineIterator = sourceLinesRepository.readLines(component)) { - while (lineIterator.hasNext()) { - sourceLines.add(lineIterator.next()); - } - } - return sourceLines; - } - - private String resolveDbFileUuid(Component component) { - if (analysisMetadataHolder.isPullRequest()) { - return referenceBranchComponentUuids.getComponentUuid(component.getKey()); - } else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) { - return newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey()); - } else { - Optional originalFile = movedFilesRepository.getOriginalFile(component); - return originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid()); - } + private List getReportLines(Component component) { + return sourceLinesHash.getLineHashesMatchingDBVersion(component); } - private List getPreviousVersionLineHashes(Component component) { + private List getDBSourceContent(Component component) { try (DbSession dbSession = dbClient.openSession(false)) { - String previousFileUuid = resolveDbFileUuid(component); - if (previousFileUuid == null) { + String uuid; + if (analysisMetadataHolder.isPullRequest()) { + uuid = referenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else if (periodHolder.hasPeriod() && periodHolder.getPeriod().getMode().equals(NewCodePeriodType.REFERENCE_BRANCH.name())) { + uuid = newCodeReferenceBranchComponentUuids.getComponentUuid(component.getKey()); + } else { + Optional originalFile = movedFilesRepository.getOriginalFile(component); + uuid = originalFile.map(MovedFilesRepository.OriginalFile::uuid).orElse(component.getUuid()); + } + + if (uuid == null) { + return Collections.emptyList(); + } + + FileSourceDto dto = fileSourceDao.selectByFileUuid(dbSession, uuid); + if (dto == null) { return Collections.emptyList(); } - List lineHashes = fileSourceDao.selectLineHashes(dbSession, previousFileUuid); - if (lineHashes == null) { + DbFileSources.Data sourceData = dto.getSourceData(); + if (sourceData == null) { return Collections.emptyList(); } - return lineHashes; + + List lines = sourceData.getLinesList(); + List result = new ArrayList<>(lines.size()); + for (DbFileSources.Line line : lines) { + result.add(line.hasSource() ? line.getSource() : ""); + } + return result; } } - private List getCurrentVersionLineHashes(Component component) { - return sourceLinesHashRepository.getLineHashesMatchingDBVersion(component); + private List getReportSourceContent(Component component) { + List lines = new ArrayList<>(); + try (CloseableIterator iterator = sourceLinesRepository.readLines(component)) { + while (iterator.hasNext()) { + lines.add(iterator.next()); + } + } + return lines; } + } From a0b67b1bbcdd5666535ad07ef1eddf6baee0fad9 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Thu, 14 May 2026 02:44:20 +0530 Subject: [PATCH 07/11] CD-7509 Updated GitDiffFinder --- .../projectanalysis/source/GitDiffFinder.java | 187 ++++++++++++++++++ .../source/GitHistogramDiffFinder.java | 0 2 files changed, 187 insertions(+) create mode 100644 server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java delete mode 100644 server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java new file mode 100644 index 000000000000..7c20005efb10 --- /dev/null +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java @@ -0,0 +1,187 @@ +/* + * SonarQube + * Copyright (C) 2009-2024 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.ce.task.projectanalysis.source; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class GitDiffFinder { + + private static final Logger LOG = LoggerFactory.getLogger(GitDiffFinder.class); + + private static final Pattern HUNK_HEADER_PATTERN = Pattern.compile( + "^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@"); + + int[] findMatchingLines(List previousVersionLines, List currentVersionLines) + throws IOException, InterruptedException { + + int totalReportLines = currentVersionLines.size(); + int totalDbLines = previousVersionLines.size(); + + int[] matchingLineArray = new int[totalReportLines]; + + if (previousVersionLines.isEmpty() || currentVersionLines.isEmpty()) { + return matchingLineArray; + } + + Path diffWorkingDirectory = null; + try { + diffWorkingDirectory = Files.createTempDirectory("sonar-histogram-diff-"); + Path previousVersionFile = diffWorkingDirectory.resolve("previous.txt"); + Path currentVersionFile = diffWorkingDirectory.resolve("current.txt"); + + Files.write(previousVersionFile, previousVersionLines, StandardCharsets.UTF_8); + Files.write(currentVersionFile, currentVersionLines, StandardCharsets.UTF_8); + + executeDiffAndParseOutput(previousVersionFile, currentVersionFile, matchingLineArray, totalDbLines, + totalReportLines); + + } finally { + deleteTempFilesAndDirectory(diffWorkingDirectory); + } + + return matchingLineArray; + } + + private void executeDiffAndParseOutput(Path previousVersionFile, Path currentVersionFile, int[] matchingLineArray, + int totalDbLines, int totalReportLines) throws IOException, InterruptedException { + + ProcessBuilder gitDiffProcess = new ProcessBuilder("git", "diff", "--no-index", "--no-color", "--histogram", + previousVersionFile.toAbsolutePath().toString(), currentVersionFile.toAbsolutePath().toString()); + gitDiffProcess.redirectError(ProcessBuilder.Redirect.DISCARD); + + Process runningProcess = gitDiffProcess.start(); + + try (BufferedReader diffOutputReader = new BufferedReader( + new InputStreamReader(runningProcess.getInputStream(), StandardCharsets.UTF_8))) { + parseUnifiedDiffOutput(diffOutputReader, matchingLineArray, totalDbLines, totalReportLines); + } + + int processExitCode = runningProcess.waitFor(); + if (processExitCode > 1) { + throw new IOException("git diff --no-index failed with exit code " + processExitCode); + } + } + + private static void parseUnifiedDiffOutput(BufferedReader diffOutputReader, int[] matchingLineArray, + int totalDbLines, int totalReportLines) throws IOException { + + int currentDbLinePosition = 1; + int currentReportLinePosition = 1; + + String rawDiffLine; + boolean insideHunk = false; + + int hunkStartInPreviousFile = 0; + int hunkLengthInPreviousFile = 0; + int hunkStartInCurrentFile = 0; + int hunkLengthInCurrentFile = 0; + + while ((rawDiffLine = diffOutputReader.readLine()) != null) { + Matcher hunkHeaderMatcher = HUNK_HEADER_PATTERN.matcher(rawDiffLine); + + if (hunkHeaderMatcher.find()) { + if (insideHunk) { + currentDbLinePosition = hunkStartInPreviousFile + hunkLengthInPreviousFile; + currentReportLinePosition = hunkStartInCurrentFile + hunkLengthInCurrentFile; + } + + hunkStartInPreviousFile = Integer.parseInt(hunkHeaderMatcher.group(1)); + hunkLengthInPreviousFile = + hunkHeaderMatcher.group(2) != null ? Integer.parseInt(hunkHeaderMatcher.group(2)) : 1; + hunkStartInCurrentFile = Integer.parseInt(hunkHeaderMatcher.group(3)); + hunkLengthInCurrentFile = + hunkHeaderMatcher.group(4) != null ? Integer.parseInt(hunkHeaderMatcher.group(4)) : 1; + + fillIdenticalLinesBetweenHunks(matchingLineArray, currentDbLinePosition, currentReportLinePosition, + hunkStartInPreviousFile, hunkStartInCurrentFile); + + currentDbLinePosition = hunkStartInPreviousFile; + currentReportLinePosition = hunkStartInCurrentFile; + insideHunk = true; + + } else if (insideHunk && !rawDiffLine.isEmpty()) { + char lineTypePrefix = rawDiffLine.charAt(0); + + if (lineTypePrefix == ' ') { + int reportArrayIndex = currentReportLinePosition - 1; + if (reportArrayIndex >= 0 && reportArrayIndex < matchingLineArray.length) { + matchingLineArray[reportArrayIndex] = currentDbLinePosition; + } + currentDbLinePosition++; + currentReportLinePosition++; + + } else if (lineTypePrefix == '+') { + currentReportLinePosition++; + + } else if (lineTypePrefix == '-') { + currentDbLinePosition++; + } + } + } + + if (insideHunk) { + currentDbLinePosition = hunkStartInPreviousFile + hunkLengthInPreviousFile; + currentReportLinePosition = hunkStartInCurrentFile + hunkLengthInCurrentFile; + } + fillIdenticalLinesBetweenHunks(matchingLineArray, currentDbLinePosition, currentReportLinePosition, + totalDbLines + 1, totalReportLines + 1); + } + + private static void fillIdenticalLinesBetweenHunks(int[] matchingLineArray, int fromDbLine, int fromReportLine, + int untilDbLine, int untilReportLine) { + + int dbLinePointer = fromDbLine; + int reportLinePointer = fromReportLine; + + while (dbLinePointer < untilDbLine && reportLinePointer < untilReportLine) { + int reportArrayIndex = reportLinePointer - 1; + if (reportArrayIndex >= 0 && reportArrayIndex < matchingLineArray.length) { + matchingLineArray[reportArrayIndex] = dbLinePointer; + } + dbLinePointer++; + reportLinePointer++; + } + } + + private static void deleteTempFilesAndDirectory(Path diffWorkingDirectory) { + if (diffWorkingDirectory == null) { + return; + } + try { + Files.deleteIfExists(diffWorkingDirectory.resolve("previous.txt")); + Files.deleteIfExists(diffWorkingDirectory.resolve("current.txt")); + Files.deleteIfExists(diffWorkingDirectory); + } catch (IOException cleanupException) { + LOG.warn("Git Diff Temp file cleanup failed for directory '{}'. " + + "Reason: {}. Non-fatal — OS will reclaim on reboot.", diffWorkingDirectory, + cleanupException.getMessage()); + } + } +} diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitHistogramDiffFinder.java deleted file mode 100644 index e69de29bb2d1..000000000000 From d5f8bec412ca95b54786e2e0880b9394f9aea7a0 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Thu, 14 May 2026 02:46:28 +0530 Subject: [PATCH 08/11] Revert "CD-7509 Update GitScmProvider to use native Git" This reverts commit 6fa49a0dcfa1e7863d662ee4470b7c2cc5e14cf5. --- .../org/sonar/scm/git/GitScmProvider.java | 63 +------------------ 1 file changed, 2 insertions(+), 61 deletions(-) diff --git a/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java b/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java index bd3a1aeabefa..80b42a45bc58 100644 --- a/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java +++ b/sonar-scanner-engine/src/main/java/org/sonar/scm/git/GitScmProvider.java @@ -21,11 +21,8 @@ import com.google.common.annotations.VisibleForTesting; import java.io.BufferedOutputStream; -import java.io.BufferedReader; import java.io.File; import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.time.Instant; import java.util.Arrays; @@ -68,7 +65,6 @@ import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.sonar.api.batch.scm.BlameCommand; import org.sonar.api.batch.scm.ScmProvider; -import org.sonar.api.config.Configuration; import org.sonar.api.notifications.AnalysisWarnings; import org.sonar.api.utils.MessageException; import org.sonar.api.utils.System2; @@ -92,25 +88,20 @@ public class GitScmProvider extends ScmProvider { private static final String NO_MERGE_BASE_FOUND_MESSAGE = "No merge base found between HEAD and %s"; @VisibleForTesting static final String SCM_INTEGRATION_DOCUMENTATION_SUFFIX = "/analyzing-source-code/scm-integration/"; - - private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled"; - private final BlameCommand blameCommand; private final AnalysisWarnings analysisWarnings; private final GitIgnoreCommand gitIgnoreCommand; private final System2 system2; private final DocumentationLinkGenerator documentationLinkGenerator; - private final Configuration configuration; public GitScmProvider(CompositeBlameCommand blameCommand, AnalysisWarnings analysisWarnings, GitIgnoreCommand gitIgnoreCommand, System2 system2, - DocumentationLinkGenerator documentationLinkGenerator, Configuration configuration) { + DocumentationLinkGenerator documentationLinkGenerator) { this.blameCommand = blameCommand; this.analysisWarnings = analysisWarnings; this.gitIgnoreCommand = gitIgnoreCommand; this.system2 = system2; this.documentationLinkGenerator = documentationLinkGenerator; - this.configuration = configuration; } @Override @@ -134,10 +125,6 @@ public BlameCommand blameCommand() { return this.blameCommand; } - private boolean useNativeGitDiff() { - return configuration.getBoolean(KEY_CODESCAN_GITCLI_ENABLED).orElse(false); - } - @CheckForNull @Override public Set branchChangedFiles(String targetBranchName, Path rootBaseDir) { @@ -259,15 +246,8 @@ public Map> branchChangedLinesWithFileMovementDetection(Strin Map> changedLines = new HashMap<>(); - boolean nativeGit = useNativeGitDiff(); - LOG.info("Using {} for changed-line detection", nativeGit ? "native Git CLI" : "JGit"); - for (Map.Entry entry : changedFiles.entrySet()) { - if (nativeGit) { - collectChangedLinesWithNativeGit(repo, mergeBaseCommit.get(), changedLines, entry.getKey()); - } else { - collectChangedLines(repo, mergeBaseCommit.get(), changedLines, entry.getKey(), entry.getValue()); - } + collectChangedLines(repo, mergeBaseCommit.get(), changedLines, entry.getKey(), entry.getValue()); } return changedLines; @@ -319,45 +299,6 @@ private void collectChangedLines(Repository repo, RevCommit mergeBaseCommit, Map } } - private void collectChangedLinesWithNativeGit(Repository repo, RevCommit mergeBase, - Map> changedLines, Path file) throws IOException, InterruptedException { - - Path workTree = repo.getWorkTree().toPath(); - String relPath = toGitPath(workTree.relativize(file).toString()); - - ProcessBuilder pb = new ProcessBuilder("git", "diff", "--no-color", "--diff-algorithm=histogram", - mergeBase.getName(), "--", relPath); - - pb.directory(repo.getWorkTree()); - pb.redirectError(ProcessBuilder.Redirect.DISCARD); - - Process process = pb.start(); - ChangedLinesComputer computer = new ChangedLinesComputer(); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { - String line; - while ((line = br.readLine()) != null) { - for (char c : line.toCharArray()) { - computer.receiver().write(c); - } - computer.receiver().write('\n'); - } - } - - int exitCode = process.waitFor(); - - if (exitCode != 0) { - LOG.warn("git diff failed with exit code {} for {}", exitCode, relPath); - return; - } - - Set lines = computer.changedLines(); - if (!lines.isEmpty()) { - changedLines.put(file, lines); - } - } - @Override @CheckForNull public Instant forkDate(String referenceBranchName, Path projectBaseDir) { From 941ca8d7405741f6dff5e49db5cb66a7a7bbc2e8 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Thu, 14 May 2026 02:51:00 +0530 Subject: [PATCH 09/11] CD-7509 added warn logs --- .../ce/task/projectanalysis/source/SourceLinesDiffImpl.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java index 55d66d211a26..203d9aa1fe5f 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java @@ -25,6 +25,8 @@ import java.util.List; import java.util.Optional; import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.sonar.api.config.Configuration; import org.sonar.ce.task.projectanalysis.analysis.AnalysisMetadataHolder; import org.sonar.ce.task.projectanalysis.component.Component; @@ -43,6 +45,7 @@ public class SourceLinesDiffImpl implements SourceLinesDiff { + private static final Logger LOG = LoggerFactory.getLogger(SourceLinesDiffImpl.class); private static final String KEY_CODESCAN_GITCLI_ENABLED = "codescan.gitcli.enabled"; private static final String KEY_SFMETA_FILE_SUFFIXES = "sonar.sfmeta.file.suffixes"; @@ -105,8 +108,10 @@ private int[] computeWithHistogramDiff(Component component) { } catch (InterruptedException e) { Thread.currentThread().interrupt(); + LOG.warn("git-cli diff interrupted for {}, falling back to Myers", component.getKey()); return computeWithMyersDiff(component); } catch (IOException e) { + LOG.warn("git-cli diff failed for {}, falling back to Myers", component.getKey(), e); return computeWithMyersDiff(component); } } From 7af47ff6261abb8a61fd01767fbf6cb3b19e5798 Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Thu, 14 May 2026 02:53:09 +0530 Subject: [PATCH 10/11] CD-7509 added logs --- .../ce/task/projectanalysis/source/SourceLinesDiffImpl.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java index 203d9aa1fe5f..0b2d9721fc77 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/SourceLinesDiffImpl.java @@ -85,6 +85,7 @@ public int[] computeMatchingLines(Component component) { } private int[] computeWithMyersDiff(Component component) { + LOG.info("Diff started for {} using Myers algorithm", component.getKey()); List database = getDBLines(component); List report = getReportLines(component); @@ -92,6 +93,7 @@ private int[] computeWithMyersDiff(Component component) { } private int[] computeWithHistogramDiff(Component component) { + LOG.info("Diff started for {} using Histogram (git-cli) algorithm", component.getKey()); try { List dbSourceLines = getDBSourceContent(component); List reportSourceLines = getReportSourceContent(component); From 7d1939eec537297b9cdb73e1a94a1b5fe83c643f Mon Sep 17 00:00:00 2001 From: Dhanush Terala Date: Fri, 15 May 2026 17:53:07 +0530 Subject: [PATCH 11/11] CD-7509 Added Comments to GitDiffFinder --- .../task/projectanalysis/source/GitDiffFinder.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java index 7c20005efb10..3055fa4db851 100644 --- a/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java +++ b/server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/source/GitDiffFinder.java @@ -31,10 +31,22 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Computes line-level mapping between the previous (DB) and current (report) version of a file + * by shelling out to {@code git diff --no-index --histogram} and parsing its unified-diff output. + * + * Returned array is indexed by current-file line (0-based): matchingLineArray[i] = the previous-file + * line number (1-based) that current line i+1 corresponds to, or 0 if the current line was added. + */ class GitDiffFinder { private static final Logger LOG = LoggerFactory.getLogger(GitDiffFinder.class); + // Matches a unified-diff hunk header, e.g. "@@ -12,5 +14,7 @@" + // group 1 = previous-file start line + // group 2 = previous-file line count (optional; defaults to 1 when omitted) + // group 3 = current-file start line + // group 4 = current-file line count (optional; defaults to 1 when omitted) private static final Pattern HUNK_HEADER_PATTERN = Pattern.compile( "^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@");