From 8fb9e4e86b4794d39afba2d62413571cbc04a744 Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Thu, 4 Jun 2026 12:50:06 +0100 Subject: [PATCH 01/14] feat: persistent inline comments to prevent cross-run duplicates Adds an opt-in config flag, persistent_inline_comments (default false). When enabled, the GitHub and GitLab providers fingerprint each inline comment and embed a hidden HTML marker in the posted body. On later runs the existing comment bodies are scanned for those markers, so a suggestion already posted is skipped instead of re-posted. This removes the duplicate inline comments that accumulate when the agent runs repeatedly on the same PR/MR (reported in particular on GitLab). Two fingerprints are matched with OR semantics: a body fingerprint over (file, line, normalised text) and a code fingerprint over the first ` ```suggestion ` block. The OR-match catches a re-emitted finding whether the model rephrases the prose or slightly changes the proposed code. The marker-scan store needs no external infrastructure; a database/cache backend could implement the same load/seen/add interface. Default behaviour is unchanged when the flag is off. Implements the feature requested in #2037. --- docs/docs/tools/improve.md | 13 ++ pr_agent/algo/inline_comment_dedup.py | 152 +++++++++++++ pr_agent/git_providers/github_provider.py | 26 +++ pr_agent/git_providers/gitlab_provider.py | 20 ++ pr_agent/settings/configuration.toml | 4 + tests/unittest/test_inline_comment_dedup.py | 226 ++++++++++++++++++++ 6 files changed, 441 insertions(+) create mode 100644 pr_agent/algo/inline_comment_dedup.py create mode 100644 tests/unittest/test_inline_comment_dedup.py diff --git a/docs/docs/tools/improve.md b/docs/docs/tools/improve.md index 11f3b9cf9b..8177969d04 100644 --- a/docs/docs/tools/improve.md +++ b/docs/docs/tools/improve.md @@ -212,6 +212,19 @@ dual_publishing_score_threshold = x Where x represents the minimum score threshold (>=) for suggestions to be presented as committable PR comments in addition to the table. Default is -1 (disabled). +### Persistent inline comments + +By default, PR-Agent re-posts identical inline code comments on every run, which clutters the discussion, particularly on GitLab. The persistent inline comments feature prevents this by skipping the re-posting of comments that are already present from an earlier run. This is achieved by embedding a hidden HTML-comment marker with a short fingerprint in each posted comment, allowing PR-Agent to scan existing comment bodies on later runs to identify and skip duplicates. + +Two fingerprints are used and matched with OR logic: one over the comment text (file, line, normalised text) and one over the proposed code block when present. This approach catches a re-emitted finding even when the model rephrases the prose or slightly changes the code. The feature is opt-in and off by default, and is implemented for the GitHub and GitLab providers; other providers are unaffected. + +To enable it, use the following setting: + +```toml +[config] +persistent_inline_comments = true +``` + ### Self-review `Platforms supported: GitHub, GitLab` diff --git a/pr_agent/algo/inline_comment_dedup.py b/pr_agent/algo/inline_comment_dedup.py new file mode 100644 index 0000000000..cf266b1c13 --- /dev/null +++ b/pr_agent/algo/inline_comment_dedup.py @@ -0,0 +1,152 @@ +"""Cross-run deduplication of inline (line-anchored) comments. + +Implements the feature requested in issue #2037: when the agent runs more +than once on the same PR/MR, it re-posts identical inline suggestions on +every run, cluttering the discussion (observed in particular on GitLab). +This module fingerprints each inline comment and embeds the fingerprint as +an HTML-comment marker in the posted body. On later runs the existing +comment bodies are scanned for those markers to rebuild the set of +already-posted fingerprints, and any suggestion whose fingerprint is already +present is skipped. + +Two fingerprints are computed per comment and matched with OR semantics: + +- Body fingerprint: SHA-256 over (relevant_file, anchor line, normalised + first 80 characters of the body). The category/importance tag and the + ``**Suggestion:**`` lead are stripped and whitespace is collapsed first. +- Code fingerprint: SHA-256 over (relevant_file, anchor line, normalised + contents of the first ```suggestion fenced block). Returns None when the + body has no suggestion block, in which case matching falls back to the + body fingerprint alone. + +The OR-match catches both "same prose, different code" and "same code, +different prose" re-emissions of the same defect, which are the two ways an +LLM tends to restate a finding across runs. + +The feature is opt-in via ``config.persistent_inline_comments`` (default +false) and is wired into the GitHub and GitLab providers. The marker-scan +store needs no external infrastructure; a different backend (database, +cache) could populate the same load/seen/add interface. +""" + +from __future__ import annotations + +import hashlib +import re +from typing import Iterator, Optional + +from pr_agent.log import get_logger + +BODY_MARKER_RE = re.compile(r"") +CODE_MARKER_RE = re.compile(r"") + +_LEAD_RE = re.compile(r"^\*\*Suggestion:\*\*\s*", re.IGNORECASE) +_TAG_RE = re.compile(r"\[(?:[a-z _]+),\s*importance:\s*\d+\]", re.IGNORECASE) +_WS_RE = re.compile(r"\s+") +_CODE_BLOCK_RE = re.compile(r"```suggestion[^\n]*\n(.*?)```", re.DOTALL) + + +def body_fingerprint(relevant_file: str, target_line_no, body: str) -> str: + normalised = _LEAD_RE.sub("", body or "") + normalised = _TAG_RE.sub("", normalised) + normalised = _WS_RE.sub(" ", normalised).strip()[:80].lower() + key = f"{relevant_file}|{target_line_no}|{normalised}" + return hashlib.sha256(key.encode("utf-8")).hexdigest()[:12] + + +def code_fingerprint(relevant_file: str, target_line_no, body: str) -> Optional[str]: + m = _CODE_BLOCK_RE.search(body or "") + if not m: + return None + code = _WS_RE.sub(" ", m.group(1)).strip().lower() + if not code: + return None + key = f"{relevant_file}|{target_line_no}|code|{code}" + return hashlib.sha256(key.encode("utf-8")).hexdigest()[:12] + + +def build_markers(body_fp: str, code_fp: Optional[str]) -> str: + markers = [f""] + if code_fp is not None: + markers.append(f"") + return "\n".join(markers) + + +def inline_comment_line(comment: dict): + """Best-effort anchor line for a GitHub inline-comment dict.""" + for key in ("line", "position", "start_line"): + if comment.get(key) is not None: + return comment[key] + return None + + +def iter_existing_inline_comment_bodies(git_provider) -> Iterator[str]: + """Yield the body of every existing comment on the current PR/MR. + + Dispatch is by provider class name so this module needs no provider + import. Unsupported providers raise NotImplementedError, which the store + treats as "cannot dedup here" and degrades to within-run dedup only. + """ + provider_name = type(git_provider).__name__ + if provider_name == "GithubProvider": + for comment in git_provider.pr.get_comments(): + yield getattr(comment, "body", "") or "" + elif provider_name == "GitLabProvider": + for discussion in git_provider.mr.discussions.list(get_all=True): + attrs = getattr(discussion, "attributes", None) or {} + for note in attrs.get("notes", []) or []: + if isinstance(note, dict): + yield note.get("body", "") or "" + else: + raise NotImplementedError( + f"inline-comment dedup not implemented for {provider_name}" + ) + + +class InlineCommentStore: + """Set of already-posted inline-comment fingerprints for one PR/MR. + + The existing comment bodies are scanned lazily on first lookup and the + seen-set is held in memory for the rest of the run. A failure to list + existing comments degrades to within-run dedup only and never raises + into the publish path. + """ + + def __init__(self, git_provider): + self._git_provider = git_provider + self._keys: set = set() + self._loaded = False + + def load(self) -> set: + if self._loaded: + return self._keys + try: + for body in iter_existing_inline_comment_bodies(self._git_provider): + for marker_re in (BODY_MARKER_RE, CODE_MARKER_RE): + for match in marker_re.finditer(body or ""): + self._keys.add(match.group(1)) + except Exception as e: + get_logger().info( + f"Persistent inline comments: could not load existing comments, " + f"within-run dedup only. error={e}" + ) + self._loaded = True + return self._keys + + def seen(self, fingerprint: Optional[str]) -> bool: + if fingerprint is None: + return False + return fingerprint in self.load() + + def add(self, fingerprint: Optional[str]) -> None: + if fingerprint is not None: + self._keys.add(fingerprint) + + +def get_inline_comment_store(git_provider) -> InlineCommentStore: + """Return the per-provider store, creating and caching it on first use.""" + store = getattr(git_provider, "_inline_comment_store", None) + if store is None: + store = InlineCommentStore(git_provider) + git_provider._inline_comment_store = store + return store diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 6416486d84..c2996c9b12 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -17,6 +17,9 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import extract_hunk_headers +from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, + code_fingerprint, get_inline_comment_store, + inline_comment_line) from ..algo.language_handler import is_valid_file from ..algo.types import EDIT_TYPE from ..algo.utils import (PRReviewHeader, Range, clip_tokens, @@ -412,6 +415,29 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_ return dict(body=body, path=path, position=position) if subject_type == "LINE" else {} def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False): + if get_settings().get("config.persistent_inline_comments", False): + store = get_inline_comment_store(self) + deduped = [] + for comment in comments: + if not comment: + deduped.append(comment) + continue + path = comment.get("path", "") + line = inline_comment_line(comment) + body = comment.get("body", "") + body_fp = body_fingerprint(path, line, body) + code_fp = code_fingerprint(path, line, body) + if store.seen(body_fp) or store.seen(code_fp): + continue + marked = dict(comment) + marked["body"] = f"{body}\n\n{build_markers(body_fp, code_fp)}" + deduped.append(marked) + store.add(body_fp) + store.add(code_fp) + if not any(deduped): + get_logger().info("Persistent inline comments: all suggestions already posted; nothing to publish") + return + comments = deduped try: # publish all comments in a single message self.pr.create_review(commit=self.last_commit_id, comments=comments) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index b3f54920d0..23f6458339 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -19,6 +19,8 @@ find_line_number_of_relevant_line_in_file, load_large_diff) from ..config_loader import get_settings +from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, + code_fingerprint, get_inline_comment_store) from ..log import get_logger from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider @@ -559,6 +561,16 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f if not found: get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") else: + store = None + body_fp = code_fp = None + if get_settings().get("config.persistent_inline_comments", False): + store = get_inline_comment_store(self) + body_fp = body_fingerprint(relevant_file, target_line_no, body) + code_fp = code_fingerprint(relevant_file, target_line_no, body) + if store.seen(body_fp) or store.seen(code_fp): + get_logger().info(f"Persistent inline comments: skipping duplicate inline comment on {relevant_file}:{target_line_no}") + return + body = f"{body}\n\n{build_markers(body_fp, code_fp)}" # in order to have exact sha's we have to find correct diff for this change diff = self.get_relevant_diff(relevant_file, relevant_line_in_file) if diff is None: @@ -578,6 +590,9 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f get_logger().debug(f"Creating comment in MR {self.id_mr} with body {body} and position {pos_obj}") try: self.mr.discussions.create({'body': body, 'position': pos_obj}) + if store is not None: + store.add(body_fp) + store.add(code_fp) except Exception as e: try: # fallback - create a general note on the file in the MR @@ -617,6 +632,8 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" body_fallback += diff_code + if store is not None: + body_fallback = f"{body_fallback}\n\n{build_markers(body_fp, code_fp)}" # Create a general note on the file in the MR self.mr.notes.create({ 'body': body_fallback, @@ -629,6 +646,9 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f } }) get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}") + if store is not None: + store.add(body_fp) + store.add(code_fp) # get_logger().debug( # f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)") diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index f4d63a73f2..942fde001f 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -44,6 +44,10 @@ cli_mode=false output_relevant_configurations=false large_patch_policy = "clip" # "clip", "skip" duplicate_prompt_examples = false +# Persistent inline comments (issue #2037): when true, the GitHub and GitLab providers +# fingerprint each inline comment, embed the fingerprint as an HTML marker, and skip +# re-posting suggestions already present on the PR/MR across runs. +persistent_inline_comments = false # seed seed=-1 # set positive value to fix the seed (and ensure temperature=0) temperature=0.2 diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py new file mode 100644 index 0000000000..9cf3e9b2fa --- /dev/null +++ b/tests/unittest/test_inline_comment_dedup.py @@ -0,0 +1,226 @@ +from unittest.mock import MagicMock, patch + +# Import a provider module first: it pulls in pr_agent.log via its transitive +# imports before config load, avoiding the partially-initialised-module +# circular import that triggers when pr_agent.log is the first pr_agent import. +from pr_agent.git_providers.github_provider import GithubProvider +from pr_agent.git_providers.gitlab_provider import GitLabProvider +from pr_agent.algo import inline_comment_dedup as d + + +# --------------------------------------------------------------------------- # +# fingerprints + markers +# --------------------------------------------------------------------------- # +def test_body_fingerprint_strips_lead_and_tag(): + a = d.body_fingerprint("f.py", 10, "**Suggestion:** Do the thing [possible issue, importance: 7]") + b = d.body_fingerprint("f.py", 10, "Do the thing") + assert a == b + assert len(a) == 12 + + +def test_body_fingerprint_varies_by_file_and_line(): + assert d.body_fingerprint("f.py", 10, "x") != d.body_fingerprint("g.py", 10, "x") + assert d.body_fingerprint("f.py", 10, "x") != d.body_fingerprint("f.py", 11, "x") + + +def test_code_fingerprint_none_without_block(): + assert d.code_fingerprint("f.py", 1, "no code here") is None + assert d.code_fingerprint("f.py", 1, "```suggestion\n\n```") is None # empty block + + +def test_code_fingerprint_whitespace_insensitive(): + fp1 = d.code_fingerprint("f.py", 1, "prose\n```suggestion\nfoo = 1\n```\n") + fp2 = d.code_fingerprint("f.py", 1, "different prose\n```suggestion\n foo = 1 \n```") + assert fp1 == fp2 and len(fp1) == 12 + + +def test_build_markers(): + assert d.build_markers("aaaaaaaaaaaa", None) == "" + out = d.build_markers("aaaaaaaaaaaa", "bbbbbbbbbbbb") + assert "" in out + assert "" in out + + +def test_inline_comment_line_prefers_line(): + assert d.inline_comment_line({"line": 5, "position": 9}) == 5 + assert d.inline_comment_line({"position": 9}) == 9 + assert d.inline_comment_line({}) is None + + +# --------------------------------------------------------------------------- # +# store +# --------------------------------------------------------------------------- # +class _GHComment: + def __init__(self, body): + self.body = body + + +def _gh_provider(existing_bodies): + """Real GithubProvider instance with only the attributes the dedup path touches.""" + p = GithubProvider.__new__(GithubProvider) + p.pr = MagicMock() + p.pr.get_comments.return_value = [_GHComment(b) for b in existing_bodies] + p.last_commit_id = "deadbeef" + return p + + +def test_store_scans_both_marker_forms(): + fp_body = d.body_fingerprint("a.py", 1, "alpha") + fp_code = d.code_fingerprint("a.py", 1, "p\n```suggestion\nx = 1\n```") + prov = _gh_provider([ + f"alpha\n\n", + f"beta\n\n", + ]) + store = d.InlineCommentStore(prov) + assert store.seen(fp_body) + assert store.seen(fp_code) + assert not store.seen("ffffffffffff") + assert store.seen(None) is False + + +def test_store_load_failure_degrades_to_empty(): + prov = _gh_provider([]) + prov.pr.get_comments.side_effect = RuntimeError("api down") + store = d.InlineCommentStore(prov) + assert store.load() == set() # must not raise + + +def test_iter_unsupported_provider_raises(): + class FooProvider: + pass + try: + list(d.iter_existing_inline_comment_bodies(FooProvider())) + assert False, "expected NotImplementedError" + except NotImplementedError: + pass + + +# --------------------------------------------------------------------------- # +# GitHub provider integration +# --------------------------------------------------------------------------- # +def _patch_flag(value): + gs = patch("pr_agent.git_providers.github_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = lambda k, default=None: value if k == "config.persistent_inline_comments" else default + return gs + + +def test_github_filters_seen_and_marks_new(): + seen_fp = d.body_fingerprint("a.py", 10, "old body") + p = _gh_provider([f"old body\n\n"]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([ + {"path": "a.py", "line": 10, "body": "old body"}, # duplicate -> dropped + {"path": "b.py", "line": 20, "body": "new body"}, # new -> kept + marker + ]) + finally: + gs.stop() + published = p.pr.create_review.call_args.kwargs["comments"] + assert len(published) == 1 + assert published[0]["path"] == "b.py" + assert ""]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([{"path": "a.py", "line": 10, "body": "old body"}]) + finally: + gs.stop() + p.pr.create_review.assert_not_called() + + +def test_github_within_batch_duplicate_dropped(): + p = _gh_provider([]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([ + {"path": "a.py", "line": 10, "body": "same finding"}, + {"path": "a.py", "line": 10, "body": "same finding"}, + ]) + finally: + gs.stop() + published = p.pr.create_review.call_args.kwargs["comments"] + assert len(published) == 1 + + +def test_github_flag_off_publishes_unmarked(): + p = _gh_provider([]) + gs = _patch_flag(False) + try: + p.publish_inline_comments([{"path": "a.py", "line": 10, "body": "x"}]) + finally: + gs.stop() + published = p.pr.create_review.call_args.kwargs["comments"] + assert len(published) == 1 + assert "pr-agent-dedup" not in published[0]["body"] + + +# --------------------------------------------------------------------------- # +# GitLab provider integration +# --------------------------------------------------------------------------- # +class _FakeDiff: + base_commit_sha = "base" + start_commit_sha = "start" + head_commit_sha = "head" + + +class _FakeTargetFile: + filename = "a.py" + old_filename = "a.py" + + +def _gl_provider(existing_bodies): + p = GitLabProvider.__new__(GitLabProvider) + p.id_mr = 1 + p.mr = MagicMock() + # existing discussions feed the store's marker scan + discs = [] + for b in existing_bodies: + disc = MagicMock() + disc.attributes = {"notes": [{"body": b}]} + discs.append(disc) + p.mr.discussions.list.return_value = discs + p.get_relevant_diff = MagicMock(return_value=_FakeDiff()) + return p + + +def _send(p, body): + p.send_inline_comment( + body=body, edit_type="addition", found=True, + relevant_file="a.py", relevant_line_in_file="+x = 1", + source_line_no=10, target_file=_FakeTargetFile(), target_line_no=10, + original_suggestion=None, + ) + + +def test_gitlab_posts_new_with_marker_and_skips_duplicate(): + p = _gl_provider([]) + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + try: + _send(p, "**Suggestion:** fix it [possible issue, importance: 7]") + first = p.mr.discussions.create.call_args.args[0] + assert "") _LEAD_RE = re.compile(r"^\*\*Suggestion:\*\*\s*", re.IGNORECASE) -_TAG_RE = re.compile(r"\[(?:[a-z _]+),\s*importance:\s*\d+\]", re.IGNORECASE) +_TAG_RE = re.compile(r"\[[^\]]+?,\s*importance:\s*\d+\]", re.IGNORECASE) _WS_RE = re.compile(r"\s+") _CODE_BLOCK_RE = re.compile(r"```suggestion[^\n]*\n(.*?)```", re.DOTALL) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 23f6458339..e4dcd63f8d 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -14,13 +14,13 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import decode_if_bytes +from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, + code_fingerprint, get_inline_comment_store) from ..algo.language_handler import is_valid_file from ..algo.utils import (clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff) from ..config_loader import get_settings -from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, - code_fingerprint, get_inline_comment_store) from ..log import get_logger from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py index 9cf3e9b2fa..9006e06657 100644 --- a/tests/unittest/test_inline_comment_dedup.py +++ b/tests/unittest/test_inline_comment_dedup.py @@ -224,3 +224,86 @@ def test_gitlab_flag_off_posts_unmarked(): assert "pr-agent-dedup" not in body finally: gs.stop() + + +# --------------------------------------------------------------------------- # +# regression cases added after review +# --------------------------------------------------------------------------- # +def test_body_fingerprint_strips_non_standard_label(): + # hyphen/digit/capitalised labels must still be stripped so the body + # fingerprint is stable across runs (review finding: tag regex too narrow) + a = d.body_fingerprint("f.py", 10, "**Suggestion:** Do the thing [best-practice, importance: 3]") + b = d.body_fingerprint("f.py", 10, "Do the thing") + assert a == b + + +def test_github_code_fingerprint_or_match_across_runs(): + # existing comment carries ONLY a code marker; a new comment with different + # prose but the same suggestion block must be dropped via the code fp even + # though its body fingerprint differs. + code_fp = d.code_fingerprint("a.py", 10, "p\n```suggestion\nx = 1\n```") + p = _gh_provider([f"earlier wording\n\n"]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([ + {"path": "a.py", "line": 10, "body": "totally different wording\n```suggestion\nx = 1\n```"}, + ]) + finally: + gs.stop() + p.pr.create_review.assert_not_called() + + +def test_store_unsupported_provider_degrades(): + class FooProvider: + pass + store = d.InlineCommentStore(FooProvider()) + assert store.load() == set() + assert store.seen("abcabcabcabc") is False + + +def test_gitlab_skips_when_existing_discussion_has_marker(): + body = "**Suggestion:** already here [possible issue, importance: 7]" + seen_fp = d.body_fingerprint("a.py", 10, body) + p = _gl_provider([f"already here\n\n"]) + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + try: + _send(p, body) + p.mr.discussions.create.assert_not_called() + finally: + gs.stop() + + +def test_gitlab_fallback_note_carries_marker_and_records(): + p = _gl_provider([]) + p.mr.discussions.create.side_effect = RuntimeError("position rejected") + p.get_line_link = MagicMock(return_value="http://link") + original = { + "relevant_lines_start": 10, "relevant_lines_end": 11, + "existing_code": "a = 1", "improved_code": "a = 2", + "suggestion_content": "fix it", "label": "possible issue", "score": 7, + } + + def _send_fb(): + p.send_inline_comment( + body="**Suggestion:** fix it [possible issue, importance: 7]", + edit_type="addition", found=True, relevant_file="a.py", + relevant_line_in_file="+a = 2", source_line_no=10, + target_file=_FakeTargetFile(), target_line_no=10, + original_suggestion=original, + ) + + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + try: + _send_fb() + assert p.mr.notes.create.called + note_body = p.mr.notes.create.call_args.args[0]["body"] + assert "") CODE_MARKER_RE = re.compile(r"") @@ -126,6 +124,7 @@ def load(self) -> set: for match in marker_re.finditer(body or ""): self._keys.add(match.group(1)) except Exception as e: + from pr_agent.log import get_logger get_logger().info( f"Persistent inline comments: could not load existing comments, " f"within-run dedup only. error={e}" diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index c2996c9b12..cf45030c25 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -18,8 +18,8 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import extract_hunk_headers from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, - code_fingerprint, get_inline_comment_store, - inline_comment_line) + code_fingerprint, + get_inline_comment_store) from ..algo.language_handler import is_valid_file from ..algo.types import EDIT_TYPE from ..algo.utils import (PRReviewHeader, Range, clip_tokens, @@ -415,25 +415,36 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_ return dict(body=body, path=path, position=position) if subject_type == "LINE" else {} def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False): - if get_settings().get("config.persistent_inline_comments", False): + store = None + pending_fingerprints = [] + # Dedup only on the top-level call. A fallback re-publish passes + # disable_fallback=True; it must not re-filter, or it could drop a + # comment that has not actually been posted yet. + if not disable_fallback and get_settings().get("config.persistent_inline_comments", False): store = get_inline_comment_store(self) + local_seen = set() deduped = [] for comment in comments: if not comment: deduped.append(comment) continue path = comment.get("path", "") - line = inline_comment_line(comment) body = comment.get("body", "") - body_fp = body_fingerprint(path, line, body) - code_fp = code_fingerprint(path, line, body) - if store.seen(body_fp) or store.seen(code_fp): + # GitHub committable comments are anchored by diff position, which + # shifts as the PR gains commits; anchor the fingerprint on the file + # path and comment content instead so it stays stable across runs. + body_fp = body_fingerprint(path, None, body) + code_fp = code_fingerprint(path, None, body) + if (store.seen(body_fp) or store.seen(code_fp) + or body_fp in local_seen or (code_fp and code_fp in local_seen)): continue marked = dict(comment) marked["body"] = f"{body}\n\n{build_markers(body_fp, code_fp)}" deduped.append(marked) - store.add(body_fp) - store.add(code_fp) + local_seen.add(body_fp) + if code_fp: + local_seen.add(code_fp) + pending_fingerprints.append((body_fp, code_fp)) if not any(deduped): get_logger().info("Persistent inline comments: all suggestions already posted; nothing to publish") return @@ -455,6 +466,13 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = get_logger().error(f"Failed to publish inline code comments fallback, error: {e}") raise e + # Record fingerprints only after a publish path has run without raising, + # so a failed publish does not block a retry of the same comment this run. + if store is not None: + for body_fp, code_fp in pending_fingerprints: + store.add(body_fp) + store.add(code_fp) + def get_review_thread_comments(self, comment_id: int) -> list[dict]: """ Retrieves all comments in the same thread as the given comment. diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index e4dcd63f8d..2ec1cf8494 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -15,7 +15,8 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import decode_if_bytes from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, - code_fingerprint, get_inline_comment_store) + code_fingerprint, + get_inline_comment_store) from ..algo.language_handler import is_valid_file from ..algo.utils import (clip_tokens, find_line_number_of_relevant_line_in_file, diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py index 9006e06657..af598035d8 100644 --- a/tests/unittest/test_inline_comment_dedup.py +++ b/tests/unittest/test_inline_comment_dedup.py @@ -1,11 +1,8 @@ from unittest.mock import MagicMock, patch -# Import a provider module first: it pulls in pr_agent.log via its transitive -# imports before config load, avoiding the partially-initialised-module -# circular import that triggers when pr_agent.log is the first pr_agent import. +from pr_agent.algo import inline_comment_dedup as d from pr_agent.git_providers.github_provider import GithubProvider from pr_agent.git_providers.gitlab_provider import GitLabProvider -from pr_agent.algo import inline_comment_dedup as d # --------------------------------------------------------------------------- # @@ -106,7 +103,7 @@ def _patch_flag(value): def test_github_filters_seen_and_marks_new(): - seen_fp = d.body_fingerprint("a.py", 10, "old body") + seen_fp = d.body_fingerprint("a.py", None, "old body") p = _gh_provider([f"old body\n\n"]) gs = _patch_flag(True) try: @@ -123,7 +120,7 @@ def test_github_filters_seen_and_marks_new(): def test_github_all_duplicates_skips_publish(): - seen_fp = d.body_fingerprint("a.py", 10, "old body") + seen_fp = d.body_fingerprint("a.py", None, "old body") p = _gh_provider([f"old body\n\n"]) gs = _patch_flag(True) try: @@ -241,7 +238,7 @@ def test_github_code_fingerprint_or_match_across_runs(): # existing comment carries ONLY a code marker; a new comment with different # prose but the same suggestion block must be dropped via the code fp even # though its body fingerprint differs. - code_fp = d.code_fingerprint("a.py", 10, "p\n```suggestion\nx = 1\n```") + code_fp = d.code_fingerprint("a.py", None, "p\n```suggestion\nx = 1\n```") p = _gh_provider([f"earlier wording\n\n"]) gs = _patch_flag(True) try: From f30ccba10beedacb72af3bcaa24032d7a522b5fc Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Thu, 4 Jun 2026 13:53:42 +0100 Subject: [PATCH 04/14] fix: scan GitLab notes so fallback-comment dedup markers persist across runs The committable-suggestion fallback posts via mr.notes.create, which may not surface as a discussion. InlineCommentStore now also scans mr.notes.list, so a fallback comment's marker is found on later runs and not re-posted. Adds a regression test. --- pr_agent/algo/inline_comment_dedup.py | 5 +++++ tests/unittest/test_inline_comment_dedup.py | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pr_agent/algo/inline_comment_dedup.py b/pr_agent/algo/inline_comment_dedup.py index 4b56e0259d..07f4356ae5 100644 --- a/pr_agent/algo/inline_comment_dedup.py +++ b/pr_agent/algo/inline_comment_dedup.py @@ -95,6 +95,11 @@ def iter_existing_inline_comment_bodies(git_provider) -> Iterator[str]: for note in attrs.get("notes", []) or []: if isinstance(note, dict): yield note.get("body", "") or "" + # The committable-suggestion fallback posts via mr.notes.create, which + # may not surface as a discussion; scan plain notes too so their markers + # are seen on later runs. + for note in git_provider.mr.notes.list(get_all=True): + yield getattr(note, "body", "") or "" else: raise NotImplementedError( f"inline-comment dedup not implemented for {provider_name}" diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py index af598035d8..8e3ba621a1 100644 --- a/tests/unittest/test_inline_comment_dedup.py +++ b/tests/unittest/test_inline_comment_dedup.py @@ -181,6 +181,7 @@ def _gl_provider(existing_bodies): disc.attributes = {"notes": [{"body": b}]} discs.append(disc) p.mr.discussions.list.return_value = discs + p.mr.notes.list.return_value = [] p.get_relevant_diff = MagicMock(return_value=_FakeDiff()) return p @@ -304,3 +305,22 @@ def _send_fb(): assert p.mr.notes.create.call_count == 1 finally: gs.stop() + + +def test_gitlab_skips_when_fallback_note_has_marker(): + # a prior run posted via the general-note fallback (mr.notes.create); its + # marker must be found by scanning notes, not only discussions. + body = "**Suggestion:** from fallback [possible issue, importance: 7]" + seen_fp = d.body_fingerprint("a.py", 10, body) + p = _gl_provider([]) + note = MagicMock() + note.body = f"from fallback\n\n" + p.mr.notes.list.return_value = [note] + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + try: + _send(p, body) + p.mr.discussions.create.assert_not_called() + finally: + gs.stop() From 424387a5996a47eaaf527cddcc2252b61f0d0b7c Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Thu, 4 Jun 2026 14:05:06 +0100 Subject: [PATCH 05/14] fix: round 2 review - GitLab deletion anchor, clip-safe markers, accurate log - GitLab fingerprints anchor on the line the comment is attached to (source line for deletions, target line otherwise), so deletion comments dedup against their real anchor instead of always the target line. - Dedup markers are appended via a clip-aware helper, so the marker survives when max_comment_chars would otherwise truncate the body and drop it. - The all-duplicates log only fires when a comment was actually skipped as a duplicate, not when the deduped batch was merely empty. - Wrap the long log f-string and overlong test lines. --- pr_agent/algo/inline_comment_dedup.py | 11 ++++++ pr_agent/git_providers/github_provider.py | 11 +++--- pr_agent/git_providers/gitlab_provider.py | 20 +++++++---- tests/unittest/test_inline_comment_dedup.py | 38 +++++++++++++++++++-- 4 files changed, 68 insertions(+), 12 deletions(-) diff --git a/pr_agent/algo/inline_comment_dedup.py b/pr_agent/algo/inline_comment_dedup.py index 07f4356ae5..0ed71df098 100644 --- a/pr_agent/algo/inline_comment_dedup.py +++ b/pr_agent/algo/inline_comment_dedup.py @@ -70,6 +70,17 @@ def build_markers(body_fp: str, code_fp: Optional[str]) -> str: return "\n".join(markers) +def body_with_markers(body: str, body_fp: str, code_fp: "Optional[str]", + max_chars: "Optional[int]" = None) -> str: + """Append the dedup marker(s) to a comment body. If max_chars is given and + body + markers would exceed it, the body is clipped (never the markers) so + the fingerprint marker always survives for the next run's scan.""" + suffix = f"\n\n{build_markers(body_fp, code_fp)}" + if max_chars and len(body) + len(suffix) > max_chars: + body = body[: max(0, max_chars - len(suffix))] + return f"{body}{suffix}" + + def inline_comment_line(comment: dict): """Best-effort anchor line for a GitHub inline-comment dict.""" for key in ("line", "position", "start_line"): diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index cf45030c25..64d9f11106 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -17,7 +17,7 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import extract_hunk_headers -from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, +from ..algo.inline_comment_dedup import (body_fingerprint, body_with_markers, code_fingerprint, get_inline_comment_store) from ..algo.language_handler import is_valid_file @@ -424,6 +424,7 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = store = get_inline_comment_store(self) local_seen = set() deduped = [] + skipped = 0 for comment in comments: if not comment: deduped.append(comment) @@ -437,16 +438,18 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = code_fp = code_fingerprint(path, None, body) if (store.seen(body_fp) or store.seen(code_fp) or body_fp in local_seen or (code_fp and code_fp in local_seen)): + skipped += 1 continue marked = dict(comment) - marked["body"] = f"{body}\n\n{build_markers(body_fp, code_fp)}" + marked["body"] = body_with_markers( + body, body_fp, code_fp, getattr(self, "max_comment_chars", None)) deduped.append(marked) local_seen.add(body_fp) if code_fp: local_seen.add(code_fp) pending_fingerprints.append((body_fp, code_fp)) - if not any(deduped): - get_logger().info("Persistent inline comments: all suggestions already posted; nothing to publish") + if skipped and not any(deduped): + get_logger().info(f"Persistent inline comments: all {skipped} suggestion(s) already posted; nothing to publish") return comments = deduped try: diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 2ec1cf8494..9f17fbed6a 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -14,7 +14,7 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import decode_if_bytes -from ..algo.inline_comment_dedup import (body_fingerprint, build_markers, +from ..algo.inline_comment_dedup import (body_fingerprint, body_with_markers, code_fingerprint, get_inline_comment_store) from ..algo.language_handler import is_valid_file @@ -566,12 +566,19 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f body_fp = code_fp = None if get_settings().get("config.persistent_inline_comments", False): store = get_inline_comment_store(self) - body_fp = body_fingerprint(relevant_file, target_line_no, body) - code_fp = code_fingerprint(relevant_file, target_line_no, body) + # Anchor the fingerprint on the line the comment is actually + # attached to: deletions anchor on the old line (source), all + # other edits on the new line (target). + anchor_line = source_line_no if edit_type == "deletion" else target_line_no + body_fp = body_fingerprint(relevant_file, anchor_line, body) + code_fp = code_fingerprint(relevant_file, anchor_line, body) if store.seen(body_fp) or store.seen(code_fp): - get_logger().info(f"Persistent inline comments: skipping duplicate inline comment on {relevant_file}:{target_line_no}") + get_logger().info( + f"Persistent inline comments: skipping duplicate inline " + f"comment on {relevant_file}:{anchor_line}") return - body = f"{body}\n\n{build_markers(body_fp, code_fp)}" + body = body_with_markers( + body, body_fp, code_fp, getattr(self, "max_comment_chars", None)) # in order to have exact sha's we have to find correct diff for this change diff = self.get_relevant_diff(relevant_file, relevant_line_in_file) if diff is None: @@ -634,7 +641,8 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f body_fallback += diff_code if store is not None: - body_fallback = f"{body_fallback}\n\n{build_markers(body_fp, code_fp)}" + body_fallback = body_with_markers( + body_fallback, body_fp, code_fp, getattr(self, "max_comment_chars", None)) # Create a general note on the file in the MR self.mr.notes.create({ 'body': body_fallback, diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py index 8e3ba621a1..63256de6d2 100644 --- a/tests/unittest/test_inline_comment_dedup.py +++ b/tests/unittest/test_inline_comment_dedup.py @@ -186,9 +186,9 @@ def _gl_provider(existing_bodies): return p -def _send(p, body): +def _send(p, body, edit_type="addition"): p.send_inline_comment( - body=body, edit_type="addition", found=True, + body=body, edit_type=edit_type, found=True, relevant_file="a.py", relevant_line_in_file="+x = 1", source_line_no=10, target_file=_FakeTargetFile(), target_line_no=10, original_suggestion=None, @@ -324,3 +324,37 @@ def test_gitlab_skips_when_fallback_note_has_marker(): p.mr.discussions.create.assert_not_called() finally: gs.stop() + + +def _flag_on_gitlab(): + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + return gs + + +def test_gitlab_deletion_anchored_on_source_line(): + # deletions anchor on the old (source) line; two deletions sharing a + # target line but different source lines must not collide. + p = _gl_provider([]) + gs = _flag_on_gitlab() + try: + _send(p, "**Suggestion:** drop it [possible issue, importance: 7]", edit_type="deletion") + first = p.mr.discussions.create.call_args.args[0] + expected = d.body_fingerprint("a.py", 10, "**Suggestion:** drop it [possible issue, importance: 7]") + assert f"" in first["body"] + finally: + gs.stop() + + +def test_gitlab_marker_survives_when_body_clipped(): + p = _gl_provider([]) + p.max_comment_chars = 60 + gs = _flag_on_gitlab() + try: + _send(p, "x" * 500) + posted = p.mr.discussions.create.call_args.args[0]["body"] + assert ""]) gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") m = gs.start() - m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + m.return_value.get.side_effect = _flag_side_effect(True) try: _send(p, body) p.mr.discussions.create.assert_not_called() @@ -294,7 +302,7 @@ def _send_fb(): gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") m = gs.start() - m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + m.return_value.get.side_effect = _flag_side_effect(True) try: _send_fb() assert p.mr.notes.create.called @@ -318,7 +326,7 @@ def test_gitlab_skips_when_fallback_note_has_marker(): p.mr.notes.list.return_value = [note] gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") m = gs.start() - m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + m.return_value.get.side_effect = _flag_side_effect(True) try: _send(p, body) p.mr.discussions.create.assert_not_called() @@ -329,7 +337,7 @@ def test_gitlab_skips_when_fallback_note_has_marker(): def _flag_on_gitlab(): gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") m = gs.start() - m.return_value.get.side_effect = lambda k, default=None: True if k == "config.persistent_inline_comments" else default + m.return_value.get.side_effect = _flag_side_effect(True) return gs From 194244f17d4f95445b12c53c8c12a307289ca8d7 Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Thu, 4 Jun 2026 14:11:50 +0100 Subject: [PATCH 07/14] test: wrap remaining long lines in dedup tests --- tests/unittest/test_inline_comment_dedup.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py index a4d842a250..4c5671ce10 100644 --- a/tests/unittest/test_inline_comment_dedup.py +++ b/tests/unittest/test_inline_comment_dedup.py @@ -17,7 +17,9 @@ def _get(key, default=None): # fingerprints + markers # --------------------------------------------------------------------------- # def test_body_fingerprint_strips_lead_and_tag(): - a = d.body_fingerprint("f.py", 10, "**Suggestion:** Do the thing [possible issue, importance: 7]") + a = d.body_fingerprint( + "f.py", 10, + "**Suggestion:** Do the thing [possible issue, importance: 7]") b = d.body_fingerprint("f.py", 10, "Do the thing") assert a == b assert len(a) == 12 @@ -238,7 +240,9 @@ def test_gitlab_flag_off_posts_unmarked(): def test_body_fingerprint_strips_non_standard_label(): # hyphen/digit/capitalised labels must still be stripped so the body # fingerprint is stable across runs (review finding: tag regex too narrow) - a = d.body_fingerprint("f.py", 10, "**Suggestion:** Do the thing [best-practice, importance: 3]") + a = d.body_fingerprint( + "f.py", 10, + "**Suggestion:** Do the thing [best-practice, importance: 3]") b = d.body_fingerprint("f.py", 10, "Do the thing") assert a == b @@ -252,7 +256,8 @@ def test_github_code_fingerprint_or_match_across_runs(): gs = _patch_flag(True) try: p.publish_inline_comments([ - {"path": "a.py", "line": 10, "body": "totally different wording\n```suggestion\nx = 1\n```"}, + {"path": "a.py", "line": 10, + "body": "totally different wording\n```suggestion\nx = 1\n```"}, ]) finally: gs.stop() @@ -349,7 +354,9 @@ def test_gitlab_deletion_anchored_on_source_line(): try: _send(p, "**Suggestion:** drop it [possible issue, importance: 7]", edit_type="deletion") first = p.mr.discussions.create.call_args.args[0] - expected = d.body_fingerprint("a.py", 10, "**Suggestion:** drop it [possible issue, importance: 7]") + expected = d.body_fingerprint( + "a.py", 10, + "**Suggestion:** drop it [possible issue, importance: 7]") assert f"" in first["body"] finally: gs.stop() From a0869e03365c71247c9985065dcff3724f7596db Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Thu, 4 Jun 2026 14:47:31 +0100 Subject: [PATCH 08/14] fix: GitHub fallback re-publish keeps a dedup marker; wrap log; drop trailing ws - The invalid-comment fixer strips the ```suggestion block, which also dropped the dedup markers appended after it; the fallback then re-published via disable_fallback=True with dedup skipped, so those comments duplicated on later runs. Dedup now separates filtering from marking: the fallback path is not filtered (a not-yet-posted comment must not be skipped) but is still marked and recorded, and a body that already carries a marker is left as is. - Wrap the all-duplicates info log to stay within the line-length limit. - Remove trailing whitespace on the blank lines around the record block. --- pr_agent/git_providers/github_provider.py | 31 +++++++++++++-------- tests/unittest/test_inline_comment_dedup.py | 19 +++++++++++++ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 64d9f11106..84df192700 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -417,10 +417,7 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False): store = None pending_fingerprints = [] - # Dedup only on the top-level call. A fallback re-publish passes - # disable_fallback=True; it must not re-filter, or it could drop a - # comment that has not actually been posted yet. - if not disable_fallback and get_settings().get("config.persistent_inline_comments", False): + if get_settings().get("config.persistent_inline_comments", False): store = get_inline_comment_store(self) local_seen = set() deduped = [] @@ -436,20 +433,30 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = # path and comment content instead so it stays stable across runs. body_fp = body_fingerprint(path, None, body) code_fp = code_fingerprint(path, None, body) - if (store.seen(body_fp) or store.seen(code_fp) + # A fallback re-publish (disable_fallback=True) is for a comment + # that has not been posted yet, so do not filter it; only the + # top-level call drops duplicates. The fallback still gets marked + # and recorded below so it dedups on later runs. + if not disable_fallback and ( + store.seen(body_fp) or store.seen(code_fp) or body_fp in local_seen or (code_fp and code_fp in local_seen)): skipped += 1 continue - marked = dict(comment) - marked["body"] = body_with_markers( - body, body_fp, code_fp, getattr(self, "max_comment_chars", None)) + if "") + assert not d.has_marker("a comment that merely mentions "]) + + def _replace_with_diff(suggestions): + transformed = suggestions[0].copy() + transformed["body"] = "**Suggestion:** fix it\n
\n```diff\n-x = 0\n+x = 1\n```\n
" + return [transformed] + + p.validate_comments_inside_hunks = MagicMock(side_effect=_replace_with_diff) + gs = _patch_flag(True) + try: + assert p.publish_code_suggestions([{ + "body": original_body, + "relevant_file": "a.py", + "relevant_lines_start": 10, + "relevant_lines_end": 10, + }]) + finally: + gs.stop() + + p.pr.create_review.assert_not_called() + + +def test_github_does_not_send_pre_transform_fingerprint_to_api(): + original_body = "**Suggestion:** fix it\n```suggestion\nx = 1\n```" + p = _gh_provider([]) + + def _replace_with_diff(suggestions): + transformed = suggestions[0].copy() + transformed["body"] = "**Suggestion:** fix it\n
\n```diff\n-x = 0\n+x = 1\n```\n
" + return [transformed] + + p.validate_comments_inside_hunks = MagicMock(side_effect=_replace_with_diff) + gs = _patch_flag(True) + try: + assert p.publish_code_suggestions([{ + "body": original_body, + "relevant_file": "a.py", + "relevant_lines_start": 10, + "relevant_lines_end": 10, + }]) + finally: + gs.stop() + + published = p.pr.create_review.call_args.kwargs["comments"] + assert "_dedup_code_fp" not in published[0] + assert "