diff --git a/docs/docs/tools/improve.md b/docs/docs/tools/improve.md index 11f3b9cf9b..8177969d04 100644 --- a/docs/docs/tools/improve.md +++ b/docs/docs/tools/improve.md @@ -212,6 +212,19 @@ dual_publishing_score_threshold = x Where x represents the minimum score threshold (>=) for suggestions to be presented as committable PR comments in addition to the table. Default is -1 (disabled). +### Persistent inline comments + +By default, PR-Agent re-posts identical inline code comments on every run, which clutters the discussion, particularly on GitLab. The persistent inline comments feature prevents this by skipping the re-posting of comments that are already present from an earlier run. This is achieved by embedding a hidden HTML-comment marker with a short fingerprint in each posted comment, allowing PR-Agent to scan existing comment bodies on later runs to identify and skip duplicates. + +Two fingerprints are used and matched with OR logic: one over the comment text (file, line, normalised text) and one over the proposed code block when present. This approach catches a re-emitted finding even when the model rephrases the prose or slightly changes the code. The feature is opt-in and off by default, and is implemented for the GitHub and GitLab providers; other providers are unaffected. + +To enable it, use the following setting: + +```toml +[config] +persistent_inline_comments = true +``` + ### Self-review `Platforms supported: GitHub, GitLab` diff --git a/pr_agent/algo/inline_comment_dedup.py b/pr_agent/algo/inline_comment_dedup.py new file mode 100644 index 0000000000..c54364e4a7 --- /dev/null +++ b/pr_agent/algo/inline_comment_dedup.py @@ -0,0 +1,183 @@ +"""Cross-run deduplication of inline (line-anchored) comments. + +Implements the feature requested in issue #2037: when the agent runs more +than once on the same PR/MR, it re-posts identical inline suggestions on +every run, cluttering the discussion (observed in particular on GitLab). +This module fingerprints each inline comment and embeds the fingerprint as +an HTML-comment marker in the posted body. On later runs the existing +comment bodies are scanned for those markers to rebuild the set of +already-posted fingerprints, and any suggestion whose fingerprint is already +present is skipped. + +Two fingerprints are computed per comment and matched with OR semantics: + +- Body fingerprint: SHA-256 over (relevant_file, anchor line, normalised + first 80 characters of the body). The category/importance tag and the + ``**Suggestion:**`` lead are stripped and whitespace is collapsed first. +- Code fingerprint: SHA-256 over (relevant_file, anchor line, normalised + contents of the first ```suggestion fenced block). Returns None when the + body has no suggestion block, in which case matching falls back to the + body fingerprint alone. + +The OR-match catches both "same prose, different code" and "same code, +different prose" re-emissions of the same defect, which are the two ways an +LLM tends to restate a finding across runs. + +The feature is opt-in via ``config.persistent_inline_comments`` (default +false) and is wired into the GitHub and GitLab providers. The marker-scan +store needs no external infrastructure; a different backend (database, +cache) could populate the same load/seen/add interface. +""" + +from __future__ import annotations + +import hashlib +import re +from typing import Iterator, Optional + +BODY_MARKER_RE = re.compile(r"") +CODE_MARKER_RE = re.compile(r"") + +_LEAD_RE = re.compile(r"^\*\*Suggestion:\*\*\s*", re.IGNORECASE) +_TAG_RE = re.compile(r"\[[^\]]+?,\s*importance:\s*\d+\]", re.IGNORECASE) +_WS_RE = re.compile(r"\s+") +_CODE_BLOCK_RE = re.compile(r"```suggestion[^\n]*\n(.*?)```", re.DOTALL) + + +def has_marker(body: str) -> bool: + """True only if the body carries a well-formed dedup marker (12-hex), + so incidental text mentioning the marker syntax is not mistaken for one.""" + return bool(BODY_MARKER_RE.search(body or "") or CODE_MARKER_RE.search(body or "")) + + +def _strip_markers(body: str) -> str: + """Remove embedded dedup markers so a pre-marked body fingerprints the + same as its original (markers are appended after marking).""" + body = BODY_MARKER_RE.sub("", body or "") + body = CODE_MARKER_RE.sub("", body) + return body + + +def body_fingerprint(relevant_file: str, target_line_no, body: str) -> str: + normalised = _LEAD_RE.sub("", _strip_markers(body)) + normalised = _TAG_RE.sub("", normalised) + normalised = _WS_RE.sub(" ", normalised).strip()[:80].lower() + key = f"{relevant_file}|{target_line_no}|{normalised}" + return hashlib.sha256(key.encode("utf-8")).hexdigest()[:12] + + +def code_fingerprint(relevant_file: str, target_line_no, body: str) -> Optional[str]: + m = _CODE_BLOCK_RE.search(_strip_markers(body)) + if not m: + return None + # Do not lower-case: code is case-sensitive, so case-only differences + # must produce distinct fingerprints. + code = _WS_RE.sub(" ", m.group(1)).strip() + if not code: + return None + key = f"{relevant_file}|{target_line_no}|code|{code}" + return hashlib.sha256(key.encode("utf-8")).hexdigest()[:12] + + +def build_markers(body_fp: str, code_fp: Optional[str]) -> str: + markers = [f""] + if code_fp is not None: + markers.append(f"") + return "\n".join(markers) + + +def body_with_markers(body: str, body_fp: str, code_fp: "Optional[str]", + max_chars: "Optional[int]" = None) -> str: + """Append the dedup marker(s) to a comment body. If max_chars is given and + body + markers would exceed it, the body is clipped (never the markers) so + the fingerprint marker always survives for the next run's scan.""" + suffix = f"\n\n{build_markers(body_fp, code_fp)}" + if max_chars and len(body) + len(suffix) > max_chars: + body = body[: max(0, max_chars - len(suffix))] + return f"{body}{suffix}" + + +def inline_comment_line(comment: dict): + """Best-effort anchor line for a GitHub inline-comment dict.""" + for key in ("line", "position", "start_line"): + if comment.get(key) is not None: + return comment[key] + return None + + +def iter_existing_inline_comment_bodies(git_provider) -> Iterator[str]: + """Yield the body of every existing comment on the current PR/MR. + + Dispatch is by provider class name so this module needs no provider + import. Unsupported providers raise NotImplementedError, which the store + treats as "cannot dedup here" and degrades to within-run dedup only. + """ + provider_name = type(git_provider).__name__ + if provider_name == "GithubProvider": + for comment in git_provider.pr.get_comments(): + yield getattr(comment, "body", "") or "" + elif provider_name == "GitLabProvider": + for discussion in git_provider.mr.discussions.list(get_all=True): + attrs = getattr(discussion, "attributes", None) or {} + for note in attrs.get("notes", []) or []: + if isinstance(note, dict): + yield note.get("body", "") or "" + # The committable-suggestion fallback posts via mr.notes.create, which + # may not surface as a discussion; scan plain notes too so their markers + # are seen on later runs. + for note in git_provider.mr.notes.list(get_all=True): + yield getattr(note, "body", "") or "" + else: + raise NotImplementedError( + f"inline-comment dedup not implemented for {provider_name}" + ) + + +class InlineCommentStore: + """Set of already-posted inline-comment fingerprints for one PR/MR. + + The existing comment bodies are scanned lazily on first lookup and the + seen-set is held in memory for the rest of the run. A failure to list + existing comments degrades to within-run dedup only and never raises + into the publish path. + """ + + def __init__(self, git_provider): + self._git_provider = git_provider + self._keys: set = set() + self._loaded = False + + def load(self) -> set: + if self._loaded: + return self._keys + try: + for body in iter_existing_inline_comment_bodies(self._git_provider): + for marker_re in (BODY_MARKER_RE, CODE_MARKER_RE): + for match in marker_re.finditer(body or ""): + self._keys.add(match.group(1)) + except Exception as e: + from pr_agent.log import get_logger + get_logger().info( + f"Persistent inline comments: could not load existing comments, " + f"within-run dedup only. error={e}" + ) + self._loaded = True + return self._keys + + def seen(self, fingerprint: Optional[str]) -> bool: + if fingerprint is None: + return False + return fingerprint in self.load() + + def add(self, fingerprint: Optional[str]) -> None: + if fingerprint is not None: + self._keys.add(fingerprint) + + +def get_inline_comment_store(git_provider) -> InlineCommentStore: + """Return the per-provider store, creating and caching it on first use.""" + store = getattr(git_provider, "_inline_comment_store", None) + if store is None: + store = InlineCommentStore(git_provider) + git_provider._inline_comment_store = store + return store diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 30e36be79b..2fd5e1e21b 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -17,6 +17,9 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import extract_hunk_headers +from ..algo.inline_comment_dedup import (body_fingerprint, body_with_markers, + code_fingerprint, + get_inline_comment_store, has_marker) from ..algo.language_handler import is_valid_file from ..algo.types import EDIT_TYPE from ..algo.utils import (PRReviewHeader, Range, clip_tokens, @@ -416,9 +419,70 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_ return dict(body=body, path=path, position=position) if subject_type == "LINE" else {} def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False): + store = None + pending_fingerprints = [] + dedup_code_fp_key = "_dedup_code_fp" + if get_settings().get("config.persistent_inline_comments", False): + store = get_inline_comment_store(self) + local_seen = set() + deduped = [] + skipped = 0 + for comment in comments: + if not comment: + deduped.append(comment) + continue + path = comment.get("path", "") + body = comment.get("body", "") + # GitHub committable comments are anchored by diff position, which + # shifts as the PR gains commits; anchor the fingerprint on the file + # path and comment content instead so it stays stable across runs. + body_fp = body_fingerprint(path, None, body) + pre_transform_code_fp = comment.get(dedup_code_fp_key) + code_fp = pre_transform_code_fp or code_fingerprint(path, None, body) + # A fallback re-publish (disable_fallback=True) is for a comment + # that has not been posted yet, so do not filter it; only the + # top-level call drops duplicates. The fallback still gets marked + # and recorded below so it dedups on later runs. + if not disable_fallback and ( + store.seen(body_fp) or store.seen(code_fp) + or body_fp in local_seen or (code_fp and code_fp in local_seen)): + skipped += 1 + continue + marked = dict(comment) + marked.pop(dedup_code_fp_key, None) + if has_marker(body): + pass # already carries a marker from the first pass + else: + marked["body"] = body_with_markers( + body, body_fp, code_fp, getattr(self, "max_comment_chars", None)) + deduped.append(marked) + local_seen.add(body_fp) + if code_fp: + local_seen.add(code_fp) + pending_fingerprints.append((body_fp, code_fp)) + if skipped and not any(deduped): + get_logger().info( + f"Persistent inline comments: all {skipped} suggestion(s) " + f"already posted; nothing to publish") + return + comments = deduped + else: + comments = [ + {key: value for key, value in comment.items() if key != dedup_code_fp_key} + if comment else comment + for comment in comments + ] try: # publish all comments in a single message self.pr.create_review(commit=self.last_commit_id, comments=comments) + # The whole batch posted; record its fingerprints so the rest of this + # run dedups against them. Cross-run dedup relies on the markers in the + # posted bodies, so comments the fallback below drops stay unrecorded + # and can be retried on a later run. + if store is not None: + for body_fp, code_fp in pending_fingerprints: + store.add(body_fp) + store.add(code_fp) except Exception as e: get_logger().info(f"Initially failed to publish inline comments as committable") @@ -431,8 +495,8 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = self._publish_inline_comments_fallback_with_verification(comments) except Exception as e: get_logger().error(f"Failed to publish inline code comments fallback, error: {e}") - raise e - + raise + def get_review_thread_comments(self, comment_id: int) -> list[dict]: """ Retrieves all comments in the same thread as the given comment. @@ -558,7 +622,11 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool: """ post_parameters_list = [] - code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions) + code_suggestions_with_fingerprints = copy.deepcopy(code_suggestions) + for suggestion in code_suggestions_with_fingerprints: + suggestion["_dedup_code_fp"] = code_fingerprint( + suggestion.get("relevant_file", ""), None, suggestion.get("body", "")) + code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions_with_fingerprints) for suggestion in code_suggestions_validated: body = suggestion['body'] @@ -584,6 +652,7 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool: "line": relevant_lines_end, "start_line": relevant_lines_start, "start_side": "RIGHT", + "_dedup_code_fp": suggestion.get("_dedup_code_fp"), } else: # API is different for single line comments post_parameters = { @@ -591,6 +660,7 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool: "path": relevant_file, "line": relevant_lines_start, "side": "RIGHT", + "_dedup_code_fp": suggestion.get("_dedup_code_fp"), } post_parameters_list.append(post_parameters) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index b3f54920d0..9f17fbed6a 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -14,6 +14,9 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import decode_if_bytes +from ..algo.inline_comment_dedup import (body_fingerprint, body_with_markers, + code_fingerprint, + get_inline_comment_store) from ..algo.language_handler import is_valid_file from ..algo.utils import (clip_tokens, find_line_number_of_relevant_line_in_file, @@ -559,6 +562,23 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f if not found: get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") else: + store = None + body_fp = code_fp = None + if get_settings().get("config.persistent_inline_comments", False): + store = get_inline_comment_store(self) + # Anchor the fingerprint on the line the comment is actually + # attached to: deletions anchor on the old line (source), all + # other edits on the new line (target). + anchor_line = source_line_no if edit_type == "deletion" else target_line_no + body_fp = body_fingerprint(relevant_file, anchor_line, body) + code_fp = code_fingerprint(relevant_file, anchor_line, body) + if store.seen(body_fp) or store.seen(code_fp): + get_logger().info( + f"Persistent inline comments: skipping duplicate inline " + f"comment on {relevant_file}:{anchor_line}") + return + body = body_with_markers( + body, body_fp, code_fp, getattr(self, "max_comment_chars", None)) # in order to have exact sha's we have to find correct diff for this change diff = self.get_relevant_diff(relevant_file, relevant_line_in_file) if diff is None: @@ -578,6 +598,9 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f get_logger().debug(f"Creating comment in MR {self.id_mr} with body {body} and position {pos_obj}") try: self.mr.discussions.create({'body': body, 'position': pos_obj}) + if store is not None: + store.add(body_fp) + store.add(code_fp) except Exception as e: try: # fallback - create a general note on the file in the MR @@ -617,6 +640,9 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" body_fallback += diff_code + if store is not None: + body_fallback = body_with_markers( + body_fallback, body_fp, code_fp, getattr(self, "max_comment_chars", None)) # Create a general note on the file in the MR self.mr.notes.create({ 'body': body_fallback, @@ -629,6 +655,9 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f } }) get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}") + if store is not None: + store.add(body_fp) + store.add(code_fp) # get_logger().debug( # f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)") diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 675f5dad9e..7c23a2ee8c 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -45,6 +45,10 @@ cli_mode=false output_relevant_configurations=false large_patch_policy = "clip" # "clip", "skip" duplicate_prompt_examples = false +# Persistent inline comments (issue #2037): when true, the GitHub and GitLab providers +# fingerprint each inline comment, embed the fingerprint as an HTML marker, and skip +# re-posting suggestions already present on the PR/MR across runs. +persistent_inline_comments = false # seed seed=-1 # set positive value to fix the seed (and ensure temperature=0) temperature=0.2 diff --git a/tests/unittest/test_inline_comment_dedup.py b/tests/unittest/test_inline_comment_dedup.py new file mode 100644 index 0000000000..454322e85e --- /dev/null +++ b/tests/unittest/test_inline_comment_dedup.py @@ -0,0 +1,484 @@ +from unittest.mock import MagicMock, patch + +from pr_agent.algo import inline_comment_dedup as d +from pr_agent.git_providers.github_provider import GithubProvider +from pr_agent.git_providers.gitlab_provider import GitLabProvider + +_FLAG = "config.persistent_inline_comments" + + +def _flag_side_effect(value=True): + def _get(key, default=None): + return value if key == _FLAG else default + return _get + + +# --------------------------------------------------------------------------- # +# fingerprints + markers +# --------------------------------------------------------------------------- # +def test_body_fingerprint_strips_lead_and_tag(): + a = d.body_fingerprint( + "f.py", 10, + "**Suggestion:** Do the thing [possible issue, importance: 7]") + b = d.body_fingerprint("f.py", 10, "Do the thing") + assert a == b + assert len(a) == 12 + + +def test_body_fingerprint_varies_by_file_and_line(): + assert d.body_fingerprint("f.py", 10, "x") != d.body_fingerprint("g.py", 10, "x") + assert d.body_fingerprint("f.py", 10, "x") != d.body_fingerprint("f.py", 11, "x") + + +def test_code_fingerprint_none_without_block(): + assert d.code_fingerprint("f.py", 1, "no code here") is None + assert d.code_fingerprint("f.py", 1, "```suggestion\n\n```") is None # empty block + + +def test_code_fingerprint_whitespace_insensitive(): + fp1 = d.code_fingerprint("f.py", 1, "prose\n```suggestion\nfoo = 1\n```\n") + fp2 = d.code_fingerprint("f.py", 1, "different prose\n```suggestion\n foo = 1 \n```") + assert fp1 == fp2 and len(fp1) == 12 + + +def test_build_markers(): + assert d.build_markers("aaaaaaaaaaaa", None) == "" + out = d.build_markers("aaaaaaaaaaaa", "bbbbbbbbbbbb") + assert "" in out + assert "" in out + + +def test_inline_comment_line_prefers_line(): + assert d.inline_comment_line({"line": 5, "position": 9}) == 5 + assert d.inline_comment_line({"position": 9}) == 9 + assert d.inline_comment_line({}) is None + + +# --------------------------------------------------------------------------- # +# store +# --------------------------------------------------------------------------- # +class _GHComment: + def __init__(self, body): + self.body = body + + +def _gh_provider(existing_bodies): + """Real GithubProvider instance with only the attributes the dedup path touches.""" + p = GithubProvider.__new__(GithubProvider) + p.pr = MagicMock() + p.pr.get_comments.return_value = [_GHComment(b) for b in existing_bodies] + p.last_commit_id = "deadbeef" + return p + + +def test_store_scans_both_marker_forms(): + fp_body = d.body_fingerprint("a.py", 1, "alpha") + fp_code = d.code_fingerprint("a.py", 1, "p\n```suggestion\nx = 1\n```") + prov = _gh_provider([ + f"alpha\n\n", + f"beta\n\n", + ]) + store = d.InlineCommentStore(prov) + assert store.seen(fp_body) + assert store.seen(fp_code) + assert not store.seen("ffffffffffff") + assert store.seen(None) is False + + +def test_store_load_failure_degrades_to_empty(): + prov = _gh_provider([]) + prov.pr.get_comments.side_effect = RuntimeError("api down") + store = d.InlineCommentStore(prov) + assert store.load() == set() # must not raise + + +def test_iter_unsupported_provider_raises(): + class FooProvider: + pass + try: + list(d.iter_existing_inline_comment_bodies(FooProvider())) + assert False, "expected NotImplementedError" + except NotImplementedError: + pass + + +# --------------------------------------------------------------------------- # +# GitHub provider integration +# --------------------------------------------------------------------------- # +def _patch_flag(value): + gs = patch("pr_agent.git_providers.github_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = _flag_side_effect(value) + return gs + + +def test_github_filters_seen_and_marks_new(): + seen_fp = d.body_fingerprint("a.py", None, "old body") + p = _gh_provider([f"old body\n\n"]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([ + {"path": "a.py", "line": 10, "body": "old body"}, # duplicate -> dropped + {"path": "b.py", "line": 20, "body": "new body"}, # new -> kept + marker + ]) + finally: + gs.stop() + published = p.pr.create_review.call_args.kwargs["comments"] + assert len(published) == 1 + assert published[0]["path"] == "b.py" + assert ""]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([{"path": "a.py", "line": 10, "body": "old body"}]) + finally: + gs.stop() + p.pr.create_review.assert_not_called() + + +def test_github_within_batch_duplicate_dropped(): + p = _gh_provider([]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([ + {"path": "a.py", "line": 10, "body": "same finding"}, + {"path": "a.py", "line": 10, "body": "same finding"}, + ]) + finally: + gs.stop() + published = p.pr.create_review.call_args.kwargs["comments"] + assert len(published) == 1 + + +def test_github_flag_off_publishes_unmarked(): + p = _gh_provider([]) + gs = _patch_flag(False) + try: + p.publish_inline_comments([{"path": "a.py", "line": 10, "body": "x"}]) + finally: + gs.stop() + published = p.pr.create_review.call_args.kwargs["comments"] + assert len(published) == 1 + assert "pr-agent-dedup" not in published[0]["body"] + + +# --------------------------------------------------------------------------- # +# GitLab provider integration +# --------------------------------------------------------------------------- # +class _FakeDiff: + base_commit_sha = "base" + start_commit_sha = "start" + head_commit_sha = "head" + + +class _FakeTargetFile: + filename = "a.py" + old_filename = "a.py" + + +def _gl_provider(existing_bodies): + p = GitLabProvider.__new__(GitLabProvider) + p.id_mr = 1 + p.mr = MagicMock() + # existing discussions feed the store's marker scan + discs = [] + for b in existing_bodies: + disc = MagicMock() + disc.attributes = {"notes": [{"body": b}]} + discs.append(disc) + p.mr.discussions.list.return_value = discs + p.mr.notes.list.return_value = [] + p.get_relevant_diff = MagicMock(return_value=_FakeDiff()) + return p + + +def _send(p, body, edit_type="addition"): + p.send_inline_comment( + body=body, edit_type=edit_type, found=True, + relevant_file="a.py", relevant_line_in_file="+x = 1", + source_line_no=10, target_file=_FakeTargetFile(), target_line_no=10, + original_suggestion=None, + ) + + +def test_gitlab_posts_new_with_marker_and_skips_duplicate(): + p = _gl_provider([]) + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = _flag_side_effect(True) + try: + _send(p, "**Suggestion:** fix it [possible issue, importance: 7]") + first = p.mr.discussions.create.call_args.args[0] + assert ""]) + gs = _patch_flag(True) + try: + p.publish_inline_comments([ + {"path": "a.py", "line": 10, + "body": "totally different wording\n```suggestion\nx = 1\n```"}, + ]) + finally: + gs.stop() + p.pr.create_review.assert_not_called() + + +def test_github_preserves_code_fingerprint_when_hunk_validation_replaces_suggestion_block(): + original_body = "**Suggestion:** fix it\n```suggestion\nx = 1\n```" + code_fp = d.code_fingerprint("a.py", None, original_body) + p = _gh_provider([f"earlier wording\n\n"]) + + def _replace_with_diff(suggestions): + transformed = suggestions[0].copy() + transformed["body"] = "**Suggestion:** fix it\n
\n```diff\n-x = 0\n+x = 1\n```\n
" + return [transformed] + + p.validate_comments_inside_hunks = MagicMock(side_effect=_replace_with_diff) + gs = _patch_flag(True) + try: + assert p.publish_code_suggestions([{ + "body": original_body, + "relevant_file": "a.py", + "relevant_lines_start": 10, + "relevant_lines_end": 10, + }]) + finally: + gs.stop() + + p.pr.create_review.assert_not_called() + + +def test_github_does_not_send_pre_transform_fingerprint_to_api(): + original_body = "**Suggestion:** fix it\n```suggestion\nx = 1\n```" + p = _gh_provider([]) + + def _replace_with_diff(suggestions): + transformed = suggestions[0].copy() + transformed["body"] = "**Suggestion:** fix it\n
\n```diff\n-x = 0\n+x = 1\n```\n
" + return [transformed] + + p.validate_comments_inside_hunks = MagicMock(side_effect=_replace_with_diff) + gs = _patch_flag(True) + try: + assert p.publish_code_suggestions([{ + "body": original_body, + "relevant_file": "a.py", + "relevant_lines_start": 10, + "relevant_lines_end": 10, + }]) + finally: + gs.stop() + + published = p.pr.create_review.call_args.kwargs["comments"] + assert "_dedup_code_fp" not in published[0] + assert ""]) + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = _flag_side_effect(True) + try: + _send(p, body) + p.mr.discussions.create.assert_not_called() + finally: + gs.stop() + + +def test_gitlab_fallback_note_carries_marker_and_records(): + p = _gl_provider([]) + p.mr.discussions.create.side_effect = RuntimeError("position rejected") + p.get_line_link = MagicMock(return_value="http://link") + original = { + "relevant_lines_start": 10, "relevant_lines_end": 11, + "existing_code": "a = 1", "improved_code": "a = 2", + "suggestion_content": "fix it", "label": "possible issue", "score": 7, + } + + def _send_fb(): + p.send_inline_comment( + body="**Suggestion:** fix it [possible issue, importance: 7]", + edit_type="addition", found=True, relevant_file="a.py", + relevant_line_in_file="+a = 2", source_line_no=10, + target_file=_FakeTargetFile(), target_line_no=10, + original_suggestion=original, + ) + + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = _flag_side_effect(True) + try: + _send_fb() + assert p.mr.notes.create.called + note_body = p.mr.notes.create.call_args.args[0]["body"] + assert "" + p.mr.notes.list.return_value = [note] + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = _flag_side_effect(True) + try: + _send(p, body) + p.mr.discussions.create.assert_not_called() + finally: + gs.stop() + + +def _flag_on_gitlab(): + gs = patch("pr_agent.git_providers.gitlab_provider.get_settings") + m = gs.start() + m.return_value.get.side_effect = _flag_side_effect(True) + return gs + + +def test_gitlab_deletion_anchored_on_source_line(): + # deletions anchor on the old (source) line; two deletions sharing a + # target line but different source lines must not collide. + p = _gl_provider([]) + gs = _flag_on_gitlab() + try: + _send(p, "**Suggestion:** drop it [possible issue, importance: 7]", edit_type="deletion") + first = p.mr.discussions.create.call_args.args[0] + expected = d.body_fingerprint( + "a.py", 10, + "**Suggestion:** drop it [possible issue, importance: 7]") + assert f"" in first["body"] + finally: + gs.stop() + + +def test_gitlab_marker_survives_when_body_clipped(): + p = _gl_provider([]) + p.max_comment_chars = 60 + gs = _flag_on_gitlab() + try: + _send(p, "x" * 500) + posted = p.mr.discussions.create.call_args.args[0]["body"] + assert "") + assert not d.has_marker("a comment that merely mentions