From b47f2fef24d2b31200cc67e777ccff30d0c073df Mon Sep 17 00:00:00 2001
From: Chris Dell <2529187+dellch@users.noreply.github.com>
Date: Tue, 2 Jun 2026 15:34:21 -0700
Subject: [PATCH 1/2] test: add failing tests for deterministic PR-description
 ticket selection

extract_ticket_links_from_pr_description() accumulates issue URLs in a set and
caps via list(set)[:3], so when a PR description references more than 3 issues
the selected subset is arbitrary and varies between runs.

These tests assert first-seen ordering and a deterministic capped subset. The
cap test fails against the current set-based implementation on every hash seed
(verified across PYTHONHASHSEED 0-11); the fix follows in the next commit.

Co-Authored-By: Claude
---
 .../test_extract_issue_from_branch.py         | 41 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/tests/unittest/test_extract_issue_from_branch.py b/tests/unittest/test_extract_issue_from_branch.py
index 6e957ba3f8..e8a240448c 100644
--- a/tests/unittest/test_extract_issue_from_branch.py
+++ b/tests/unittest/test_extract_issue_from_branch.py
@@ -1,6 +1,12 @@
 import pytest
 
-from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name
+from pr_agent.tools.ticket_pr_compliance_check import (
+    extract_ticket_links_from_branch_name,
+    extract_ticket_links_from_pr_description,
+)
+
+# The PR-description extractor caps results at 3 (hardcoded in the function).
+MAX_TICKETS = 3
 
 
 class TestExtractTicketsLinkFromBranchName:
@@ -110,3 +116,36 @@ def test_multiple_matches_deduplicated(self):
             "https://github.com/org/repo/issues/1",
             "https://github.com/org/repo/issues/2",
         }
+
+
+class TestExtractTicketLinksFromPrDescription:
+    """GitHub issue extraction from the PR description."""
+
+    def test_preserves_first_seen_order(self):
+        """Issues are returned in first-seen order, de-duplicated.
+
+        Note: this documents the intended ordered behaviour. It does not reliably fail
+        against the old set-based code, because set iteration order is randomised per
+        process (PYTHONHASHSEED) and may coincidentally match insertion order for a
+        small input. test_cap_selects_deterministic_first_seen_subset is the reliable
+        regression guard (see its note)."""
+        desc = "Fixes #3, relates to #1, also #3 again and #2"
+        result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
+        assert result == [
+            "https://github.com/org/repo/issues/3",
+            "https://github.com/org/repo/issues/1",
+            "https://github.com/org/repo/issues/2",
+        ]
+
+    def test_cap_selects_deterministic_first_seen_subset(self):
+        """When more than MAX_TICKETS issues are present, the first MAX_TICKETS in
+        first-seen order are kept (not an arbitrary subset from a set).
+
+        This is the reliable regression guard for the bug: with > MAX_TICKETS issues,
+        the old code sliced list(set)[:MAX_TICKETS], so it returned an arbitrary subset
+        that (essentially) never equals the first-seen subset, on any hash seed."""
+        nums = list(range(1, MAX_TICKETS + 4))
+        desc = " ".join(f"#{n}" for n in nums)
+        result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
+        expected = [f"https://github.com/org/repo/issues/{n}" for n in nums[:MAX_TICKETS]]
+        assert result == expected

From 03979aa26d2a8a093090172c33707ab96a45fb14 Mon Sep 17 00:00:00 2001
From: Chris Dell <2529187+dellch@users.noreply.github.com>
Date: Tue, 2 Jun 2026 15:35:07 -0700
Subject: [PATCH 2/2] fix: make GitHub PR-description ticket selection
 deterministic

Track issue URLs in first-seen order while de-duplicating (a seen set plus an
ordered list), then apply the 3-ticket cap by slicing the ordered list, instead
of accumulating in a set and slicing list(set)[:3]. Selection is now stable and
predictable across runs. Behaviour is unchanged for 3 or fewer issues.

Makes the tests added in the previous commit pass.

Co-Authored-By: Claude
---
 pr_agent/tools/ticket_pr_compliance_check.py | 21 ++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py
index 6d25d76b19..b323986b12 100644
--- a/pr_agent/tools/ticket_pr_compliance_check.py
+++ b/pr_agent/tools/ticket_pr_compliance_check.py
@@ -39,31 +39,40 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url
     """
     Extract all ticket links from PR description
     """
-    github_tickets = set()
+    # Preserve first-seen order while de-duplicating, so the cap below selects a
+    # deterministic subset (a plain set would slice an arbitrary, run-varying one).
+    seen = set()
+    github_tickets = []
+
+    def _add(url):
+        if url not in seen:
+            seen.add(url)
+            github_tickets.append(url)
+
     try:
         # Use the updated pattern to find matches
         matches = GITHUB_TICKET_PATTERN.findall(pr_description)
 
         for match in matches:
             if match[0]:  # Full URL match
-                github_tickets.add(match[0])
+                _add(match[0])
             elif match[1]:  # Shorthand notation match: owner/repo#issue_number
                 owner, repo, issue_number = match[2], match[3], match[4]
-                github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}')
+                _add(f"{base_url_html.strip('/')}/{owner}/{repo}/issues/{issue_number}")
             else:  # #123 format
                 issue_number = match[5][1:]  # remove #
                 if issue_number.isdigit() and len(issue_number) < 5 and repo_path:
-                    github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}')
+                    _add(f"{base_url_html.strip('/')}/{repo_path}/issues/{issue_number}")
 
         if len(github_tickets) > 3:
             get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}")
             # Limit the number of tickets to 3
-            github_tickets = set(list(github_tickets)[:3])
+            github_tickets = github_tickets[:3]
     except Exception as e:
         get_logger().error(f"Error extracting tickets error= {e}",
                            artifact={"traceback": traceback.format_exc()})
 
-    return list(github_tickets)
+    return github_tickets
 
 def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html="https://github.com"):
     """