The-PR-Agent · dellch · Jun 2, 2026 · Jun 2, 2026
diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py
@@ -39,31 +39,40 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url
     """
     Extract all ticket links from PR description
     """
-    github_tickets = set()
+    # Preserve first-seen order while de-duplicating, so the cap below selects a
+    # deterministic subset (a plain set would slice an arbitrary, run-varying one).
+    seen = set()
+    github_tickets = []
+
+    def _add(url):
+        if url not in seen:
+            seen.add(url)
+            github_tickets.append(url)
+
     try:
         # Use the updated pattern to find matches
         matches = GITHUB_TICKET_PATTERN.findall(pr_description)
 
         for match in matches:
             if match[0]:  # Full URL match
-                github_tickets.add(match[0])
+                _add(match[0])
             elif match[1]:  # Shorthand notation match: owner/repo#issue_number
                 owner, repo, issue_number = match[2], match[3], match[4]
-                github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}')
+                _add(f"{base_url_html.strip('/')}/{owner}/{repo}/issues/{issue_number}")
             else:  # #123 format
                 issue_number = match[5][1:]  # remove #
                 if issue_number.isdigit() and len(issue_number) < 5 and repo_path:
-                    github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}')
+                    _add(f"{base_url_html.strip('/')}/{repo_path}/issues/{issue_number}")
 
         if len(github_tickets) > 3:
             get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}")
             # Limit the number of tickets to 3
-            github_tickets = set(list(github_tickets)[:3])
+            github_tickets = github_tickets[:3]
     except Exception as e:
         get_logger().error(f"Error extracting tickets error= {e}",
                            artifact={"traceback": traceback.format_exc()})
 
-    return list(github_tickets)
+    return github_tickets
 
 def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html="https://github.com"):
     """

diff --git a/tests/unittest/test_extract_issue_from_branch.py b/tests/unittest/test_extract_issue_from_branch.py
@@ -1,6 +1,12 @@
 import pytest
 
-from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name
+from pr_agent.tools.ticket_pr_compliance_check import (
+    extract_ticket_links_from_branch_name,
+    extract_ticket_links_from_pr_description,
+)
+
+# The PR-description extractor caps results at 3 (hardcoded in the function).
+MAX_TICKETS = 3
 
 
 class TestExtractTicketsLinkFromBranchName:
@@ -110,3 +116,36 @@ def test_multiple_matches_deduplicated(self):
             "https://github.com/org/repo/issues/1",
             "https://github.com/org/repo/issues/2",
         }
+
+
+class TestExtractTicketLinksFromPrDescription:
+    """GitHub issue extraction from the PR description."""
+
+    def test_preserves_first_seen_order(self):
+        """Issues are returned in first-seen order, de-duplicated.
+
+        Note: this documents the intended ordered behaviour. It does not reliably fail
+        against the old set-based code, because set iteration order is randomised per
+        process (PYTHONHASHSEED) and may coincidentally match insertion order for a
+        small input. test_cap_selects_deterministic_first_seen_subset is the reliable
+        regression guard (see its note)."""
+        desc = "Fixes #3, relates to #1, also #3 again and #2"
+        result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
+        assert result == [
+            "https://github.com/org/repo/issues/3",
+            "https://github.com/org/repo/issues/1",
+            "https://github.com/org/repo/issues/2",
+        ]
+
+    def test_cap_selects_deterministic_first_seen_subset(self):
+        """When more than MAX_TICKETS issues are present, the first MAX_TICKETS in
+        first-seen order are kept (not an arbitrary subset from a set).
+
+        This is the reliable regression guard for the bug: with > MAX_TICKETS issues,
+        the old code sliced list(set)[:MAX_TICKETS], so it returned an arbitrary subset
+        that (essentially) never equals the first-seen subset, on any hash seed."""
+        nums = list(range(1, MAX_TICKETS + 4))
+        desc = " ".join(f"#{n}" for n in nums)
+        result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
+        expected = [f"https://github.com/org/repo/issues/{n}" for n in nums[:MAX_TICKETS]]
+        assert result == expected