diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 6d25d76b19..37d4268152 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -39,31 +39,40 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url """ Extract all ticket links from PR description """ - github_tickets = set() + # Preserve first-seen order while de-duplicating, so the MAX_TICKETS cap below + # selects a deterministic subset (a plain set would slice an arbitrary one). + seen = set() + github_tickets = [] + + def _add(url): + if url not in seen: + seen.add(url) + github_tickets.append(url) + try: # Use the updated pattern to find matches matches = GITHUB_TICKET_PATTERN.findall(pr_description) for match in matches: if match[0]: # Full URL match - github_tickets.add(match[0]) + _add(match[0]) elif match[1]: # Shorthand notation match: owner/repo#issue_number owner, repo, issue_number = match[2], match[3], match[4] - github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}') + _add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}') else: # #123 format issue_number = match[5][1:] # remove # if issue_number.isdigit() and len(issue_number) < 5 and repo_path: - github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') + _add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') if len(github_tickets) > 3: get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}") # Limit the number of tickets to 3 - github_tickets = set(list(github_tickets)[:3]) + github_tickets = github_tickets[:3] except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", artifact={"traceback": traceback.format_exc()}) - return list(github_tickets) + return github_tickets def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html="https://github.com"): """ diff --git a/tests/unittest/test_extract_issue_from_branch.py b/tests/unittest/test_extract_issue_from_branch.py index 6e957ba3f8..0f09ab1118 100644 --- a/tests/unittest/test_extract_issue_from_branch.py +++ b/tests/unittest/test_extract_issue_from_branch.py @@ -1,6 +1,12 @@ import pytest -from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name +from pr_agent.tools.ticket_pr_compliance_check import ( + extract_ticket_links_from_branch_name, + extract_ticket_links_from_pr_description, +) + +# The PR-description extractor caps results at 3 (hardcoded in the function). +MAX_TICKETS = 3 class TestExtractTicketsLinkFromBranchName: @@ -110,3 +116,26 @@ def test_multiple_matches_deduplicated(self): "https://github.com/org/repo/issues/1", "https://github.com/org/repo/issues/2", } + + +class TestExtractTicketLinksFromPrDescription: + """GitHub issue extraction from the PR description.""" + + def test_preserves_first_seen_order(self): + """Issues are returned in first-seen order, de-duplicated.""" + desc = "Fixes #3, relates to #1, also #3 again and #2" + result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com") + assert result == [ + "https://github.com/org/repo/issues/3", + "https://github.com/org/repo/issues/1", + "https://github.com/org/repo/issues/2", + ] + + def test_cap_selects_deterministic_first_seen_subset(self): + """When more than MAX_TICKETS issues are present, the first MAX_TICKETS in + first-seen order are kept (not an arbitrary subset from a set).""" + nums = list(range(1, MAX_TICKETS + 4)) + desc = " ".join(f"#{n}" for n in nums) + result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com") + expected = [f"https://github.com/org/repo/issues/{n}" for n in nums[:MAX_TICKETS]] + assert result == expected