Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions pr_agent/tools/ticket_pr_compliance_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,40 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url
"""
Extract all ticket links from PR description
"""
github_tickets = set()
# Preserve first-seen order while de-duplicating, so the cap below selects a
# deterministic subset (a plain set would slice an arbitrary, run-varying one).
seen = set()
github_tickets = []

def _add(url):
if url not in seen:
seen.add(url)
github_tickets.append(url)

try:
# Use the updated pattern to find matches
matches = GITHUB_TICKET_PATTERN.findall(pr_description)

for match in matches:
if match[0]: # Full URL match
github_tickets.add(match[0])
_add(match[0])
elif match[1]: # Shorthand notation match: owner/repo#issue_number
owner, repo, issue_number = match[2], match[3], match[4]
github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}')
_add(f"{base_url_html.strip('/')}/{owner}/{repo}/issues/{issue_number}")
else: # #123 format
issue_number = match[5][1:] # remove #
if issue_number.isdigit() and len(issue_number) < 5 and repo_path:
github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}')
_add(f"{base_url_html.strip('/')}/{repo_path}/issues/{issue_number}")

if len(github_tickets) > 3:
get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}")
# Limit the number of tickets to 3
github_tickets = set(list(github_tickets)[:3])
github_tickets = github_tickets[:3]
except Exception as e:
get_logger().error(f"Error extracting tickets error= {e}",
artifact={"traceback": traceback.format_exc()})

return list(github_tickets)
return github_tickets

def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html="https://github.com"):
"""
Expand Down
41 changes: 40 additions & 1 deletion tests/unittest/test_extract_issue_from_branch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import pytest

from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name
from pr_agent.tools.ticket_pr_compliance_check import (
extract_ticket_links_from_branch_name,
extract_ticket_links_from_pr_description,
)

# The PR-description extractor caps results at 3 (hardcoded in the function).
MAX_TICKETS = 3


class TestExtractTicketsLinkFromBranchName:
Expand Down Expand Up @@ -110,3 +116,36 @@ def test_multiple_matches_deduplicated(self):
"https://github.com/org/repo/issues/1",
"https://github.com/org/repo/issues/2",
}


class TestExtractTicketLinksFromPrDescription:
"""GitHub issue extraction from the PR description."""

def test_preserves_first_seen_order(self):
"""Issues are returned in first-seen order, de-duplicated.

Note: this documents the intended ordered behaviour. It does not reliably fail
against the old set-based code, because set iteration order is randomised per
process (PYTHONHASHSEED) and may coincidentally match insertion order for a
small input. test_cap_selects_deterministic_first_seen_subset is the reliable
regression guard (see its note)."""
desc = "Fixes #3, relates to #1, also #3 again and #2"
result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
assert result == [
"https://github.com/org/repo/issues/3",
"https://github.com/org/repo/issues/1",
"https://github.com/org/repo/issues/2",
]

def test_cap_selects_deterministic_first_seen_subset(self):
"""When more than MAX_TICKETS issues are present, the first MAX_TICKETS in
first-seen order are kept (not an arbitrary subset from a set).

This is the reliable regression guard for the bug: with > MAX_TICKETS issues,
the old code sliced list(set)[:MAX_TICKETS], so it returned an arbitrary subset
that (essentially) never equals the first-seen subset, on any hash seed."""
nums = list(range(1, MAX_TICKETS + 4))
desc = " ".join(f"#{n}" for n in nums)
result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
expected = [f"https://github.com/org/repo/issues/{n}" for n in nums[:MAX_TICKETS]]
assert result == expected
Loading