diff --git a/docs/docs/core-abilities/fetching_ticket_context.md b/docs/docs/core-abilities/fetching_ticket_context.md index de06ba2172..739da3be15 100644 --- a/docs/docs/core-abilities/fetching_ticket_context.md +++ b/docs/docs/core-abilities/fetching_ticket_context.md @@ -1,9 +1,10 @@ # Fetching Ticket Context for PRs -`Supported Git Platforms: GitHub, GitLab, Bitbucket` +`Supported Git Platforms: GitHub, GitLab, Bitbucket, Azure DevOps` -!!! note "Branch-name issue linking: GitHub only (for now)" - Extracting issue links from the **branch name** (and the optional `branch_issue_regex` setting) is currently implemented for **GitHub only**. Support for GitLab, Bitbucket, and other platforms is planned for a later release. The GitHub flow was the most relevant to implement first; other providers will follow. +!!! note "Branch-name linking: Jira keys on all providers; numeric GitHub issues on GitHub only" + **Jira** ticket keys (e.g. `ABC-123`) are extracted from the branch name on **every git provider**. + Extracting **numeric GitHub issue** links from the branch name (and the optional `branch_issue_regex` setting) is currently implemented for **GitHub only**; support for other providers is planned for a later release. ## Overview @@ -94,7 +95,11 @@ Since PR-Agent is integrated with GitHub, it doesn't require any additional conf ## Jira Integration -We support both Jira Cloud and Jira Server/Data Center. +Only **Jira Cloud** is supported. The base URL is derived from a validated site name +(`jira_site` → `https://.atlassian.net`) rather than taken as a free-form URL, so +the configured destination is always an Atlassian Cloud host. Jira Server / Data Center +(self-hosted) uses a free-form host and is not supported yet; it can be added once +base-URL handling for the self-hosted case is settled. ### Jira Cloud @@ -116,205 +121,37 @@ You can create an API token from your Atlassian account: ```toml [jira] -jira_api_token = "YOUR_API_TOKEN" +jira_site = "" # the "" in https://.atlassian.net (e.g. "mycompany") jira_api_email = "YOUR_EMAIL" +jira_api_token = "YOUR_API_TOKEN" ``` -### Jira Data Center/Server - -#### Using Basic Authentication for Jira Data Center/Server - -You can use your Jira username and password to authenticate with Jira Data Center/Server. - -In your Configuration file/Environment variables/Secrets file, add the following lines: - -```toml -jira_api_email = "your_username" -jira_api_token = "your_password" -``` - -(Note that indeed the 'jira_api_email' field is used for the username, and the 'jira_api_token' field is used for the user password.) - -##### Validating Basic authentication via Python script - -If you are facing issues retrieving tickets in PR-Agent with Basic auth, you can validate the flow using a Python script. -This following steps will help you check if the basic auth is working correctly, and if you can access the Jira ticket details: - -1. run `pip install jira==3.8.0` - -2. run the following Python script (after replacing the placeholders with your actual values): - -???- example "Script to validate basic auth" - - ```python - from jira import JIRA - - - if __name__ == "__main__": - try: - # Jira server URL - server = "https://..." - # Basic auth - username = "..." - password = "..." - # Jira ticket code (e.g. "PROJ-123") - ticket_id = "..." - - print("Initializing JiraServerTicketProvider with JIRA server") - # Initialize JIRA client - jira = JIRA( - server=server, - basic_auth=(username, password), - timeout=30 - ) - if jira: - print(f"JIRA client initialized successfully") - else: - print("Error initializing JIRA client") - - # Fetch ticket details - ticket = jira.issue(ticket_id) - print(f"Ticket title: {ticket.fields.summary}") - - except Exception as e: - print(f"Error fetching JIRA ticket details: {e}") - ``` +`jira_site` is your Jira Cloud site name — the part before `.atlassian.net` (for +`https://mycompany.atlassian.net`, the site is `mycompany`). PR-Agent builds the base URL +as `https://.atlassian.net`; it does not accept a full URL, so configuration +cannot redirect the authenticated request to another host. Store `jira_api_email` and +`jira_api_token` as secrets (environment variables or the secrets file), not in +repository-committed configuration. -#### Using a Personal Access Token (PAT) for Jira Data Center/Server +#### Acceptance criteria / requirements (optional) -1. Create a [Personal Access Token (PAT)](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) in your Jira account -2. In your Configuration file/Environment variables/Secrets file, add the following lines: +To include a ticket's acceptance criteria in the analysis, set `jira_requirements_field` +to the id of the custom field that holds it. The field id is specific to your Jira +instance (for example `customfield_10127`); leave it empty to skip requirements. ```toml [jira] -jira_base_url = "YOUR_JIRA_BASE_URL" # e.g. https://jira.example.com -jira_api_token = "YOUR_API_TOKEN" +jira_requirements_field = "customfield_10127" ``` -##### Validating PAT token via Python script - -If you are facing issues retrieving tickets in PR-Agent with PAT token, you can validate the flow using a Python script. -This following steps will help you check if the token is working correctly, and if you can access the Jira ticket details: - -1. run `pip install jira==3.8.0` - -2. run the following Python script (after replacing the placeholders with your actual values): - -??? example- "Script to validate PAT token" - - ```python - from jira import JIRA - - - if __name__ == "__main__": - try: - # Jira server URL - server = "https://..." - # Jira PAT token - token_auth = "..." - # Jira ticket code (e.g. "PROJ-123") - ticket_id = "..." - - print("Initializing JiraServerTicketProvider with JIRA server") - # Initialize JIRA client - jira = JIRA( - server=server, - token_auth=token_auth, - timeout=30 - ) - if jira: - print(f"JIRA client initialized successfully") - else: - print("Error initializing JIRA client") - - # Fetch ticket details - ticket = jira.issue(ticket_id) - print(f"Ticket title: {ticket.fields.summary}") - - except Exception as e: - print(f"Error fetching JIRA ticket details: {e}") - ``` - - -### Multi-JIRA Server Configuration - -PR-Agent supports connecting to multiple JIRA servers using different authentication methods. - -=== "Email/Token (Basic Auth)" - - Configure multiple servers using Email/Token authentication: - - - `jira_servers`: List of JIRA server URLs - - `jira_api_token`: List of API tokens (for Cloud) or passwords (for Data Center) - - `jira_api_email`: List of emails (for Cloud) or usernames (for Data Center) - - `jira_base_url`: Default server for ticket IDs like `PROJ-123`, Each repository can configure (local config file) its own `jira_base_url` to choose which server to use by default. - - **Example Configuration:** - ```toml - [jira] - # Server URLs - jira_servers = ["https://company.atlassian.net", "https://datacenter.jira.com"] - - # API tokens/passwords - jira_api_token = ["cloud_api_token_here", "datacenter_password"] - - # Emails/usernames (both required) - jira_api_email = ["user@company.com", "datacenter_username"] - - # Default server for ticket IDs - jira_base_url = "https://company.atlassian.net" - ``` - -=== "PAT Auth" - - Configure multiple servers using Personal Access Token authentication: - - - `jira_servers`: List of JIRA server URLs - - `jira_api_token`: List of PAT tokens - - `jira_api_email`: Not needed (can be omitted or left empty) - - `jira_base_url`: Default server for ticket IDs like `PROJ-123`, Each repository can configure (local config file) its own `jira_base_url` to choose which server to use by default. - - **Example Configuration:** - ```toml - [jira] - # Server URLs - jira_servers = ["https://server1.jira.com", "https://server2.jira.com"] - - # PAT tokens only - jira_api_token = ["pat_token_1", "pat_token_2"] - - # Default server for ticket IDs - jira_base_url = "https://server1.jira.com" - ``` - - **Mixed Authentication (Email/Token + PAT):** - ```toml - [jira] - jira_servers = ["https://company.atlassian.net", "https://server.jira.com"] - jira_api_token = ["cloud_api_token", "server_pat_token"] - jira_api_email = ["user@company.com", ""] # Empty for PAT - ``` - - - - ### How to link a PR to a Jira ticket To integrate with Jira, you can link your PR to a ticket using either of these methods: **Method 1: Description Reference:** -Include a ticket reference in your PR description, using either the complete URL format `https://.atlassian.net/browse/ISSUE-123` or the shortened ticket ID `ISSUE-123` (without prefix or suffix for the shortened ID). +Include a ticket reference in your PR description, using either the complete URL format `https://.atlassian.net/browse/ISSUE-123` or the shortened ticket ID `ISSUE-123` (without prefix or suffix for the shortened ID). **Method 2: Branch Name Detection:** Name your branch with the ticket ID as a prefix (e.g., `ISSUE-123-feature-description` or `ISSUE-123/feature-description`). - -!!! note "Jira Base URL" - For shortened ticket IDs or branch detection (method 2 for JIRA cloud), you must configure the Jira base URL in your configuration file under the [jira] section: - - ```toml - [jira] - jira_base_url = "https://.atlassian.net" - ``` - Where `` is your Jira organization identifier (e.g., `mycompany` for `https://mycompany.atlassian.net`). diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index e0d8e11d10..67968518ee 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -109,6 +109,12 @@ url = "" org = "" pat = "" +[jira] +# For fetching ticket context from Jira Cloud (only Jira Cloud is supported) +jira_site = "" # the "" in https://.atlassian.net (e.g. "your-org") +jira_api_email = "" # Atlassian account email +jira_api_token = "" # Atlassian API token + [azure_devops_server] # For Azure devops Server basic auth - configured in the webhook creation # Optional, uncomment if you want to use Azure devops webhooks. Value assigned when you create the webhook diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index f4d63a73f2..19e2c89dcb 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -318,6 +318,19 @@ pr_commands = [ "/improve --pr_code_suggestions.commitable_code_suggestions=true", ] +[jira] +# Jira Cloud ticket context. Only Jira Cloud is supported; the base URL is built as +# https://.atlassian.net from the site name below, so repo configuration +# cannot redirect the authenticated request to another host. +# Credentials: fill and place in .secrets.toml, or set via environment variables +# (e.g. jira__jira_api_token). +# jira_site = "your-org" # the "" in https://.atlassian.net +# jira_api_email = "..." # Atlassian account email +# jira_api_token = "..." # Atlassian API token +# Custom field id holding acceptance criteria / requirements, mapped to the ticket +# "requirements" section. Instance-specific (e.g. "customfield_10127"); empty disables it. +jira_requirements_field = "" + [litellm] # use_client = false # drop_params = false diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 6d25d76b19..613f23ba5b 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -1,6 +1,8 @@ import re import traceback +from atlassian import Jira + from pr_agent.config_loader import get_settings from pr_agent.git_providers import GithubProvider from pr_agent.git_providers import AzureDevopsProvider @@ -13,16 +15,59 @@ # Option A: issue number at start of branch or after /, followed by - or end (e.g. feature/1-test-issue, 123-fix) BRANCH_ISSUE_PATTERN = re.compile(r"(?:^|/)(\d{1,6})(?=-|$)") +# Max number of tickets to analyse per PR, and max characters of ticket body to keep. +MAX_TICKETS = 3 +MAX_TICKET_CHARACTERS = 10000 + +# Jira REST API version. Pinned to "2" because extract_jira_tickets() reads description +# and custom fields as plain strings. v2 returns them that way; v3 returns ADF JSON dicts +# that would need separate parsing. The atlassian-python-api client defaults to "2" today, +# but pinning keeps the contract stable across dependency upgrades. +JIRA_API_VERSION = "2" + +# Jira Cloud site name (the "" in https://.atlassian.net). Only Jira Cloud is +# supported: the base URL is built from this name rather than taken as a free-form URL, so +# repo-controlled config cannot redirect the authenticated request (and the token) to an +# arbitrary host. The name must be a single DNS label (letters, digits, hyphens) so it +# cannot contain '.', '/', ':', '@' etc. that would let it escape *.atlassian.net. +JIRA_SITE_PATTERN = re.compile(r"^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$", re.IGNORECASE) + + +def _jira_cloud_base_url(): + """ + Return the Jira Cloud base URL built from the configured site name, or None if the + site is missing or invalid. Building the URL from a validated site name (rather than + accepting a free-form base URL from settings) ensures the authenticated request can + only ever go to https://.atlassian.net, even if settings were overridden by + untrusted repo configuration. + """ + site = get_settings().get("JIRA.JIRA_SITE", None) + if not site: + return None + site = str(site).strip() + if not JIRA_SITE_PATTERN.match(site): + get_logger().warning( + f"Invalid jira_site '{site}'; expected a Jira Cloud site name like 'mycompany' " + f"(the '' in https://.atlassian.net). Skipping Jira ticket lookup.") + return None + return f"https://{site}.atlassian.net" + + def find_jira_tickets(text): - # Regular expression patterns for JIRA tickets + # Regular expression patterns for JIRA tickets. Matching is case-insensitive so + # lowercased branch names (e.g. bugfix/abc-123-description) are detected; keys are + # normalized to upper case to match Jira's canonical form. patterns = [ r'\b[A-Z]{2,10}-\d{1,7}\b', # Standard JIRA ticket format (e.g., PROJ-123) r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b' # JIRA URL or just the ticket ] - tickets = set() + # Preserve first-seen order while de-duplicating, so the MAX_TICKETS cap applied + # later is deterministic across runs (a plain set would fetch arbitrary tickets). + seen = set() + tickets = [] for pattern in patterns: - matches = re.findall(pattern, text) + matches = re.findall(pattern, text, flags=re.IGNORECASE) for match in matches: if isinstance(match, tuple): # If it's a tuple (from the URL pattern), take the last non-empty group @@ -30,9 +75,159 @@ def find_jira_tickets(text): else: ticket = match if ticket: - tickets.add(ticket) + ticket = ticket.upper() + if ticket not in seen: + seen.add(ticket) + tickets.append(ticket) + + return tickets + + +def _get_jira_client(): + """ + Build a Jira Cloud client from the [jira] settings. Returns None if Jira is not + configured. Only Jira Cloud is supported: the base URL is derived from a validated + site name (jira_site) so untrusted repo configuration cannot redirect the + authenticated request to an arbitrary host. Cloud authenticates with the account + email + API token. The REST API version is pinned via JIRA_API_VERSION (see its + definition for why). + """ + site = get_settings().get("JIRA.JIRA_SITE", None) + api_email = get_settings().get("JIRA.JIRA_API_EMAIL", None) + api_token = get_settings().get("JIRA.JIRA_API_TOKEN", None) + base_url = _jira_cloud_base_url() # None if site is missing or invalid (already warned if invalid) + if not (base_url and api_email and api_token): + # Warn when Jira is partially configured: some [jira] value is set but the required + # site + email + token are incomplete, which is likely a mistake. Stay silent when + # nothing is set (Jira simply not in use), and don't double-warn when the site was + # set but invalid (_jira_cloud_base_url already warned about that). + site_set_but_invalid = site and not base_url + if any([site, api_email, api_token]) and not site_set_but_invalid: + missing = [ + name for name, value in ( + ("jira_site", site), + ("jira_api_email", api_email), + ("jira_api_token", api_token), + ) if not value + ] + get_logger().warning( + f"Jira is partially configured; skipping Jira ticket lookup. Missing: {', '.join(missing)}") + return None + try: + return Jira(url=base_url, username=api_email, password=api_token, api_version=JIRA_API_VERSION) + except Exception as e: + get_logger().error(f"Failed to initialize Jira client: {e}", + artifact={"traceback": traceback.format_exc()}) + return None + + +def extract_jira_tickets(text, max_characters=MAX_TICKET_CHARACTERS): + """ + Find Jira ticket keys in the given text and fetch their content. Returns a list of + ticket dicts in the same shape used by the rest of the ticket-analysis flow. Returns + an empty list when no keys are found or when Jira is not configured. + """ + # Look for keys before building a client: most PRs have none, and building the + # client first would do needless work (and log a noisy init failure if Jira is + # misconfigured) even when there is nothing to fetch. + keys = find_jira_tickets(text or "") + if not keys: + return [] + + jira_client = _get_jira_client() + if jira_client is None: + return [] + + base_url = _jira_cloud_base_url() or "" + # Custom field that holds acceptance criteria / requirements. The field id is + # instance-specific (e.g. "customfield_10127"), so it must be configured; empty + # means no requirements are extracted. + requirements_field = get_settings().get("JIRA.JIRA_REQUIREMENTS_FIELD", "") or "" + if len(keys) > MAX_TICKETS: + get_logger().info(f"Too many Jira tickets found: {len(keys)}; limiting to {MAX_TICKETS}") + keys = keys[:MAX_TICKETS] + + tickets_content = [] + for key in keys: + try: + issue = jira_client.issue(key) + except Exception as e: + get_logger().warning(f"Failed to fetch Jira ticket {key}: {e}") + continue + if not issue: + continue - return list(tickets) + fields = issue.get("fields", {}) or {} + # The client is pinned to REST v2 (see _get_jira_client), which returns + # description and rich-text custom fields as plain wiki-markup strings. The + # isinstance guards below defend against anything non-string (e.g. v3 ADF dicts). + body = fields.get("description") or "" + if not isinstance(body, str): + body = "" + if len(body) > max_characters: + body = body[:max_characters] + "..." + + requirements = "" + if requirements_field: + requirements = fields.get(requirements_field) or "" + if not isinstance(requirements, str): + requirements = "" + if len(requirements) > max_characters: + requirements = requirements[:max_characters] + "..." + + labels = fields.get("labels", []) or [] + tickets_content.append({ + "ticket_id": key, + "ticket_url": f"{base_url}/browse/{key}" if base_url else "", + "title": fields.get("summary", ""), + "body": body, + "requirements": requirements, + "labels": ", ".join(labels), + }) + return tickets_content + + +def _get_pr_title(git_provider) -> str: + """Return the PR/MR title across providers (GitHub/Bitbucket use .pr, GitLab .mr).""" + for attr in ("pr", "mr"): + obj = getattr(git_provider, attr, None) + title = getattr(obj, "title", None) + if title: + return title + return "" + + +def add_jira_tickets(git_provider, tickets_content): + """ + Provider-agnostic Jira lookup. Scans the PR title, description and branch name for + Jira ticket keys and appends any found tickets to tickets_content (de-duplicated by + ticket_url). No-op when Jira is not configured. Works for any git provider, since it + only relies on get_user_description() and get_pr_branch(). + + MAX_TICKETS is the overall per-PR cap, so any provider-native tickets already in + tickets_content count against it: Jira tickets are appended only until the combined + total reaches MAX_TICKETS, keeping the existing tickets first. + """ + try: + if len(tickets_content) >= MAX_TICKETS: + return tickets_content + jira_context = "\n".join(filter(None, [ + _get_pr_title(git_provider), + git_provider.get_user_description() or "", + git_provider.get_pr_branch() or "", + ])) + existing_urls = {t.get("ticket_url") for t in tickets_content} + for jira_ticket in extract_jira_tickets(jira_context, MAX_TICKET_CHARACTERS): + if len(tickets_content) >= MAX_TICKETS: + get_logger().info( + f"Reached the per-PR cap of {MAX_TICKETS} tickets; skipping remaining Jira tickets") + break + if jira_ticket.get("ticket_url") not in existing_urls: + tickets_content.append(jira_ticket) + except Exception as e: + get_logger().error(f"Error extracting Jira tickets: {e}", + artifact={"traceback": traceback.format_exc()}) + return tickets_content def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'): @@ -55,10 +250,10 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url if issue_number.isdigit() and len(issue_number) < 5 and repo_path: github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') - if len(github_tickets) > 3: + if len(github_tickets) > MAX_TICKETS: get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}") - # Limit the number of tickets to 3 - github_tickets = set(list(github_tickets)[:3]) + # Limit the number of tickets + github_tickets = set(list(github_tickets)[:MAX_TICKETS]) except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", artifact={"traceback": traceback.format_exc()}) @@ -106,8 +301,9 @@ def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html= async def extract_tickets(git_provider): - MAX_TICKET_CHARACTERS = 10000 try: + tickets_content = [] + if isinstance(git_provider, GithubProvider): user_description = git_provider.get_user_description() description_tickets = extract_ticket_links_from_pr_description( @@ -123,12 +319,11 @@ async def extract_tickets(git_provider): if link not in seen: seen.add(link) merged.append(link) - if len(merged) > 3: + if len(merged) > MAX_TICKETS: get_logger().info(f"Too many tickets (description + branch): {len(merged)}") - tickets = merged[:3] + tickets = merged[:MAX_TICKETS] else: tickets = merged - tickets_content = [] if tickets: @@ -188,24 +383,29 @@ async def extract_tickets(git_provider): 'sub_issues': sub_issues_content # Store sub-issues content }) - return tickets_content - elif isinstance(git_provider, AzureDevopsProvider): tickets_info = git_provider.get_linked_work_items() - tickets_content = [] for ticket in tickets_info: try: ticket_body_str = ticket.get("body", "") if len(ticket_body_str) > MAX_TICKET_CHARACTERS: ticket_body_str = ticket_body_str[:MAX_TICKET_CHARACTERS] + "..." + # Cap acceptance criteria like the body, so a large work-item field + # can't push an unbounded blob into the review prompt. + requirements_str = ticket.get("acceptance_criteria", "") or "" + if not isinstance(requirements_str, str): + requirements_str = "" + if len(requirements_str) > MAX_TICKET_CHARACTERS: + requirements_str = requirements_str[:MAX_TICKET_CHARACTERS] + "..." + tickets_content.append( { "ticket_id": ticket.get("id"), "ticket_url": ticket.get("url"), "title": ticket.get("title"), "body": ticket_body_str, - "requirements": ticket.get("acceptance_criteria", ""), + "requirements": requirements_str, "labels": ", ".join(ticket.get("labels", [])), } ) @@ -214,7 +414,13 @@ async def extract_tickets(git_provider): f"Error processing Azure DevOps ticket: {e}", artifact={"traceback": traceback.format_exc()}, ) - return tickets_content + + # Provider-agnostic Jira lookup. Tickets are often referenced by key in the PR + # title, description or branch name rather than via a provider-native link, so + # this runs for every provider and is a no-op when Jira is not configured. + add_jira_tickets(git_provider, tickets_content) + + return tickets_content except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", diff --git a/tests/unittest/test_jira_ticket_extraction.py b/tests/unittest/test_jira_ticket_extraction.py new file mode 100644 index 0000000000..e151d38666 --- /dev/null +++ b/tests/unittest/test_jira_ticket_extraction.py @@ -0,0 +1,434 @@ +from unittest.mock import MagicMock, patch +from urllib.parse import urlparse + +import pytest + +from pr_agent.config_loader import get_settings +from pr_agent.git_providers import AzureDevopsProvider +from pr_agent.tools.ticket_pr_compliance_check import ( + MAX_TICKET_CHARACTERS, + MAX_TICKETS, + _get_jira_client, + _jira_cloud_base_url, + _get_pr_title, + add_jira_tickets, + extract_jira_tickets, + extract_tickets, + find_jira_tickets, +) + +# Keys the tests mutate via get_settings().set(...). Snapshot and restore them around +# every test so values (e.g. JIRA_REQUIREMENTS_FIELD) don't leak between tests. +_JIRA_KEYS = ( + "JIRA.JIRA_SITE", + "JIRA.JIRA_API_EMAIL", + "JIRA.JIRA_API_TOKEN", + "JIRA.JIRA_REQUIREMENTS_FIELD", +) + + +@pytest.fixture(autouse=True) +def restore_jira_settings(): + saved = {key: get_settings().get(key, None) for key in _JIRA_KEYS} + yield + for key, value in saved.items(): + get_settings().set(key, value) + + +class TestFindJiraTickets: + """Jira key extraction from arbitrary text (PR title, description, branch name).""" + + def test_uppercase_key_with_prefix(self): + """feature/ABC-123-description -> ABC-123""" + assert find_jira_tickets("feature/ABC-123-description-of-branch") == ["ABC-123"] + + def test_lowercase_key_with_prefix_normalized(self): + """bugfix/abc-123-description -> ABC-123 (case-insensitive, normalized to upper)""" + assert find_jira_tickets("bugfix/abc-123-description-of-branch") == ["ABC-123"] + + def test_mixed_case_key_normalized(self): + """Abc-123 -> ABC-123""" + assert find_jira_tickets("Abc-123-fix") == ["ABC-123"] + + def test_arbitrary_prefix_segment(self): + """Any prefix segment, not just feature/bugfix.""" + assert find_jira_tickets("chore/PROJ-45-cleanup") == ["PROJ-45"] + assert find_jira_tickets("hotfix/proj-45-cleanup") == ["PROJ-45"] + + def test_key_at_start_no_prefix(self): + """ABC-123-fix -> ABC-123""" + assert find_jira_tickets("ABC-123-fix") == ["ABC-123"] + + def test_key_anywhere_in_branch(self): + """Key embedded in the middle of a branch name is still found.""" + assert find_jira_tickets("release/v1.2.3-ABC-9-final") == ["ABC-9"] + + def test_key_in_description_text(self): + """Key mentioned in free text.""" + assert find_jira_tickets("This implements ABC-123 as discussed") == ["ABC-123"] + + def test_browse_url(self): + """Full Jira browse URL -> key.""" + assert find_jira_tickets( + "see https://acme.atlassian.net/browse/ABC-123 for details" + ) == ["ABC-123"] + + def test_no_ticket(self): + """Branch with no key -> []""" + assert find_jira_tickets("feature/no-ticket-here") == [] + assert find_jira_tickets("") == [] + + def test_multiple_tickets_deduped_in_order(self): + """Distinct keys are returned de-duplicated and case-normalized, in first-seen + order. Order must be stable so the later MAX_TICKETS cap is deterministic.""" + result = find_jira_tickets("ABC-1 and DEF-2, again ABC-1 and abc-1") + assert result == ["ABC-1", "DEF-2"] + + def test_order_preserved_across_patterns(self): + """First-seen order holds even when keys arrive via different patterns (plain + key vs. browse URL).""" + result = find_jira_tickets( + "GHI-3 first, then https://acme.atlassian.net/browse/ABC-1, then DEF-2" + ) + assert result == ["GHI-3", "ABC-1", "DEF-2"] + + +class TestExtractJiraTickets: + """End-to-end extraction: find keys, fetch via the Jira client, map to ticket dicts.""" + + def _configure_jira(self): + get_settings().set("JIRA.JIRA_SITE", "acme") + get_settings().set("JIRA.JIRA_API_EMAIL", "me@acme.com") + get_settings().set("JIRA.JIRA_API_TOKEN", "token123") + + def _disable_jira(self): + get_settings().set("JIRA.JIRA_SITE", "") + get_settings().set("JIRA.JIRA_API_EMAIL", "") + get_settings().set("JIRA.JIRA_API_TOKEN", "") + + def _fake_client(self, fields=None): + fields = fields or {"summary": "Title", "description": "Body", "labels": ["a"]} + client = MagicMock() + client.issue.return_value = {"fields": fields} + return client + + def test_returns_empty_when_not_configured(self): + self._disable_jira() + assert extract_jira_tickets("bugfix/abc-123-x") == [] + + def test_no_client_built_when_no_keys(self): + """No Jira keys in the text -> return early without constructing a client, so a + keyless PR pays no client-init cost (or noisy init-failure log).""" + self._configure_jira() + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira") as jira_cls: + result = extract_jira_tickets("nothing ticket-like here") + assert result == [] + jira_cls.assert_not_called() + + def test_fetches_lowercase_branch_key(self): + """The whole point: a lowercased branch key is detected and fetched as upper.""" + self._configure_jira() + client = self._fake_client() + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("bugfix/abc-123-description-of-branch") + client.issue.assert_called_once_with("ABC-123") + assert len(result) == 1 + assert result[0]["ticket_id"] == "ABC-123" + assert result[0]["ticket_url"] == "https://acme.atlassian.net/browse/ABC-123" + assert result[0]["title"] == "Title" + assert result[0]["labels"] == "a" + + def test_requirements_field_populated_when_configured(self): + """When jira_requirements_field is set, that custom field maps to requirements.""" + self._configure_jira() + get_settings().set("JIRA.JIRA_REQUIREMENTS_FIELD", "customfield_10127") + client = MagicMock() + client.issue.return_value = {"fields": { + "summary": "T", "description": "B", "labels": [], + "customfield_10127": "Acceptance criteria text", + }} + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("ABC-1") + assert result[0]["requirements"] == "Acceptance criteria text" + + def test_requirements_truncated_like_body(self): + """A large requirements custom field is capped the same way the body is, so it + can't push an unbounded blob into the review prompt.""" + self._configure_jira() + get_settings().set("JIRA.JIRA_REQUIREMENTS_FIELD", "customfield_10127") + client = MagicMock() + client.issue.return_value = {"fields": { + "summary": "T", "description": "B", "labels": [], + "customfield_10127": "x" * 50, + }} + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("ABC-1", max_characters=10) + assert result[0]["requirements"] == "x" * 10 + "..." + + def test_requirements_empty_when_field_not_configured(self): + """With no requirements field configured, requirements stays empty.""" + self._configure_jira() + get_settings().set("JIRA.JIRA_REQUIREMENTS_FIELD", "") + client = self._fake_client() + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("ABC-1") + assert result[0]["requirements"] == "" + + def test_checks_all_tickets_when_multiple(self): + """When several distinct keys are present, each is fetched.""" + self._configure_jira() + client = self._fake_client() + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("ABC-1 DEF-2 GHI-3") + fetched = {c.args[0] for c in client.issue.call_args_list} + assert fetched == {"ABC-1", "DEF-2", "GHI-3"} + assert len(result) == 3 + + def test_caps_candidate_keys_at_three(self): + """No more than three candidate keys are fetched (matches the GitHub branch).""" + self._configure_jira() + client = self._fake_client() + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("ABC-1 ABC-2 ABC-3 ABC-4 ABC-5") + assert client.issue.call_count == 3 + assert len(result) == 3 + + def test_skips_ticket_on_fetch_error(self): + """A failed fetch for one key does not abort the others.""" + self._configure_jira() + client = MagicMock() + client.issue.side_effect = [ + Exception("404 not found"), + {"fields": {"summary": "Second", "description": "B", "labels": []}}, + ] + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("ABC-1 ABC-2") + assert len(result) == 1 + assert result[0]["title"] == "Second" + + def test_nonexistent_keys_are_skipped(self): + """Key-like noise (utf-8, sha-1) that does not resolve in Jira is skipped, + leaving only the real ticket.""" + self._configure_jira() + + def fake_issue(key): + if key == "ABC-123": + return {"fields": {"summary": "Real", "description": "Body", "labels": []}} + raise Exception("404 not found") + + client = MagicMock() + client.issue.side_effect = fake_issue + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + result = extract_jira_tickets("abc-123 utf-8") + assert [t["ticket_id"] for t in result] == ["ABC-123"] + + +class TestGetJiraClient: + """Cloud client construction: site-name -> base URL, email+token auth, v2 pin.""" + + def _set(self, site=None, email=None, token=None): + get_settings().set("JIRA.JIRA_SITE", site if site is not None else "") + get_settings().set("JIRA.JIRA_API_EMAIL", email if email is not None else "") + get_settings().set("JIRA.JIRA_API_TOKEN", token if token is not None else "") + + def test_cloud_builds_url_from_site_and_pins_v2(self): + """site name -> https://.atlassian.net, email/token basic auth, v2 pinned.""" + self._set(site="acme", email="me@acme.com", token="token123") + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira") as jira_cls: + _get_jira_client() + jira_cls.assert_called_once_with( + url="https://acme.atlassian.net", username="me@acme.com", + password="token123", api_version="2", + ) + + def test_returns_none_when_not_configured(self): + self._set() + assert _get_jira_client() is None + + def test_returns_none_when_email_missing(self): + """Cloud requires email; site + token alone is incomplete.""" + self._set(site="acme", token="token123") + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira") as jira_cls: + assert _get_jira_client() is None + jira_cls.assert_not_called() + + def test_no_warning_when_nothing_configured(self): + """Jira simply not in use -> return None silently, no misconfiguration warning.""" + self._set() + with patch("pr_agent.tools.ticket_pr_compliance_check.get_logger") as get_log: + assert _get_jira_client() is None + get_log.return_value.warning.assert_not_called() + + def test_warns_when_partially_configured(self): + """Some [jira] value set but the required site + email + token are incomplete + -> warn (likely a misconfiguration) and return None.""" + self._set(email="me@acme.com", token="token123") # site missing + with patch("pr_agent.tools.ticket_pr_compliance_check.get_logger") as get_log: + assert _get_jira_client() is None + get_log.return_value.warning.assert_called_once() + msg = get_log.return_value.warning.call_args.args[0] + assert "jira_site" in msg + assert "jira_api_token" not in msg # the one that IS set is not listed as missing + + +class TestJiraSiteInjection: + """jira_site is validated so repo-controlled config can't redirect the authenticated + request (and token) off *.atlassian.net. Building the URL from the site name means a + malicious value cannot express a different host.""" + + # Values a malicious repo config might inject to try to escape *.atlassian.net. + INJECTION_ATTEMPTS = [ + "evil.website.com?", # query separator + "evil.com#", # fragment + "evil.com/", # path separator + "evil.com", # dotted host + "evil.com:443", # port + "x@evil.com", # userinfo + "a.evil.com", # subdomain prefix + "../../evil", # path traversal + "evil%2ecom", # encoded dot + "evil_underscore", # underscore not allowed in DNS label + "evil .com", # internal whitespace + "", # empty + ] + + @pytest.mark.parametrize("bad_site", INJECTION_ATTEMPTS) + def test_injection_attempt_rejected(self, bad_site): + get_settings().set("JIRA.JIRA_SITE", bad_site) + get_settings().set("JIRA.JIRA_API_EMAIL", "me@acme.com") + get_settings().set("JIRA.JIRA_API_TOKEN", "token123") + # No client is built, and no base URL is produced for an invalid site. + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira") as jira_cls: + assert _get_jira_client() is None + jira_cls.assert_not_called() + assert _jira_cloud_base_url() is None + + def test_valid_site_only_ever_targets_atlassian_net(self): + """Any accepted site name resolves to a host under .atlassian.net.""" + for good in ("acme", "my-org", "a1b2", "x"): + get_settings().set("JIRA.JIRA_SITE", good) + url = _jira_cloud_base_url() + assert url == f"https://{good}.atlassian.net" + assert urlparse(url).hostname.endswith(".atlassian.net") + + +class TestGetPrTitle: + """Provider-agnostic title access (GitHub/Bitbucket use .pr, GitLab uses .mr).""" + + def test_reads_pr_title(self): + gp = MagicMock(spec=["pr"]) + gp.pr = MagicMock(title="From PR object") + assert _get_pr_title(gp) == "From PR object" + + def test_reads_mr_title_when_no_pr(self): + """GitLab stores the merge request as .mr, not .pr.""" + gp = MagicMock(spec=["mr"]) + gp.mr = MagicMock(title="From MR object") + assert _get_pr_title(gp) == "From MR object" + + def test_returns_empty_when_no_title(self): + gp = MagicMock(spec=[]) + assert _get_pr_title(gp) == "" + + +class TestAddJiraTickets: + """Provider-agnostic Jira append used by extract_tickets for every provider.""" + + def _provider(self, title="", description="", branch=""): + gp = MagicMock(spec=["pr", "get_user_description", "get_pr_branch"]) + gp.pr = MagicMock(title=title) + gp.get_user_description.return_value = description + gp.get_pr_branch.return_value = branch + return gp + + def _configure_jira(self): + get_settings().set("JIRA.JIRA_SITE", "acme") + get_settings().set("JIRA.JIRA_API_EMAIL", "me@acme.com") + get_settings().set("JIRA.JIRA_API_TOKEN", "token123") + get_settings().set("JIRA.JIRA_REQUIREMENTS_FIELD", "") + + def test_appends_ticket_from_any_provider(self): + """Works off get_user_description + get_pr_branch, so it is provider-neutral.""" + self._configure_jira() + client = MagicMock() + client.issue.return_value = {"fields": {"summary": "T", "description": "B", "labels": []}} + gp = self._provider(branch="feature/ABC-123-x") + out = [] + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + add_jira_tickets(gp, out) + assert [t["ticket_id"] for t in out] == ["ABC-123"] + + def test_dedupes_against_existing_tickets(self): + """A Jira ticket already present (same url) is not added twice.""" + self._configure_jira() + client = MagicMock() + client.issue.return_value = {"fields": {"summary": "T", "description": "B", "labels": []}} + gp = self._provider(title="ABC-123") + existing = [{"ticket_url": "https://acme.atlassian.net/browse/ABC-123"}] + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + add_jira_tickets(gp, existing) + assert len(existing) == 1 + + def test_noop_when_jira_not_configured(self): + get_settings().set("JIRA.JIRA_SITE", "") + get_settings().set("JIRA.JIRA_API_TOKEN", "") + gp = self._provider(branch="feature/ABC-123-x") + out = [] + add_jira_tickets(gp, out) + assert out == [] + + def test_respects_overall_cap_with_existing_tickets(self): + """MAX_TICKETS is the combined per-PR cap: provider-native tickets already in + tickets_content count against it, and Jira is appended only up to the cap.""" + self._configure_jira() + client = MagicMock() + client.issue.return_value = {"fields": {"summary": "T", "description": "B", "labels": []}} + # Pre-fill with (MAX_TICKETS - 1) provider-native tickets, then offer several Jira keys. + existing = [{"ticket_url": f"https://example/issues/{i}"} for i in range(MAX_TICKETS - 1)] + gp = self._provider(description="ABC-1 DEF-2 GHI-3 JKL-4") + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira", return_value=client): + add_jira_tickets(gp, existing) + assert len(existing) == MAX_TICKETS # only one Jira ticket was appended + + def test_skips_jira_entirely_when_cap_already_reached(self): + """When provider-native tickets already fill the cap, no Jira client is built.""" + self._configure_jira() + existing = [{"ticket_url": f"https://example/issues/{i}"} for i in range(MAX_TICKETS)] + gp = self._provider(description="ABC-1 DEF-2") + with patch("pr_agent.tools.ticket_pr_compliance_check.Jira") as jira_cls: + add_jira_tickets(gp, existing) + jira_cls.assert_not_called() + assert len(existing) == MAX_TICKETS + + +class TestAzureRequirementsTruncation: + """The Azure DevOps branch caps acceptance criteria like the body (no unbounded blob).""" + + @pytest.mark.asyncio + async def test_acceptance_criteria_truncated(self): + gp = AzureDevopsProvider.__new__(AzureDevopsProvider) # bypass __init__/network + gp.get_linked_work_items = MagicMock(return_value=[{ + "id": 1, + "url": "https://dev.azure.com/org/proj/_workitems/edit/1", + "title": "Work item", + "body": "short body", + "acceptance_criteria": "x" * (MAX_TICKET_CHARACTERS + 50), + "labels": [], + }]) + # Isolate the Azure branch: keep the provider-agnostic Jira step a no-op. + with patch("pr_agent.tools.ticket_pr_compliance_check.add_jira_tickets", + side_effect=lambda gp, tc: tc): + result = await extract_tickets(gp) + assert result[0]["requirements"] == "x" * MAX_TICKET_CHARACTERS + "..." + + @pytest.mark.asyncio + async def test_non_string_acceptance_criteria_becomes_empty(self): + gp = AzureDevopsProvider.__new__(AzureDevopsProvider) + gp.get_linked_work_items = MagicMock(return_value=[{ + "id": 2, "url": "u", "title": "t", "body": "b", + "acceptance_criteria": {"unexpected": "dict"}, "labels": [], + }]) + with patch("pr_agent.tools.ticket_pr_compliance_check.add_jira_tickets", + side_effect=lambda gp, tc: tc): + result = await extract_tickets(gp) + assert result[0]["requirements"] == ""