From 1323461d25a650033c55a3a6bb49533f441fac4c Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 10:57:41 -0500 Subject: [PATCH 01/11] feat: add jira issue provider support (cherry picked from commit bfc288d0bd7e8277c7dc8f7033e6526c8cc308e6) --- deploy/values/oci-dev.yaml | 30 ++ pr_agent/algo/ticket_utils.py | 16 + pr_agent/issue_providers/__init__.py | 16 + pr_agent/issue_providers/base.py | 45 ++ .../issue_providers/github_issue_provider.py | 21 + .../issue_providers/gitlab_issue_provider.py | 20 + .../issue_providers/jira_issue_provider.py | 155 ++++++ pr_agent/issue_providers/resolver.py | 36 ++ pr_agent/settings/configuration.toml | 39 ++ pr_agent/tools/pr_similar_issue.py | 484 +++++++++++++----- pr_agent/tools/ticket_pr_compliance_check.py | 21 - .../unittest/test_issue_provider_resolver.py | 10 + tests/unittest/test_jira_issue_provider.py | 58 +++ tests/unittest/test_similar_issue_helpers.py | 49 ++ 14 files changed, 858 insertions(+), 142 deletions(-) create mode 100644 deploy/values/oci-dev.yaml create mode 100644 pr_agent/algo/ticket_utils.py create mode 100644 pr_agent/issue_providers/__init__.py create mode 100644 pr_agent/issue_providers/base.py create mode 100644 pr_agent/issue_providers/github_issue_provider.py create mode 100644 pr_agent/issue_providers/gitlab_issue_provider.py create mode 100644 pr_agent/issue_providers/jira_issue_provider.py create mode 100644 pr_agent/issue_providers/resolver.py create mode 100644 tests/unittest/test_issue_provider_resolver.py create mode 100644 tests/unittest/test_jira_issue_provider.py create mode 100644 tests/unittest/test_similar_issue_helpers.py diff --git a/deploy/values/oci-dev.yaml b/deploy/values/oci-dev.yaml new file mode 100644 index 0000000000..d47bd54241 --- /dev/null +++ b/deploy/values/oci-dev.yaml @@ -0,0 +1,30 @@ +fqdn: + prefix: "dev" + suffix: "na.onecloud.hosting.cerence.net" + +gateway: "istio-system/gateway-na-onecloud-hosting-cerence-net" + +image: + tag: "dev" + +extraEnv: + - name: NOTIFICATIONS__NOTIFY_ON_REVIEW_PLUS + value: "true" + - name: CONFIG__ISSUE_PROVIDER + value: "jira" + - name: JIRA__BASE_URL + value: "https://cerence.atlassian.net" + - name: JIRA__ISSUE_PROJECTS + value: "XUITXTSRV" + +prSimilarIssue: + vectorDb: "qdrant" + embeddingBaseUrl: "https://callm-api-embedding.int.na.oc.cerence.net/v1/embeddings" + embeddingModel: "intfloat/multilingual-e5-large" + embeddingDim: 1024 + embeddingMaxTokens: 10000 + +qdrant: + enabled: true + persistence: + enabled: false diff --git a/pr_agent/algo/ticket_utils.py b/pr_agent/algo/ticket_utils.py new file mode 100644 index 0000000000..1787df4d73 --- /dev/null +++ b/pr_agent/algo/ticket_utils.py @@ -0,0 +1,16 @@ +import re +from typing import List + +JIRA_KEY_PATTERN = re.compile(r"(?:https?://[^\s/]+/browse/)?([A-Z][A-Z0-9]+-\d{1,7})", re.IGNORECASE) + + +def find_jira_keys(text: str) -> List[str]: + if not text: + return [] + matches = JIRA_KEY_PATTERN.findall(text) + keys = [] + for match in matches: + key = match.upper() + if key not in keys: + keys.append(key) + return keys diff --git a/pr_agent/issue_providers/__init__.py b/pr_agent/issue_providers/__init__.py new file mode 100644 index 0000000000..e9a8e1d9ca --- /dev/null +++ b/pr_agent/issue_providers/__init__.py @@ -0,0 +1,16 @@ +from pr_agent.issue_providers.base import Issue, IssueComment, IssueProvider +from pr_agent.issue_providers.github_issue_provider import GithubIssueProvider +from pr_agent.issue_providers.gitlab_issue_provider import GitlabIssueProvider +from pr_agent.issue_providers.jira_issue_provider import JiraIssueProvider +from pr_agent.issue_providers.resolver import get_issue_provider, resolve_issue_provider_name + +__all__ = [ + "Issue", + "IssueComment", + "IssueProvider", + "GithubIssueProvider", + "GitlabIssueProvider", + "JiraIssueProvider", + "get_issue_provider", + "resolve_issue_provider_name", +] diff --git a/pr_agent/issue_providers/base.py b/pr_agent/issue_providers/base.py new file mode 100644 index 0000000000..aa941075c0 --- /dev/null +++ b/pr_agent/issue_providers/base.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Iterable, List, Optional + + +@dataclass +class IssueComment: + body: str + url: str = "" + id: Optional[str] = None + author: Optional[str] = None + + +@dataclass +class Issue: + key: str + title: str + description: str = "" + url: str = "" + created_at: Optional[str] = None + author: Optional[dict] = None + comments: List[IssueComment] = field(default_factory=list) + + @property + def body(self) -> str: + return self.description + + @property + def web_url(self) -> str: + return self.url + + +class IssueProvider(ABC): + @abstractmethod + def list_issues(self, project_path: Optional[str] = None, state: str = "all") -> Iterable: + raise NotImplementedError + + @abstractmethod + def get_issue(self, issue_id: str, project_path: Optional[str] = None): + raise NotImplementedError + + def get_issue_comments(self, issue) -> List[IssueComment]: + return [] diff --git a/pr_agent/issue_providers/github_issue_provider.py b/pr_agent/issue_providers/github_issue_provider.py new file mode 100644 index 0000000000..b083d1328f --- /dev/null +++ b/pr_agent/issue_providers/github_issue_provider.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import Optional + +from pr_agent.issue_providers.base import IssueProvider + + +class GithubIssueProvider(IssueProvider): + def __init__(self, git_provider, repo_obj): + self.git_provider = git_provider + self.repo_obj = repo_obj + + def list_issues(self, project_path: Optional[str] = None, state: str = "all"): + return self.repo_obj.get_issues(state=state) + + def get_issue(self, issue_id, project_path: Optional[str] = None): + issue_number = int(issue_id) + return self.repo_obj.get_issue(issue_number) + + def get_issue_comments(self, issue): + return list(issue.get_comments()) diff --git a/pr_agent/issue_providers/gitlab_issue_provider.py b/pr_agent/issue_providers/gitlab_issue_provider.py new file mode 100644 index 0000000000..183f7b9f6c --- /dev/null +++ b/pr_agent/issue_providers/gitlab_issue_provider.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import Optional + +from pr_agent.issue_providers.base import IssueProvider + + +class GitlabIssueProvider(IssueProvider): + def __init__(self, git_provider): + self.git_provider = git_provider + + def list_issues(self, project_path: Optional[str] = None, state: str = "all"): + return self.git_provider.list_issues(project_path, state=state) + + def get_issue(self, issue_id, project_path: Optional[str] = None): + issue_iid = int(issue_id) + return self.git_provider.get_issue(issue_iid, project_path) + + def get_issue_comments(self, issue): + return self.git_provider.get_issue_comments(issue) diff --git a/pr_agent/issue_providers/jira_issue_provider.py b/pr_agent/issue_providers/jira_issue_provider.py new file mode 100644 index 0000000000..9c4688e728 --- /dev/null +++ b/pr_agent/issue_providers/jira_issue_provider.py @@ -0,0 +1,155 @@ +from __future__ import annotations + +import base64 +import json +import urllib.parse +import urllib.request +from typing import Iterable, List, Optional + +from pr_agent.config_loader import get_settings +from pr_agent.issue_providers.base import Issue, IssueProvider +from pr_agent.log import get_logger + + +class JiraIssueProvider(IssueProvider): + def __init__(self, settings=None, project_path: Optional[str] = None, timeout_seconds: int = 15): + settings = settings or get_settings() + jira_settings = _get_section(settings, "JIRA") + self.base_url = (jira_settings.get("BASE_URL") or "").rstrip("/") + self.api_email = jira_settings.get("API_EMAIL") or "" + self.api_token = jira_settings.get("API_TOKEN") or "" + self.api_version = jira_settings.get("API_VERSION", 2) + self.issue_jql = (jira_settings.get("ISSUE_JQL") or "").strip() + self.issue_projects = _normalize_list(jira_settings.get("ISSUE_PROJECTS", [])) + self.issue_project_map = jira_settings.get("ISSUE_PROJECT_MAP", {}) or {} + self.issue_max_results = int(jira_settings.get("ISSUE_MAX_RESULTS") or 200) + self.valid_project_keys = set(_normalize_list(jira_settings.get("VALID_PROJECT_KEYS", []))) + self.project_path = project_path + self.timeout_seconds = timeout_seconds + + def is_configured(self) -> bool: + return bool(self.base_url and self.api_email and self.api_token) + + def list_issues(self, project_path: Optional[str] = None, state: str = "all") -> Iterable[Issue]: + jql = self._build_jql(project_path or self.project_path) + if not jql: + get_logger().warning("Jira issue provider has no JQL or project keys; skipping issue listing.") + return [] + data = self._request_json( + "search", + { + "jql": jql, + "maxResults": self.issue_max_results, + "fields": "summary,description,created,reporter", + }, + ) + issues = data.get("issues", []) if isinstance(data, dict) else [] + return [self._issue_from_payload(item) for item in issues] + + def get_issue(self, issue_id: str, project_path: Optional[str] = None) -> Optional[Issue]: + issue_key = (issue_id or "").strip().upper() + if not issue_key: + return None + data = self._request_json( + f"issue/{urllib.parse.quote(issue_key)}", + {"fields": "summary,description,created,reporter"}, + ) + if not data: + return None + return self._issue_from_payload(data) + + def _build_jql(self, project_path: Optional[str]) -> str: + if self.issue_jql: + return self.issue_jql + project_keys = self._resolve_project_keys(project_path) + if not project_keys: + return "" + return f"project in ({', '.join(project_keys)}) order by created DESC" + + def _resolve_project_keys(self, project_path: Optional[str]) -> List[str]: + project_map = _normalize_project_map(self.issue_project_map) + keys = [] + if project_path and project_path in project_map: + keys = project_map[project_path] + elif self.issue_projects: + keys = self.issue_projects + if self.valid_project_keys: + keys = [key for key in keys if key in self.valid_project_keys] + return keys + + def _request_json(self, path: str, params: dict) -> dict: + if not self.is_configured(): + get_logger().warning("Jira client is not configured; skipping issue fetch") + return {} + query = urllib.parse.urlencode(params) + url = f"{self.base_url}/rest/api/{self.api_version}/{path}" + if query: + url = f"{url}?{query}" + auth_token = base64.b64encode(f"{self.api_email}:{self.api_token}".encode("utf-8")).decode("utf-8") + request = urllib.request.Request(url) + request.add_header("Authorization", f"Basic {auth_token}") + request.add_header("Accept", "application/json") + try: + with urllib.request.urlopen(request, timeout=self.timeout_seconds) as response: + payload = response.read().decode("utf-8") + return json.loads(payload) + except Exception as exc: + get_logger().warning("Failed to fetch Jira issues", artifact={"error": str(exc), "url": url}) + return {} + + def _issue_from_payload(self, issue: dict) -> Issue: + fields = issue.get("fields", {}) if isinstance(issue, dict) else {} + key = issue.get("key", "UNKNOWN") + summary = fields.get("summary") or "" + description = JiraIssueProvider._normalize_description(fields.get("description")) + created_at = fields.get("created") or "" + reporter = fields.get("reporter") or {} + author = {"username": reporter.get("displayName") or reporter.get("name") or reporter.get("emailAddress") or ""} + return Issue( + key=key, + title=summary, + description=description, + url=f"{self.base_url}/browse/{key}" if self.base_url else "", + created_at=created_at, + author=author, + ) + + @staticmethod + def _normalize_description(description: object) -> str: + if description is None: + return "" + if isinstance(description, str): + return description + try: + return str(description) + except Exception: + return "" + + +def _get_section(settings, key: str) -> dict: + if settings is None: + return {} + if hasattr(settings, "get"): + return settings.get(key, {}) or {} + return settings.get(key, {}) if isinstance(settings, dict) else {} + + +def _normalize_list(value: object) -> List[str]: + if not value: + return [] + if isinstance(value, str): + return [item.strip().upper() for item in value.split(",") if item.strip()] + return [str(item).strip().upper() for item in value if str(item).strip()] + + +def _normalize_project_map(value: object) -> dict: + if not value: + return {} + try: + mapping = dict(value) + except Exception: + return {} + normalized = {} + for project_path, keys in mapping.items(): + normalized[project_path] = _normalize_list(keys) + return normalized diff --git a/pr_agent/issue_providers/resolver.py b/pr_agent/issue_providers/resolver.py new file mode 100644 index 0000000000..3bf3b03d63 --- /dev/null +++ b/pr_agent/issue_providers/resolver.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Optional + +from pr_agent.config_loader import get_settings +from pr_agent.issue_providers.github_issue_provider import GithubIssueProvider +from pr_agent.issue_providers.gitlab_issue_provider import GitlabIssueProvider +from pr_agent.issue_providers.jira_issue_provider import JiraIssueProvider + + +def resolve_issue_provider_name(config_value: Optional[str], git_provider_name: Optional[str]) -> str: + value = (config_value or "auto").strip().lower() + if value == "auto": + return (git_provider_name or "gitlab").strip().lower() + return value + + +def get_issue_provider( + provider_name: Optional[str], + git_provider=None, + repo_obj=None, + project_path: Optional[str] = None, + settings=None, +): + resolved = resolve_issue_provider_name(provider_name, getattr(git_provider, "provider_name", None)) + if resolved == "jira": + return JiraIssueProvider(settings=settings or get_settings(), project_path=project_path) + if resolved == "github": + if repo_obj is None: + raise ValueError("GithubIssueProvider requires repo_obj") + return GithubIssueProvider(git_provider, repo_obj) + if resolved == "gitlab": + if git_provider is None: + raise ValueError("GitlabIssueProvider requires git_provider") + return GitlabIssueProvider(git_provider) + raise ValueError(f"Unsupported issue provider '{resolved}'") diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index dd2c3864e4..1f2c20fd6a 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -12,6 +12,7 @@ fallback_models=["o4-mini"] #model_weak="gpt-4o" # optional, a weaker model to use for some easier tasks # CLI git_provider="github" +issue_provider="auto" # "auto", "github", "gitlab", "jira" publish_output=true publish_output_progress=true verbosity_level=0 # 0,1,2 @@ -171,6 +172,39 @@ publish_post_process_suggestion_impact=true wiki_page_accepted_suggestions=true allow_thumbs_up_down=false +[review_plus] # /review_plus # +enable_review = true +enable_improve = true +enable_ask = true + +[ticket_summary] # /summarize_tickets # +max_diff_chars = 15000 +description_preview_chars = 300 +enable_code_changes_summary = true + +[ticket] +provider = "jira" +base_url = "" +api_email = "" +api_token = "" +api_version = 2 +valid_project_keys = [] + +[jira] +base_url = "" +api_email = "" +api_token = "" +api_version = 2 +valid_project_keys = [] +issue_projects = [] +issue_jql = "" +issue_project_map = {} +issue_max_results = 200 + +[notifications] +teams_webhook_url = "" +notify_on_review_plus = false + [pr_custom_prompt] # /custom_prompt # prompt = """\ The code suggestions should focus only on the following: @@ -350,6 +384,11 @@ skip_comments = false force_update_dataset = false max_issues_to_scan = 500 vectordb = "pinecone" # options: "pinecone", "lancedb", "qdrant" +embedding_base_url = "" # OpenAI-compatible embeddings endpoint (optional) +embedding_model = "text-embedding-ada-002" +embedding_api_key = "" # Optional if the embeddings endpoint is unauthenticated +embedding_dim = 1536 +embedding_max_tokens = 8000 [pr_find_similar_component] class_name = "" diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 7a97d85ba7..3813956700 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -1,35 +1,108 @@ import time from enum import Enum +import re from typing import List +from urllib.parse import urlparse import openai from pydantic import BaseModel, Field -from pr_agent.algo import MAX_TOKENS +from pr_agent.algo.ticket_utils import find_jira_keys from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import get_max_tokens from pr_agent.config_loader import get_settings -from pr_agent.git_providers import get_git_provider +from pr_agent.git_providers import get_git_provider_with_context +from pr_agent.issue_providers import get_issue_provider, resolve_issue_provider_name from pr_agent.log import get_logger +from pr_agent.tools.embedding_client import EmbeddingClient -MODEL = "text-embedding-ada-002" +DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002" class PRSimilarIssue: def __init__(self, issue_url: str, ai_handler, args: list = None): - if get_settings().config.git_provider != "github": - raise Exception("Only github is supported for similar issue tool") + self.issue_url = issue_url + self.resource_url = issue_url.split('=')[-1] if issue_url else "" + self.provider_name = get_settings().config.git_provider + self.issue_provider_name = resolve_issue_provider_name( + get_settings().get("CONFIG.ISSUE_PROVIDER", "auto"), + self.provider_name, + ) + self.supported = self.provider_name in ("github", "gitlab") + self.git_provider = get_git_provider_with_context(self.resource_url) + if not self.supported: + return self.cli_mode = get_settings().CONFIG.CLI_MODE self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan - self.issue_url = issue_url - self.git_provider = get_git_provider()() - repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) - self.git_provider.repo = repo_name - self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) self.token_handler = TokenHandler() - repo_obj = self.git_provider.repo_obj - repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + self.embedding_model = get_settings().pr_similar_issue.get("embedding_model", DEFAULT_EMBEDDING_MODEL) + self.embedding_base_url = get_settings().pr_similar_issue.get("embedding_base_url", "") + self.embedding_api_key = get_settings().pr_similar_issue.get("embedding_api_key", "") + self.embedding_dim = get_settings().pr_similar_issue.get("embedding_dim", 1536) + self.embedding_max_tokens = get_settings().pr_similar_issue.get("embedding_max_tokens", 8000) + self.embedding_client = None + if self.embedding_base_url: + self.embedding_client = EmbeddingClient( + self.embedding_base_url, + self.embedding_model, + api_key=self.embedding_api_key or None, + ) + self.repo_obj = None + self.issue_iid = None + self.project_path = None + self.issue_context = False + self.output_target = None + self.issue_provider = None + self.jira_keys = [] + if self.provider_name == "github": + repo_name, _ = self.git_provider._parse_issue_url(self.resource_url) + self.git_provider.repo = repo_name + self.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.git_provider.repo_obj = self.repo_obj + repo_name_for_index = self.repo_obj.full_name + else: + if self.issue_provider_name != "jira" and self._is_issue_url(self.resource_url): + self.issue_context = True + self.project_path, self.issue_iid = self.git_provider._parse_issue_url(self.resource_url) + self.repo_obj = self.git_provider._get_project(self.project_path) + if self.repo_obj is None: + raise Exception(f"GitLab project not found: {self.project_path}") + self.git_provider.id_project = self.project_path + self.git_provider.repo_obj = self.repo_obj + else: + self.issue_context = False + if not getattr(self.git_provider, "mr", None): + raise Exception("GitLab merge request context is required for /similar_issue") + self.output_target = self.git_provider.mr + self.project_path = self.git_provider.id_project + self.repo_obj = self.git_provider.gl.projects.get(self.project_path) + self.git_provider.repo_obj = self.repo_obj + if self.issue_provider_name == "jira": + self.jira_keys = find_jira_keys(self.resource_url) + if not self.jira_keys: + self.jira_keys = self._extract_jira_keys_from_mr(self.git_provider.mr) + if self.jira_keys: + self.issue_context = True + self.issue_iid = self.jira_keys[0] + else: + issue_iid = self._extract_issue_iid_from_text(self._build_query_from_mr(self.git_provider.mr)) + if issue_iid: + try: + self._get_issue_by_number(issue_iid) + self.issue_context = True + self.issue_iid = issue_iid + except Exception: + get_logger().debug("Issue reference not found or inaccessible; falling back to MR context.") + repo_name_for_index = getattr(self.repo_obj, "path_with_namespace", self.project_path) + + repo_name_for_index = repo_name_for_index.lower().replace('/', '-').replace('_/', '-') + self.repo_name_for_index = repo_name_for_index + self.issue_provider = get_issue_provider( + self.issue_provider_name, + git_provider=self.git_provider, + repo_obj=self.repo_obj, + project_path=self.project_path, + ) index_name = self.index_name = "codium-ai-pr-agent-issues" if get_settings().pr_similar_issue.vectordb == "pinecone": @@ -45,9 +118,17 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): environment = get_settings().pinecone.environment except Exception: if not self.cli_mode: - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) - issue_main.create_comment("Please set pinecone api key and environment in secrets file") + try: + if self.provider_name == "github": + _, issue_number = self.git_provider._parse_issue_url(self.resource_url) + issue_main = self._get_issue_by_number(issue_number) + elif self.issue_context and self.issue_provider_name != "jira": + issue_main = self._get_issue_by_number(self.issue_iid) + else: + issue_main = self.git_provider.mr + self._publish_output(issue_main, "Please set pinecone api key and environment in secrets file") + except Exception: + get_logger().warning("Failed to publish pinecone credential message.") raise Exception("Please set pinecone api key and environment in secrets file") # check if index exists, and if repo is already indexed @@ -77,16 +158,16 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): get_logger().info('Indexing the entire repo...') get_logger().info('Getting issues...') - issues = list(repo_obj.get_issues(state='all')) + issues = list(self._iter_issues()) get_logger().info('Done') self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) else: # update index if needed pinecone_index = pinecone.Index(index_name=index_name) issues_to_update = [] - issues_paginated_list = repo_obj.get_issues(state='all') + issues_paginated_list = self._iter_issues() counter = 1 for issue in issues_paginated_list: - if issue.pull_request: + if getattr(issue, "pull_request", None): continue issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" @@ -142,16 +223,16 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): get_logger().info('Indexing the entire repo...') get_logger().info('Getting issues...') - issues = list(repo_obj.get_issues(state='all')) + issues = list(self._iter_issues()) get_logger().info('Done') self._update_table_with_issues(issues, repo_name_for_index, ingest=ingest) else: # update table if needed issues_to_update = [] - issues_paginated_list = repo_obj.get_issues(state='all') + issues_paginated_list = self._iter_issues() counter = 1 for issue in issues_paginated_list: - if issue.pull_request: + if getattr(issue, "pull_request", None): continue issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" @@ -183,17 +264,22 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): except Exception: raise Exception("Please install qdrant-client to use qdrant as vectordb") - api_key = None - url = None - try: - api_key = get_settings().qdrant.api_key - url = get_settings().qdrant.url - except Exception: + api_key = get_settings().get("QDRANT.API_KEY", None) + url = get_settings().get("QDRANT.URL", None) + if not url: if not self.cli_mode: - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) - issue_main.create_comment("Please set qdrant url and api key in secrets file") - raise Exception("Please set qdrant url and api key in secrets file") + try: + if self.provider_name == "github": + _, issue_number = self.git_provider._parse_issue_url(self.resource_url) + issue_main = self._get_issue_by_number(issue_number) + elif self.issue_context and self.issue_provider_name != "jira": + issue_main = self._get_issue_by_number(self.issue_iid) + else: + issue_main = self.git_provider.mr + self._publish_output(issue_main, "Please set qdrant url in secrets file") + except Exception: + get_logger().warning("Failed to publish qdrant credential message.") + raise Exception("Please set qdrant url in secrets file") self.qdrant = qdrant_client.QdrantClient(url=url, api_key=api_key) @@ -205,10 +291,30 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): ingest = False self.qdrant.create_collection( collection_name=self.index_name, - vectors_config=VectorParams(size=1536, distance=Distance.COSINE), + vectors_config=VectorParams(size=self.embedding_dim, distance=Distance.COSINE), ) else: - if get_settings().pr_similar_issue.force_update_dataset: + existing_dim = self._get_qdrant_vector_size() + if existing_dim and existing_dim != self.embedding_dim: + if get_settings().pr_similar_issue.force_update_dataset: + get_logger().warning( + "Qdrant collection dimension mismatch (existing=%s, expected=%s); recreating.", + existing_dim, + self.embedding_dim, + ) + self.qdrant.delete_collection(self.index_name) + self.qdrant.create_collection( + collection_name=self.index_name, + vectors_config=VectorParams(size=self.embedding_dim, distance=Distance.COSINE), + ) + run_from_scratch = True + ingest = False + else: + raise Exception( + f"Qdrant collection '{self.index_name}' has dimension {existing_dim}, " + f"expected {self.embedding_dim}. Set pr_similar_issue.force_update_dataset=true to rebuild." + ) + elif get_settings().pr_similar_issue.force_update_dataset: ingest = True else: response = self.qdrant.count( @@ -223,15 +329,15 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): if run_from_scratch or ingest: get_logger().info('Indexing the entire repo...') get_logger().info('Getting issues...') - issues = list(repo_obj.get_issues(state='all')) + issues = list(self._iter_issues()) get_logger().info('Done') self._update_qdrant_with_issues(issues, repo_name_for_index, ingest=ingest) else: issues_to_update = [] - issues_paginated_list = repo_obj.get_issues(state='all') + issues_paginated_list = self._iter_issues() counter = 1 for issue in issues_paginated_list: - if issue.pull_request: + if getattr(issue, "pull_request", None): continue issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" @@ -257,16 +363,49 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): async def run(self): - get_logger().info('Getting issue...') - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) - issue_str, comments, number = self._process_issue(issue_main) - openai.api_key = get_settings().openai.key + if not self.supported: + message = "The /similar_issue tool is currently supported only for GitHub and GitLab." + if get_settings().config.publish_output and hasattr(self.git_provider, "publish_comment"): + try: + self.git_provider.publish_comment(message) + except Exception: + get_logger().warning("Failed to publish unsupported-provider message.") + return "" + + original_issue_number = None + get_logger().info('Preparing query...') + if self.issue_provider_name == "jira" and self.jira_keys: + issue_texts = [] + for key in self.jira_keys: + issue = self._get_issue_by_number(key) + if not issue: + continue + issue_str, _, _ = self._process_issue(issue) + issue_texts.append(issue_str) + if issue_texts: + original_issue_number = str(self.jira_keys[0]) + query_text = "\n\n".join(issue_texts) + issue_main = self.git_provider.mr + else: + issue_main = self.git_provider.mr + query_text = self._build_query_from_mr(issue_main) + elif self.provider_name == "github": + _, original_issue_number = self.git_provider._parse_issue_url(self.resource_url) + issue_main = self._get_issue_by_number(original_issue_number) + issue_str, _, _ = self._process_issue(issue_main) + query_text = issue_str + else: + if self.issue_context: + issue_main = self._get_issue_by_number(self.issue_iid) + issue_str, _, original_issue_number = self._process_issue(issue_main) + query_text = issue_str + else: + issue_main = self.git_provider.mr + query_text = self._build_query_from_mr(issue_main) get_logger().info('Done') get_logger().info('Querying...') - res = openai.Embedding.create(input=[issue_str], engine=MODEL) - embeds = [record['embedding'] for record in res['data']] + embeds = self._embed_texts([query_text]) relevant_issues_number_list = [] relevant_comment_number_list = [] @@ -284,13 +423,12 @@ async def run(self): if 'example_issue_' in r["id"]: continue - try: - issue_number = int(r["id"].split('.')[0].split('_')[-1]) - except: + issue_number = r["id"].split(".", 1)[0].split("_", 1)[-1] + if not issue_number: get_logger().debug(f"Failed to parse issue number from {r['id']}") continue - if original_issue_number == issue_number: + if original_issue_number and str(original_issue_number) == str(issue_number): continue if issue_number not in relevant_issues_number_list: relevant_issues_number_list.append(issue_number) @@ -309,13 +447,12 @@ async def run(self): if 'example_issue_' in r["id"]: continue - try: - issue_number = int(r["id"].split('.')[0].split('_')[-1]) - except: + issue_number = r["id"].split(".", 1)[0].split("_", 1)[-1] + if not issue_number: get_logger().debug(f"Failed to parse issue number from {r['id']}") continue - if original_issue_number == issue_number: + if original_issue_number and str(original_issue_number) == str(issue_number): continue if issue_number not in relevant_issues_number_list: relevant_issues_number_list.append(issue_number) @@ -341,12 +478,11 @@ async def run(self): rid = r.payload.get("id", "") if 'example_issue_' in rid: continue - try: - issue_number = int(rid.split('.')[0].split('_')[-1]) - except Exception: + issue_number = rid.split(".", 1)[0].split("_", 1)[-1] + if not issue_number: get_logger().debug(f"Failed to parse issue number from {rid}") continue - if original_issue_number == issue_number: + if original_issue_number and str(original_issue_number) == str(issue_number): continue if issue_number not in relevant_issues_number_list: relevant_issues_number_list.append(issue_number) @@ -361,28 +497,170 @@ async def run(self): similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): - issue = self.git_provider.repo_obj.get_issue(issue_number_similar) - title = issue.title - url = issue.html_url + issue = self._get_issue_by_number(issue_number_similar) + title = self._get_issue_title(issue) + url = getattr(issue, "html_url", None) or getattr(issue, "web_url", None) if relevant_comment_number_list[i] != -1: - url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url + url = self._get_issue_comment_url(issue, relevant_comment_number_list[i]) similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: - response = issue_main.create_comment(similar_issues_str) + target = self.output_target or issue_main + self._publish_output(target, similar_issues_str) get_logger().info(similar_issues_str) get_logger().info('Done') + def _embed_texts(self, list_to_encode: list[str]) -> list[list[float]]: + if not list_to_encode: + return [] + + if self.embedding_client: + return self.embedding_client.embed(list_to_encode) + + openai.api_key = get_settings().openai.key + res = openai.Embedding.create(input=list_to_encode, engine=self.embedding_model) + return [record['embedding'] for record in res['data']] + + def _embed_texts_with_fallback(self, list_to_encode: list[str]) -> list[list[float]]: + try: + return self._embed_texts(list_to_encode) + except Exception: + get_logger().error('Failed to embed entire list, embedding one by one...') + embeds = [] + for text in list_to_encode: + try: + embeds.append(self._embed_texts([text])[0]) + except Exception: + embeds.append([0] * self.embedding_dim) + return embeds + + def _get_qdrant_vector_size(self) -> int | None: + try: + info = self.qdrant.get_collection(self.index_name) + vectors = info.config.params.vectors + if hasattr(vectors, "size"): + return vectors.size + if isinstance(vectors, dict): + if "size" in vectors: + return vectors.get("size") + default_vec = vectors.get("default") + if hasattr(default_vec, "size"): + return default_vec.size + except Exception: + return None + return None + def _process_issue(self, issue): - header = issue.title - body = issue.body - number = issue.number + header = self._get_issue_title(issue) + body = self._get_issue_body(issue) + number = self._get_issue_number(issue) if get_settings().pr_similar_issue.skip_comments: comments = [] else: - comments = list(issue.get_comments()) + comments = self._get_issue_comments(issue) issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" return issue_str, comments, number + def _iter_issues(self): + return self.issue_provider.list_issues(self.project_path, state="all") + + def _get_issue_by_number(self, issue_number): + return self.issue_provider.get_issue(issue_number, self.project_path) + + def _publish_output(self, target, message: str): + if self.provider_name == "github": + return target.create_comment(message) + return target.notes.create({"body": message}) + + def _is_issue_url(self, url: str) -> bool: + try: + path = urlparse(url).path + except Exception: + return False + return "/issues/" in path + + def _build_query_from_mr(self, mr) -> str: + title = getattr(mr, "title", "") or "" + description = getattr(mr, "description", "") or "" + if description: + return f"MR Title: \"{title}\"\n\nMR Description:\n{description}" + return f"MR Title: \"{title}\"" + + def _extract_jira_keys_from_mr(self, mr) -> list: + title = getattr(mr, "title", "") or "" + description = getattr(mr, "description", "") or "" + branch_name = "" + commit_messages = "" + try: + branch_name = self.git_provider.get_pr_branch() or "" + except Exception: + branch_name = "" + try: + commit_messages = self.git_provider.get_commit_messages() or "" + except Exception: + commit_messages = "" + text = "\n".join([title, description, branch_name, commit_messages]) + return find_jira_keys(text) + + def _extract_issue_iid_from_text(self, text: str): + if not text: + return None + match = re.search(r"#(\d+)", text) + if not match: + return None + try: + return int(match.group(1)) + except ValueError: + return None + + def _get_issue_title(self, issue) -> str: + return getattr(issue, "title", "") or "" + + def _get_issue_body(self, issue) -> str: + body = getattr(issue, "body", None) + if body is None: + body = getattr(issue, "description", "") + return body or "" + + def _get_issue_number(self, issue) -> int: + for attr in ("iid", "number", "id", "key"): + value = getattr(issue, attr, None) + if value is not None: + if isinstance(value, int): + return value + value_str = str(value) + if value_str.isdigit(): + return int(value_str) + return value_str + raise ValueError("Issue number is missing") + + def _get_issue_username(self, issue) -> str: + user = getattr(issue, "user", None) + if user and getattr(user, "login", None): + return user.login + author = getattr(issue, "author", None) + if isinstance(author, dict): + return author.get("username") or author.get("name") or "@unknown" + if author and getattr(author, "username", None): + return author.username + return "@unknown" + + def _get_issue_comments(self, issue): + comments = getattr(issue, "comments", None) + if comments is not None: + return comments + return self.issue_provider.get_issue_comments(issue) + + def _get_issue_comment_url(self, issue, comment_index: int) -> str: + comments = self._get_issue_comments(issue) + if comment_index < 0 or comment_index >= len(comments): + return getattr(issue, "web_url", None) or getattr(issue, "html_url", None) or "" + comment = comments[comment_index] + comment_url = getattr(comment, "html_url", None) or getattr(comment, "web_url", None) or getattr(comment, "url", None) + if comment_url: + return comment_url + issue_url = getattr(issue, "web_url", None) or getattr(issue, "html_url", None) or "" + return issue_url + def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False): get_logger().info('Processing issues...') corpus = Corpus() @@ -395,7 +673,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal counter = 0 for issue in issues_list: - if issue.pull_request: + if getattr(issue, "pull_request", None): continue counter += 1 @@ -407,10 +685,10 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = issue.user.login + username = self._get_issue_username(issue) created_at = str(issue.created_at) if len(issue_str) < 8000 or \ - self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): # fast reject first + self.token_handler.count_tokens(issue_str) < self.embedding_max_tokens: # fast reject first issue_record = Record( id=issue_key + "." + "issue", text=issue_str, @@ -428,7 +706,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal continue if len(comment_body) < 8000 or \ - self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + self.token_handler.count_tokens(comment_body) < self.embedding_max_tokens: comment_record = Record( id=issue_key + ".comment_" + str(j + 1), text=comment_body, @@ -442,20 +720,8 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal get_logger().info('Done') get_logger().info('Embedding...') - openai.api_key = get_settings().openai.key list_to_encode = list(df["text"].values) - try: - res = openai.Embedding.create(input=list_to_encode, engine=MODEL) - embeds = [record['embedding'] for record in res['data']] - except: - embeds = [] - get_logger().error('Failed to embed entire list, embedding one by one...') - for i, text in enumerate(list_to_encode): - try: - res = openai.Embedding.create(input=[text], engine=MODEL) - embeds.append(res['data'][0]['embedding']) - except: - embeds.append([0] * 1536) + embeds = self._embed_texts_with_fallback(list_to_encode) df["values"] = embeds meta = DatasetMetadata.empty() meta.dense_model.dimension = len(embeds[0]) @@ -491,7 +757,7 @@ def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=Fal counter = 0 for issue in issues_list: - if issue.pull_request: + if getattr(issue, "pull_request", None): continue counter += 1 @@ -503,10 +769,10 @@ def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=Fal issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = issue.user.login + username = self._get_issue_username(issue) created_at = str(issue.created_at) if len(issue_str) < 8000 or \ - self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): # fast reject first + self.token_handler.count_tokens(issue_str) < self.embedding_max_tokens: # fast reject first issue_record = Record( id=issue_key + "." + "issue", text=issue_str, @@ -524,7 +790,7 @@ def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=Fal continue if len(comment_body) < 8000 or \ - self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + self.token_handler.count_tokens(comment_body) < self.embedding_max_tokens: comment_record = Record( id=issue_key + ".comment_" + str(j + 1), text=comment_body, @@ -538,20 +804,8 @@ def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=Fal get_logger().info('Done') get_logger().info('Embedding...') - openai.api_key = get_settings().openai.key list_to_encode = list(df["text"].values) - try: - res = openai.Embedding.create(input=list_to_encode, engine=MODEL) - embeds = [record['embedding'] for record in res['data']] - except: - embeds = [] - get_logger().error('Failed to embed entire list, embedding one by one...') - for i, text in enumerate(list_to_encode): - try: - res = openai.Embedding.create(input=[text], engine=MODEL) - embeds.append(res['data'][0]['embedding']) - except: - embeds.append([0] * 1536) + embeds = self._embed_texts_with_fallback(list_to_encode) df["vector"] = embeds get_logger().info('Done') @@ -573,7 +827,6 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa try: import uuid - import pandas as pd from qdrant_client.models import PointStruct except Exception: raise @@ -589,7 +842,7 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa counter = 0 for issue in issues_list: - if issue.pull_request: + if getattr(issue, "pull_request", None): continue counter += 1 @@ -601,10 +854,10 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = issue.user.login + username = self._get_issue_username(issue) created_at = str(issue.created_at) if len(issue_str) < 8000 or \ - self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): + self.token_handler.count_tokens(issue_str) < self.embedding_max_tokens: issue_record = Record( id=issue_key + "." + "issue", text=issue_str, @@ -622,7 +875,7 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa continue if len(comment_body) < 8000 or \ - self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + self.token_handler.count_tokens(comment_body) < self.embedding_max_tokens: comment_record = Record( id=issue_key + ".comment_" + str(j + 1), text=comment_body, @@ -633,30 +886,19 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa ) corpus.append(comment_record) - df = pd.DataFrame(corpus.dict()["documents"]) + documents = corpus.dict()["documents"] get_logger().info('Done') get_logger().info('Embedding...') - openai.api_key = get_settings().openai.key - list_to_encode = list(df["text"].values) - try: - res = openai.Embedding.create(input=list_to_encode, engine=MODEL) - embeds = [record['embedding'] for record in res['data']] - except Exception: - embeds = [] - get_logger().error('Failed to embed entire list, embedding one by one...') - for i, text in enumerate(list_to_encode): - try: - res = openai.Embedding.create(input=[text], engine=MODEL) - embeds.append(res['data'][0]['embedding']) - except Exception: - embeds.append([0] * 1536) - df["vector"] = embeds + list_to_encode = [doc["text"] for doc in documents] + embeds = self._embed_texts_with_fallback(list_to_encode) + for doc, vector in zip(documents, embeds): + doc["vector"] = vector get_logger().info('Done') get_logger().info('Upserting into Qdrant...') points = [] - for row in df.to_dict(orient="records"): + for row in documents: points.append( PointStruct(id=uuid.uuid5(uuid.NAMESPACE_DNS, row["id"]).hex, vector=row["vector"], payload={"id": row["id"], "text": row["text"], "metadata": row["metadata"]}) ) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 523e21f921..30bba1b775 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -11,27 +11,6 @@ r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#\d+)' ) -def find_jira_tickets(text): - # Regular expression patterns for JIRA tickets - patterns = [ - r'\b[A-Z]{2,10}-\d{1,7}\b', # Standard JIRA ticket format (e.g., PROJ-123) - r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b' # JIRA URL or just the ticket - ] - - tickets = set() - for pattern in patterns: - matches = re.findall(pattern, text) - for match in matches: - if isinstance(match, tuple): - # If it's a tuple (from the URL pattern), take the last non-empty group - ticket = next((m for m in reversed(match) if m), None) - else: - ticket = match - if ticket: - tickets.add(ticket) - - return list(tickets) - def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'): """ diff --git a/tests/unittest/test_issue_provider_resolver.py b/tests/unittest/test_issue_provider_resolver.py new file mode 100644 index 0000000000..1652f9071c --- /dev/null +++ b/tests/unittest/test_issue_provider_resolver.py @@ -0,0 +1,10 @@ +from pr_agent.issue_providers.resolver import resolve_issue_provider_name + + +def test_resolve_issue_provider_defaults_to_git_provider(): + assert resolve_issue_provider_name("auto", "gitlab") == "gitlab" + assert resolve_issue_provider_name("auto", "github") == "github" + + +def test_resolve_issue_provider_explicit_choice(): + assert resolve_issue_provider_name("jira", "gitlab") == "jira" diff --git a/tests/unittest/test_jira_issue_provider.py b/tests/unittest/test_jira_issue_provider.py new file mode 100644 index 0000000000..2ddde6bcc2 --- /dev/null +++ b/tests/unittest/test_jira_issue_provider.py @@ -0,0 +1,58 @@ +import json +from unittest.mock import MagicMock, patch + +from pr_agent.issue_providers.jira_issue_provider import JiraIssueProvider + + +def _mock_response(payload: dict): + response = MagicMock() + response.read.return_value = json.dumps(payload).encode("utf-8") + response.__enter__.return_value = response + return response + + +def test_build_jql_prefers_explicit(): + provider = JiraIssueProvider(settings={"JIRA": {"ISSUE_JQL": "project = ABC"}}, project_path="org/repo") + assert provider._build_jql("org/repo") == "project = ABC" + + +def test_build_jql_uses_project_map(): + provider = JiraIssueProvider( + settings={"JIRA": {"ISSUE_PROJECT_MAP": {"org/repo": ["ABC", "DEF"]}}}, + project_path="org/repo", + ) + assert provider._build_jql("org/repo") == "project in (ABC, DEF) order by created DESC" + + +def test_list_issues_parses_payload(): + payload = { + "issues": [ + { + "key": "ABC-1", + "id": "10001", + "fields": { + "summary": "Test issue", + "description": "Body text", + "created": "2025-01-01T00:00:00.000+0000", + "reporter": {"displayName": "Alice"}, + }, + } + ] + } + provider = JiraIssueProvider( + settings={ + "JIRA": { + "BASE_URL": "https://jira.example.com", + "API_EMAIL": "user@example.com", + "API_TOKEN": "token", + "ISSUE_JQL": "project = ABC", + } + }, + project_path="org/repo", + ) + with patch("pr_agent.issue_providers.jira_issue_provider.urllib.request.urlopen", return_value=_mock_response(payload)): + issues = list(provider.list_issues()) + assert len(issues) == 1 + assert issues[0].key == "ABC-1" + assert issues[0].title == "Test issue" + assert issues[0].description == "Body text" diff --git a/tests/unittest/test_similar_issue_helpers.py b/tests/unittest/test_similar_issue_helpers.py new file mode 100644 index 0000000000..08c70e77b3 --- /dev/null +++ b/tests/unittest/test_similar_issue_helpers.py @@ -0,0 +1,49 @@ +from unittest.mock import MagicMock + +from pr_agent.algo.ticket_utils import find_jira_keys +from pr_agent.tools.pr_similar_issue import PRSimilarIssue + + +def test_build_query_from_mr_includes_title_and_description(): + tool = PRSimilarIssue.__new__(PRSimilarIssue) + mr = MagicMock() + mr.title = "Sample MR" + mr.description = "Some description" + + query = tool._build_query_from_mr(mr) + + assert query == 'MR Title: "Sample MR"\n\nMR Description:\nSome description' + + +def test_get_issue_number_prefers_iid(): + tool = PRSimilarIssue.__new__(PRSimilarIssue) + issue = MagicMock() + issue.iid = "42" + + assert tool._get_issue_number(issue) == 42 + + +def test_get_qdrant_vector_size_from_object(): + tool = PRSimilarIssue.__new__(PRSimilarIssue) + tool.index_name = "issues" + tool.qdrant = MagicMock() + + vectors = MagicMock() + vectors.size = 1024 + info = MagicMock() + info.config.params.vectors = vectors + tool.qdrant.get_collection.return_value = info + + assert tool._get_qdrant_vector_size() == 1024 + + +def test_extract_issue_iid_from_text(): + tool = PRSimilarIssue.__new__(PRSimilarIssue) + + assert tool._extract_issue_iid_from_text("Relates to #12 and #3") == 12 + assert tool._extract_issue_iid_from_text("No references here") is None + + +def test_find_jira_keys_extracts_unique(): + keys = find_jira_keys("Fixes ABC-123 and https://jira.example.com/browse/ABC-123") + assert keys == ["ABC-123"] From e43c2fe94bbbb32d3dc6d243069d68b809718b3e Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 11:05:36 -0500 Subject: [PATCH 02/11] fix: isolate jira similar issue index (cherry picked from commit 5c0ed614536b1ef7e118716f2b567d8b9fe1e87f) --- pr_agent/tools/pr_similar_issue.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 3813956700..b38f2cbefe 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -96,6 +96,8 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): repo_name_for_index = getattr(self.repo_obj, "path_with_namespace", self.project_path) repo_name_for_index = repo_name_for_index.lower().replace('/', '-').replace('_/', '-') + if self.issue_provider_name == "jira": + repo_name_for_index = f"{repo_name_for_index}-jira" self.repo_name_for_index = repo_name_for_index self.issue_provider = get_issue_provider( self.issue_provider_name, From 905e62dc12ca1f5f1100926c8e067b3187789d6b Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 11:10:48 -0500 Subject: [PATCH 03/11] fix: add jira search v3 fallback (cherry picked from commit bbaec74feeafb31cc5794c0a82332792fd8bccb6) --- .../issue_providers/jira_issue_provider.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pr_agent/issue_providers/jira_issue_provider.py b/pr_agent/issue_providers/jira_issue_provider.py index 9c4688e728..75d0d5452a 100644 --- a/pr_agent/issue_providers/jira_issue_provider.py +++ b/pr_agent/issue_providers/jira_issue_provider.py @@ -35,15 +35,16 @@ def list_issues(self, project_path: Optional[str] = None, state: str = "all") -> if not jql: get_logger().warning("Jira issue provider has no JQL or project keys; skipping issue listing.") return [] - data = self._request_json( - "search", - { - "jql": jql, - "maxResults": self.issue_max_results, - "fields": "summary,description,created,reporter", - }, - ) + params = { + "jql": jql, + "maxResults": self.issue_max_results, + "fields": "summary,description,created,reporter", + } + data = self._request_json("search", params, api_version=self.api_version) issues = data.get("issues", []) if isinstance(data, dict) else [] + if not issues and self.api_version < 3: + data = self._request_json("search/jql", params, api_version=3) + issues = data.get("issues", []) if isinstance(data, dict) else [] return [self._issue_from_payload(item) for item in issues] def get_issue(self, issue_id: str, project_path: Optional[str] = None) -> Optional[Issue]: @@ -53,6 +54,7 @@ def get_issue(self, issue_id: str, project_path: Optional[str] = None) -> Option data = self._request_json( f"issue/{urllib.parse.quote(issue_key)}", {"fields": "summary,description,created,reporter"}, + api_version=self.api_version, ) if not data: return None @@ -77,12 +79,13 @@ def _resolve_project_keys(self, project_path: Optional[str]) -> List[str]: keys = [key for key in keys if key in self.valid_project_keys] return keys - def _request_json(self, path: str, params: dict) -> dict: + def _request_json(self, path: str, params: dict, api_version: Optional[int] = None) -> dict: if not self.is_configured(): get_logger().warning("Jira client is not configured; skipping issue fetch") return {} query = urllib.parse.urlencode(params) - url = f"{self.base_url}/rest/api/{self.api_version}/{path}" + version = api_version or self.api_version + url = f"{self.base_url}/rest/api/{version}/{path}" if query: url = f"{url}?{query}" auth_token = base64.b64encode(f"{self.api_email}:{self.api_token}".encode("utf-8")).decode("utf-8") From c899e2ca3f1eb5544aabafac51c6f0d20e43f3fc Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 11:50:33 -0500 Subject: [PATCH 04/11] fix: harden issue provider resolution and jira ticket compliance (cherry picked from commit f90435f36a800a71529bf8e4fa7917f2375510c0) --- deploy/values/oci-int.yaml | 30 +++++++ .../core-abilities/fetching_ticket_context.md | 10 +++ docs/docs/tools/similar_issues.md | 72 +++++++++++++++- .../issue_providers/jira_issue_provider.py | 9 +- pr_agent/issue_providers/resolver.py | 23 +++++- pr_agent/tools/ticket_pr_compliance_check.py | 82 ++++++++++++++++++- .../unittest/test_issue_provider_resolver.py | 7 ++ .../test_ticket_pr_compliance_check.py | 55 +++++++++++++ 8 files changed, 279 insertions(+), 9 deletions(-) create mode 100644 deploy/values/oci-int.yaml create mode 100644 tests/unittest/test_ticket_pr_compliance_check.py diff --git a/deploy/values/oci-int.yaml b/deploy/values/oci-int.yaml new file mode 100644 index 0000000000..67d7b0d2d6 --- /dev/null +++ b/deploy/values/oci-int.yaml @@ -0,0 +1,30 @@ +fqdn: + prefix: "int" + suffix: "na.oc.cerence.net" + +gateway: "istio-system/gateway-int-na-oc-cerence-net" + +image: + tag: "dev" + +extraEnv: + - name: NOTIFICATIONS__NOTIFY_ON_REVIEW_PLUS + value: "true" + - name: CONFIG__ISSUE_PROVIDER + value: "jira" + - name: JIRA__BASE_URL + value: "https://cerence.atlassian.net" + - name: JIRA__ISSUE_PROJECTS + value: "XUITXTSRV" + +prSimilarIssue: + vectorDb: "qdrant" + embeddingBaseUrl: "https://callm-api-embedding.int.na.oc.cerence.net/v1/embeddings" + embeddingModel: "intfloat/multilingual-e5-large" + embeddingDim: 1024 + embeddingMaxTokens: 10000 + +qdrant: + enabled: true + persistence: + enabled: false diff --git a/docs/docs/core-abilities/fetching_ticket_context.md b/docs/docs/core-abilities/fetching_ticket_context.md index 09093ddf5b..0383944bec 100644 --- a/docs/docs/core-abilities/fetching_ticket_context.md +++ b/docs/docs/core-abilities/fetching_ticket_context.md @@ -164,6 +164,16 @@ jira_api_token = "YOUR_API_TOKEN" jira_api_email = "YOUR_EMAIL" ``` +To use Jira as the issue provider for ticket compliance (and `/similar_issue`), enable it explicitly: + +```toml +[config] +issue_provider = "jira" + +[jira] +issue_projects = ["ABC"] # or issue_jql = "project = ABC order by created DESC" +``` + ### Jira Data Center/Server [//]: # () diff --git a/docs/docs/tools/similar_issues.md b/docs/docs/tools/similar_issues.md index a2ce8f7e92..e50103db87 100644 --- a/docs/docs/tools/similar_issues.md +++ b/docs/docs/tools/similar_issues.md @@ -1,7 +1,7 @@ ## Overview -The similar issue tool retrieves the most similar issues to the current issue. -It can be invoked manually by commenting on any PR: +The similar issue tool retrieves the most similar issues to the current issue or MR context. +It can be invoked manually by commenting on any PR/MR: ``` /similar_issue @@ -15,8 +15,69 @@ It can be invoked manually by commenting on any PR: ![similar_issue](https://codium.ai/images/pr_agent/similar_issue.png){width=768} +### GitLab example (MR comment) + +Comment on an MR: + +``` +/similar_issue +``` + +Example output posted to the MR: + +``` +### Similar Issues +___ + +1. **[Add retry logic for HTTP client](https://gitlab.example.com/org/repo/-/issues/1)** (score=0.91) +2. **[Cache embeddings for faster review](https://gitlab.example.com/org/repo/-/issues/3)** (score=0.89) +``` + Note that to perform retrieval, the `similar_issue` tool indexes all the repo previous issues (once). +## Indexing lifecycle and scope + +### What is indexed +- Issues and (optionally) issue comments only. MRs are not indexed. +- Each vector includes `repo`, `username`, `created_at`, and `level` (issue or comment). + +### When indexing happens +- On demand, the first time `/similar_issue` is called for a repo. +- A per-repo marker record is stored to avoid re-indexing the same repo. +- On later runs, only new issues are appended (based on issue IDs). + +### Query scope +- One shared collection is used, but queries always filter to the current repo. +- GitLab: the query text comes from MR title + description. If the MR text includes `#`, that GitLab issue is used as the query source, but the output still posts on the MR. + +```mermaid +flowchart TD + A[Comment /similar_issue on MR] --> B{Repo indexed?} + B -- No --> C[Fetch repo issues + comments] + C --> D[Embed + upsert vectors to vector DB] + B -- Yes --> E[Check for new issues] + E --> F{New issues?} + F -- Yes --> D + F -- No --> G[Build query] + D --> G[Build query] + G --> H[Query vector DB (filter by repo)] + H --> I[Post Similar Issues on MR] +``` + +## Embedding configuration + +The tool uses an OpenAI-compatible embeddings endpoint. Configure it in `configuration.toml` (or via env vars): + +``` +[pr_similar_issue] +embedding_base_url = "https://your-embeddings-host/v1/embeddings" +embedding_model = "intfloat/multilingual-e5-large" +embedding_dim = 1024 +embedding_max_tokens = 10000 +``` + +If the embedding endpoint requires auth, set `PR_SIMILAR_ISSUE__EMBEDDING_API_KEY` as an environment variable. + ### Selecting a Vector Database Configure your preferred database by changing the `pr_similar_issue` parameter in `configuration.toml` file. @@ -59,13 +120,18 @@ vectordb = "qdrant" ``` You can get a free managed Qdrant instance from [Qdrant Cloud](https://cloud.qdrant.io/). +Ensure the Qdrant collection dimension matches `embedding_dim`. If you change models, set +`pr_similar_issue.force_update_dataset=true` to rebuild the collection. ## How to use - To invoke the 'similar issue' tool from **CLI**, run: `python3 cli.py --issue_url=... similar_issue` -- To invoke the 'similar' issue tool via online usage, [comment](https://github.com/Codium-ai/pr-agent/issues/178#issuecomment-1716934893) on a PR: +- To invoke the 'similar issue' tool via online usage, [comment](https://github.com/Codium-ai/pr-agent/issues/178#issuecomment-1716934893) on a PR/MR: `/similar_issue` +- GitLab: if run from an MR comment, the query uses the MR title + description. If the MR text includes an issue reference (e.g., `#123`), that issue is used as the query source, but the output is still posted on the MR. If run from CLI with `--issue_url`, the query uses that issue. +- Jira: set `issue_provider="jira"` and configure `[jira]` with either `issue_projects` (or `issue_project_map`) or `issue_jql`. When enabled, `/similar_issue` indexes Jira issues instead of GitLab/GitHub issues. If the MR text includes Jira keys (e.g., `ABC-123`), those tickets are used as the query source; otherwise it uses the MR title + description. + - You can also enable the 'similar issue' tool to run automatically when a new issue is opened, by adding it to the [pr_commands list in the github_app section](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L66) diff --git a/pr_agent/issue_providers/jira_issue_provider.py b/pr_agent/issue_providers/jira_issue_provider.py index 75d0d5452a..43c539306e 100644 --- a/pr_agent/issue_providers/jira_issue_provider.py +++ b/pr_agent/issue_providers/jira_issue_provider.py @@ -18,7 +18,7 @@ def __init__(self, settings=None, project_path: Optional[str] = None, timeout_se self.base_url = (jira_settings.get("BASE_URL") or "").rstrip("/") self.api_email = jira_settings.get("API_EMAIL") or "" self.api_token = jira_settings.get("API_TOKEN") or "" - self.api_version = jira_settings.get("API_VERSION", 2) + self.api_version = _coerce_int(jira_settings.get("API_VERSION", 2), default=2) self.issue_jql = (jira_settings.get("ISSUE_JQL") or "").strip() self.issue_projects = _normalize_list(jira_settings.get("ISSUE_PROJECTS", [])) self.issue_project_map = jira_settings.get("ISSUE_PROJECT_MAP", {}) or {} @@ -145,6 +145,13 @@ def _normalize_list(value: object) -> List[str]: return [str(item).strip().upper() for item in value if str(item).strip()] +def _coerce_int(value: object, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + + def _normalize_project_map(value: object) -> dict: if not value: return {} diff --git a/pr_agent/issue_providers/resolver.py b/pr_agent/issue_providers/resolver.py index 3bf3b03d63..e6db277969 100644 --- a/pr_agent/issue_providers/resolver.py +++ b/pr_agent/issue_providers/resolver.py @@ -8,10 +8,27 @@ from pr_agent.issue_providers.jira_issue_provider import JiraIssueProvider -def resolve_issue_provider_name(config_value: Optional[str], git_provider_name: Optional[str]) -> str: - value = (config_value or "auto").strip().lower() +def _normalize_provider_name(value: Optional[object]) -> Optional[str]: + if value is None: + return None + if callable(value): + try: + value = value() + except Exception: + return None + if value is None: + return None + if isinstance(value, str): + value = value.strip() + else: + value = str(value).strip() + return value.lower() if value else None + + +def resolve_issue_provider_name(config_value: Optional[str], git_provider_name: Optional[object]) -> str: + value = _normalize_provider_name(config_value) or "auto" if value == "auto": - return (git_provider_name or "gitlab").strip().lower() + return _normalize_provider_name(git_provider_name) or "gitlab" return value diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 30bba1b775..abddd0a4dd 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -1,9 +1,10 @@ import re import traceback +from pr_agent.algo.ticket_utils import find_jira_keys from pr_agent.config_loader import get_settings -from pr_agent.git_providers import GithubProvider -from pr_agent.git_providers import AzureDevopsProvider +from pr_agent.git_providers import AzureDevopsProvider, GithubProvider +from pr_agent.issue_providers import get_issue_provider, resolve_issue_provider_name from pr_agent.log import get_logger # Compile the regex pattern once, outside the function @@ -12,6 +13,48 @@ ) +def _get_pr_title(git_provider) -> str: + for attr in ("mr", "pr"): + pr_obj = getattr(git_provider, attr, None) + title = getattr(pr_obj, "title", None) + if title: + return title + return "" + + +def _build_jira_context_text(git_provider) -> str: + parts = [] + try: + title = _get_pr_title(git_provider) + if title: + parts.append(title) + except Exception: + pass + try: + description = git_provider.get_user_description() or "" + if description: + parts.append(description) + except Exception: + pass + try: + branch = git_provider.get_pr_branch() or "" + if branch: + parts.append(branch) + except Exception: + pass + try: + commit_messages = git_provider.get_commit_messages() or "" + if commit_messages: + parts.append(commit_messages) + except Exception: + pass + return "\n".join(parts) + + +def _resolve_issue_provider_project_path(git_provider) -> str | None: + return getattr(git_provider, "id_project", None) or getattr(git_provider, "repo", None) + + def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'): """ Extract all ticket links from PR description @@ -46,6 +89,41 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url async def extract_tickets(git_provider): MAX_TICKET_CHARACTERS = 10000 try: + issue_provider_name = resolve_issue_provider_name( + get_settings().get("CONFIG.ISSUE_PROVIDER", "auto"), + get_settings().config.git_provider, + ) + if issue_provider_name == "jira": + jira_context = _build_jira_context_text(git_provider) + jira_keys = find_jira_keys(jira_context) + if len(jira_keys) > 3: + get_logger().info(f"Too many Jira keys found in PR context: {len(jira_keys)}") + jira_keys = jira_keys[:3] + tickets_content = [] + if jira_keys: + project_path = _resolve_issue_provider_project_path(git_provider) + issue_provider = get_issue_provider("jira", project_path=project_path) + for jira_key in jira_keys: + try: + issue_main = issue_provider.get_issue(jira_key, project_path) + except Exception as e: + get_logger().warning(f"Failed to fetch Jira issue {jira_key}: {e}") + continue + if not issue_main: + continue + issue_body_str = issue_main.body or "" + if len(issue_body_str) > MAX_TICKET_CHARACTERS: + issue_body_str = issue_body_str[:MAX_TICKET_CHARACTERS] + "..." + tickets_content.append({ + "ticket_id": issue_main.key, + "ticket_url": issue_main.url, + "title": issue_main.title, + "body": issue_body_str, + "labels": "", + "sub_issues": [], + }) + return tickets_content + if isinstance(git_provider, GithubProvider): user_description = git_provider.get_user_description() tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo, git_provider.base_url_html) diff --git a/tests/unittest/test_issue_provider_resolver.py b/tests/unittest/test_issue_provider_resolver.py index 1652f9071c..4ecc15e9dc 100644 --- a/tests/unittest/test_issue_provider_resolver.py +++ b/tests/unittest/test_issue_provider_resolver.py @@ -8,3 +8,10 @@ def test_resolve_issue_provider_defaults_to_git_provider(): def test_resolve_issue_provider_explicit_choice(): assert resolve_issue_provider_name("jira", "gitlab") == "jira" + + +def test_resolve_issue_provider_handles_callable_git_provider(): + def provider_name(): + return "CodeCommit" + + assert resolve_issue_provider_name("auto", provider_name) == "codecommit" diff --git a/tests/unittest/test_ticket_pr_compliance_check.py b/tests/unittest/test_ticket_pr_compliance_check.py new file mode 100644 index 0000000000..6cdcaeaeab --- /dev/null +++ b/tests/unittest/test_ticket_pr_compliance_check.py @@ -0,0 +1,55 @@ +import types + +import pytest + +from pr_agent.issue_providers.base import Issue +from pr_agent.tools import ticket_pr_compliance_check + + +class DummyGitProvider: + def get_user_description(self): + return "Implements ABC-1 and follow-up work for ABC-2." + + def get_pr_branch(self): + return "feature/ABC-2-add-tests" + + def get_commit_messages(self): + return "Refs ABC-1" + + +class DummyJiraProvider: + def __init__(self, issues): + self._issues = issues + + def get_issue(self, issue_id, project_path=None): + return self._issues.get(issue_id) + + +@pytest.mark.asyncio +async def test_extract_tickets_uses_jira_provider(monkeypatch): + issue_one = Issue( + key="ABC-1", + title="Issue one", + description="Body one", + url="https://jira.example.com/browse/ABC-1", + ) + issue_two = Issue( + key="ABC-2", + title="Issue two", + description="Body two", + url="https://jira.example.com/browse/ABC-2", + ) + dummy_provider = DummyJiraProvider({"ABC-1": issue_one, "ABC-2": issue_two}) + dummy_settings = types.SimpleNamespace( + config=types.SimpleNamespace(git_provider="gitlab"), + get=lambda key, default=None: "jira" if key == "CONFIG.ISSUE_PROVIDER" else default, + ) + + monkeypatch.setattr(ticket_pr_compliance_check, "get_issue_provider", lambda *args, **kwargs: dummy_provider) + monkeypatch.setattr(ticket_pr_compliance_check, "get_settings", lambda: dummy_settings) + + tickets = await ticket_pr_compliance_check.extract_tickets(DummyGitProvider()) + + assert [ticket["ticket_id"] for ticket in tickets] == ["ABC-1", "ABC-2"] + assert tickets[0]["ticket_url"] == "https://jira.example.com/browse/ABC-1" + assert tickets[1]["title"] == "Issue two" From 3dc4c29ee8b54995822b2194fd57c041b9664594 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 12:47:17 -0500 Subject: [PATCH 05/11] feat: improve jira issue parity and ticket extraction (cherry picked from commit 2317af75758cca9c37f3916b2fbef9b27374a7c1) --- pr_agent/issue_providers/base.py | 1 + .../issue_providers/jira_issue_provider.py | 44 +++++++++++- pr_agent/tools/ticket_pr_compliance_check.py | 70 ++++++++++++++----- tests/unittest/test_jira_issue_provider.py | 36 ++++++++++ .../test_ticket_pr_compliance_check.py | 58 +++++++++++++++ 5 files changed, 190 insertions(+), 19 deletions(-) diff --git a/pr_agent/issue_providers/base.py b/pr_agent/issue_providers/base.py index aa941075c0..9ce2fabc9c 100644 --- a/pr_agent/issue_providers/base.py +++ b/pr_agent/issue_providers/base.py @@ -22,6 +22,7 @@ class Issue: created_at: Optional[str] = None author: Optional[dict] = None comments: List[IssueComment] = field(default_factory=list) + labels: List[str] = field(default_factory=list) @property def body(self) -> str: diff --git a/pr_agent/issue_providers/jira_issue_provider.py b/pr_agent/issue_providers/jira_issue_provider.py index 43c539306e..d7a03e97be 100644 --- a/pr_agent/issue_providers/jira_issue_provider.py +++ b/pr_agent/issue_providers/jira_issue_provider.py @@ -7,7 +7,7 @@ from typing import Iterable, List, Optional from pr_agent.config_loader import get_settings -from pr_agent.issue_providers.base import Issue, IssueProvider +from pr_agent.issue_providers.base import Issue, IssueComment, IssueProvider from pr_agent.log import get_logger @@ -38,7 +38,7 @@ def list_issues(self, project_path: Optional[str] = None, state: str = "all") -> params = { "jql": jql, "maxResults": self.issue_max_results, - "fields": "summary,description,created,reporter", + "fields": "summary,description,created,reporter,labels,subtasks", } data = self._request_json("search", params, api_version=self.api_version) issues = data.get("issues", []) if isinstance(data, dict) else [] @@ -53,7 +53,7 @@ def get_issue(self, issue_id: str, project_path: Optional[str] = None) -> Option return None data = self._request_json( f"issue/{urllib.parse.quote(issue_key)}", - {"fields": "summary,description,created,reporter"}, + {"fields": "summary,description,created,reporter,labels,subtasks"}, api_version=self.api_version, ) if not data: @@ -108,6 +108,8 @@ def _issue_from_payload(self, issue: dict) -> Issue: created_at = fields.get("created") or "" reporter = fields.get("reporter") or {} author = {"username": reporter.get("displayName") or reporter.get("name") or reporter.get("emailAddress") or ""} + labels = fields.get("labels") or [] + labels = labels if isinstance(labels, list) else [] return Issue( key=key, title=summary, @@ -115,8 +117,44 @@ def _issue_from_payload(self, issue: dict) -> Issue: url=f"{self.base_url}/browse/{key}" if self.base_url else "", created_at=created_at, author=author, + labels=labels, ) + def get_issue_comments(self, issue) -> List[IssueComment]: + issue_key = getattr(issue, "key", None) or getattr(issue, "id", None) + if not issue_key: + return [] + data = self._request_json( + f"issue/{urllib.parse.quote(str(issue_key))}/comment", + {}, + api_version=self.api_version, + ) + comments = data.get("comments", []) if isinstance(data, dict) else [] + results = [] + for comment in comments: + body = comment.get("body") or "" + if not body: + continue + author_obj = comment.get("author") or {} + author = "" + if isinstance(author_obj, dict): + author = author_obj.get("displayName") or author_obj.get("name") or author_obj.get("emailAddress") or "" + cid = str(comment.get("id") or "") + results.append( + IssueComment( + body=body, + url=self._build_comment_url(issue_key, cid), + id=cid, + author=author, + ) + ) + return results + + def _build_comment_url(self, issue_key: str, comment_id: str) -> str: + if not self.base_url or not issue_key or not comment_id: + return "" + return f"{self.base_url}/browse/{issue_key}?focusedCommentId={comment_id}" + @staticmethod def _normalize_description(description: object) -> str: if description is None: diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index abddd0a4dd..685522883c 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -8,8 +8,8 @@ from pr_agent.log import get_logger # Compile the regex pattern once, outside the function -GITHUB_TICKET_PATTERN = re.compile( - r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#\d+)' +ISSUE_LINK_PATTERN = re.compile( + r'(https?://[^\s]+/(?:[^/]+/){2,3}(?:-|)issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#[0-9]+)' ) @@ -59,40 +59,48 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url """ Extract all ticket links from PR description """ - github_tickets = set() + ticket_links = set() try: # Use the updated pattern to find matches - matches = GITHUB_TICKET_PATTERN.findall(pr_description) + matches = ISSUE_LINK_PATTERN.findall(pr_description) for match in matches: if match[0]: # Full URL match - github_tickets.add(match[0]) + ticket_links.add(match[0]) elif match[1]: # Shorthand notation match: owner/repo#issue_number owner, repo, issue_number = match[2], match[3], match[4] - github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}') + ticket_links.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}') else: # #123 format issue_number = match[5][1:] # remove # if issue_number.isdigit() and len(issue_number) < 5 and repo_path: - github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') + ticket_links.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') - if len(github_tickets) > 3: - get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}") - # Limit the number of tickets to 3 - github_tickets = set(list(github_tickets)[:3]) + if len(ticket_links) > 3: + get_logger().info(f"Too many tickets found in PR description: {len(ticket_links)}") + # Limit the number of tickets to 3 + ticket_links = set(list(ticket_links)[:3]) except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", artifact={"traceback": traceback.format_exc()}) - return list(github_tickets) + return list(ticket_links) async def extract_tickets(git_provider): MAX_TICKET_CHARACTERS = 10000 try: + git_provider_name = getattr(git_provider, "provider_name", None) + if callable(git_provider_name): + try: + git_provider_name = git_provider_name() + except Exception: + git_provider_name = None issue_provider_name = resolve_issue_provider_name( get_settings().get("CONFIG.ISSUE_PROVIDER", "auto"), - get_settings().config.git_provider, + git_provider_name or get_settings().config.git_provider, ) + project_path = _resolve_issue_provider_project_path(git_provider) + if issue_provider_name == "jira": jira_context = _build_jira_context_text(git_provider) jira_keys = find_jira_keys(jira_context) @@ -101,7 +109,6 @@ async def extract_tickets(git_provider): jira_keys = jira_keys[:3] tickets_content = [] if jira_keys: - project_path = _resolve_issue_provider_project_path(git_provider) issue_provider = get_issue_provider("jira", project_path=project_path) for jira_key in jira_keys: try: @@ -119,11 +126,42 @@ async def extract_tickets(git_provider): "ticket_url": issue_main.url, "title": issue_main.title, "body": issue_body_str, - "labels": "", + "labels": ", ".join(issue_main.labels) if hasattr(issue_main, "labels") else "", "sub_issues": [], }) return tickets_content + if issue_provider_name == "gitlab" and project_path: + user_description = git_provider.get_user_description() + base_url = getattr(git_provider, "gitlab_url", "") + tickets = extract_ticket_links_from_pr_description(user_description, project_path, base_url) + tickets_content = [] + if tickets: + issue_provider = get_issue_provider("gitlab", git_provider=git_provider, project_path=project_path) + for ticket in tickets: + try: + _, issue_iid = git_provider._parse_issue_url(ticket) + issue_main = issue_provider.get_issue(issue_iid, project_path) + except Exception as e: + get_logger().error(f"Error getting GitLab issue: {e}", + artifact={"traceback": traceback.format_exc()}) + continue + if not issue_main: + continue + issue_body_str = getattr(issue_main, "description", "") or "" + if len(issue_body_str) > MAX_TICKET_CHARACTERS: + issue_body_str = issue_body_str[:MAX_TICKET_CHARACTERS] + "..." + labels = getattr(issue_main, "labels", []) or [] + tickets_content.append({ + "ticket_id": getattr(issue_main, "iid", getattr(issue_main, "id", None)), + "ticket_url": getattr(issue_main, "web_url", ticket), + "title": getattr(issue_main, "title", ""), + "body": issue_body_str, + "labels": ", ".join(labels), + "sub_issues": [], + }) + return tickets_content + if isinstance(git_provider, GithubProvider): user_description = git_provider.get_user_description() tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo, git_provider.base_url_html) @@ -189,7 +227,7 @@ async def extract_tickets(git_provider): return tickets_content - elif isinstance(git_provider, AzureDevopsProvider): + if isinstance(git_provider, AzureDevopsProvider): tickets_info = git_provider.get_linked_work_items() tickets_content = [] for ticket in tickets_info: diff --git a/tests/unittest/test_jira_issue_provider.py b/tests/unittest/test_jira_issue_provider.py index 2ddde6bcc2..43028e6ca8 100644 --- a/tests/unittest/test_jira_issue_provider.py +++ b/tests/unittest/test_jira_issue_provider.py @@ -56,3 +56,39 @@ def test_list_issues_parses_payload(): assert issues[0].key == "ABC-1" assert issues[0].title == "Test issue" assert issues[0].description == "Body text" + + +def test_get_issue_comments_parses_payload(): + issue_payload = { + "key": "ABC-1", + "fields": {"summary": "Test issue", "description": "Body", "created": "2025-01-01"}, + } + comments_payload = { + "comments": [ + { + "id": "200", + "body": "First comment", + "author": {"displayName": "Bob"}, + } + ] + } + provider = JiraIssueProvider( + settings={ + "JIRA": { + "BASE_URL": "https://jira.example.com", + "API_EMAIL": "user@example.com", + "API_TOKEN": "token", + } + }, + project_path="org/repo", + ) + with patch( + "pr_agent.issue_providers.jira_issue_provider.urllib.request.urlopen", + side_effect=[_mock_response(issue_payload), _mock_response(comments_payload)], + ): + issue = provider.get_issue("ABC-1") + comments = provider.get_issue_comments(issue) + assert len(comments) == 1 + assert comments[0].body == "First comment" + assert comments[0].author == "Bob" + assert comments[0].url.endswith("focusedCommentId=200") diff --git a/tests/unittest/test_ticket_pr_compliance_check.py b/tests/unittest/test_ticket_pr_compliance_check.py index 6cdcaeaeab..38fc5bbc22 100644 --- a/tests/unittest/test_ticket_pr_compliance_check.py +++ b/tests/unittest/test_ticket_pr_compliance_check.py @@ -32,12 +32,14 @@ async def test_extract_tickets_uses_jira_provider(monkeypatch): title="Issue one", description="Body one", url="https://jira.example.com/browse/ABC-1", + labels=["one"], ) issue_two = Issue( key="ABC-2", title="Issue two", description="Body two", url="https://jira.example.com/browse/ABC-2", + labels=["two"], ) dummy_provider = DummyJiraProvider({"ABC-1": issue_one, "ABC-2": issue_two}) dummy_settings = types.SimpleNamespace( @@ -53,3 +55,59 @@ async def test_extract_tickets_uses_jira_provider(monkeypatch): assert [ticket["ticket_id"] for ticket in tickets] == ["ABC-1", "ABC-2"] assert tickets[0]["ticket_url"] == "https://jira.example.com/browse/ABC-1" assert tickets[1]["title"] == "Issue two" + + +class DummyGitlabIssue: + def __init__(self, iid, title, description, labels, web_url): + self.iid = iid + self.title = title + self.description = description + self.labels = labels + self.web_url = web_url + + +class DummyGitlabProvider: + def __init__(self): + self.id_project = "group/repo" + self.gitlab_url = "https://gitlab.example.com" + + def get_user_description(self): + return "Relates to #5 and https://gitlab.example.com/group/repo/-/issues/6." + + def _parse_issue_url(self, url): + return ("group/repo", 5 if "5" in url else 6) + + +class DummyGitlabIssueProvider: + def __init__(self, issues): + self._issues = issues + + def get_issue(self, issue_id, project_path=None): + return self._issues.get(issue_id) + + +@pytest.mark.asyncio +async def test_extract_tickets_gitlab(monkeypatch): + issues = { + 5: DummyGitlabIssue(5, "Five", "Body five", ["bug"], "https://gitlab.example.com/group/repo/-/issues/5"), + 6: DummyGitlabIssue(6, "Six", "Body six", ["feature"], "https://gitlab.example.com/group/repo/-/issues/6"), + } + gitlab_provider = DummyGitlabProvider() + dummy_issue_provider = DummyGitlabIssueProvider(issues) + + dummy_settings = types.SimpleNamespace( + config=types.SimpleNamespace(git_provider="gitlab"), + get=lambda key, default=None: "gitlab" if key == "CONFIG.ISSUE_PROVIDER" else default, + ) + + monkeypatch.setattr(ticket_pr_compliance_check, "get_issue_provider", lambda *args, **kwargs: dummy_issue_provider) + monkeypatch.setattr(ticket_pr_compliance_check, "get_settings", lambda: dummy_settings) + + tickets = await ticket_pr_compliance_check.extract_tickets(gitlab_provider) + + assert len(tickets) == 2 + ids = sorted([t["ticket_id"] for t in tickets]) + assert ids == [5, 6] + bodies = " ".join([t["body"] for t in tickets]) + assert "Body five" in bodies + assert any(t["labels"] == "feature" for t in tickets) From 645a39e74450f5a0b0fa368e31d16086270793c4 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 13:05:26 -0500 Subject: [PATCH 06/11] chore: suppress jira v2 warning when v3 fallback succeeds (cherry picked from commit ef4a8a7930cbd39515a3010fff2a1ca66f8dd4fa) --- pr_agent/issue_providers/jira_issue_provider.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pr_agent/issue_providers/jira_issue_provider.py b/pr_agent/issue_providers/jira_issue_provider.py index d7a03e97be..467dc80117 100644 --- a/pr_agent/issue_providers/jira_issue_provider.py +++ b/pr_agent/issue_providers/jira_issue_provider.py @@ -40,10 +40,10 @@ def list_issues(self, project_path: Optional[str] = None, state: str = "all") -> "maxResults": self.issue_max_results, "fields": "summary,description,created,reporter,labels,subtasks", } - data = self._request_json("search", params, api_version=self.api_version) + data = self._request_json("search", params, api_version=self.api_version, suppress_warning=True) issues = data.get("issues", []) if isinstance(data, dict) else [] if not issues and self.api_version < 3: - data = self._request_json("search/jql", params, api_version=3) + data = self._request_json("search/jql", params, api_version=3, suppress_warning=False) issues = data.get("issues", []) if isinstance(data, dict) else [] return [self._issue_from_payload(item) for item in issues] @@ -79,7 +79,7 @@ def _resolve_project_keys(self, project_path: Optional[str]) -> List[str]: keys = [key for key in keys if key in self.valid_project_keys] return keys - def _request_json(self, path: str, params: dict, api_version: Optional[int] = None) -> dict: + def _request_json(self, path: str, params: dict, api_version: Optional[int] = None, suppress_warning: bool = False) -> dict: if not self.is_configured(): get_logger().warning("Jira client is not configured; skipping issue fetch") return {} @@ -97,7 +97,8 @@ def _request_json(self, path: str, params: dict, api_version: Optional[int] = No payload = response.read().decode("utf-8") return json.loads(payload) except Exception as exc: - get_logger().warning("Failed to fetch Jira issues", artifact={"error": str(exc), "url": url}) + if not suppress_warning: + get_logger().warning("Failed to fetch Jira issues", artifact={"error": str(exc), "url": url}) return {} def _issue_from_payload(self, issue: dict) -> Issue: From 7783b0947c47f44a4135b3140738bfe6555644aa Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 13:51:45 -0500 Subject: [PATCH 07/11] fix: allow gitlab get_issue_comments without issue (cherry picked from commit 358e5974d49ec4676154e669e68744e2c23a6c9c) --- pr_agent/git_providers/gitlab_provider.py | 111 ++++++++++++++++++---- 1 file changed, 92 insertions(+), 19 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index e9db1a3740..62ff8d48b2 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -71,7 +71,8 @@ def __init__(self, merge_request_url: Optional[str] = None, incremental: Optiona self.temp_comments = [] self._submodule_cache: dict[tuple[str, str, str], list[dict]] = {} self.pr_url = merge_request_url - self._set_merge_request(merge_request_url) + if merge_request_url and self._is_merge_request_url(merge_request_url): + self._set_merge_request(merge_request_url) self.RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") self.incremental = incremental @@ -785,9 +786,6 @@ def get_pr_owner_id(self) -> str | None: def get_pr_description_full(self): return self.mr.description - def get_issue_comments(self): - return self.mr.notes.list(get_all=True)[::-1] - def get_repo_settings(self): try: main_branch = self.gl.projects.get(self.id_project).default_branch @@ -847,6 +845,13 @@ def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool: get_logger().warning(f"Failed to remove reaction, error: {e}") return False + def _is_merge_request_url(self, url: str) -> bool: + try: + path_parts = urlparse(url).path.strip('/').split('/') + except Exception: + return False + return "merge_requests" in path_parts + def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[str, int]: parsed_url = urlparse(merge_request_url) @@ -872,10 +877,64 @@ def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[str, int]: # Return the path before 'merge_requests' and the ID return project_path, mr_id + def _parse_issue_url(self, issue_url: str) -> Tuple[str, int]: + parsed_url = urlparse(issue_url) + + path_parts = parsed_url.path.strip('/').split('/') + if 'issues' not in path_parts: + raise ValueError("The provided URL does not appear to be a GitLab issue URL") + + issues_index = path_parts.index('issues') + if len(path_parts) <= issues_index + 1: + raise ValueError("The provided URL does not contain an issue IID") + + try: + issue_iid = int(path_parts[issues_index + 1]) + except ValueError as e: + raise ValueError("Unable to convert issue IID to integer") from e + + project_parts = path_parts[:issues_index] + if project_parts and project_parts[-1] == '-': + project_parts = project_parts[:-1] + project_path = "/".join(project_parts) + if project_path.endswith('/-'): + project_path = project_path[:-2] + return project_path, issue_iid + def _get_merge_request(self): mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr) return mr + def _get_project(self, project_path: str): + try: + encoded = urllib.parse.quote_plus(project_path) + return self.gl.projects.get(encoded) + except Exception: + return self._project_by_path(project_path) + + def get_issue(self, issue_iid: int, project_path: Optional[str] = None): + project = self._get_project(project_path or self.id_project) + if project is None: + raise GitlabGetError("Project not found") + return project.issues.get(issue_iid) + + def list_issues(self, project_path: Optional[str] = None, state: str = "all"): + project = self._get_project(project_path or self.id_project) + if project is None: + raise GitlabGetError("Project not found") + return project.issues.list(state=state, iterator=True) + + def get_issue_comments(self, issue=None): + if issue is None: + try: + return self.mr.notes.list(get_all=True)[::-1] + except Exception: + return [] + return list(issue.notes.list(iterator=True)) + + def create_issue_comment(self, issue, body: str): + return issue.notes.create({"body": body}) + def get_user_id(self): return None @@ -954,22 +1013,36 @@ def generate_link_to_relevant_line_number(self, suggestion) -> str: return "" #Clone related def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: - if "gitlab." not in repo_url_to_clone: - get_logger().error(f"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.") - return None - (scheme, base_url) = repo_url_to_clone.split("gitlab.") access_token = getattr(self.gl, 'oauth_token', None) or getattr(self.gl, 'private_token', None) - if not all([scheme, access_token, base_url]): - get_logger().error(f"Either no access token found, or repo URL: {repo_url_to_clone} " - f"is missing prefix: {scheme} and/or base URL: {base_url}.") + if not access_token: + get_logger().error("No access token found for GitLab clone.") return None - #Note that the ""official"" method found here: - # https://docs.gitlab.com/user/profile/personal_access_tokens/#clone-repository-using-personal-access-token - # requires a username, which may not be applicable. - # The following solution is taken from: https://stackoverflow.com/questions/25409700/using-gitlab-token-to-clone-without-authentication/35003812#35003812 - # For example: For repo url: https://gitlab.codium-inc.com/qodo/autoscraper.git - # Then to clone one will issue: 'git clone https://oauth2:@gitlab.codium-inc.com/qodo/autoscraper.git' + # Note: GitLab instances are not always hosted under a gitlab.* domain. + # Build a clone URL that works with any host (e.g., git.labs.hosting.cerence.net). + if repo_url_to_clone.startswith(("http://", "https://")): + try: + from urllib.parse import urlparse + parsed = urlparse(repo_url_to_clone) + if not parsed.scheme or not parsed.netloc: + raise ValueError("missing scheme or host") + netloc = parsed.netloc.split("@")[-1] + return f"{parsed.scheme}://oauth2:{access_token}@{netloc}{parsed.path}" + except Exception as exc: + get_logger().error( + f"Repo URL: {repo_url_to_clone} could not be parsed for clone.", + artifact={"error": str(exc)}, + ) + return None - clone_url = f"{scheme}oauth2:{access_token}@gitlab.{base_url}" - return clone_url + # Fallback to legacy gitlab.* parsing when a raw URL is provided. + if "gitlab." not in repo_url_to_clone: + get_logger().error(f"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.") + return None + scheme, base_url = repo_url_to_clone.split("gitlab.") + if not all([scheme, base_url]): + get_logger().error( + f"Repo URL: {repo_url_to_clone} is missing prefix: {scheme} and/or base URL: {base_url}." + ) + return None + return f"{scheme}oauth2:{access_token}@gitlab.{base_url}" From e4f24576f59e985629f5963349e74f2a95905748 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 15:41:44 -0500 Subject: [PATCH 08/11] refactor: simplify gitlab/github init and improve logging (cherry picked from commit 8c330c6a0a40d8ab9c6c045bc7c77a1c26a2a4db) --- pr_agent/tools/pr_similar_issue.py | 91 +++++++++++++++++------------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index b38f2cbefe..5ab31e2e66 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -55,45 +55,9 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): self.issue_provider = None self.jira_keys = [] if self.provider_name == "github": - repo_name, _ = self.git_provider._parse_issue_url(self.resource_url) - self.git_provider.repo = repo_name - self.repo_obj = self.git_provider.github_client.get_repo(repo_name) - self.git_provider.repo_obj = self.repo_obj - repo_name_for_index = self.repo_obj.full_name + repo_name_for_index = self._init_github_context() else: - if self.issue_provider_name != "jira" and self._is_issue_url(self.resource_url): - self.issue_context = True - self.project_path, self.issue_iid = self.git_provider._parse_issue_url(self.resource_url) - self.repo_obj = self.git_provider._get_project(self.project_path) - if self.repo_obj is None: - raise Exception(f"GitLab project not found: {self.project_path}") - self.git_provider.id_project = self.project_path - self.git_provider.repo_obj = self.repo_obj - else: - self.issue_context = False - if not getattr(self.git_provider, "mr", None): - raise Exception("GitLab merge request context is required for /similar_issue") - self.output_target = self.git_provider.mr - self.project_path = self.git_provider.id_project - self.repo_obj = self.git_provider.gl.projects.get(self.project_path) - self.git_provider.repo_obj = self.repo_obj - if self.issue_provider_name == "jira": - self.jira_keys = find_jira_keys(self.resource_url) - if not self.jira_keys: - self.jira_keys = self._extract_jira_keys_from_mr(self.git_provider.mr) - if self.jira_keys: - self.issue_context = True - self.issue_iid = self.jira_keys[0] - else: - issue_iid = self._extract_issue_iid_from_text(self._build_query_from_mr(self.git_provider.mr)) - if issue_iid: - try: - self._get_issue_by_number(issue_iid) - self.issue_context = True - self.issue_iid = issue_iid - except Exception: - get_logger().debug("Issue reference not found or inaccessible; falling back to MR context.") - repo_name_for_index = getattr(self.repo_obj, "path_with_namespace", self.project_path) + repo_name_for_index = self._init_gitlab_context() repo_name_for_index = repo_name_for_index.lower().replace('/', '-').replace('_/', '-') if self.issue_provider_name == "jira": @@ -635,6 +599,57 @@ def _get_issue_number(self, issue) -> int: return value_str raise ValueError("Issue number is missing") + def _init_github_context(self) -> str: + repo_name, _ = self.git_provider._parse_issue_url(self.resource_url) + self.git_provider.repo = repo_name + self.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.git_provider.repo_obj = self.repo_obj + return self.repo_obj.full_name + + def _init_gitlab_context(self) -> str: + # Issue URL path (non-Jira) – treat it as issue context + if self.issue_provider_name != "jira" and self._is_issue_url(self.resource_url): + self.issue_context = True + self.project_path, self.issue_iid = self.git_provider._parse_issue_url(self.resource_url) + self.repo_obj = self.git_provider._get_project(self.project_path) + if self.repo_obj is None: + raise Exception(f"GitLab project not found: {self.project_path}") + self.git_provider.id_project = self.project_path + self.git_provider.repo_obj = self.repo_obj + return getattr(self.repo_obj, "path_with_namespace", self.project_path) + + # MR context is required from here on + if not getattr(self.git_provider, "mr", None): + raise Exception("GitLab merge request context is required for /similar_issue") + + self.issue_context = False + self.output_target = self.git_provider.mr + self.project_path = self.git_provider.id_project + self.repo_obj = self.git_provider.gl.projects.get(self.project_path) + self.git_provider.repo_obj = self.repo_obj + + if self.issue_provider_name == "jira": + self.jira_keys = find_jira_keys(self.resource_url) + if not self.jira_keys: + self.jira_keys = self._extract_jira_keys_from_mr(self.git_provider.mr) + if self.jira_keys: + self.issue_context = True + self.issue_iid = self.jira_keys[0] + return getattr(self.repo_obj, "path_with_namespace", self.project_path) + + issue_iid = self._extract_issue_iid_from_text(self._build_query_from_mr(self.git_provider.mr)) + if issue_iid: + try: + self._get_issue_by_number(issue_iid) + self.issue_context = True + self.issue_iid = issue_iid + except Exception as exc: + get_logger().debug( + "Issue reference not found or inaccessible; falling back to MR context.", + artifact={"error": str(exc)}, + ) + return getattr(self.repo_obj, "path_with_namespace", self.project_path) + def _get_issue_username(self, issue) -> str: user = getattr(issue, "user", None) if user and getattr(user, "login", None): From 940a2de36db5458ab0cbe10c2f7e3ec6ab5c0cc9 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 15:50:52 -0500 Subject: [PATCH 09/11] chore: add jira adf parsing and handle partial qdrant embeds (cherry picked from commit ea28ff71ef49570a3c9a728d6e790ad8e908048b) (cherry picked from commit 98769ce4797128abe2693c0e121fec48d1f5791b) --- .../issue_providers/jira_issue_provider.py | 16 ++++++++ pr_agent/tools/pr_similar_issue.py | 41 ++++++++----------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/pr_agent/issue_providers/jira_issue_provider.py b/pr_agent/issue_providers/jira_issue_provider.py index 467dc80117..8e92feff17 100644 --- a/pr_agent/issue_providers/jira_issue_provider.py +++ b/pr_agent/issue_providers/jira_issue_provider.py @@ -162,6 +162,22 @@ def _normalize_description(description: object) -> str: return "" if isinstance(description, str): return description + + # Handle Atlassian Document Format (ADF) + if isinstance(description, dict) and description.get("type") == "doc": + texts: list[str] = [] + + def extract_text(node): + if isinstance(node, dict): + if node.get("type") == "text" and "text" in node: + texts.append(node["text"]) + if "content" in node and isinstance(node["content"], list): + for child in node["content"]: + extract_text(child) + + extract_text(description) + return " ".join(texts) + try: return str(description) except Exception: diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 5ab31e2e66..ef60731ddd 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -32,21 +32,7 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): if not self.supported: return - self.cli_mode = get_settings().CONFIG.CLI_MODE - self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan - self.token_handler = TokenHandler() - self.embedding_model = get_settings().pr_similar_issue.get("embedding_model", DEFAULT_EMBEDDING_MODEL) - self.embedding_base_url = get_settings().pr_similar_issue.get("embedding_base_url", "") - self.embedding_api_key = get_settings().pr_similar_issue.get("embedding_api_key", "") - self.embedding_dim = get_settings().pr_similar_issue.get("embedding_dim", 1536) - self.embedding_max_tokens = get_settings().pr_similar_issue.get("embedding_max_tokens", 8000) - self.embedding_client = None - if self.embedding_base_url: - self.embedding_client = EmbeddingClient( - self.embedding_base_url, - self.embedding_model, - api_key=self.embedding_api_key or None, - ) + self._init_embedding_settings() self.repo_obj = None self.issue_iid = None self.project_path = None @@ -54,11 +40,8 @@ def __init__(self, issue_url: str, ai_handler, args: list = None): self.output_target = None self.issue_provider = None self.jira_keys = [] - if self.provider_name == "github": - repo_name_for_index = self._init_github_context() - else: - repo_name_for_index = self._init_gitlab_context() + repo_name_for_index = self._init_repo_context() repo_name_for_index = repo_name_for_index.lower().replace('/', '-').replace('_/', '-') if self.issue_provider_name == "jira": repo_name_for_index = f"{repo_name_for_index}-jira" @@ -486,18 +469,20 @@ def _embed_texts(self, list_to_encode: list[str]) -> list[list[float]]: res = openai.Embedding.create(input=list_to_encode, engine=self.embedding_model) return [record['embedding'] for record in res['data']] - def _embed_texts_with_fallback(self, list_to_encode: list[str]) -> list[list[float]]: + def _embed_texts_with_fallback(self, list_to_encode: list[str]) -> tuple[list[list[float]], list[int]]: try: - return self._embed_texts(list_to_encode) + return self._embed_texts(list_to_encode), list(range(len(list_to_encode))) except Exception: get_logger().error('Failed to embed entire list, embedding one by one...') embeds = [] - for text in list_to_encode: + successful_indices = [] + for idx, text in enumerate(list_to_encode): try: embeds.append(self._embed_texts([text])[0]) + successful_indices.append(idx) except Exception: - embeds.append([0] * self.embedding_dim) - return embeds + get_logger().warning("Failed to embed text segment; skipping.", artifact={"index": idx}) + return embeds, successful_indices def _get_qdrant_vector_size(self) -> int | None: try: @@ -908,11 +893,17 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa get_logger().info('Embedding...') list_to_encode = [doc["text"] for doc in documents] - embeds = self._embed_texts_with_fallback(list_to_encode) + embeds, successful_indices = self._embed_texts_with_fallback(list_to_encode) + if len(successful_indices) != len(documents): + documents = [documents[i] for i in successful_indices] for doc, vector in zip(documents, embeds): doc["vector"] = vector get_logger().info('Done') + if not documents: + get_logger().info('No documents to upsert into Qdrant.') + return + get_logger().info('Upserting into Qdrant...') points = [] for row in documents: From beb9c14bf4dd3dd820dc04581ba5e4164fdd9e5f Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 16:10:48 -0500 Subject: [PATCH 10/11] chore: drop deployment overrides for jira feature --- deploy/values/oci-dev.yaml | 30 ------------------------------ deploy/values/oci-int.yaml | 30 ------------------------------ 2 files changed, 60 deletions(-) delete mode 100644 deploy/values/oci-dev.yaml delete mode 100644 deploy/values/oci-int.yaml diff --git a/deploy/values/oci-dev.yaml b/deploy/values/oci-dev.yaml deleted file mode 100644 index d47bd54241..0000000000 --- a/deploy/values/oci-dev.yaml +++ /dev/null @@ -1,30 +0,0 @@ -fqdn: - prefix: "dev" - suffix: "na.onecloud.hosting.cerence.net" - -gateway: "istio-system/gateway-na-onecloud-hosting-cerence-net" - -image: - tag: "dev" - -extraEnv: - - name: NOTIFICATIONS__NOTIFY_ON_REVIEW_PLUS - value: "true" - - name: CONFIG__ISSUE_PROVIDER - value: "jira" - - name: JIRA__BASE_URL - value: "https://cerence.atlassian.net" - - name: JIRA__ISSUE_PROJECTS - value: "XUITXTSRV" - -prSimilarIssue: - vectorDb: "qdrant" - embeddingBaseUrl: "https://callm-api-embedding.int.na.oc.cerence.net/v1/embeddings" - embeddingModel: "intfloat/multilingual-e5-large" - embeddingDim: 1024 - embeddingMaxTokens: 10000 - -qdrant: - enabled: true - persistence: - enabled: false diff --git a/deploy/values/oci-int.yaml b/deploy/values/oci-int.yaml deleted file mode 100644 index 67d7b0d2d6..0000000000 --- a/deploy/values/oci-int.yaml +++ /dev/null @@ -1,30 +0,0 @@ -fqdn: - prefix: "int" - suffix: "na.oc.cerence.net" - -gateway: "istio-system/gateway-int-na-oc-cerence-net" - -image: - tag: "dev" - -extraEnv: - - name: NOTIFICATIONS__NOTIFY_ON_REVIEW_PLUS - value: "true" - - name: CONFIG__ISSUE_PROVIDER - value: "jira" - - name: JIRA__BASE_URL - value: "https://cerence.atlassian.net" - - name: JIRA__ISSUE_PROJECTS - value: "XUITXTSRV" - -prSimilarIssue: - vectorDb: "qdrant" - embeddingBaseUrl: "https://callm-api-embedding.int.na.oc.cerence.net/v1/embeddings" - embeddingModel: "intfloat/multilingual-e5-large" - embeddingDim: 1024 - embeddingMaxTokens: 10000 - -qdrant: - enabled: true - persistence: - enabled: false From 99bf12a3c8a45cea37709d8e3a279973d37deb6a Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Sat, 10 Jan 2026 16:11:25 -0500 Subject: [PATCH 11/11] chore: add embedding client dependency --- pr_agent/tools/embedding_client.py | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 pr_agent/tools/embedding_client.py diff --git a/pr_agent/tools/embedding_client.py b/pr_agent/tools/embedding_client.py new file mode 100644 index 0000000000..9e743f129e --- /dev/null +++ b/pr_agent/tools/embedding_client.py @@ -0,0 +1,75 @@ +import json +import logging +from typing import List + +import requests + + +class EmbeddingClientError(Exception): + pass + + +class EmbeddingClient: + def __init__(self, base_url: str, model: str, api_key: str | None = None, timeout_sec: int = 30): + self.base_url = base_url.rstrip("/") + self.model = model + self.api_key = api_key + self.timeout_sec = timeout_sec + + def embed(self, texts: List[str]) -> List[List[float]]: + if not self.base_url: + raise EmbeddingClientError("Embedding base URL is required") + if not texts: + return [] + + payload = { + "model": self.model, + "input": texts, + "encoding_format": "float", + } + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + + try: + response = requests.post( + self.base_url, + headers=headers, + data=json.dumps(payload), + timeout=self.timeout_sec, + ) + except requests.RequestException as exc: + raise EmbeddingClientError(f"Embedding request failed: {exc}") from exc + + if response.status_code >= 400: + raise EmbeddingClientError( + f"Embedding request failed: {response.status_code} {response.text}" + ) + + try: + data = response.json() + except ValueError as exc: + raise EmbeddingClientError("Embedding response was not valid JSON") from exc + + embeddings = self._extract_embeddings(data) + if len(embeddings) != len(texts): + logging.getLogger(__name__).warning( + "Embedding count mismatch: expected %s, got %s", + len(texts), + len(embeddings), + ) + return embeddings + + @staticmethod + def _extract_embeddings(data: object) -> List[List[float]]: + if isinstance(data, dict) and "data" in data: + return [item["embedding"] for item in data.get("data", []) if "embedding" in item] + if isinstance(data, list): + return [ + item["embedding"] if isinstance(item, dict) and "embedding" in item else item + for item in data + ] + raise EmbeddingClientError("Unexpected embedding response format")