diff --git a/.env.example b/.env.example index 68494f6..c859de2 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,18 @@ DB_USER=example_db_user DB_PASSWORD=example_db_password DB_NAME=example_db_name +# Suppressor vector PostgreSQL container +PGVECTOR_DB_HOST=vector-db +PGVECTOR_DB_PORT=5432 +PGVECTOR_DB_PUBLISHED_PORT=5433 +PGVECTOR_DB_USER=vector_db_user +PGVECTOR_DB_PASSWORD=vector_db_password +PGVECTOR_DB_NAME=vector_db +PGVECTOR_TABLE=public.es3_vector +EMBEDDING_DIM=1024 +PGVECTOR_MATCH_THRESHOLD=0 +PGVECTOR_MATCH_COUNT=10 + # Nexus container NEXUS_BASE_URL=http://nexus:8081 NEXUS_PUBLISHED_PORT=8081 @@ -35,4 +47,4 @@ LLM_TIMEOUT=300 LLM_TEMPERATURE=0.1 SLACK_WEBHOOK_URL= DASHBOARD_BASE_URL=http://localhost:8000 -OLLAMA_KEEP_ALIVE=5m \ No newline at end of file +OLLAMA_KEEP_ALIVE=5m diff --git a/backend/nexus/nexus_repo.py b/backend/nexus/nexus_repo.py index 0f8963d..ef0bb78 100644 --- a/backend/nexus/nexus_repo.py +++ b/backend/nexus/nexus_repo.py @@ -1,13 +1,14 @@ +import asyncio import os import time from pathlib import PurePosixPath from urllib.parse import quote +import httpx import requests from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import StreamingResponse from requests.auth import HTTPBasicAuth -from starlette.concurrency import run_in_threadpool from backend.auth.security import require_permission @@ -21,6 +22,7 @@ NEXUS_DASHBOARD_CACHE_TTL_SECONDS = int(os.getenv("NEXUS_DASHBOARD_CACHE_TTL_SECONDS", "30")) nexus_auth = HTTPBasicAuth(NEXUS_USERNAME, NEXUS_PASSWORD) +httpx_nexus_auth = httpx.BasicAuth(NEXUS_USERNAME or "", NEXUS_PASSWORD or "") _dashboard_cache = {"expires_at": 0.0, "payload": None} @@ -65,6 +67,35 @@ def fetch_nexus_assets(): return all_assets +async def fetch_nexus_assets_async(client): + nexus_url = f"{NEXUS_BASE_URL}/service/rest/v1/assets" + all_assets = [] + continuation_token = None + + while True: + params = {"repository": NEXUS_REPOSITORY} + if continuation_token: + params["continuationToken"] = continuation_token + + response = await client.get(nexus_url, params=params) + + if response.status_code != 200: + print(f"Nexus API Error: {response.status_code} - {response.text}") + break + + data = response.json() + for item in data.get("items", []): + if item.get("path"): + item["path"] = item["path"].lstrip("/") + all_assets.append(item) + continuation_token = data.get("continuationToken") + + if not continuation_token: + break + + return all_assets + + def fetch_nexus_assets_by_name(names): nexus_url = f"{NEXUS_BASE_URL}/service/rest/v1/search/assets" matches = [] @@ -98,6 +129,39 @@ def fetch_nexus_assets_by_name(names): return matches +async def fetch_nexus_assets_by_name_async(client, names): + nexus_url = f"{NEXUS_BASE_URL}/service/rest/v1/search/assets" + + async def fetch_one(name): + if not name: + return [] + + matches = [] + continuation_token = None + while True: + params = {"repository": NEXUS_REPOSITORY, "name": name} + if continuation_token: + params["continuationToken"] = continuation_token + + response = await client.get(nexus_url, params=params) + if response.status_code != 200: + print(f"Nexus Search API Error: {response.status_code} - {response.text}") + break + + data = response.json() + for item in data.get("items", []): + if item.get("path"): + item["path"] = item["path"].lstrip("/") + matches.append(item) + continuation_token = data.get("continuationToken") + if not continuation_token: + break + return matches + + results = await asyncio.gather(*(fetch_one(name) for name in names)) + return [item for group in results for item in group] + + def fetch_nexus_blobstores(): nexus_url = f"{NEXUS_BASE_URL}/service/rest/v1/blobstores" response = requests.get( @@ -113,6 +177,17 @@ def fetch_nexus_blobstores(): return response.json() +async def fetch_nexus_blobstores_async(client): + nexus_url = f"{NEXUS_BASE_URL}/service/rest/v1/blobstores" + response = await client.get(nexus_url) + + if response.status_code != 200: + print(f"Nexus Blob Store API Error: {response.status_code} - {response.text}") + return [] + + return response.json() + + def get_safe_item_name(item): path = item.get("path") or "" parts = path.split("/") @@ -184,10 +259,30 @@ def fetch_dashboard_payload(): return payload +async def fetch_dashboard_payload_async(client): + now = time.monotonic() + cached_payload = _dashboard_cache["payload"] + if cached_payload is not None and _dashboard_cache["expires_at"] > now: + return cached_payload + + assets, blobstores = await asyncio.gather( + fetch_nexus_assets_async(client), + fetch_nexus_blobstores_async(client), + ) + payload = { + "items": assets, + "summary": build_dashboard_summary(assets, blobstores), + } + _dashboard_cache["payload"] = payload + _dashboard_cache["expires_at"] = now + NEXUS_DASHBOARD_CACHE_TTL_SECONDS + return payload + + @router.post("/api/nexus/list") async def nexus_list(_user: dict = Depends(require_permission("install_extension"))): try: - all_assets = await run_in_threadpool(fetch_nexus_assets) + async with httpx.AsyncClient(auth=httpx_nexus_auth, timeout=10) as client: + all_assets = await fetch_nexus_assets_async(client) print(f"총 {len(all_assets)}개의 자산을 Nexus에서 성공적으로 불러왔습니다.") return all_assets except Exception as e: @@ -212,7 +307,8 @@ async def nexus_exists( candidate_names.append(f"{ext_id}-{version}.vsix") try: - matches = await run_in_threadpool(fetch_nexus_assets_by_name, candidate_names) + async with httpx.AsyncClient(auth=httpx_nexus_auth, timeout=10) as client: + matches = await fetch_nexus_assets_by_name_async(client, candidate_names) except Exception as e: print(f"Nexus exists lookup error: {e}") matches = [] @@ -277,7 +373,8 @@ def iterfile(): @router.get("/api/nexus/dashboard") async def nexus_dashboard(): try: - return await run_in_threadpool(fetch_dashboard_payload) + async with httpx.AsyncClient(auth=httpx_nexus_auth, timeout=10) as client: + return await fetch_dashboard_payload_async(client) except Exception as e: print(f"Nexus dashboard summary error: {e}") return { diff --git a/backend/recevie_result.py b/backend/recevie_result.py index a64f029..dd64cff 100644 --- a/backend/recevie_result.py +++ b/backend/recevie_result.py @@ -30,6 +30,7 @@ def _run_ai_judgment(web_payload: dict, target_dir: Path) -> None: # 데이터를 저장할 기본 루트 디렉토리 BASE_SAVE_DIR = Path("analysis_result") +POLICY_PATH = Path(__file__).resolve().parent / "admin" / "policy_settings.json" def safe_path_part(value: Any, default: str) -> str: @@ -62,6 +63,170 @@ def get_nested(payload: Dict[str, Any], *keys: str, default: Any = None) -> Any: return current if current is not None else default +def _normalize_decision(value: Any) -> str: + lowered = str(value or "").strip().lower() + if lowered in {"approve", "safe"}: + return "safe" + if lowered in {"reject", "rejected", "critical"}: + return "reject" + if lowered in {"review", "manual_review", "hold"}: + return "review" + return lowered or "undecided" + + +def _read_policy() -> Dict[str, Any]: + default = { + "critical_auto_reject_enabled": True, + "low_auto_approve_enabled": False, + "fallback_decision": "review", + } + try: + if not POLICY_PATH.exists(): + return default + with POLICY_PATH.open("r", encoding="utf-8") as file: + data = json.load(file) + if not isinstance(data, dict): + return default + return {**default, **data, "fallback_decision": "review"} + except Exception as policy_e: + print(f"[receive_result] policy read failed: {policy_e}") + return default + + +def _apply_auto_policy(raw_decision: Any, recommended_decision: Any, risk_level: Any) -> tuple[str, Dict[str, Any]]: + policy = _read_policy() + raw = _normalize_decision(raw_decision) + recommended = _normalize_decision(recommended_decision) + risk = str(risk_level or "").strip().upper() + + if recommended == "reject" or risk == "CRITICAL": + decision = "reject" if policy.get("critical_auto_reject_enabled") else "review" + reason = "critical_auto_reject" if decision == "reject" else "manual_review" + elif risk == "LOW" or recommended == "safe" or raw == "safe": + decision = "safe" if policy.get("low_auto_approve_enabled") else "review" + reason = "low_auto_approve" if decision == "safe" else "manual_review" + elif raw == "reject": + decision = raw + reason = "explicit_decision" + else: + decision = "review" + reason = "recommended_review" + + return decision, { + "risk_level": risk or "UNKNOWN", + "recommended_decision": recommended, + "incoming_decision": raw, + "decision": decision, + "reason": reason, + "policy": { + "critical_auto_reject_enabled": bool(policy.get("critical_auto_reject_enabled")), + "low_auto_approve_enabled": bool(policy.get("low_auto_approve_enabled")), + "fallback_decision": "review", + }, + } + + +def _nexus_status_for_decision(decision: Any) -> str: + normalized = _normalize_decision(decision) + if normalized == "safe": + return "safe" + if normalized == "reject": + return "reject" + return "review" + + +def _reconcile_nexus_location( + *, + decision: str, + auto_policy: Dict[str, Any], + browser: str, + ext_names: list[str], + version: str, + ext_id: str, +) -> Dict[str, Any]: + target_status = _nexus_status_for_decision(decision) + candidate_statuses = [ + _nexus_status_for_decision(auto_policy.get("incoming_decision")), + _nexus_status_for_decision(auto_policy.get("recommended_decision")), + ] + candidate_statuses = list(dict.fromkeys(candidate_statuses)) + + try: + from backend.admin.decision.nexus_file import ( + build_nexus_path, + delete_nexus_file, + fetch_nexus_asset_paths, + move_nexus_file, + ) + + normalized_names = [] + for name in ext_names: + text = str(name or "").strip() + if text and text not in normalized_names: + normalized_names.append(text) + if not normalized_names: + normalized_names.append("unknown_extension") + + target_path = build_nexus_path(target_status, browser, normalized_names[0], version, ext_id) + asset_paths = fetch_nexus_asset_paths() + asset_lookup = {path.lower(): path for path in asset_paths} + + if target_status == "reject": + for status in candidate_statuses: + for ext_name in normalized_names: + source_path = build_nexus_path(status, browser, ext_name, version, ext_id) + if source_path.lower() not in asset_lookup: + continue + resolved_source = asset_lookup[source_path.lower()] + delete_nexus_file(resolved_source) + return { + "status": "deleted", + "source_path": resolved_source, + "target_path": None, + "candidate_statuses": candidate_statuses, + } + + return { + "status": "source_not_found", + "target_path": None, + "candidate_statuses": candidate_statuses, + } + + if target_path.lower() in asset_lookup: + return { + "status": "already_target", + "target_path": asset_lookup[target_path.lower()], + "candidate_statuses": candidate_statuses, + } + + for status in candidate_statuses: + for ext_name in normalized_names: + source_path = build_nexus_path(status, browser, ext_name, version, ext_id) + if source_path.lower() not in asset_lookup: + continue + resolved_source = asset_lookup[source_path.lower()] + move_nexus_file(resolved_source, target_path) + return { + "status": "moved", + "source_path": resolved_source, + "target_path": target_path, + "candidate_statuses": candidate_statuses, + } + + return { + "status": "source_not_found", + "target_path": target_path, + "candidate_statuses": candidate_statuses, + } + except Exception as nexus_e: + print(f"[receive_result] nexus reconcile failed: {nexus_e}") + return { + "status": "error", + "message": str(nexus_e), + "candidate_statuses": candidate_statuses, + } + + @router.post("/api/receive") async def receive_and_save_analysis( background_tasks: BackgroundTasks, @@ -80,12 +245,17 @@ async def receive_and_save_analysis( # 1. 경로 구성을 위한 정보 추출 # 기존 legacy top-level 필드 우선, 없으면 새 web_payload 구조에서 추출 - decision = ( + raw_decision = ( payload.get("decision") or payload.get("judge") or overall.get("recommended_decision") or "undecided" ) + decision, auto_policy = _apply_auto_policy( + raw_decision=raw_decision, + recommended_decision=overall.get("recommended_decision"), + risk_level=overall.get("risk_level"), + ) browser = ( payload.get("browser") @@ -113,11 +283,20 @@ async def receive_and_save_analysis( or "unknown_id" ) + raw_ext_name = str(ext_name or "").strip() decision = safe_path_part(decision, "undecided") browser = safe_path_part(browser, "unknown_browser") ext_name = safe_path_part(ext_name, "unknown_extension") version = safe_path_part(version, "unknown_version") ext_id = safe_path_part(ext_id, "unknown_id") + nexus_reconcile = _reconcile_nexus_location( + decision=decision, + auto_policy=auto_policy, + browser=browser, + ext_names=[ext_name, raw_ext_name], + version=version, + ext_id=ext_id, + ) # 2. 저장 경로 생성 # analysis_result/{decision}/{browser}/{extName}/{version}/{extID} @@ -170,6 +349,8 @@ async def receive_and_save_analysis( "recommended_decision": overall.get("recommended_decision", decision), "decision_reason": overall.get("decision_reason", ""), }, + "auto_policy": auto_policy, + "nexus_reconcile": nexus_reconcile, "metadata": { "extID": ext_id, "extName": ext_name, diff --git a/backend/search/browser/chrome_id.py b/backend/search/browser/chrome_id.py index 751d271..0a6737c 100644 --- a/backend/search/browser/chrome_id.py +++ b/backend/search/browser/chrome_id.py @@ -2,6 +2,7 @@ import re import requests +import httpx from bs4 import BeautifulSoup @@ -52,7 +53,10 @@ def _extract_json_ld(soup): def _extract_text_value(patterns, text, default="N/A"): for pattern in patterns: - match = re.search(pattern, text, re.IGNORECASE) + try: + match = re.search(pattern, text, re.IGNORECASE) + except re.error: + continue if match: return match.group(1).strip() return default @@ -226,3 +230,112 @@ def get_extension_info(extension_id): } except Exception as e: return {"success": False, "error": str(e)} + + +async def get_extension_info_async(client, extension_id): + url = f"{DETAIL_BASE_URL}/{extension_id}?hl=en" + headers = { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" + ) + } + + try: + res = await client.get(url, headers=headers) + res.raise_for_status() + + soup = BeautifulSoup(res.text, "html.parser") + json_ld = _extract_json_ld(soup) + aggregate_rating = json_ld.get("aggregateRating") if isinstance(json_ld, dict) else {} + if not isinstance(aggregate_rating, dict): + aggregate_rating = {} + + name = ( + (soup.find("h1").text.strip() if soup.find("h1") else "") + or json_ld.get("name") + or _first_meta(soup, ("property", "og:title"), ("name", "title")) + or "N/A" + ) + + logo_url = ( + _first_meta(soup, ("property", "og:image"), ("name", "twitter:image")) + or json_ld.get("image") + or "N/A" + ) + if isinstance(logo_url, list): + logo_url = logo_url[0] if logo_url else "N/A" + + meta_description = ( + _first_meta(soup, ("property", "og:description"), ("name", "description")) + or json_ld.get("description") + or "" + ) + overview_description = _extract_overview_description(soup) + description = overview_description or meta_description or "N/A" + + all_text = " ".join(soup.get_text(" ").split()) + version = _extract_detail_field(soup, ["Version", "踰꾩쟾"]) or _extract_text_value( + [ + r"踰꾩쟾\s*([0-9][0-9A-Za-z.\-_]*)", + r"Version\s*([0-9][0-9A-Za-z.\-_]*)", + ], + all_text, + ) + updated = _extract_detail_field(soup, ["Updated", "?낅뜲?댄듃 ?좎쭨"]) or _extract_text_value( + [ + r"?낅뜲?댄듃 ?좎쭨[:\s]*([0-9. /\-]+)", + r"Updated\s+([A-Za-z]+ \d{1,2}, \d{4})", + r"Updated[:\s]*([A-Za-z0-9, /\-]+?)(?:\s+Features|\s+Flag concern|\s+Size|$)", + ], + all_text, + ) + + users = _extract_text_value( + [ + r"?ъ슜??s*([\d,.]+[KkMm]?)\s*\+?\s*紐?", + r"([\d,.]+[KkMm]?)\s*\+?\s*users", + ], + all_text, + "0", + ) + rating = ( + str(aggregate_rating.get("ratingValue") or "") + or _extract_text_value( + [ + r"蹂꾪몴\s*([\d.]+)\s*媛?", + r"([\d.]+)\s+out of 5", + r"Rated\s*([\d.]+)", + r"Rating\s*([\d.]+)", + ], + all_text, + "0.0", + ) + ) + + users_count = _parse_number(users) + try: + rating_value = float(str(rating).replace(",", ".")) + except ValueError: + rating_value = 0.0 + + return { + "success": True, + "data": { + "id": extension_id, + "name": name, + "logo_url": logo_url, + "version": version, + "users": users, + "users_count": users_count, + "rating": f"{rating_value:.1f}" if rating_value else "0.0", + "rating_value": rating_value, + "updated": updated, + "last_updated": updated, + "summary": description, + "description": description, + "url": url, + }, + } + except Exception as e: + return {"success": False, "error": str(e)} diff --git a/backend/search/browser/chrome_name.py b/backend/search/browser/chrome_name.py index 6637452..8e3a174 100644 --- a/backend/search/browser/chrome_name.py +++ b/backend/search/browser/chrome_name.py @@ -1,9 +1,11 @@ +import asyncio import re import os import time from urllib.parse import quote import requests +import httpx from bs4 import BeautifulSoup @@ -49,6 +51,22 @@ def _search_by_name_with_requests(extension_name, limit): return _extract_ids_from_html(response.text, limit) +async def _search_by_name_with_httpx(client, extension_name, limit): + search_url = f"https://chromewebstore.google.com/search/{quote(extension_name)}" + headers = { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" + ) + } + + response = await client.get(search_url, headers=headers) + if response.status_code != 200: + return [] + + return _extract_ids_from_html(response.text, limit) + + def _search_by_name_with_selenium(extension_name, limit): from selenium import webdriver from selenium.webdriver.chrome.options import Options @@ -134,6 +152,31 @@ def _expand_ids_with_related_queries(extension_name, ext_ids, limit): return ext_ids +async def _expand_ids_with_related_queries_async(extension_name, ext_ids, limit): + variants = list(_query_variants(extension_name)) + if not variants or len(ext_ids) >= limit: + return ext_ids + + async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: + tasks = [ + _search_by_name_with_httpx(client, variant, 10) + for variant in variants + ] + results = await asyncio.gather(*tasks, return_exceptions=True) + + for variant, result in zip(variants, results): + if len(ext_ids) >= limit: + break + if isinstance(result, Exception): + print(f"Related search skipped ({variant}): {result}") + continue + for ext_id in result: + if _append_unique(ext_ids, ext_id, limit): + return ext_ids + + return ext_ids + + def search_by_name(extension_name, limit=40): if not extension_name: return [] @@ -156,3 +199,28 @@ def search_by_name(extension_name, limit=40): print(f"Selenium search fallback: {e}") return [] + + +async def search_by_name_async(extension_name, limit=40): + if not extension_name: + return [] + + limit = max(1, min(int(limit or 40), 80)) + + try: + async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: + ids = await _search_by_name_with_httpx(client, extension_name, limit) + if ids: + return await _expand_ids_with_related_queries_async(extension_name, ids, limit) + except Exception as e: + print(f"Search error: {e}") + + if os.getenv("CHROME_SEARCH_USE_SELENIUM", "false").lower() == "true": + try: + ids = _search_by_name_with_selenium(extension_name, limit) + if ids: + return await _expand_ids_with_related_queries_async(extension_name, ids, limit) + except Exception as e: + print(f"Selenium search fallback: {e}") + + return [] diff --git a/backend/search/browser/vscode_id.py b/backend/search/browser/vscode_id.py index 647908a..d2ba3da 100644 --- a/backend/search/browser/vscode_id.py +++ b/backend/search/browser/vscode_id.py @@ -1,4 +1,5 @@ import requests +import httpx API_BASE_URL = "https://open-vsx.org/api" @@ -49,3 +50,40 @@ def vscode_search_by_id(ext_id): } except Exception as e: return {"success": False, "error": str(e)} + + +async def vscode_search_by_id_async(client, ext_id): + try: + publisher, name = str(ext_id).split(".", 1) + + url = f"{API_BASE_URL}/{publisher}/{name}/latest" + res = await client.get(url) + res.raise_for_status() + payload = res.json() + + files = payload.get("files") or {} + download_count = int(payload.get("downloadCount") or 0) + rating_value = float(payload.get("averageRating") or 0.0) + updated = _normalize_timestamp(payload.get("timestamp")) + description = payload.get("description") or "N/A" + + return { + "success": True, + "data": { + "id": ext_id, + "name": payload.get("displayName") or payload.get("name") or "N/A", + "logo_url": files.get("icon") or "N/A", + "version": payload.get("version") or "N/A", + "users": str(download_count), + "users_count": download_count, + "rating": f"{rating_value:.1f}" if rating_value else "0.0", + "rating_value": rating_value, + "updated": updated, + "last_updated": updated, + "summary": description, + "description": description, + "url": f"{MARKET_BASE_URL}/{publisher}/{name}", + }, + } + except Exception as e: + return {"success": False, "error": str(e)} diff --git a/backend/search/browser/vscode_name.py b/backend/search/browser/vscode_name.py index 5382b91..7a815e7 100644 --- a/backend/search/browser/vscode_name.py +++ b/backend/search/browser/vscode_name.py @@ -1,4 +1,5 @@ import requests +import httpx SEARCH_URL = "https://open-vsx.org/api/-/search" @@ -24,3 +25,25 @@ def vscode_search_by_name(query, size=20): except Exception as e: print(f"VSCode search error: {e}") return [] + + +async def vscode_search_by_name_async(query, size=20): + try: + async with httpx.AsyncClient(timeout=10) as client: + res = await client.get( + SEARCH_URL, + params={"query": query, "size": size}, + ) + res.raise_for_status() + payload = res.json() + + ext_ids = [] + for ext in payload.get("extensions") or []: + namespace = ext.get("namespace") + name = ext.get("name") + if namespace and name: + ext_ids.append(f"{namespace}.{name}") + return ext_ids + except Exception as e: + print(f"VSCode search error: {e}") + return [] diff --git a/backend/search/search.py b/backend/search/search.py index 40fcb4d..a229c30 100644 --- a/backend/search/search.py +++ b/backend/search/search.py @@ -5,17 +5,19 @@ import time from datetime import datetime +import httpx from fastapi import APIRouter, Request from fastapi.responses import JSONResponse -from backend.search.browser.chrome_id import get_extension_info -from backend.search.browser.chrome_name import search_by_name -from backend.search.browser.vscode_id import vscode_search_by_id -from backend.search.browser.vscode_name import vscode_search_by_name +from backend.search.browser.chrome_id import get_extension_info_async +from backend.search.browser.chrome_name import search_by_name_async +from backend.search.browser.vscode_id import vscode_search_by_id_async +from backend.search.browser.vscode_name import vscode_search_by_name_async router = APIRouter() SEARCH_CACHE_TTL_SECONDS = int(os.getenv("SEARCH_CACHE_TTL_SECONDS", "300")) SEARCH_DEFAULT_LIMIT = int(os.getenv("SEARCH_DEFAULT_LIMIT", "20")) +SEARCH_DETAIL_CONCURRENCY = int(os.getenv("SEARCH_DETAIL_CONCURRENCY", "12")) _search_cache = {} @@ -133,6 +135,21 @@ def _valid_result(info): ) +async def _gather_extension_info(ids, get_info): + semaphore = asyncio.Semaphore(max(1, SEARCH_DETAIL_CONCURRENCY)) + limits = httpx.Limits( + max_connections=max(1, SEARCH_DETAIL_CONCURRENCY), + max_keepalive_connections=max(1, SEARCH_DETAIL_CONCURRENCY), + ) + + async with httpx.AsyncClient(timeout=10, follow_redirects=True, limits=limits) as client: + async def fetch(ext_id): + async with semaphore: + return await get_info(client, ext_id) + + return await asyncio.gather(*(fetch(ext_id) for ext_id in ids)) + + @router.post("/api/search_name") async def search_name_api(request: Request): data = await request.json() @@ -146,18 +163,16 @@ async def search_name_api(request: Request): if browser == "Chrome" or browser == "VSCode": if browser == "Chrome": - ids = search_by_name(extension_name, limit=limit) - get_info = get_extension_info + ids = await search_by_name_async(extension_name, limit=limit) + get_info = get_extension_info_async else: - ids = vscode_search_by_name(extension_name, size=limit) - get_info = vscode_search_by_id + ids = await vscode_search_by_name_async(extension_name, size=limit) + get_info = vscode_search_by_id_async if not ids: return JSONResponse(content={"success": False, "error": "검색 결과가 없습니다."}) - loop = asyncio.get_event_loop() - tasks = [loop.run_in_executor(None, get_info, ext_id) for ext_id in ids] - responses = await asyncio.gather(*tasks) + responses = await _gather_extension_info(ids, get_info) all_results = [] for index, info in enumerate(responses): @@ -192,13 +207,15 @@ async def search_id_api(request: Request): return JSONResponse(content=cached) if browser == "Chrome": - result = get_extension_info(extension_id) + async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: + result = await get_extension_info_async(client, extension_id) if _valid_result(result): result["data"] = _enrich_result(result["data"], search_rank=1) return JSONResponse(content=_cache_set(cache_key, result)) elif browser == "VSCode": - result = vscode_search_by_id(extension_id) + async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: + result = await vscode_search_by_id_async(client, extension_id) if _valid_result(result): result["data"] = _enrich_result(result["data"], search_rank=1) return JSONResponse(content=_cache_set(cache_key, result)) diff --git a/docker-compose.yml b/docker-compose.yml index cfe55e6..81295e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,6 +38,8 @@ services: context: ../suppressor image: suppressor:local depends_on: + vector-db: + condition: service_healthy nexus: condition: service_healthy nexus-init: @@ -50,6 +52,15 @@ services: NEXUS_REPOSITORY: "${NEXUS_REPOSITORY:-extension-demo}" NEXUS_USERNAME: "${NEXUS_USERNAME:-example_nexus_user}" NEXUS_PASSWORD: "${NEXUS_PASSWORD:-example_nexus_password}" + PGVECTOR_DB_HOST: "${PGVECTOR_DB_HOST:-vector-db}" + PGVECTOR_DB_PORT: "${PGVECTOR_DB_PORT:-5432}" + PGVECTOR_DB_USER: "${PGVECTOR_DB_USER:-vector_db_user}" + PGVECTOR_DB_PASSWORD: "${PGVECTOR_DB_PASSWORD:-vector_db_password}" + PGVECTOR_DB_NAME: "${PGVECTOR_DB_NAME:-vector_db}" + EMBEDDING_DIM: "${EMBEDDING_DIM:-1024}" + PGVECTOR_TABLE: "${PGVECTOR_TABLE:-public.es3_vector}" + PGVECTOR_MATCH_THRESHOLD: "${PGVECTOR_MATCH_THRESHOLD:-0}" + PGVECTOR_MATCH_COUNT: "${PGVECTOR_MATCH_COUNT:-10}" EXTERNAL_RUNNER_MODE: "${EXTERNAL_RUNNER_MODE:-file}" FILE_SCAN_URL: "http://127.0.0.1:8001/file_scan" volumes: @@ -74,6 +85,22 @@ services: timeout: 5s retries: 5 + vector-db: + image: pgvector/pgvector:pg16 + environment: + POSTGRES_USER: "${PGVECTOR_DB_USER:-vector_db_user}" + POSTGRES_PASSWORD: "${PGVECTOR_DB_PASSWORD:-vector_db_password}" + POSTGRES_DB: "${PGVECTOR_DB_NAME:-vector_db}" + ports: + - "${PGVECTOR_DB_PUBLISHED_PORT:-5433}:5432" + volumes: + - vector_db_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${PGVECTOR_DB_USER:-vector_db_user} -d ${PGVECTOR_DB_NAME:-vector_db}"] + interval: 10s + timeout: 5s + retries: 5 + nexus: image: sonatype/nexus3:3.75.1 environment: @@ -106,6 +133,7 @@ services: volumes: db_data: + vector_db_data: nexus_data: suppressor_storage: suppressor_pending_files: