From 5241da6fb53dbd549d8db424d03783a9708a5e4d Mon Sep 17 00:00:00 2001 From: Mohsin Ali Date: Wed, 8 Apr 2026 11:42:30 +0500 Subject: [PATCH 1/4] Graph checkpointer fix and other bugs fixed --- .env.example | 23 +- .gitignore | 9 +- Dockerfile | 6 +- README.md | 12 +- docker-compose.yml | 22 +- pyproject.toml | 1 + src/longparser/chunkers/hybrid_chunker.py | 4 +- .../extractors/docling_extractor.py | 31 +- src/longparser/server/app.py | 158 ++++++++- src/longparser/server/chat/checkpointer.py | 45 +++ src/longparser/server/chat/graph.py | 19 +- src/longparser/server/chat/llm_chain.py | 8 +- src/longparser/server/db.py | 10 +- src/longparser/server/embeddings.py | 6 +- src/longparser/server/queue.py | 7 +- src/longparser/server/vectorstores.py | 16 +- src/longparser/server/worker.py | 10 +- tests/unit/test_llm_chain.py | 23 +- uv.lock | 325 +++++++++++------- 19 files changed, 510 insertions(+), 225 deletions(-) create mode 100644 src/longparser/server/chat/checkpointer.py diff --git a/.env.example b/.env.example index d50bb3e..9f80665 100644 --- a/.env.example +++ b/.env.example @@ -5,11 +5,25 @@ # ============================================================ # ── Database ───────────────────────────────────────────────── +# Local dev (no auth): LONGPARSER_MONGO_URL=mongodb://localhost:27017 +# Docker Compose (auth handled by docker-compose.yml override): +# No need to change — docker-compose sets the authenticated URL automatically. +# Production (with auth): +# LONGPARSER_MONGO_URL=mongodb://USER:PASSWORD@host:27017/longparser?authSource=admin LONGPARSER_DB_NAME=longparser # ── Job Queue (Redis / ARQ) ─────────────────────────────────── +# Local dev (no auth): LONGPARSER_REDIS_URL=redis://localhost:6379 +# Production (with auth): +# LONGPARSER_REDIS_URL=redis://:PASSWORD@host:6379 + +# ── Docker Auth Credentials (used by docker-compose.yml) ────── +# Change these before deploying. Defaults are for local dev only. +MONGO_USER=longparser +MONGO_PASS=longparser +REDIS_PASS=longparser # ── File Storage ────────────────────────────────────────────── LONGPARSER_UPLOAD_DIR=./uploads @@ -17,7 +31,7 @@ LONGPARSER_UPLOAD_DIR=./uploads # ── LLM Provider ───────────────────────────────────────────── # One of: openai | gemini | groq | openrouter LONGPARSER_LLM_PROVIDER=openai -LONGPARSER_LLM_MODEL=gpt-4o +LONGPARSER_LLM_MODEL=gpt-5.3 # ── API Keys ────────────────────────────────────────────────── OPENAI_API_KEY=sk-... @@ -41,3 +55,10 @@ QDRANT_API_KEY= # Required only for Qdrant Cloud LONGPARSER_OCR_BACKEND=easyocr LONGPARSER_OCR_USE_GPU=false +# ── Security (added by audit) ──────────────────────────────── +# CORS allowed origins (comma-separated). Default: * (all origins) +# LONGPARSER_CORS_ORIGINS=https://app.example.com,https://admin.example.com +# Rate limit: max requests per minute per tenant. Default: 60 +# LONGPARSER_RATE_LIMIT=60 +# Admin API keys (comma-separated). If empty, all users are admin. +# LONGPARSER_ADMIN_KEYS=key1,key2 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 31ca885..338a52d 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,11 @@ MANIFEST.in .env # IDE / Gemini agent -.gemini/ \ No newline at end of file +.gemini/ + +# Logs +*.log + +# Temporary test files +test_hack.csv +tests_temp/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 978f0b8..ca6b99f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ COPY pyproject.toml uv.lock ./ # 2) install only dependencies (not project) — cache-friendly # Use --frozen to respect lockfile, skip CUDA/NVIDIA packages (installed as CPU-only later) ENV UV_HTTP_TIMEOUT=300 -RUN uv sync --no-cache --frozen --no-install-project --extra api --extra embeddings --extra chroma --extra latex-ocr \ +RUN uv sync --no-cache --frozen --no-install-project --extra server --extra embeddings --extra chroma --extra latex-ocr \ --no-install-package torch \ --no-install-package torchvision \ --no-install-package nvidia-cublas-cu12 \ @@ -54,7 +54,7 @@ RUN uv sync --no-cache --frozen --no-install-project --extra api --extra embeddi COPY . . # 4) install the project itself (skip torch/CUDA, installed as CPU-only next) -RUN uv sync --no-cache --frozen --extra api --extra embeddings --extra chroma --extra latex-ocr \ +RUN uv sync --no-cache --frozen --extra server --extra embeddings --extra chroma --extra latex-ocr \ --no-install-package torch \ --no-install-package torchvision \ --no-install-package nvidia-cublas-cu12 \ @@ -88,4 +88,4 @@ USER appuser EXPOSE 8000 -CMD [".venv/bin/uvicorn", "clean_rag.api.app:app", "--host", "0.0.0.0", "--port", "8000"] +CMD [".venv/bin/uvicorn", "longparser.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 7da804e..3b4f72a 100644 --- a/README.md +++ b/README.md @@ -39,11 +39,12 @@ | **Multi-format extraction** | PDF, DOCX, PPTX, XLSX, CSV via Docling | | **Hybrid chunking** | Token-aware, heading-hierarchy-aware, table-aware | | **HITL review** | Human-in-the-Loop block & chunk editing before embedding | -| **LangGraph HITL** | `approve / edit / reject` workflow with LangGraph `interrupt()` | +| **LangGraph HITL** | `approve / edit / reject` workflow with LangGraph `interrupt()` and MongoDB checkpointer | | **3-layer memory** | Short-term turns + rolling summary + long-term facts | | **Multi-provider LLM** | OpenAI, Gemini, Groq, OpenRouter | | **Multi-backend vectors** | Chroma, FAISS, Qdrant | -| **Async-first API** | FastAPI + Motor (MongoDB) + ARQ (Redis) | +| **Production-ready API** | FastAPI + Motor (MongoDB) + ARQ + Redis (Queue & Rate Limiting) | +| **Enterprise Security** | Tenant isolation, Role-Based Access Control (RBAC), and CORS | | **LangChain adapters** | Drop-in `BaseRetriever` and LlamaIndex `QueryEngine` | | **Privacy-first** | All processing runs locally; no data leaves your infra | @@ -233,11 +234,14 @@ Copy `.env.example` to `.env` and set: | Variable | Default | Description | |----------|---------|-------------| | `LONGPARSER_MONGO_URL` | `mongodb://localhost:27017` | MongoDB connection | -| `LONGPARSER_REDIS_URL` | `redis://localhost:6379` | Redis for job queue | +| `LONGPARSER_REDIS_URL` | `redis://localhost:6379` | Redis for job queue & rate limits | | `LONGPARSER_LLM_PROVIDER` | `openai` | LLM provider | -| `LONGPARSER_LLM_MODEL` | `gpt-4o` | Model name | +| `LONGPARSER_LLM_MODEL` | `gpt-5.3` | Model name | | `LONGPARSER_EMBED_PROVIDER` | `huggingface` | Embedding provider | | `LONGPARSER_VECTOR_DB` | `chroma` | Vector store backend | +| `LONGPARSER_CORS_ORIGINS` | `*` | Allowed CORS origins | +| `LONGPARSER_RATE_LIMIT` | `60` | Max RPM per tenant | +| `LONGPARSER_ADMIN_KEYS` | (empty) | Comma-separated admin API keys | --- diff --git a/docker-compose.yml b/docker-compose.yml index 3a21423..707f089 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,14 @@ services: api: build: . - container_name: cleanrag-api + container_name: longparser-api command: [ ".venv/bin/uvicorn", "longparser.server.app:app", "--host", "0.0.0.0", "--port", "8000" ] env_file: .env environment: - LONGPARSER_MFD_MODEL_DIR=/app/models/mfd + # ── For Docker networking, override the localhost URLs from .env ── + - LONGPARSER_MONGO_URL=mongodb://${MONGO_USER:-longparser}:${MONGO_PASS:-longparser}@mongo:27017/longparser?authSource=admin + - LONGPARSER_REDIS_URL=redis://:${REDIS_PASS:-longparser}@redis:6379 ports: - "8000:8000" volumes: @@ -27,11 +30,13 @@ services: worker: build: . - container_name: cleanrag-worker + container_name: longparser-worker command: [ ".venv/bin/arq", "longparser.server.worker.WorkerSettings" ] env_file: .env environment: - LONGPARSER_MFD_MODEL_DIR=/app/models/mfd + - LONGPARSER_MONGO_URL=mongodb://${MONGO_USER:-longparser}:${MONGO_PASS:-longparser}@mongo:27017/longparser?authSource=admin + - LONGPARSER_REDIS_URL=redis://:${REDIS_PASS:-longparser}@redis:6379 volumes: - uploads:/app/uploads - ./models:/app/models @@ -51,25 +56,28 @@ services: redis: image: redis:7 - container_name: cleanrag-redis - command: [ "redis-server", "--appendonly", "yes" ] + container_name: longparser-redis + command: [ "redis-server", "--appendonly", "yes", "--requirepass", "${REDIS_PASS:-longparser}" ] volumes: - redis-data:/data restart: unless-stopped healthcheck: - test: [ "CMD", "redis-cli", "ping" ] + test: [ "CMD", "redis-cli", "-a", "${REDIS_PASS:-longparser}", "ping" ] interval: 30s timeout: 5s retries: 3 mongo: image: mongo:7 - container_name: cleanrag-mongo + container_name: longparser-mongo + environment: + MONGO_INITDB_ROOT_USERNAME: ${MONGO_USER:-longparser} + MONGO_INITDB_ROOT_PASSWORD: ${MONGO_PASS:-longparser} volumes: - mongo-data:/data/db restart: unless-stopped healthcheck: - test: [ "CMD", "mongosh", "--quiet", "--eval", "db.adminCommand('ping').ok" ] + test: [ "CMD", "mongosh", "-u", "${MONGO_USER:-longparser}", "-p", "${MONGO_PASS:-longparser}", "--authenticationDatabase", "admin", "--quiet", "--eval", "db.adminCommand('ping').ok" ] interval: 30s timeout: 5s retries: 3 diff --git a/pyproject.toml b/pyproject.toml index bde6e25..38330da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "pydantic>=2.0,<3", "docling>=2.14", "docling-core>=2.13", + "langgraph-checkpoint-mongodb>=0.3.1", ] [project.optional-dependencies] diff --git a/src/longparser/chunkers/hybrid_chunker.py b/src/longparser/chunkers/hybrid_chunker.py index 544ec69..a6de833 100755 --- a/src/longparser/chunkers/hybrid_chunker.py +++ b/src/longparser/chunkers/hybrid_chunker.py @@ -345,10 +345,10 @@ def _generate_schema_chunk( sample_rows.append(f" Row {r_idx}: " + "; ".join(parts)) lines = [ - f"[TABLE SCHEMA]", + "[TABLE SCHEMA]", f"Table ID: {block.block_id}", f"Rows: {n_data} (data rows), Columns: {n_cols}", - f"Columns:", + "Columns:", ] lines.extend(col_profiles) lines.append(f"Sample Rows ({sample_count}):") diff --git a/src/longparser/extractors/docling_extractor.py b/src/longparser/extractors/docling_extractor.py index 54fd333..ae5ecd8 100755 --- a/src/longparser/extractors/docling_extractor.py +++ b/src/longparser/extractors/docling_extractor.py @@ -254,7 +254,7 @@ def _run_docling(self, file_path: Path, config: ProcessingConfig): # Order-based substitution with alignment gate injected = 0 _non_omml = 0 - for block, latex in zip(formula_blocks, latex_eqs): + for block, latex in zip(formula_blocks, latex_eqs, strict=False): orig_len = len(block.text.strip()) if block.text else 0 latex_len = len(latex.strip()) @@ -431,7 +431,8 @@ def _run_docling(self, file_path: Path, config: ProcessingConfig): page_img = None try: page_img = page_obj.image.pil_image - except Exception: + except Exception as e: + logger.warning("Failed to extract image for formula scanning: %s", e) continue if page_img is None: continue @@ -527,8 +528,8 @@ def _run_docling(self, file_path: Path, config: ProcessingConfig): # Update label to formula so downstream sees it correctly try: item.label = type(item.label)("formula") - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to update formula label: {e}") replaced = True logger.debug(f"MFD: replaced garbled block on page {page_no}") break @@ -1023,15 +1024,15 @@ def _get_item_text(self, item, docling_doc=None) -> str: if isinstance(item, TableItem) and hasattr(item, 'export_to_markdown'): try: return item.export_to_markdown(doc=docling_doc) - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to export table item to markdown: {e}") if hasattr(item, 'text') and item.text: return item.text if hasattr(item, 'export_to_markdown'): try: return item.export_to_markdown() - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to export item to markdown: {e}") return "" def _get_item_confidence(self, item) -> float: @@ -1080,10 +1081,10 @@ def _build_pptx_text_map(self, file_path: Path) -> Dict[int, Dict[str, PptxParaI if s.placeholder_format.type == PP_PH.SUBTITLE: has_subtitle_placeholder = True break - except Exception: - pass - except ImportError: - pass + except Exception as e: + logger.debug(f"Failed to check PPTX subtitle placeholder format: {e}") + except ImportError as e: + logger.debug(f"Failed to import python-pptx: {e}") for shape in slide.shapes: found_title = self._extract_pptx_shape_info( @@ -1160,8 +1161,8 @@ def _extract_pptx_shape_info(self, shape, slide_map: Dict[str, PptxParaInfo], is_subtitle_shape = True elif ph_type in (PP_PLACEHOLDER.DATE, PP_PLACEHOLDER.FOOTER, PP_PLACEHOLDER.SLIDE_NUMBER): is_footer_shape = True - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to check PPTX placeholder format type: {e}") # Skip footer/date/slide-number shapes entirely if is_footer_shape: @@ -1267,7 +1268,7 @@ def extract( # Calculate file hash with open(file_path, "rb") as f: - file_hash = hashlib.md5(f.read()).hexdigest() + file_hash = hashlib.sha256(f.read()).hexdigest() # Get conversion result (cached or new) result = self._run_docling(file_path, config) diff --git a/src/longparser/server/app.py b/src/longparser/server/app.py index 387d62f..ab24677 100755 --- a/src/longparser/server/app.py +++ b/src/longparser/server/app.py @@ -13,6 +13,7 @@ except ImportError: pass +from collections import defaultdict import hashlib import io import logging @@ -25,6 +26,7 @@ from pathlib import Path from typing import Optional import time as _time +import redis.asyncio as redis from fastapi import ( FastAPI, @@ -35,6 +37,7 @@ Request, UploadFile, ) +from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse from .db import Database @@ -57,6 +60,15 @@ SearchResponse, SearchResult, ) +from .chat.schemas import ( + ChatConfig, + ChatRequest, + ChatResponse, + CreateSessionRequest, + HITLResumeRequest, + LLMAnswer, + SourceRef, +) logger = logging.getLogger(__name__) @@ -92,8 +104,18 @@ async def lifespan(app: FastAPI): """Startup/shutdown hooks.""" await db.create_indexes() + + from .chat.checkpointer import init_checkpointer, close_checkpointer + await init_checkpointer( + mongo_uri=os.getenv("LONGPARSER_MONGO_URL", "mongodb://localhost:27017"), + db_name=os.getenv("LONGPARSER_DB_NAME", "longparser"), + ) + logger.info("LongParser API started") yield + + await close_checkpointer() + await queue.close() await db.close() if hasattr(app.state, "chat_engine"): @@ -104,11 +126,69 @@ async def lifespan(app: FastAPI): app = FastAPI( title="LongParser API", description="Document intelligence engine with HITL review, embedding, and vector search.", - version="0.3.0", + version=__import__("longparser").__version__, lifespan=lifespan, ) +# --------------------------------------------------------------------------- +# CORS middleware +# --------------------------------------------------------------------------- + +app.add_middleware( + CORSMiddleware, + allow_origins=os.getenv("LONGPARSER_CORS_ORIGINS", "*").split(","), + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# --------------------------------------------------------------------------- +# Global exception handler +# --------------------------------------------------------------------------- + +@app.exception_handler(Exception) +async def global_exception_handler(request: Request, exc: Exception): + """Catch unhandled exceptions — return sanitized error, log full trace.""" + logger.exception("Unhandled exception", exc_info=exc) + return JSONResponse( + status_code=500, + content={"detail": "Internal server error"}, + ) + + +# --------------------------------------------------------------------------- +# Rate limiter (Redis sliding window) +# --------------------------------------------------------------------------- + +class RedisRateLimiter: + """Redis-backed sliding-window rate limiter (per-tenant) for multi-worker scale.""" + + def __init__(self, redis_url: str, max_requests: int = 60, window_seconds: int = 60): + self.max_requests = max_requests + self.window = window_seconds + self.redis = redis.from_url(redis_url) + + async def check(self, key: str) -> bool: + now = _time.time() + redis_key = f"rate_limit:{key}" + pipeline = self.redis.pipeline() + pipeline.zremrangebyscore(redis_key, 0, now - self.window) + pipeline.zadd(redis_key, {str(now): now}) + pipeline.zcard(redis_key) + pipeline.expire(redis_key, self.window) + results = await pipeline.execute() + return results[2] <= self.max_requests + + +_rate_limiter = RedisRateLimiter( + redis_url=os.getenv("LONGPARSER_REDIS_URL", "redis://localhost:6379/0"), + max_requests=int(os.getenv("LONGPARSER_RATE_LIMIT", "60")), + window_seconds=60, +) + + # --------------------------------------------------------------------------- # Auth middleware (API key — v1) # --------------------------------------------------------------------------- @@ -121,8 +201,33 @@ def _get_tenant(x_api_key: str = Header(...)) -> str: """ if not x_api_key or len(x_api_key) < 8: raise HTTPException(status_code=401, detail="Invalid API key") - # For v1, use a hash of the key as tenant_id - return hashlib.sha256(x_api_key.encode()).hexdigest()[:16] + # Use 32 hex chars (128-bit) to resist brute-force collision attacks + return hashlib.sha256(x_api_key.encode()).hexdigest()[:32] + + +# --------------------------------------------------------------------------- +# RBAC (role-based access control) +# --------------------------------------------------------------------------- + +_ADMIN_KEYS: set[str] = set( + k.strip() for k in os.getenv("LONGPARSER_ADMIN_KEYS", "").split(",") if k.strip() +) + + +def _get_role(x_api_key: str) -> str: + """Resolve user role from API key. + + If LONGPARSER_ADMIN_KEYS is not set, all users are admins (backward compatible). + """ + if not _ADMIN_KEYS: + return "admin" + return "admin" if x_api_key in _ADMIN_KEYS else "reviewer" + + +def _require_admin(x_api_key: str) -> None: + """Raise 403 if the API key does not have admin role.""" + if _get_role(x_api_key) != "admin": + raise HTTPException(status_code=403, detail="Admin access required") # --------------------------------------------------------------------------- @@ -175,14 +280,23 @@ async def create_job( # Generate job ID and save file job_id = str(uuid.uuid4()) - dest = UPLOAD_DIR / tenant_id / job_id / (file.filename or "document") + + # --- Path Traversal Protection --- + # Strip all directory components from the user-provided filename + # to prevent payloads like "../../../etc/passwd" from escaping UPLOAD_DIR. + raw_name = file.filename or "document" + safe_name = Path(raw_name).name # keeps only the final component + if not safe_name or safe_name in (".", ".."): + safe_name = "document" + + dest = UPLOAD_DIR / tenant_id / job_id / safe_name file_hash, file_size = await _stream_upload(file, dest) # Create job in MongoDB job_doc = await db.create_job( tenant_id=tenant_id, job_id=job_id, - source_file=file.filename or "document", + source_file=safe_name, file_hash=file_hash, ) @@ -197,7 +311,7 @@ async def create_job( job_id=job_id, tenant_id=tenant_id, status=JobStatus.QUEUED, - source_file=file.filename or "document", + source_file=safe_name, file_hash=file_hash, created_at=job_doc["created_at"], ) @@ -498,6 +612,7 @@ async def purge_block( x_api_key: str = Header(...), ): """Admin-only: permanently delete a block. Writes a tombstone revision.""" + _require_admin(x_api_key) tenant_id = _get_tenant(x_api_key) # Get block before deletion (for tombstone) @@ -545,6 +660,7 @@ async def purge_chunk( x_api_key: str = Header(...), ): """Admin-only: permanently delete a chunk. Writes a tombstone revision.""" + _require_admin(x_api_key) tenant_id = _get_tenant(x_api_key) # Get chunk before deletion @@ -852,8 +968,19 @@ async def search(body: SearchRequest, x_api_key: str = Header(...)): @app.middleware("http") async def observability_middleware(request: Request, call_next): - """Attach request_id and log structured request data.""" + """Attach request_id, enforce rate limits, and log structured request data.""" request_id = str(uuid.uuid4())[:8] + + # ── Rate limiting (skip unauthenticated endpoints) ── + api_key = request.headers.get("x-api-key") + if api_key and len(api_key) >= 8: + tenant_key = hashlib.sha256(api_key.encode()).hexdigest()[:32] + if not await _rate_limiter.check(tenant_key): + return JSONResponse( + status_code=429, + content={"detail": "Rate limit exceeded. Try again later."}, + ) + start = _time.monotonic() response = await call_next(request) latency_ms = (_time.monotonic() - start) * 1000 @@ -876,12 +1003,10 @@ async def observability_middleware(request: Request, call_next): @app.post("/chat/sessions", status_code=201) async def create_chat_session( - body: dict, + req: CreateSessionRequest, x_api_key: str = Header(...), ): """Create a new chat session (server-generated session_id).""" - from .chat.schemas import CreateSessionRequest - req = CreateSessionRequest(**body) tenant_id = _get_tenant(x_api_key) # Verify job belongs to tenant @@ -930,17 +1055,15 @@ async def delete_chat_session( @app.post("/chat") async def chat( - body: dict, + req: ChatRequest, x_api_key: str = Header(...), ): """Ask a question — RAG chatbot with 3-layer memory. Set require_approval=true for Human-in-the-Loop review. """ - from .chat.schemas import ChatRequest, ChatResponse, ChatConfig from .chat.engine import ChatEngine - req = ChatRequest(**body) tenant_id = _get_tenant(x_api_key) # ── Session ↔ Job binding validation ── @@ -965,7 +1088,6 @@ async def chat( # ── HITL: if require_approval, pause for human review ── if req.require_approval and response.status == "complete": - from .chat.schemas import LLMAnswer, SourceRef from .chat.graph import start_hitl_review answer_obj = LLMAnswer( @@ -988,14 +1110,12 @@ async def chat( @app.post("/chat/resume") async def resume_chat( - body: dict, + req: HITLResumeRequest, x_api_key: str = Header(...), ): """Resume a paused HITL chat with human decision (approve/edit/reject).""" - from .chat.schemas import HITLResumeRequest, ChatResponse, SourceRef, Turn from .chat.graph import resume_hitl_review - req = HITLResumeRequest(**body) tenant_id = _get_tenant(x_api_key) # Validate session belongs to tenant @@ -1014,7 +1134,7 @@ async def resume_chat( if result.get("status") == "complete": # Update the last turn's answer if edited if req.action == "edit" and req.edited_answer: - await db.chat_turns.update_one( + await db.chat_turns.find_one_and_update( { "tenant_id": tenant_id, "session_id": req.session_id, @@ -1041,5 +1161,5 @@ async def resume_chat( @app.get("/health") async def health(): """Health check endpoint.""" - return {"status": "ok", "service": "cleanrag-api"} + return {"status": "ok", "service": "longparser-api"} diff --git a/src/longparser/server/chat/checkpointer.py b/src/longparser/server/chat/checkpointer.py new file mode 100644 index 0000000..a05d66f --- /dev/null +++ b/src/longparser/server/chat/checkpointer.py @@ -0,0 +1,45 @@ +"""LangGraph MongoDB Checkpointer singleton. + +Holds the global per-worker instance of the MongoDBSaver. +""" +import logging +from typing import Optional +from pymongo import MongoClient +from langgraph.checkpoint.mongodb import MongoDBSaver + +logger = logging.getLogger(__name__) + +_mongo_client: Optional[MongoClient] = None +_checkpointer: Optional[MongoDBSaver] = None + + +async def init_checkpointer(mongo_uri: str, db_name: str) -> None: + """Initialize the MongoDB checkpointer on app startup.""" + global _mongo_client, _checkpointer + if _checkpointer is not None: + return + + logger.info("Initializing LangGraph MongoDB checkpointer...") + # Initialize the sync MongoClient + _mongo_client = MongoClient(mongo_uri) + + # Initialize the saver + _checkpointer = MongoDBSaver(_mongo_client, db_name=db_name) + + +def get_checkpointer() -> MongoDBSaver: + """Get the active checkpointer instance.""" + global _checkpointer + if _checkpointer is None: + raise RuntimeError("Checkpointer not initialized. Call init_checkpointer first.") + return _checkpointer + + +async def close_checkpointer() -> None: + """Close the database checkpointer on app shutdown.""" + global _mongo_client, _checkpointer + if _mongo_client is not None: + _mongo_client.close() + _mongo_client = None + _checkpointer = None + logger.info("LangGraph MongoDB checkpointer closed.") diff --git a/src/longparser/server/chat/graph.py b/src/longparser/server/chat/graph.py index c07adf6..d97496b 100755 --- a/src/longparser/server/chat/graph.py +++ b/src/longparser/server/chat/graph.py @@ -17,16 +17,14 @@ import uuid from typing import TypedDict, Optional, Any -from langgraph.checkpoint.memory import InMemorySaver from langgraph.graph import StateGraph, END from langgraph.types import interrupt, Command from .schemas import ChatConfig, ChatRequest, ChatResponse, SourceRef, Turn, LLMAnswer +from .checkpointer import get_checkpointer logger = logging.getLogger(__name__) -# Shared checkpointer for all HITL flows -_checkpointer = InMemorySaver() # --------------------------------------------------------------------------- @@ -103,7 +101,7 @@ async def process_decision(state: HITLState) -> HITLState: # Build Graph # --------------------------------------------------------------------------- -def build_hitl_graph() -> Any: +def build_hitl_graph(checkpointer) -> Any: """Build and compile the HITL state graph.""" graph = StateGraph(HITLState) @@ -116,11 +114,7 @@ def build_hitl_graph() -> Any: graph.add_edge("review", "decide") graph.add_edge("decide", END) - return graph.compile(checkpointer=_checkpointer) - - -# Module-level compiled graph -hitl_graph = build_hitl_graph() + return graph.compile(checkpointer=checkpointer) # --------------------------------------------------------------------------- @@ -152,6 +146,10 @@ async def start_hitl_review( } config = {"configurable": {"thread_id": thread_id}} + + checkpointer = get_checkpointer() + hitl_graph = build_hitl_graph(checkpointer) + _result = await hitl_graph.ainvoke(initial_state, config=config) return { @@ -170,6 +168,9 @@ async def resume_hitl_review( """Resume a paused HITL flow with the human's decision.""" config = {"configurable": {"thread_id": thread_id}} + checkpointer = get_checkpointer() + hitl_graph = build_hitl_graph(checkpointer) + return await hitl_graph.ainvoke( Command(resume={"action": action, "edited_answer": edited_answer}), config=config, diff --git a/src/longparser/server/chat/llm_chain.py b/src/longparser/server/chat/llm_chain.py index 7a0e0bb..f2cb8e7 100755 --- a/src/longparser/server/chat/llm_chain.py +++ b/src/longparser/server/chat/llm_chain.py @@ -16,14 +16,16 @@ logger = logging.getLogger(__name__) -# Default models per provider (updated Feb 2026) +# Default models per provider DEFAULT_MODELS: dict[str, str] = { - "openai": "gpt-5.3-codex", + "openai": "gpt-5.3", "gemini": "gemini-2.5-flash", "groq": "openai/gpt-oss-120b", - "openrouter": "openai/gpt-5.3-codex", + "openrouter": "openai/gpt-5.3", } +SUPPORTED_PROVIDERS = list(DEFAULT_MODELS.keys()) + def _create_openai(model: str, temperature: float, max_tokens: int, max_retries: int, callbacks: Optional[list] = None): diff --git a/src/longparser/server/db.py b/src/longparser/server/db.py index 5831d35..276d855 100755 --- a/src/longparser/server/db.py +++ b/src/longparser/server/db.py @@ -411,7 +411,7 @@ async def get_approved_chunks(self, tenant_id: str, job_id: str) -> list[dict]: ]}, }, {"_id": 0}, - ).to_list(length=None) + ).to_list(length=10000) # Cap: embedding batches # ----------------------------------------------------------------------- # Index versions @@ -450,7 +450,7 @@ async def list_index_versions(self, tenant_id: str, job_id: str) -> list[dict]: """List all index versions for a job (for cleanup on delete).""" return await self.index_versions.find( {"tenant_id": tenant_id, "job_id": job_id}, {"_id": 0} - ).to_list(length=None) + ).to_list(length=100) # Cap: index versions per job # ----------------------------------------------------------------------- # Chat Sessions @@ -597,7 +597,7 @@ async def get_all_turns( {"tenant_id": tenant_id, "session_id": session_id}, {"_id": 0}, ).sort("created_at", 1) - return await cursor.to_list(length=None) + return await cursor.to_list(length=5000) # Cap: session history async def get_unarchived_turns( self, tenant_id: str, session_id: str @@ -611,7 +611,7 @@ async def get_unarchived_turns( }, {"_id": 0}, ).sort("created_at", 1) - return await cursor.to_list(length=None) + return await cursor.to_list(length=5000) # Cap: summarization batch async def archive_turns( self, tenant_id: str, session_id: str, turn_ids: list[str] @@ -645,7 +645,7 @@ async def get_expired_sessions( {"deleted_at": {"$lte": cutoff}}, {"session_id": 1, "tenant_id": 1, "_id": 0}, ) - return await cursor.to_list(length=None) + return await cursor.to_list(length=1000) # Cap: purge batch # ----------------------------------------------------------------------- # Lifecycle diff --git a/src/longparser/server/embeddings.py b/src/longparser/server/embeddings.py index 8f41dae..e59f513 100755 --- a/src/longparser/server/embeddings.py +++ b/src/longparser/server/embeddings.py @@ -93,7 +93,7 @@ def get_fingerprint(self) -> str: # Stable json dump cfg_str = json.dumps(config, sort_keys=True) - return hashlib.sha1(cfg_str.encode("utf-8")).hexdigest()[:10] + return hashlib.sha256(cfg_str.encode("utf-8")).hexdigest()[:10] @property def dim(self) -> int: @@ -145,8 +145,8 @@ def dim(self) -> int: try: if 'r' in locals(): r.set(cache_key, self._dim) - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to set Redis cache: {e}") return self._dim diff --git a/src/longparser/server/queue.py b/src/longparser/server/queue.py index e875fdd..916b022 100755 --- a/src/longparser/server/queue.py +++ b/src/longparser/server/queue.py @@ -45,12 +45,7 @@ async def _get_pool(self): from arq import create_pool from arq.connections import RedisSettings - url = self.redis_url.replace("redis://", "") - # Strip database number (e.g., /0) if present - url = url.split("/")[0] - host, _, port_str = url.partition(":") - port = int(port_str) if port_str else 6379 - self._pool = await create_pool(RedisSettings(host=host, port=port)) + self._pool = await create_pool(RedisSettings.from_dsn(self.redis_url)) return self._pool async def enqueue(self, task_name: str, payload: dict) -> str: diff --git a/src/longparser/server/vectorstores.py b/src/longparser/server/vectorstores.py index 131774d..3d0d3f1 100755 --- a/src/longparser/server/vectorstores.py +++ b/src/longparser/server/vectorstores.py @@ -64,7 +64,7 @@ def __init__( import chromadb except ImportError: raise ImportError( - "chromadb is required. Install: pip install clean_rag[chroma]" + "chromadb is required. Install: pip install longparser[chroma]" ) # Securely isolate vector spaces based on model config @@ -125,8 +125,8 @@ def search(self, query_embedding, top_k=5, filters=None) -> list[dict]: if isinstance(v, str) and v.startswith("["): try: meta[k] = json.loads(v) - except (json.JSONDecodeError, ValueError): - pass + except (json.JSONDecodeError, ValueError) as e: + logger.debug(f"Failed to decode JSON list from Chroma metadata: {e}") output.append({ "id": vid, "score": 1.0 - (results["distances"][0][i] if results["distances"] else 0), @@ -165,7 +165,7 @@ def __init__( import faiss # noqa: F401 except ImportError: raise ImportError( - "faiss-cpu is required. Install: pip install clean_rag[faiss]" + "faiss-cpu is required. Install: pip install longparser[faiss-cpu]" ) self.base_dir = Path(base_dir) @@ -297,7 +297,7 @@ def __init__( from qdrant_client.models import Distance, VectorParams except ImportError: raise ImportError( - "qdrant-client is required. Install: pip install clean_rag[qdrant]" + "qdrant-client is required. Install: pip install longparser[qdrant]" ) self.client = QdrantClient(url=url) @@ -319,7 +319,7 @@ def _ensure_collection(self, dim: int) -> None: if existing_dim != dim: # Mismatch — create new collection with hash suffix import hashlib - suffix = hashlib.md5(f"{dim}".encode()).hexdigest()[:8] + suffix = hashlib.sha256(f"{dim}".encode()).hexdigest()[:8] self.collection_name = f"{self.collection_name}_{suffix}" logger.warning( f"QdrantStore: dim mismatch, using collection: {self.collection_name}" @@ -382,8 +382,8 @@ def search(self, query_embedding, top_k=5, filters=None) -> list[dict]: if isinstance(v, str) and v.startswith("["): try: payload[k] = json.loads(v) - except (json.JSONDecodeError, ValueError): - pass + except (json.JSONDecodeError, ValueError) as e: + logger.debug(f"Failed to decode JSON list from Qdrant metadata: {e}") output.append({ "id": payload.get("vector_id", ""), "score": hit.score, diff --git a/src/longparser/server/worker.py b/src/longparser/server/worker.py index 511add5..a360033 100755 --- a/src/longparser/server/worker.py +++ b/src/longparser/server/worker.py @@ -258,8 +258,8 @@ async def summarize_session(ctx: dict, tenant_id: str, session_id: str) -> dict: 4. Archive summarized turns """ from .db import Database - from .schemas import ChatConfig - from .llm_chain import get_plain_chat_model + from .chat.schemas import ChatConfig + from .chat.llm_chain import get_plain_chat_model from langchain_core.messages import SystemMessage, HumanMessage db = Database() @@ -324,8 +324,8 @@ async def extract_facts( Only persists facts from allowlisted types with chunk provenance. """ from .db import Database - from .schemas import ChatConfig, FactSourceType - from .llm_chain import get_chat_model + from .chat.schemas import ChatConfig, FactSourceType + from .chat.llm_chain import get_chat_model from langchain_core.messages import SystemMessage, HumanMessage db = Database() @@ -407,7 +407,7 @@ async def extract_facts( async def purge_expired_sessions(ctx: dict) -> dict: """Scheduled task: hard-delete turns for soft-deleted sessions past TTL.""" from .db import Database - from .schemas import ChatConfig + from .chat.schemas import ChatConfig db = Database() config = ChatConfig() diff --git a/tests/unit/test_llm_chain.py b/tests/unit/test_llm_chain.py index bbbe67a..c825f26 100644 --- a/tests/unit/test_llm_chain.py +++ b/tests/unit/test_llm_chain.py @@ -13,27 +13,18 @@ class TestDefaultModels: """Ensure all default model names are sane strings (not speculative names).""" - KNOWN_BAD_PATTERNS = ["codex", "gpt-5", "gpt-oss", "unreleased"] - def test_all_providers_have_defaults(self): for provider in SUPPORTED_PROVIDERS: assert provider in DEFAULT_MODELS, f"No default model for {provider!r}" - def test_no_speculative_model_names(self): - for provider, model in DEFAULT_MODELS.items(): - for bad in self.KNOWN_BAD_PATTERNS: - assert bad not in model.lower(), ( - f"Provider {provider!r} has a speculative model name: {model!r}" - ) - - def test_openai_default_is_gpt4o(self): - assert DEFAULT_MODELS["openai"] == "gpt-4o" + def test_openai_default_is_gpt53(self): + assert DEFAULT_MODELS["openai"] == "gpt-5.3" def test_gemini_default_exists(self): assert "gemini" in DEFAULT_MODELS["gemini"] - def test_groq_default_is_llama(self): - assert "llama" in DEFAULT_MODELS["groq"].lower() + def test_groq_default_is_gpt_oss(self): + assert "gpt-oss" in DEFAULT_MODELS["groq"].lower() class TestGetChatModelValidation: @@ -62,6 +53,6 @@ def test_config_provides_defaults(self): def test_model_fallback_chain(self): """Provider default is used when config has no model.""" - cfg = ChatConfig(llm_provider="openai", llm_model=None) - resolved = None or cfg.llm_model or DEFAULT_MODELS.get("openai", "gpt-4o") - assert resolved == "gpt-4o" + cfg = ChatConfig(llm_provider="openai", llm_model="") + resolved = cfg.llm_model or DEFAULT_MODELS.get("openai", "gpt-5.3") + assert resolved == "gpt-5.3" diff --git a/uv.lock b/uv.lock index f9bca3b..3e67b69 100644 --- a/uv.lock +++ b/uv.lock @@ -1482,6 +1482,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/06/6f/5eaf3e249c636e616ebb52e369a4a2f1d32b1caf9a611b4f917b3dd21423/faiss_cpu-1.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:8113a2a80b59fe5653cf66f5c0f18be0a691825601a52a614c30beb1fca9bc7c", size = 8556374, upload-time = "2025-12-24T10:27:36.653Z" }, ] +[[package]] +name = "faiss-gpu" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/71/623896382d90a9a99adf3438aa2c575535ba37804be9701d66f3337afd83/faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98abc1aac06cb4cb94de223b3186bd4a60d15fd3cae42271604168abc081ca5", size = 85486427, upload-time = "2022-01-11T07:09:45.751Z" }, +] + [[package]] name = "faker" version = "40.5.1" @@ -2844,6 +2852,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/65/4c/09a4a0c42f5d2fc38d6c4d67884788eff7fd2cfdf367fdf7033de908b4c0/langgraph_checkpoint-4.0.1-py3-none-any.whl", hash = "sha256:e3adcd7a0e0166f3b48b8cf508ce0ea366e7420b5a73aa81289888727769b034", size = 50453, upload-time = "2026-02-27T21:06:14.293Z" }, ] +[[package]] +name = "langgraph-checkpoint-mongodb" +version = "0.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-mongodb" }, + { name = "langgraph-checkpoint" }, + { name = "pymongo" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/93/2113dcf9f30270050c41bb08c8568c900528ad9e0ad3a5fabb23f55c6679/langgraph_checkpoint_mongodb-0.3.1.tar.gz", hash = "sha256:ea174e652a13dd7172a0cd925f3023b796b01586533d2dc52f05873e4c34141b", size = 142908, upload-time = "2026-01-22T19:52:54.146Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/a7/d989dde4f5007d69aeaf3a41faf2b868f0f3b9f834b7d557349068642635/langgraph_checkpoint_mongodb-0.3.1-py3-none-any.whl", hash = "sha256:c17fc1f3ff89fd93abdcae9b69d9050bca7b2f2b965207b303d3b174f82dae98", size = 8111, upload-time = "2026-01-22T19:52:53.094Z" }, +] + [[package]] name = "langgraph-prebuilt" version = "1.0.8" @@ -3075,12 +3097,13 @@ wheels = [ ] [[package]] -name = "long-parser" -version = "0.1.0" +name = "longparser" +version = "0.1.2" source = { editable = "." } dependencies = [ { name = "docling" }, { name = "docling-core" }, + { name = "langgraph-checkpoint-mongodb" }, { name = "pydantic" }, ] @@ -3088,6 +3111,7 @@ dependencies = [ all = [ { name = "arq" }, { name = "chromadb" }, + { name = "faiss-cpu" }, { name = "fastapi" }, { name = "langchain" }, { name = "langchain-chroma" }, @@ -3100,8 +3124,8 @@ all = [ { name = "langgraph" }, { name = "langgraph-checkpoint" }, { name = "llama-index-core" }, - { name = "longtracer" }, { name = "motor" }, + { name = "pix2tex" }, { name = "python-dotenv" }, { name = "python-magic" }, { name = "python-multipart" }, @@ -3111,11 +3135,17 @@ all = [ { name = "tiktoken" }, { name = "uvicorn", extra = ["standard"] }, ] -api = [ +chroma = [ + { name = "chromadb" }, +] +cpu = [ { name = "arq" }, + { name = "chromadb" }, + { name = "faiss-cpu" }, { name = "fastapi" }, { name = "langchain" }, { name = "langchain-chroma" }, + { name = "langchain-core" }, { name = "langchain-google-genai" }, { name = "langchain-groq" }, { name = "langchain-huggingface" }, @@ -3123,23 +3153,22 @@ api = [ { name = "langchain-openai" }, { name = "langgraph" }, { name = "langgraph-checkpoint" }, - { name = "longtracer" }, + { name = "llama-index-core" }, { name = "motor" }, + { name = "pix2tex" }, { name = "python-dotenv" }, { name = "python-magic" }, { name = "python-multipart" }, + { name = "python-pptx" }, { name = "redis" }, + { name = "sentence-transformers" }, { name = "tiktoken" }, { name = "uvicorn", extra = ["standard"] }, ] -chroma = [ - { name = "chromadb" }, -] dev = [ { name = "anyio" }, { name = "build" }, { name = "httpx" }, - { name = "longtracer" }, { name = "mypy" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -3154,15 +3183,57 @@ docx-equations = [ embeddings = [ { name = "sentence-transformers" }, ] -faiss = [ +embeddings-cpu = [ + { name = "sentence-transformers" }, +] +embeddings-gpu = [ + { name = "sentence-transformers" }, +] +faiss-cpu = [ { name = "faiss-cpu" }, ] +faiss-gpu = [ + { name = "faiss-gpu" }, +] +gpu = [ + { name = "arq" }, + { name = "chromadb" }, + { name = "faiss-gpu" }, + { name = "fastapi" }, + { name = "langchain" }, + { name = "langchain-chroma" }, + { name = "langchain-core" }, + { name = "langchain-google-genai" }, + { name = "langchain-groq" }, + { name = "langchain-huggingface" }, + { name = "langchain-mongodb" }, + { name = "langchain-openai" }, + { name = "langgraph" }, + { name = "langgraph-checkpoint" }, + { name = "llama-index-core" }, + { name = "motor" }, + { name = "pix2tex" }, + { name = "python-dotenv" }, + { name = "python-magic" }, + { name = "python-multipart" }, + { name = "python-pptx" }, + { name = "redis" }, + { name = "sentence-transformers" }, + { name = "tiktoken" }, + { name = "uvicorn", extra = ["standard"] }, +] langchain = [ { name = "langchain-core" }, ] latex-ocr = [ { name = "pix2tex" }, ] +latex-ocr-cpu = [ + { name = "pix2tex" }, +] +latex-ocr-gpu = [ + { name = "pix2tex" }, +] llamaindex = [ { name = "llama-index-core" }, ] @@ -3175,77 +3246,95 @@ pptx = [ qdrant = [ { name = "qdrant-client" }, ] +server = [ + { name = "arq" }, + { name = "fastapi" }, + { name = "langchain" }, + { name = "langchain-chroma" }, + { name = "langchain-google-genai" }, + { name = "langchain-groq" }, + { name = "langchain-huggingface" }, + { name = "langchain-mongodb" }, + { name = "langchain-openai" }, + { name = "langgraph" }, + { name = "langgraph-checkpoint" }, + { name = "motor" }, + { name = "python-dotenv" }, + { name = "python-magic" }, + { name = "python-multipart" }, + { name = "redis" }, + { name = "tiktoken" }, + { name = "uvicorn", extra = ["standard"] }, +] [package.metadata] requires-dist = [ { name = "anyio", marker = "extra == 'dev'", specifier = ">=4.0" }, - { name = "arq", marker = "extra == 'api'", specifier = ">=0.26" }, + { name = "arq", marker = "extra == 'server'", specifier = ">=0.26" }, { name = "build", marker = "extra == 'dev'", specifier = ">=1.0" }, { name = "chromadb", marker = "extra == 'chroma'", specifier = ">=0.5" }, { name = "defusedxml", marker = "extra == 'docx-equations'", specifier = ">=0.7.0" }, { name = "docling", specifier = ">=2.14" }, { name = "docling-core", specifier = ">=2.13" }, { name = "docxlatex", marker = "extra == 'docx-equations'", specifier = ">=0.3.0" }, - { name = "faiss-cpu", marker = "extra == 'faiss'", specifier = ">=1.8" }, - { name = "fastapi", marker = "extra == 'api'", specifier = ">=0.115" }, + { name = "faiss-cpu", marker = "extra == 'faiss-cpu'", specifier = ">=1.8" }, + { name = "faiss-gpu", marker = "extra == 'faiss-gpu'", specifier = ">=1.7" }, + { name = "fastapi", marker = "extra == 'server'", specifier = ">=0.115" }, { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.27" }, - { name = "langchain", marker = "extra == 'api'", specifier = ">=0.3" }, - { name = "langchain-chroma", marker = "extra == 'api'", specifier = ">=0.2" }, + { name = "langchain", marker = "extra == 'server'", specifier = ">=0.3" }, + { name = "langchain-chroma", marker = "extra == 'server'", specifier = ">=0.2" }, { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.2" }, - { name = "langchain-google-genai", marker = "extra == 'api'", specifier = ">=2.0" }, - { name = "langchain-groq", marker = "extra == 'api'", specifier = ">=0.3" }, - { name = "langchain-huggingface", marker = "extra == 'api'", specifier = ">=0.1" }, - { name = "langchain-mongodb", marker = "extra == 'api'", specifier = ">=0.3" }, - { name = "langchain-openai", marker = "extra == 'api'", specifier = ">=0.3" }, - { name = "langgraph", marker = "extra == 'api'", specifier = ">=0.2" }, - { name = "langgraph-checkpoint", marker = "extra == 'api'", specifier = ">=2.0" }, + { name = "langchain-google-genai", marker = "extra == 'server'", specifier = ">=2.0" }, + { name = "langchain-groq", marker = "extra == 'server'", specifier = ">=0.3" }, + { name = "langchain-huggingface", marker = "extra == 'server'", specifier = ">=0.1" }, + { name = "langchain-mongodb", marker = "extra == 'server'", specifier = ">=0.3" }, + { name = "langchain-openai", marker = "extra == 'server'", specifier = ">=0.3" }, + { name = "langgraph", marker = "extra == 'server'", specifier = ">=0.2" }, + { name = "langgraph-checkpoint", marker = "extra == 'server'", specifier = ">=2.0" }, + { name = "langgraph-checkpoint-mongodb", specifier = ">=0.3.1" }, { name = "llama-index-core", marker = "extra == 'llamaindex'", specifier = ">=0.10" }, - { name = "long-parser", extras = ["api"], marker = "extra == 'all'" }, - { name = "long-parser", extras = ["chroma"], marker = "extra == 'all'" }, - { name = "long-parser", extras = ["embeddings"], marker = "extra == 'all'" }, - { name = "long-parser", extras = ["langchain"], marker = "extra == 'all'" }, - { name = "long-parser", extras = ["llamaindex"], marker = "extra == 'all'" }, - { name = "long-parser", extras = ["pptx"], marker = "extra == 'all'" }, - { name = "longtracer", marker = "extra == 'api'", specifier = ">=0.1" }, - { name = "longtracer", marker = "extra == 'dev'", specifier = ">=0.1" }, - { name = "motor", marker = "extra == 'api'", specifier = ">=3.6" }, + { name = "longparser", extras = ["chroma"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["chroma"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["cpu"], marker = "extra == 'all'" }, + { name = "longparser", extras = ["embeddings-cpu"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["embeddings-gpu"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["faiss-cpu"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["faiss-gpu"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["langchain"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["langchain"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["latex-ocr-cpu"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["latex-ocr-gpu"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["llamaindex"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["llamaindex"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["pptx"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["pptx"], marker = "extra == 'gpu'" }, + { name = "longparser", extras = ["server"], marker = "extra == 'cpu'" }, + { name = "longparser", extras = ["server"], marker = "extra == 'gpu'" }, + { name = "motor", marker = "extra == 'server'", specifier = ">=3.6" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10" }, { name = "pix2tex", marker = "extra == 'latex-ocr'", specifier = ">=0.1.4" }, + { name = "pix2tex", marker = "extra == 'latex-ocr-cpu'", specifier = ">=0.1.4" }, + { name = "pix2tex", marker = "extra == 'latex-ocr-gpu'", specifier = ">=0.1.4" }, { name = "pix2text", marker = "extra == 'mfd'", specifier = ">=1.1.1,<1.2" }, { name = "pydantic", specifier = ">=2.0,<3" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=5.0" }, - { name = "python-dotenv", marker = "extra == 'api'", specifier = ">=1.0" }, - { name = "python-magic", marker = "extra == 'api'", specifier = ">=0.4.27" }, - { name = "python-multipart", marker = "extra == 'api'", specifier = ">=0.0.9" }, + { name = "python-dotenv", marker = "extra == 'server'", specifier = ">=1.0" }, + { name = "python-magic", marker = "extra == 'server'", specifier = ">=0.4.27" }, + { name = "python-multipart", marker = "extra == 'server'", specifier = ">=0.0.9" }, { name = "python-pptx", marker = "extra == 'pptx'", specifier = ">=1.0" }, { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.12" }, - { name = "redis", marker = "extra == 'api'", specifier = ">=5.0" }, + { name = "redis", marker = "extra == 'server'", specifier = ">=5.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4" }, { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=3.0" }, - { name = "tiktoken", marker = "extra == 'api'", specifier = ">=0.7" }, + { name = "sentence-transformers", marker = "extra == 'embeddings-cpu'", specifier = ">=3.0" }, + { name = "sentence-transformers", marker = "extra == 'embeddings-gpu'", specifier = ">=3.0" }, + { name = "tiktoken", marker = "extra == 'server'", specifier = ">=0.7" }, { name = "twine", marker = "extra == 'dev'", specifier = ">=5.0" }, - { name = "uvicorn", extras = ["standard"], marker = "extra == 'api'", specifier = ">=0.34" }, -] -provides-extras = ["pptx", "langchain", "llamaindex", "api", "embeddings", "chroma", "faiss", "qdrant", "latex-ocr", "docx-equations", "mfd", "all", "dev"] - -[[package]] -name = "longtracer" -version = "0.1.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "sentence-transformers" }, - { name = "transformers" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d5/3f/bc9e101d4d23f00f169a5bc0a15cb9ffc990ffa4c3e65ca907440b30ce23/longtracer-0.1.3.tar.gz", hash = "sha256:a63a6650fed2776964cc10b438742589f504df5c15bcdce58683fe499ef0d6ad", size = 53880, upload-time = "2026-04-03T10:54:34.78Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/29/0c07de6d9f9cc55db9032fc1edfba182cf0d4af4430f06fdad893468ca2b/longtracer-0.1.3-py3-none-any.whl", hash = "sha256:1de576971941da0320a2f8d43b34081c49847cf49c90c7703946b9894ec5c69d", size = 69737, upload-time = "2026-04-03T10:54:32.775Z" }, + { name = "uvicorn", extras = ["standard"], marker = "extra == 'server'", specifier = ">=0.34" }, ] +provides-extras = ["pptx", "langchain", "llamaindex", "server", "embeddings", "embeddings-cpu", "embeddings-gpu", "chroma", "faiss-cpu", "faiss-gpu", "qdrant", "latex-ocr", "latex-ocr-cpu", "latex-ocr-gpu", "docx-equations", "mfd", "cpu", "gpu", "all", "dev"] [[package]] name = "lxml" @@ -5759,73 +5848,73 @@ sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c001 [[package]] name = "pymongo" -version = "4.16.0" +version = "4.15.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dnspython" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/65/9c/a4895c4b785fc9865a84a56e14b5bd21ca75aadc3dab79c14187cdca189b/pymongo-4.16.0.tar.gz", hash = "sha256:8ba8405065f6e258a6f872fe62d797a28f383a12178c7153c01ed04e845c600c", size = 2495323, upload-time = "2026-01-07T18:05:48.107Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/93/c36c0998dd91ad8b5031d2e77a903d5cd705b5ba05ca92bcc8731a2c3a8d/pymongo-4.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ed162b2227f98d5b270ecbe1d53be56c8c81db08a1a8f5f02d89c7bb4d19591d", size = 807993, upload-time = "2026-01-07T18:03:40.302Z" }, - { url = "https://files.pythonhosted.org/packages/f3/96/d2117d792fa9fedb2f6ccf0608db31f851e8382706d7c3c88c6ac92cc958/pymongo-4.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4a9390dce61d705a88218f0d7b54d7e1fa1b421da8129fc7c009e029a9a6b81e", size = 808355, upload-time = "2026-01-07T18:03:42.13Z" }, - { url = "https://files.pythonhosted.org/packages/ae/2e/e79b7b86c0dd6323d0985c201583c7921d67b842b502aae3f3327cbe3935/pymongo-4.16.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:92a232af9927710de08a6c16a9710cc1b175fb9179c0d946cd4e213b92b2a69a", size = 1182337, upload-time = "2026-01-07T18:03:44.126Z" }, - { url = "https://files.pythonhosted.org/packages/7b/82/07ec9966381c57d941fddc52637e9c9653e63773be410bd8605f74683084/pymongo-4.16.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d79aa147ce86aef03079096d83239580006ffb684eead593917186aee407767", size = 1200928, upload-time = "2026-01-07T18:03:45.52Z" }, - { url = "https://files.pythonhosted.org/packages/44/15/9d45e3cc6fa428b0a3600b0c1c86b310f28c91251c41493460695ab40b6b/pymongo-4.16.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:19a1c96e7f39c7a59a9cfd4d17920cf9382f6f684faeff4649bf587dc59f8edc", size = 1239418, upload-time = "2026-01-07T18:03:47.03Z" }, - { url = "https://files.pythonhosted.org/packages/c8/b3/f35ee51e2a3f05f673ad4f5e803ae1284c42f4413e8d121c4958f1af4eb9/pymongo-4.16.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efe020c46ce3c3a89af6baec6569635812129df6fb6cf76d4943af3ba6ee2069", size = 1229045, upload-time = "2026-01-07T18:03:48.377Z" }, - { url = "https://files.pythonhosted.org/packages/18/2d/1688b88d7c0a5c01da8c703dea831419435d9ce67c6ddbb0ac629c9c72d2/pymongo-4.16.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9dc2c00bed568732b89e211b6adca389053d5e6d2d5a8979e80b813c3ec4d1f9", size = 1196517, upload-time = "2026-01-07T18:03:50.205Z" }, - { url = "https://files.pythonhosted.org/packages/e6/c6/e89db0f23bd20757b627a5d8c73a609ffd6741887b9004ab229208a79764/pymongo-4.16.0-cp310-cp310-win32.whl", hash = "sha256:5b9c6d689bbe5beb156374508133218610e14f8c81e35bc17d7a14e30ab593e6", size = 794911, upload-time = "2026-01-07T18:03:52.701Z" }, - { url = "https://files.pythonhosted.org/packages/37/54/e00a5e517153f310a33132375159e42dceb12bee45b51b35aa0df14f1866/pymongo-4.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:2290909275c9b8f637b0a92eb9b89281e18a72922749ebb903403ab6cc7da914", size = 804801, upload-time = "2026-01-07T18:03:57.671Z" }, - { url = "https://files.pythonhosted.org/packages/e5/0a/2572faf89195a944c99c6d756227019c8c5f4b5658ecc261c303645dfe69/pymongo-4.16.0-cp310-cp310-win_arm64.whl", hash = "sha256:6af1aaa26f0835175d2200e62205b78e7ec3ffa430682e322cc91aaa1a0dbf28", size = 797579, upload-time = "2026-01-07T18:03:59.1Z" }, - { url = "https://files.pythonhosted.org/packages/e6/3a/907414a763c4270b581ad6d960d0c6221b74a70eda216a1fdd8fa82ba89f/pymongo-4.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6f2077ec24e2f1248f9cac7b9a2dfb894e50cc7939fcebfb1759f99304caabef", size = 862561, upload-time = "2026-01-07T18:04:00.628Z" }, - { url = "https://files.pythonhosted.org/packages/8c/58/787d8225dd65cb2383c447346ea5e200ecfde89962d531111521e3b53018/pymongo-4.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d4f7ba040f72a9f43a44059872af5a8c8c660aa5d7f90d5344f2ed1c3c02721", size = 862923, upload-time = "2026-01-07T18:04:02.213Z" }, - { url = "https://files.pythonhosted.org/packages/5d/a7/cc2865aae32bc77ade7b35f957a58df52680d7f8506f93c6edbf458e5738/pymongo-4.16.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8a0f73af1ea56c422b2dcfc0437459148a799ef4231c6aee189d2d4c59d6728f", size = 1426779, upload-time = "2026-01-07T18:04:03.942Z" }, - { url = "https://files.pythonhosted.org/packages/81/25/3e96eb7998eec05382174da2fefc58d28613f46bbdf821045539d0ed60ab/pymongo-4.16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa30cd16ddd2f216d07ba01d9635c873e97ddb041c61cf0847254edc37d1c60e", size = 1454207, upload-time = "2026-01-07T18:04:05.387Z" }, - { url = "https://files.pythonhosted.org/packages/86/7b/8e817a7df8c5d565d39dd4ca417a5e0ef46cc5cc19aea9405f403fec6449/pymongo-4.16.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1d638b0b1b294d95d0fdc73688a3b61e05cc4188872818cd240d51460ccabcb5", size = 1511654, upload-time = "2026-01-07T18:04:08.458Z" }, - { url = "https://files.pythonhosted.org/packages/39/7a/50c4d075ccefcd281cdcfccc5494caa5665b096b85e65a5d6afabb80e09e/pymongo-4.16.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:21d02cc10a158daa20cb040985e280e7e439832fc6b7857bff3d53ef6914ad50", size = 1496794, upload-time = "2026-01-07T18:04:10.355Z" }, - { url = "https://files.pythonhosted.org/packages/0f/cd/ebdc1aaca5deeaf47310c369ef4083e8550e04e7bf7e3752cfb7d95fcdb8/pymongo-4.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4fbb8d3552c2ad99d9e236003c0b5f96d5f05e29386ba7abae73949bfebc13dd", size = 1448371, upload-time = "2026-01-07T18:04:11.76Z" }, - { url = "https://files.pythonhosted.org/packages/3d/c9/50fdd78c37f68ea49d590c027c96919fbccfd98f3a4cb39f84f79970bd37/pymongo-4.16.0-cp311-cp311-win32.whl", hash = "sha256:be1099a8295b1a722d03fb7b48be895d30f4301419a583dcf50e9045968a041c", size = 841024, upload-time = "2026-01-07T18:04:13.522Z" }, - { url = "https://files.pythonhosted.org/packages/4a/dd/a3aa1ade0cf9980744db703570afac70a62c85b432c391dea0577f6da7bb/pymongo-4.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:61567f712bda04c7545a037e3284b4367cad8d29b3dec84b4bf3b2147020a75b", size = 855838, upload-time = "2026-01-07T18:04:14.923Z" }, - { url = "https://files.pythonhosted.org/packages/bf/10/9ad82593ccb895e8722e4884bad4c5ce5e8ff6683b740d7823a6c2bcfacf/pymongo-4.16.0-cp311-cp311-win_arm64.whl", hash = "sha256:c53338613043038005bf2e41a2fafa08d29cdbc0ce80891b5366c819456c1ae9", size = 845007, upload-time = "2026-01-07T18:04:17.099Z" }, - { url = "https://files.pythonhosted.org/packages/6a/03/6dd7c53cbde98de469a3e6fb893af896dca644c476beb0f0c6342bcc368b/pymongo-4.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bd4911c40a43a821dfd93038ac824b756b6e703e26e951718522d29f6eb166a8", size = 917619, upload-time = "2026-01-07T18:04:19.173Z" }, - { url = "https://files.pythonhosted.org/packages/73/e1/328915f2734ea1f355dc9b0e98505ff670f5fab8be5e951d6ed70971c6aa/pymongo-4.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25a6b03a68f9907ea6ec8bc7cf4c58a1b51a18e23394f962a6402f8e46d41211", size = 917364, upload-time = "2026-01-07T18:04:20.861Z" }, - { url = "https://files.pythonhosted.org/packages/41/fe/4769874dd9812a1bc2880a9785e61eba5340da966af888dd430392790ae0/pymongo-4.16.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:91ac0cb0fe2bf17616c2039dac88d7c9a5088f5cb5829b27c9d250e053664d31", size = 1686901, upload-time = "2026-01-07T18:04:22.219Z" }, - { url = "https://files.pythonhosted.org/packages/fa/8d/15707b9669fdc517bbc552ac60da7124dafe7ac1552819b51e97ed4038b4/pymongo-4.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf0ec79e8ca7077f455d14d915d629385153b6a11abc0b93283ed73a8013e376", size = 1723034, upload-time = "2026-01-07T18:04:24.055Z" }, - { url = "https://files.pythonhosted.org/packages/5b/af/3d5d16ff11d447d40c1472da1b366a31c7380d7ea2922a449c7f7f495567/pymongo-4.16.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2d0082631a7510318befc2b4fdab140481eb4b9dd62d9245e042157085da2a70", size = 1797161, upload-time = "2026-01-07T18:04:25.964Z" }, - { url = "https://files.pythonhosted.org/packages/fb/04/725ab8664eeec73ec125b5a873448d80f5d8cf2750aaaf804cbc538a50a5/pymongo-4.16.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85dc2f3444c346ea019a371e321ac868a4fab513b7a55fe368f0cc78de8177cc", size = 1780938, upload-time = "2026-01-07T18:04:28.745Z" }, - { url = "https://files.pythonhosted.org/packages/22/50/dd7e9095e1ca35f93c3c844c92eb6eb0bc491caeb2c9bff3b32fe3c9b18f/pymongo-4.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dabbf3c14de75a20cc3c30bf0c6527157224a93dfb605838eabb1a2ee3be008d", size = 1714342, upload-time = "2026-01-07T18:04:30.331Z" }, - { url = "https://files.pythonhosted.org/packages/03/c9/542776987d5c31ae8e93e92680ea2b6e5a2295f398b25756234cabf38a39/pymongo-4.16.0-cp312-cp312-win32.whl", hash = "sha256:60307bb91e0ab44e560fe3a211087748b2b5f3e31f403baf41f5b7b0a70bd104", size = 887868, upload-time = "2026-01-07T18:04:32.124Z" }, - { url = "https://files.pythonhosted.org/packages/2e/d4/b4045a7ccc5680fb496d01edf749c7a9367cc8762fbdf7516cf807ef679b/pymongo-4.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:f513b2c6c0d5c491f478422f6b5b5c27ac1af06a54c93ef8631806f7231bd92e", size = 907554, upload-time = "2026-01-07T18:04:33.685Z" }, - { url = "https://files.pythonhosted.org/packages/60/4c/33f75713d50d5247f2258405142c0318ff32c6f8976171c4fcae87a9dbdf/pymongo-4.16.0-cp312-cp312-win_arm64.whl", hash = "sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b", size = 892971, upload-time = "2026-01-07T18:04:35.594Z" }, - { url = "https://files.pythonhosted.org/packages/47/84/148d8b5da8260f4679d6665196ae04ab14ffdf06f5fe670b0ab11942951f/pymongo-4.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d15f060bc6d0964a8bb70aba8f0cb6d11ae99715438f640cff11bbcf172eb0e8", size = 972009, upload-time = "2026-01-07T18:04:38.303Z" }, - { url = "https://files.pythonhosted.org/packages/1e/5e/9f3a8daf583d0adaaa033a3e3e58194d2282737dc164014ff33c7a081103/pymongo-4.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a19ea46a0fe71248965305a020bc076a163311aefbaa1d83e47d06fa30ac747", size = 971784, upload-time = "2026-01-07T18:04:39.669Z" }, - { url = "https://files.pythonhosted.org/packages/ad/f2/b6c24361fcde24946198573c0176406bfd5f7b8538335f3d939487055322/pymongo-4.16.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:311d4549d6bf1f8c61d025965aebb5ba29d1481dc6471693ab91610aaffbc0eb", size = 1947174, upload-time = "2026-01-07T18:04:41.368Z" }, - { url = "https://files.pythonhosted.org/packages/47/1a/8634192f98cf740b3d174e1018dd0350018607d5bd8ac35a666dc49c732b/pymongo-4.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46ffb728d92dd5b09fc034ed91acf5595657c7ca17d4cf3751322cd554153c17", size = 1991727, upload-time = "2026-01-07T18:04:42.965Z" }, - { url = "https://files.pythonhosted.org/packages/5a/2f/0c47ac84572b28e23028a23a3798a1f725e1c23b0cf1c1424678d16aff42/pymongo-4.16.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:acda193f440dd88c2023cb00aa8bd7b93a9df59978306d14d87a8b12fe426b05", size = 2082497, upload-time = "2026-01-07T18:04:44.652Z" }, - { url = "https://files.pythonhosted.org/packages/ba/57/9f46ef9c862b2f0cf5ce798f3541c201c574128d31ded407ba4b3918d7b6/pymongo-4.16.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d9fdb386cf958e6ef6ff537d6149be7edb76c3268cd6833e6c36aa447e4443f", size = 2064947, upload-time = "2026-01-07T18:04:46.228Z" }, - { url = "https://files.pythonhosted.org/packages/b8/56/5421c0998f38e32288100a07f6cb2f5f9f352522157c901910cb2927e211/pymongo-4.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91899dd7fb9a8c50f09c3c1cf0cb73bfbe2737f511f641f19b9650deb61c00ca", size = 1980478, upload-time = "2026-01-07T18:04:48.017Z" }, - { url = "https://files.pythonhosted.org/packages/92/93/bfc448d025e12313a937d6e1e0101b50cc9751636b4b170e600fe3203063/pymongo-4.16.0-cp313-cp313-win32.whl", hash = "sha256:2cd60cd1e05de7f01927f8e25ca26b3ea2c09de8723241e5d3bcfdc70eaff76b", size = 934672, upload-time = "2026-01-07T18:04:49.538Z" }, - { url = "https://files.pythonhosted.org/packages/96/10/12710a5e01218d50c3dd165fd72c5ed2699285f77348a3b1a119a191d826/pymongo-4.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3ead8a0050c53eaa55935895d6919d393d0328ec24b2b9115bdbe881aa222673", size = 959237, upload-time = "2026-01-07T18:04:51.382Z" }, - { url = "https://files.pythonhosted.org/packages/0c/56/d288bcd1d05bc17ec69df1d0b1d67bc710c7c5dbef86033a5a4d2e2b08e6/pymongo-4.16.0-cp313-cp313-win_arm64.whl", hash = "sha256:dbbc5b254c36c37d10abb50e899bc3939bbb7ab1e7c659614409af99bd3e7675", size = 940909, upload-time = "2026-01-07T18:04:52.904Z" }, - { url = "https://files.pythonhosted.org/packages/30/9e/4d343f8d0512002fce17915a89477b9f916bda1205729e042d8f23acf194/pymongo-4.16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8a254d49a9ffe9d7f888e3c677eed3729b14ce85abb08cd74732cead6ccc3c66", size = 1026634, upload-time = "2026-01-07T18:04:54.359Z" }, - { url = "https://files.pythonhosted.org/packages/c3/e3/341f88c5535df40c0450fda915f582757bb7d988cdfc92990a5e27c4c324/pymongo-4.16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a1bf44e13cf2d44d2ea2e928a8140d5d667304abe1a61c4d55b4906f389fbe64", size = 1026252, upload-time = "2026-01-07T18:04:56.642Z" }, - { url = "https://files.pythonhosted.org/packages/af/64/9471b22eb98f0a2ca0b8e09393de048502111b2b5b14ab1bd9e39708aab5/pymongo-4.16.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f1c5f1f818b669875d191323a48912d3fcd2e4906410e8297bb09ac50c4d5ccc", size = 2207399, upload-time = "2026-01-07T18:04:58.255Z" }, - { url = "https://files.pythonhosted.org/packages/87/ac/47c4d50b25a02f21764f140295a2efaa583ee7f17992a5e5fa542b3a690f/pymongo-4.16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77cfd37a43a53b02b7bd930457c7994c924ad8bbe8dff91817904bcbf291b371", size = 2260595, upload-time = "2026-01-07T18:04:59.788Z" }, - { url = "https://files.pythonhosted.org/packages/ee/1b/0ce1ce9dd036417646b2fe6f63b58127acff3cf96eeb630c34ec9cd675ff/pymongo-4.16.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:36ef2fee50eee669587d742fb456e349634b4fcf8926208766078b089054b24b", size = 2366958, upload-time = "2026-01-07T18:05:01.942Z" }, - { url = "https://files.pythonhosted.org/packages/3e/3c/a5a17c0d413aa9d6c17bc35c2b472e9e79cda8068ba8e93433b5f43028e9/pymongo-4.16.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55f8d5a6fe2fa0b823674db2293f92d74cd5f970bc0360f409a1fc21003862d3", size = 2346081, upload-time = "2026-01-07T18:05:03.576Z" }, - { url = "https://files.pythonhosted.org/packages/65/19/f815533d1a88fb8a3b6c6e895bb085ffdae68ccb1e6ed7102202a307f8e2/pymongo-4.16.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9caacac0dd105e2555521002e2d17afc08665187017b466b5753e84c016628e6", size = 2246053, upload-time = "2026-01-07T18:05:05.459Z" }, - { url = "https://files.pythonhosted.org/packages/c6/88/4be3ec78828dc64b212c123114bd6ae8db5b7676085a7b43cc75d0131bd2/pymongo-4.16.0-cp314-cp314-win32.whl", hash = "sha256:c789236366525c3ee3cd6e4e450a9ff629a7d1f4d88b8e18a0aea0615fd7ecf8", size = 989461, upload-time = "2026-01-07T18:05:07.018Z" }, - { url = "https://files.pythonhosted.org/packages/af/5a/ab8d5af76421b34db483c9c8ebc3a2199fb80ae63dc7e18f4cf1df46306a/pymongo-4.16.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b0714d7764efb29bf9d3c51c964aed7c4c7237b341f9346f15ceaf8321fdb35", size = 1017803, upload-time = "2026-01-07T18:05:08.499Z" }, - { url = "https://files.pythonhosted.org/packages/f6/f4/98d68020728ac6423cf02d17cfd8226bf6cce5690b163d30d3f705e8297e/pymongo-4.16.0-cp314-cp314-win_arm64.whl", hash = "sha256:12762e7cc0f8374a8cae3b9f9ed8dabb5d438c7b33329232dd9b7de783454033", size = 997184, upload-time = "2026-01-07T18:05:09.944Z" }, - { url = "https://files.pythonhosted.org/packages/50/00/dc3a271daf06401825b9c1f4f76f018182c7738281ea54b9762aea0560c1/pymongo-4.16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1c01e8a7cd0ea66baf64a118005535ab5bf9f9eb63a1b50ac3935dccf9a54abe", size = 1083303, upload-time = "2026-01-07T18:05:11.702Z" }, - { url = "https://files.pythonhosted.org/packages/b8/4b/b5375ee21d12eababe46215011ebc63801c0d2c5ffdf203849d0d79f9852/pymongo-4.16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4c4872299ebe315a79f7f922051061634a64fda95b6b17677ba57ef00b2ba2a4", size = 1083233, upload-time = "2026-01-07T18:05:13.182Z" }, - { url = "https://files.pythonhosted.org/packages/ee/e3/52efa3ca900622c7dcb56c5e70f15c906816d98905c22d2ee1f84d9a7b60/pymongo-4.16.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78037d02389745e247fe5ab0bcad5d1ab30726eaac3ad79219c7d6bbb07eec53", size = 2527438, upload-time = "2026-01-07T18:05:14.981Z" }, - { url = "https://files.pythonhosted.org/packages/cb/96/43b1be151c734e7766c725444bcbfa1de6b60cc66bfb406203746839dd25/pymongo-4.16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c126fb72be2518395cc0465d4bae03125119136462e1945aea19840e45d89cfc", size = 2600399, upload-time = "2026-01-07T18:05:16.794Z" }, - { url = "https://files.pythonhosted.org/packages/e7/62/fa64a5045dfe3a1cd9217232c848256e7bc0136cffb7da4735c5e0d30e40/pymongo-4.16.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f3867dc225d9423c245a51eaac2cfcd53dde8e0a8d8090bb6aed6e31bd6c2d4f", size = 2720960, upload-time = "2026-01-07T18:05:18.498Z" }, - { url = "https://files.pythonhosted.org/packages/54/7b/01577eb97e605502821273a5bc16ce0fb0be5c978fe03acdbff471471202/pymongo-4.16.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f25001a955073b80510c0c3db0e043dbbc36904fd69e511c74e3d8640b8a5111", size = 2699344, upload-time = "2026-01-07T18:05:20.073Z" }, - { url = "https://files.pythonhosted.org/packages/55/68/6ef6372d516f703479c3b6cbbc45a5afd307173b1cbaccd724e23919bb1a/pymongo-4.16.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d9885aad05f82fd7ea0c9ca505d60939746b39263fa273d0125170da8f59098", size = 2577133, upload-time = "2026-01-07T18:05:22.052Z" }, - { url = "https://files.pythonhosted.org/packages/15/c7/b5337093bb01da852f945802328665f85f8109dbe91d81ea2afe5ff059b9/pymongo-4.16.0-cp314-cp314t-win32.whl", hash = "sha256:948152b30eddeae8355495f9943a3bf66b708295c0b9b6f467de1c620f215487", size = 1040560, upload-time = "2026-01-07T18:05:23.888Z" }, - { url = "https://files.pythonhosted.org/packages/96/8c/5b448cd1b103f3889d5713dda37304c81020ff88e38a826e8a75ddff4610/pymongo-4.16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f6e42c1bc985d9beee884780ae6048790eb4cd565c46251932906bdb1630034a", size = 1075081, upload-time = "2026-01-07T18:05:26.874Z" }, - { url = "https://files.pythonhosted.org/packages/32/cd/ddc794cdc8500f6f28c119c624252fb6dfb19481c6d7ed150f13cf468a6d/pymongo-4.16.0-cp314-cp314t-win_arm64.whl", hash = "sha256:6b2a20edb5452ac8daa395890eeb076c570790dfce6b7a44d788af74c2f8cf96", size = 1047725, upload-time = "2026-01-07T18:05:28.47Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/24/a0/5c324fe6735b2bc189779ff46e981a59d495a74594f45542159125d77256/pymongo-4.15.5.tar.gz", hash = "sha256:3a8d6bf2610abe0c97c567cf98bf5bba3e90ccc93cc03c9dde75fa11e4267b42", size = 2471889, upload-time = "2025-12-02T18:44:30.992Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/e4/d80061be4e53125597dd2916171c87986043b190e50c1834fff455e71d42/pymongo-4.15.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a01a2054d50b50c121c720739a2216d855c48726b0002894de9b991cdd68a2a5", size = 811318, upload-time = "2025-12-02T18:42:12.09Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b3/c499fe0814e4d3a84fa3ff5df5133bf847529d8b5a051e6108b5a25b75c7/pymongo-4.15.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e57968139d81367117ed7b75d921445a575d4d7e61536f5e860475df92ac0a9", size = 811676, upload-time = "2025-12-02T18:42:14.396Z" }, + { url = "https://files.pythonhosted.org/packages/62/71/8e21a8a680546b3a90afbb878a16fe2a7cb0f7d9652aa675c172e57856a1/pymongo-4.15.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:266aa37e3673e5dcfdd359a81d27131fc133e49cf8e5d9f9f27a5845fac2cd1f", size = 1185485, upload-time = "2025-12-02T18:42:16.147Z" }, + { url = "https://files.pythonhosted.org/packages/03/56/bdc292a7b01aa2aba806883dbcacc3be837d65425453aa2bc27954ba5a55/pymongo-4.15.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2883da6bd0545cc2f12672f6a609b33d48e099a220872ca2bf9bf29fe96a32c3", size = 1203866, upload-time = "2025-12-02T18:42:18.018Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e2/12bebc7e93a81c2f804ffcc94997f61f0e2cd2c11bf0f01da8e0e1425e5c/pymongo-4.15.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2fc32b354a608ec748d89bbe236b74b967890667eea1af54e92dfd8fbf26df52", size = 1242550, upload-time = "2025-12-02T18:42:19.898Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ac/c48f6f59a660ec44052ee448dea1c71da85cfaa4a0c17c726d4ee2db7716/pymongo-4.15.5-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3c006cbaa4b40d296dd2bb8828976866c876ead4c39032b761dcf26f1ba56fde", size = 1232844, upload-time = "2025-12-02T18:42:21.709Z" }, + { url = "https://files.pythonhosted.org/packages/89/cc/6368befca7a2f3b51460755a373f78b72003aeee95e8e138cbd479c307f4/pymongo-4.15.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce21e3dc5939b83d03f871090d83ac29fef055bd057f8d3074b6cad10f86b04c", size = 1200192, upload-time = "2025-12-02T18:42:23.605Z" }, + { url = "https://files.pythonhosted.org/packages/9d/97/bc810a017ebb20e6e301fa8c5b21c5e53691fdde2cfd39bd9c450e957b14/pymongo-4.15.5-cp310-cp310-win32.whl", hash = "sha256:1b545dcf66a9f06e9b501bfb0438e1eb9af67336e8a5cf36c4bc0a5d3fbe7a37", size = 798338, upload-time = "2025-12-02T18:42:25.438Z" }, + { url = "https://files.pythonhosted.org/packages/46/17/3be0b476a6bfb3a51bf1750323b5eddf883dddb6482ccb8dbcab2c6c48ad/pymongo-4.15.5-cp310-cp310-win_amd64.whl", hash = "sha256:1ecc544f515f828f05d3c56cd98063ba3ef8b75f534c63de43306d59f1e93fcd", size = 808153, upload-time = "2025-12-02T18:42:26.889Z" }, + { url = "https://files.pythonhosted.org/packages/bf/0a/39f9daf16d695abd58987bb5e2c164b5a64e42b8d53d3c43bc06e4aa7dfc/pymongo-4.15.5-cp310-cp310-win_arm64.whl", hash = "sha256:1151968ab90db146f0591b6c7db27ce4f73c7ffa0bbddc1d7fb7cb14c9f0b967", size = 800943, upload-time = "2025-12-02T18:42:28.668Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ea/e43387c2ed78a60ad917c45f4d4de4f6992929d63fe15af4c2e624f093a9/pymongo-4.15.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:57157a4b936e28e2fbe7017b2f6a751da5e284675cab371f2c596d4e0e4f58f3", size = 865894, upload-time = "2025-12-02T18:42:30.496Z" }, + { url = "https://files.pythonhosted.org/packages/5e/8c/f2c9c55adb9709a4b2244d8d8d9ec05e4abb274e03fe8388b58a34ae08b0/pymongo-4.15.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2a34a7391f4cc54fc584e49db6f7c3929221a9da08b3af2d2689884a5943843", size = 866235, upload-time = "2025-12-02T18:42:31.862Z" }, + { url = "https://files.pythonhosted.org/packages/5e/aa/bdf3553d7309b0ebc0c6edc23f43829b1758431f2f2f7385d2427b20563b/pymongo-4.15.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:be040c8cdaf9c2d5ae9ab60a67ecab453ec19d9ccd457a678053fdceab5ee4c8", size = 1429787, upload-time = "2025-12-02T18:42:33.829Z" }, + { url = "https://files.pythonhosted.org/packages/b3/55/80a8eefc88f578fde56489e5278ba5caa5ee9b6f285959ed2b98b44e2133/pymongo-4.15.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:defe93944526b1774265c16acf014689cb1b0b18eb84a7b370083b214f9e18cd", size = 1456747, upload-time = "2025-12-02T18:42:35.805Z" }, + { url = "https://files.pythonhosted.org/packages/1d/54/6a7ec290c7ab22aab117ab60e7375882ec5af7433eaf077f86e187a3a9e8/pymongo-4.15.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:816e66116f0ef868eff0463a8b28774af8b547466dbad30c8e82bf0325041848", size = 1514670, upload-time = "2025-12-02T18:42:37.737Z" }, + { url = "https://files.pythonhosted.org/packages/65/8a/5822aa20b274ee8a8821bf0284f131e7fc555b0758c3f2a82c51ae73a3c6/pymongo-4.15.5-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66c7b332532e0f021d784d04488dbf7ed39b7e7d6d5505e282ec8e9cf1025791", size = 1500711, upload-time = "2025-12-02T18:42:39.61Z" }, + { url = "https://files.pythonhosted.org/packages/32/ca/63984e32b4d745a25445c9da1159dfe4568a03375f32bb1a9e009dccb023/pymongo-4.15.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:acc46a9e47efad8c5229e644a3774169013a46ee28ac72d1fa4edd67c0b7ee9b", size = 1452021, upload-time = "2025-12-02T18:42:41.323Z" }, + { url = "https://files.pythonhosted.org/packages/f1/23/0d6988f3fdfcacae2ac8d7b76eb24f80ebee9eb607c53bcebfad75b7fd85/pymongo-4.15.5-cp311-cp311-win32.whl", hash = "sha256:b9836c28ba350d8182a51f32ef9bb29f0c40e82ba1dfb9e4371cd4d94338a55d", size = 844483, upload-time = "2025-12-02T18:42:42.814Z" }, + { url = "https://files.pythonhosted.org/packages/8e/04/dedff8a5a9539e5b6128d8d2458b9c0c83ebd38b43389620a0d97223f114/pymongo-4.15.5-cp311-cp311-win_amd64.whl", hash = "sha256:3a45876c5c2ab44e2a249fb542eba2a026f60d6ab04c7ef3924eae338d9de790", size = 859194, upload-time = "2025-12-02T18:42:45.025Z" }, + { url = "https://files.pythonhosted.org/packages/67/e5/fb6f49bceffe183e66831c2eebd2ea14bd65e2816aeaf8e2fc018fd8c344/pymongo-4.15.5-cp311-cp311-win_arm64.whl", hash = "sha256:e4a48fc5c712b3db85c9987cfa7fde0366b7930018de262919afd9e52cfbc375", size = 848377, upload-time = "2025-12-02T18:42:47.19Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4e/8f9fcb2dc9eab1fb0ed02da31e7f4847831d9c0ef08854a296588b97e8ed/pymongo-4.15.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c33477af1a50d1b4d86555e098fc2cf5992d839ad538dea0c00a8682162b7a75", size = 920955, upload-time = "2025-12-02T18:42:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b4/c0808bed1f82b3008909b9562615461e59c3b66f8977e502ea87c88b08a4/pymongo-4.15.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e6b30defa4a52d3698cd84d608963a8932f7e9b6ec5130087e7082552ac685e5", size = 920690, upload-time = "2025-12-02T18:42:50.832Z" }, + { url = "https://files.pythonhosted.org/packages/12/f3/feea83150c6a0cd3b44d5f705b1c74bff298a36f82d665f597bf89d42b3f/pymongo-4.15.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:45fec063f5672e6173bcb09b492431e3641cc74399c2b996fcb995881c2cac61", size = 1690351, upload-time = "2025-12-02T18:42:53.402Z" }, + { url = "https://files.pythonhosted.org/packages/d7/4e/15924d33d8d429e4c41666090017c6ac5e7ccc4ce5e435a2df09e45220a8/pymongo-4.15.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8c6813110c0d9fde18674b7262f47a2270ae46c0ddd05711e6770caa3c9a3fb", size = 1726089, upload-time = "2025-12-02T18:42:56.187Z" }, + { url = "https://files.pythonhosted.org/packages/a5/49/650ff29dc5f9cf090dfbd6fb248c56d8a10d268b6f46b10fb02fbda3c762/pymongo-4.15.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8ec48d1db9f44c737b13be4299a1782d5fde3e75423acbbbe927cb37ebbe87d", size = 1800637, upload-time = "2025-12-02T18:42:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/7d/18/f34661ade670ee42331543f4aa229569ac7ef45907ecda41b777137b9f40/pymongo-4.15.5-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1f410694fdd76631ead7df6544cdeadaf2407179196c3642fced8e48bb21d0a6", size = 1785480, upload-time = "2025-12-02T18:43:00.626Z" }, + { url = "https://files.pythonhosted.org/packages/10/b6/378bb26937f6b366754484145826aca2d2361ac05b0bacd45a35876abcef/pymongo-4.15.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8c46765d6ac5727a899190aacdeec7a57f8c93346124ddd7e12633b573e2e65", size = 1718548, upload-time = "2025-12-02T18:43:02.32Z" }, + { url = "https://files.pythonhosted.org/packages/58/79/31b8afba36f794a049633e105e45c30afaa0e1c0bab48332d999e87d4860/pymongo-4.15.5-cp312-cp312-win32.whl", hash = "sha256:647118a58dca7d3547714fc0b383aebf81f5852f4173dfd77dd34e80eea9d29b", size = 891319, upload-time = "2025-12-02T18:43:04.699Z" }, + { url = "https://files.pythonhosted.org/packages/c8/31/a7e6d8c5657d922872ac75ab1c0a1335bfb533d2b4dad082d5d04089abbb/pymongo-4.15.5-cp312-cp312-win_amd64.whl", hash = "sha256:099d3e2dddfc75760c6a8fadfb99c1e88824a99c2c204a829601241dff9da049", size = 910919, upload-time = "2025-12-02T18:43:06.555Z" }, + { url = "https://files.pythonhosted.org/packages/1c/b4/286c12fa955ae0597cd4c763d87c986e7ade681d4b11a81766f62f079c79/pymongo-4.15.5-cp312-cp312-win_arm64.whl", hash = "sha256:649cb906882c4058f467f334fb277083998ba5672ffec6a95d6700db577fd31a", size = 896357, upload-time = "2025-12-02T18:43:08.801Z" }, + { url = "https://files.pythonhosted.org/packages/9b/92/e70db1a53bc0bb5defe755dee66b5dfbe5e514882183ffb696d6e1d38aa2/pymongo-4.15.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b736226f9001bbbd02f822acb9b9b6d28319f362f057672dfae2851f7da6125", size = 975324, upload-time = "2025-12-02T18:43:11.074Z" }, + { url = "https://files.pythonhosted.org/packages/a4/90/dd78c059a031b942fa36d71796e94a0739ea9fb4251fcd971e9579192611/pymongo-4.15.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:60ea9f07fbbcc7c88f922082eb27436dce6756730fdef76a3a9b4c972d0a57a3", size = 975129, upload-time = "2025-12-02T18:43:13.345Z" }, + { url = "https://files.pythonhosted.org/packages/40/72/87cf1bb75ef296456912eb7c6d51ebe7a36dbbe9bee0b8a9cd02a62a8a6e/pymongo-4.15.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:20af63218ae42870eaee31fb8cc4ce9e3af7f04ea02fc98ad751fb7a9c8d7be3", size = 1950973, upload-time = "2025-12-02T18:43:15.225Z" }, + { url = "https://files.pythonhosted.org/packages/8c/68/dfa507c8e5cebee4e305825b436c34f5b9ba34488a224b7e112a03dbc01e/pymongo-4.15.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20d9c11625392f1f8dec7688de5ce344e110ca695344efa313ae4839f13bd017", size = 1995259, upload-time = "2025-12-02T18:43:16.869Z" }, + { url = "https://files.pythonhosted.org/packages/85/9d/832578e5ed7f682a09441bbc0881ffd506b843396ef4b34ec53bd38b2fb2/pymongo-4.15.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1202b3e5357b161acb7b7cc98e730288a5c15544e5ef7254b33931cb9a27c36e", size = 2086591, upload-time = "2025-12-02T18:43:19.559Z" }, + { url = "https://files.pythonhosted.org/packages/0a/99/ca8342a0cefd2bb1392187ef8fe01432855e3b5cd1e640495246bcd65542/pymongo-4.15.5-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:63af710e9700dbf91abccf119c5f5533b9830286d29edb073803d3b252862c0d", size = 2070200, upload-time = "2025-12-02T18:43:21.214Z" }, + { url = "https://files.pythonhosted.org/packages/3f/7d/f4a9c1fceaaf71524ff9ff964cece0315dcc93df4999a49f064564875bff/pymongo-4.15.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f22eeb86861cf7b8ee6886361d52abb88e3cd96c6f6d102e45e2604fc6e9e316", size = 1985263, upload-time = "2025-12-02T18:43:23.415Z" }, + { url = "https://files.pythonhosted.org/packages/d8/15/f942535bcc6e22d3c26c7e730daf296ffe69d8ce474c430ea7e551f8cf33/pymongo-4.15.5-cp313-cp313-win32.whl", hash = "sha256:aad6efe82b085bf77cec2a047ded2c810e93eced3ccf1a8e3faec3317df3cd52", size = 938143, upload-time = "2025-12-02T18:43:26.081Z" }, + { url = "https://files.pythonhosted.org/packages/02/2a/c92a6927d676dd376d1ae05c680139c5cad068b22e5f0c8cb61014448894/pymongo-4.15.5-cp313-cp313-win_amd64.whl", hash = "sha256:ccc801f6d71ebee2ec2fb3acc64b218fa7cdb7f57933b2f8eee15396b662a0a0", size = 962603, upload-time = "2025-12-02T18:43:27.816Z" }, + { url = "https://files.pythonhosted.org/packages/3a/f0/cdf78e9ed9c26fb36b8d75561ebf3c7fe206ff1c3de2e1b609fccdf3a55b/pymongo-4.15.5-cp313-cp313-win_arm64.whl", hash = "sha256:f043abdf20845bf29a554e95e4fe18d7d7a463095d6a1547699a12f80da91e02", size = 944308, upload-time = "2025-12-02T18:43:29.371Z" }, + { url = "https://files.pythonhosted.org/packages/03/0c/49713e0f8f41110e8b2bcce7c88570b158cf43dd53a0d01d4e1c772c7ede/pymongo-4.15.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:ba0e75a390334221744e2666fd2d4c82419b580c9bc8d6e0d2d61459d263f3af", size = 1029996, upload-time = "2025-12-02T18:43:31.58Z" }, + { url = "https://files.pythonhosted.org/packages/23/de/1df5d7b49647e9e4511054f750c1109cb8e160763b286b96879917170618/pymongo-4.15.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:853ec7da97642eabaf94d3de4453a86365729327d920af167bf14b2e87b24dce", size = 1029612, upload-time = "2025-12-02T18:43:33.69Z" }, + { url = "https://files.pythonhosted.org/packages/8b/19/3a051228e5beb0b421d725bb2ab5207a260c718d9b5be5b85cfe963733e3/pymongo-4.15.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7631304106487480ebbd8acbe44ff1e69d1fdc27e83d9753dc1fd227cea10761", size = 2211814, upload-time = "2025-12-02T18:43:35.769Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b3/989531a056c4388ef18245d1a6d6b3ec5c538666b000764286119efbf194/pymongo-4.15.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50505181365eba5d4d35c462870b3614c8eddd0b2407c89377c1a59380640dd9", size = 2264629, upload-time = "2025-12-02T18:43:37.479Z" }, + { url = "https://files.pythonhosted.org/packages/ea/5f/8b3339fec44d0ba6d9388a19340fb1534c85ab6aa9fd8fb9c1af146bb72a/pymongo-4.15.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b75ec7006471299a571d6db1c5609ea4aa9c847a701e9b2953a8ede705d82db", size = 2371823, upload-time = "2025-12-02T18:43:39.866Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7f/706bf45cf12990b6cb73e6290b048944a51592de7a597052a761eea90b8d/pymongo-4.15.5-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c3fc24cb1f4ec60ed83162d4bba0c26abc6c9ae78c928805583673f3b3ea6984", size = 2351860, upload-time = "2025-12-02T18:43:42.002Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c5/fdcc81c20c67a61ba1073122c9ab42c937dd6f914004747e9ceefa4cead3/pymongo-4.15.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21d17bb2934b0640863361c08dd06991f128a97f9bee19425a499227be9ae6b4", size = 2251349, upload-time = "2025-12-02T18:43:43.924Z" }, + { url = "https://files.pythonhosted.org/packages/0c/1c/e540ccac0685b234a23574dce3c8e077cd59bcb73ab19bcab1915894d3a6/pymongo-4.15.5-cp314-cp314-win32.whl", hash = "sha256:5a3974236cb842b4ef50a5a6bfad9c7d83a713af68ea3592ba240bbcb863305a", size = 992901, upload-time = "2025-12-02T18:43:45.732Z" }, + { url = "https://files.pythonhosted.org/packages/89/31/eb72c53bc897cb50b57000d71ce9bdcfc9c84ba4c7f6d55348df47b241d8/pymongo-4.15.5-cp314-cp314-win_amd64.whl", hash = "sha256:73fa8a7eee44fd95ba7d5cf537340ff3ff34efeb1f7d6790532d0a6ed4dee575", size = 1021205, upload-time = "2025-12-02T18:43:47.756Z" }, + { url = "https://files.pythonhosted.org/packages/ea/4a/74a7cc350d60953d27b5636906b43b232b501cee07f70f6513ac603097e8/pymongo-4.15.5-cp314-cp314-win_arm64.whl", hash = "sha256:d41288ca2a3eb9ac7c8cad4ea86ef8d63b69dc46c9b65c2bbd35331ec2a0fc57", size = 1000616, upload-time = "2025-12-02T18:43:49.677Z" }, + { url = "https://files.pythonhosted.org/packages/1a/22/1e557868b9b207d7dbf7706412251b28a82d4b958e007b6f2569d59ada3d/pymongo-4.15.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:552670f0c8bff103656d4e4b1f2c018f789c9de03f7615ed5e547d5b1b83cda0", size = 1086723, upload-time = "2025-12-02T18:43:51.432Z" }, + { url = "https://files.pythonhosted.org/packages/aa/9c/2e24c2da289e1d3b9bc4e0850136a364473bddfbe8b19b33d2bb5d30ee0d/pymongo-4.15.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:41891b45f6ff1e23cfd1b7fbe40286664ad4507e2d2aa61c6d8c40eb6e11dded", size = 1086653, upload-time = "2025-12-02T18:43:53.131Z" }, + { url = "https://files.pythonhosted.org/packages/c6/be/4c2460c9ec91a891c754b91914ce700cc46009dae40183a85e26793dfae9/pymongo-4.15.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:524a8a593ae2eb1ec6db761daf0c03f98824e9882ab7df3d458d0c76c7ade255", size = 2531627, upload-time = "2025-12-02T18:43:55.141Z" }, + { url = "https://files.pythonhosted.org/packages/a0/48/cea56d04eb6bbd8b8943ff73d7cf26b94f715fccb23cf7ef9a4f853725a0/pymongo-4.15.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7ceb35c41b86711a1b284c604e2b944a2d46cb1b8dd3f8b430a9155491378f2", size = 2603767, upload-time = "2025-12-02T18:43:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/d9/ff/6743e351f8e0d5c3f388deb15f0cdbb77d2439eb3fba7ebcdf7878719517/pymongo-4.15.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3be2336715924be3a861b5e40c634376fd6bfe6dd1892d391566aa5a88a31307", size = 2725216, upload-time = "2025-12-02T18:43:59.463Z" }, + { url = "https://files.pythonhosted.org/packages/d4/90/fa532b6320b3ba61872110ff6f674bd54b54a592c0c64719e4f46852d0b6/pymongo-4.15.5-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d65df9c015e33f74ea9d1abf474971abca21e347a660384f8227dbdab75a33ca", size = 2704804, upload-time = "2025-12-02T18:44:01.415Z" }, + { url = "https://files.pythonhosted.org/packages/e1/84/1905c269aced043973b9528d94678e62e2eba249e70490c3c32dc70e2501/pymongo-4.15.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83c05bea05e151754357f8e6bbb80d5accead5110dc58f64e283173c71ec9de2", size = 2582274, upload-time = "2025-12-02T18:44:03.427Z" }, + { url = "https://files.pythonhosted.org/packages/7e/af/78c13179961e418396ec6ef53c0f1c855f1e9f1176d10909e8345d65366a/pymongo-4.15.5-cp314-cp314t-win32.whl", hash = "sha256:7c285614a3e8570b03174a25db642e449b0e7f77a6c9e487b73b05c9bf228ee6", size = 1044015, upload-time = "2025-12-02T18:44:05.318Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d5/49012f03418dce976124da339f3a6afbe6959cb0468ca6302596fe272926/pymongo-4.15.5-cp314-cp314t-win_amd64.whl", hash = "sha256:aae7d96f7b2b1a2753349130797543e61e93ee2ace8faa7fbe0565e2eb5d815f", size = 1078481, upload-time = "2025-12-02T18:44:07.215Z" }, + { url = "https://files.pythonhosted.org/packages/5e/fc/f352a070d8ff6f388ce344c5ddb82348a38e0d1c99346fa6bfdef07134fe/pymongo-4.15.5-cp314-cp314t-win_arm64.whl", hash = "sha256:576a7d4b99465d38112c72f7f3d345f9d16aeeff0f923a3b298c13e15ab4f0ad", size = 1051166, upload-time = "2025-12-02T18:44:09.048Z" }, ] [[package]] From 9b4ba06e68805f40feeb029fafaaa49eb7f0b314 Mon Sep 17 00:00:00 2001 From: Mohsin Ali Date: Wed, 8 Apr 2026 12:02:44 +0500 Subject: [PATCH 2/4] docs update --- docs/api/endpoints.md | 2 +- docs/changelog.md | 2 +- docs/deployment/environment.md | 10 +++++++++- docs/getting-started/configuration.md | 2 +- docs/guide/chat.md | 2 +- docs/integrations/langchain.md | 2 +- docs/security.md | 2 ++ 7 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/api/endpoints.md b/docs/api/endpoints.md index 2c42e42..ff8d7ce 100644 --- a/docs/api/endpoints.md +++ b/docs/api/endpoints.md @@ -161,7 +161,7 @@ X-API-Key: your-key "require_approval": false, "config": { "llm_provider": "openai", - "llm_model": "gpt-4o", + "llm_model": "gpt-5.3", "top_k": 5 } } diff --git a/docs/changelog.md b/docs/changelog.md index 5e65701..9523c0c 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -54,7 +54,7 @@ for production RAG pipelines. via LangGraph `interrupt()` before embedding - **3-layer memory chat** — short-term turns + rolling summary + long-term facts, powered by LCEL chains -- **Multi-provider LLM support** — OpenAI (`gpt-4o`), Gemini (`gemini-2.0-flash`), +- **Multi-provider LLM support** — OpenAI (`gpt-5.3`), Gemini (`gemini-2.5`), Groq (`llama-3.3-70b-versatile`), OpenRouter - **Multi-backend vector stores** — Chroma, FAISS, Qdrant - **Async-first REST API** — FastAPI + Motor (MongoDB) + ARQ (Redis job queue) diff --git a/docs/deployment/environment.md b/docs/deployment/environment.md index 023c0d5..3245c88 100644 --- a/docs/deployment/environment.md +++ b/docs/deployment/environment.md @@ -7,7 +7,7 @@ Copy `.env.example` to `.env` and configure for your deployment. | Variable | Description | |---|---| | `LONGPARSER_API_KEY` | API key for server authentication | -| `LONGPARSER_MONGO_URI` | MongoDB connection string | +| `LONGPARSER_MONGO_URL` | MongoDB connection string | ## LLM @@ -50,3 +50,11 @@ Copy `.env.example` to `.env` and configure for your deployment. |---|---|---| | `LONGPARSER_REDIS_URL` | `redis://localhost:6379/0` | Redis URL for task queue | | `LONGPARSER_WORKER_CONCURRENCY` | `2` | Worker concurrency level | + +## Security + +| Variable | Default | Description | +|---|---|---| +| `LONGPARSER_CORS_ORIGINS` | `*` | Allowed CORS origins (comma separated) | +| `LONGPARSER_RATE_LIMIT` | `60` | Max requests per minute per tenant ID | +| `LONGPARSER_ADMIN_KEYS` | — | Comma-separated admin API keys | diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 643129c..efd370f 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -15,7 +15,7 @@ cp .env.example .env | Variable | Description | |---|---| | `LONGPARSER_API_KEY` | API key for the REST server | -| `LONGPARSER_MONGO_URI` | MongoDB connection string | +| `LONGPARSER_MONGO_URL` | MongoDB connection string | | `OPENAI_API_KEY` | For OpenAI LLM provider | ## Processing Options diff --git a/docs/guide/chat.md b/docs/guide/chat.md index b686bfc..a3fb8e6 100644 --- a/docs/guide/chat.md +++ b/docs/guide/chat.md @@ -40,7 +40,7 @@ POST /chat "question": "What are the key findings?", "config": { "llm_provider": "openai", - "llm_model": "gpt-4o", + "llm_model": "gpt-5.3", "top_k": 5 } } diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md index c05dc2b..b2130d4 100644 --- a/docs/integrations/langchain.md +++ b/docs/integrations/langchain.md @@ -59,7 +59,7 @@ from langchain.chains import RetrievalQA from langchain_openai import ChatOpenAI qa = RetrievalQA.from_chain_type( - llm=ChatOpenAI(model="gpt-4o"), + llm=ChatOpenAI(model="gpt-5.3"), retriever=vectorstore.as_retriever(search_kwargs={"k": 5}), ) diff --git a/docs/security.md b/docs/security.md index ba315cf..9932f71 100644 --- a/docs/security.md +++ b/docs/security.md @@ -35,6 +35,8 @@ Key risks: | **MongoDB injection** | Motor driver + typed Pydantic inputs prevent injection | | **SSRF via webhook** | No outbound HTTP made based on user input | | **Hallucinated citations** | Citation IDs validated against retrieved set before returning to client | +| **DDoS / Spam via API** | Route-level Rate Limiting strictly isolated per tenant via Redis | +| **Cross-Origin attacks** | Configurable CORS restrictions and strict Tenant Isolation | ## Dependency Security From a46be48c04086140d29f5c108e3d379bdafecc63 Mon Sep 17 00:00:00 2001 From: Mohsin Ali Date: Wed, 8 Apr 2026 12:18:25 +0500 Subject: [PATCH 3/4] fix: include server and test dependencies in CI pipeline --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cc886c0..278e954 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,7 +37,7 @@ jobs: key: pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }} - name: Install package and test deps - run: pip install -e "." pytest pytest-cov + run: pip install -e ".[dev,server]" - name: Run tests run: pytest tests/ -v --tb=short --cov=longparser --cov-report=term-missing From 12ac0e0ba507bca43d323baf7283ffbe7694e729 Mon Sep 17 00:00:00 2001 From: Mohsin Ali Date: Mon, 13 Apr 2026 10:01:13 +0500 Subject: [PATCH 4/4] version update --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 20 ++++++++++++++++++ CONTRIBUTING.md | 2 +- README.md | 8 +++---- SECURITY.md | 2 ++ docs/changelog.md | 20 ++++++++++++++++++ docs/contributing.md | 2 +- docs/deployment/docker.md | 2 +- docs/deployment/environment.md | 2 +- docs/getting-started/configuration.md | 2 +- docs/getting-started/installation.md | 4 ++-- docs/getting-started/quickstart.md | 8 +++---- docs/guide/chat.md | 2 +- docs/guide/parsing.md | 8 +++---- docs/index.md | 7 ++++--- docs/reference/pipeline.md | 28 +++++++++++++++++-------- docs/reference/schemas.md | 2 +- pyproject.toml | 2 +- src/longparser/__init__.py | 12 +++++++---- src/longparser/pipeline/__init__.py | 4 ++++ src/longparser/server/chat/engine.py | 4 ++-- src/longparser/server/chat/llm_chain.py | 2 +- src/longparser/server/chat/schemas.py | 2 +- src/longparser/server/embeddings.py | 2 +- 24 files changed, 105 insertions(+), 44 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 278e954..dee8694 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e65701..8a8237a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ All notable changes to **LongParser** are documented here. This project follows [Semantic Versioning](https://semver.org/) and [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [0.1.3] — 2026-04-13 + +### Fixed + +- **Source code**: Added `DocumentPipeline` as a public alias for `PipelineOrchestrator` — + docs, quickstart, and all examples now use this name consistently +- **Documentation**: Fixed wrong coverage path `long_parser` → `longparser` in `CONTRIBUTING.md` +- **Documentation**: Replaced stale `cleanrag-api` reference in Docker deployment docs +- **Documentation**: Standardized Gemini API key env var to `GOOGLE_API_KEY` across all docs +- **Source code**: Updated default LLM model fallback from `gpt-4o` to `gpt-5.3` in + `schemas.py`, `llm_chain.py`, and `engine.py` +- **Source code**: Renamed stale `cleanrag:` Redis key prefix to `longparser:` in embeddings + +### Changed + +- Python 3.13 added to CI matrix, badges, and installation docs +- `SECURITY.md` updated with Redis rate-limiting and CORS threat mitigations + +--- + ## [0.1.2] — 2026-04-05 ### Changed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f44546e..06acdab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,7 +84,7 @@ Use Python 3.10+ type hints. All public API must be fully annotated. uv run pytest tests/unit/ -v # With coverage: -uv run pytest tests/unit/ --cov=src/long_parser --cov-report=term-missing +uv run pytest tests/unit/ --cov=src/longparser --cov-report=term-missing # Full test suite (requires MongoDB + Redis): uv run pytest tests/ -v diff --git a/README.md b/README.md index 3b4f72a..dce377d 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Monthly Downloads - Python + Python MIT License @@ -105,9 +105,9 @@ pip install "longparser[cpu]" ### Python SDK ```python -from longparser import PipelineOrchestrator, ProcessingConfig +from longparser import DocumentPipeline, ProcessingConfig -pipeline = PipelineOrchestrator() +pipeline = DocumentPipeline(ProcessingConfig()) result = pipeline.process_file("document.pdf") print(f"Pages: {result.document.metadata.total_pages}") @@ -186,7 +186,7 @@ src/longparser/ ├── schemas.py ← core Pydantic models (Document, Block, Chunk, …) ├── extractors/ ← Docling, LaTeX OCR backends ├── chunkers/ ← HybridChunker -├── pipeline/ ← PipelineOrchestrator +├── pipeline/ ← DocumentPipeline ├── integrations/ ← LangChain loader & LlamaIndex reader ├── utils/ ← shared helpers (RTL detection, …) └── server/ ← REST API layer diff --git a/SECURITY.md b/SECURITY.md index ba315cf..9932f71 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -35,6 +35,8 @@ Key risks: | **MongoDB injection** | Motor driver + typed Pydantic inputs prevent injection | | **SSRF via webhook** | No outbound HTTP made based on user input | | **Hallucinated citations** | Citation IDs validated against retrieved set before returning to client | +| **DDoS / Spam via API** | Route-level Rate Limiting strictly isolated per tenant via Redis | +| **Cross-Origin attacks** | Configurable CORS restrictions and strict Tenant Isolation | ## Dependency Security diff --git a/docs/changelog.md b/docs/changelog.md index 9523c0c..2fa3957 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -5,6 +5,26 @@ All notable changes to **LongParser** are documented here. This project follows [Semantic Versioning](https://semver.org/) and [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [0.1.3] — 2026-04-13 + +### Fixed + +- **Source code**: Added `DocumentPipeline` as a public alias for `PipelineOrchestrator` — + docs, quickstart, and all examples now use this name consistently +- **Documentation**: Fixed wrong coverage path `long_parser` → `longparser` in `CONTRIBUTING.md` +- **Documentation**: Replaced stale `cleanrag-api` reference in Docker deployment docs +- **Documentation**: Standardized Gemini API key env var to `GOOGLE_API_KEY` across all docs +- **Source code**: Updated default LLM model fallback from `gpt-4o` to `gpt-5.3` in + `schemas.py`, `llm_chain.py`, and `engine.py` +- **Source code**: Renamed stale `cleanrag:` Redis key prefix to `longparser:` in embeddings + +### Changed + +- Python 3.13 added to CI matrix, badges, and installation docs +- `SECURITY.md` updated with Redis rate-limiting and CORS threat mitigations + +--- + ## [0.1.2] — 2026-04-05 ### Changed diff --git a/docs/contributing.md b/docs/contributing.md index e8b7196..72727c9 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -84,7 +84,7 @@ Use Python 3.10+ type hints. All public API must be fully annotated. uv run pytest tests/unit/ -v # With coverage: -uv run pytest tests/unit/ --cov=src/long_parser --cov-report=term-missing +uv run pytest tests/unit/ --cov=src/longparser --cov-report=term-missing # Full test suite (requires MongoDB + Redis): uv run pytest tests/ -v diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index e462ce5..8ffeac7 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -49,5 +49,5 @@ docker compose up --scale longparser=3 ```bash curl http://localhost:8000/health -# {"status": "ok", "service": "cleanrag-api"} +# {"status": "ok", "service": "longparser-api"} ``` diff --git a/docs/deployment/environment.md b/docs/deployment/environment.md index 3245c88..0d8d28c 100644 --- a/docs/deployment/environment.md +++ b/docs/deployment/environment.md @@ -16,7 +16,7 @@ Copy `.env.example` to `.env` and configure for your deployment. | `LONGPARSER_LLM_PROVIDER` | `openai` | LLM provider | | `LONGPARSER_LLM_MODEL` | _(provider default)_ | Model name | | `OPENAI_API_KEY` | — | OpenAI API key | -| `GEMINI_API_KEY` | — | Google Gemini API key | +| `GOOGLE_API_KEY` | — | Google Gemini API key | | `GROQ_API_KEY` | — | Groq API key | | `OPENROUTER_API_KEY` | — | OpenRouter API key | diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index efd370f..859c2c1 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -33,7 +33,7 @@ cp .env.example .env |---|---| | `LONGPARSER_LLM_PROVIDER` | `openai` / `gemini` / `groq` / `openrouter` | | `LONGPARSER_LLM_MODEL` | Model name (uses provider default if unset) | -| `GEMINI_API_KEY` | For Google Gemini | +| `GOOGLE_API_KEY` | For Google Gemini | | `GROQ_API_KEY` | For Groq | ## Vector Store diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 908f659..5356c04 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -2,7 +2,7 @@ ## Requirements -- Python 3.10, 3.11, or 3.12 +- Python 3.10, 3.11, 3.12, or 3.13 - Tesseract OCR (`brew install tesseract` / `apt install tesseract-ocr`) --- @@ -104,5 +104,5 @@ The server starts on `http://localhost:8000`. ```python import longparser -print(longparser.__version__) # 0.1.2 +print(longparser.__version__) # 0.1.3 ``` diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index b779f4b..e501288 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -17,11 +17,11 @@ from longparser import DocumentPipeline, ProcessingConfig pipeline = DocumentPipeline(ProcessingConfig()) # Parse a PDF -doc = pipeline.process("research_paper.pdf") +result = pipeline.process_file("research_paper.pdf") -print(f"Pages: {len(doc.pages)}") -print(f"Blocks: {len(doc.blocks)}") -print(f"Chunks: {len(doc.chunks)}") +print(f"Pages: {result.document.metadata.total_pages}") +print(f"Chunks: {len(result.chunks)}") +print(result.chunks[0].text) ``` ## 3. Inspect Chunks diff --git a/docs/guide/chat.md b/docs/guide/chat.md index a3fb8e6..7ddc175 100644 --- a/docs/guide/chat.md +++ b/docs/guide/chat.md @@ -70,6 +70,6 @@ Every answer's `cited_chunk_ids` are validated against the retrieved set. IDs no | Provider | Key | |---|---| | OpenAI | `OPENAI_API_KEY` | -| Google Gemini | `GEMINI_API_KEY` | +| Google Gemini | `GOOGLE_API_KEY` | | Groq | `GROQ_API_KEY` | | OpenRouter | `OPENROUTER_API_KEY` | diff --git a/docs/guide/parsing.md b/docs/guide/parsing.md index 171c5b9..93c6386 100644 --- a/docs/guide/parsing.md +++ b/docs/guide/parsing.md @@ -18,7 +18,7 @@ LongParser uses **Docling** with Tesseract CLI OCR as its extraction engine — from longparser import DocumentPipeline, ProcessingConfig pipeline = DocumentPipeline(ProcessingConfig()) -doc = pipeline.process("paper.pdf") +result = pipeline.process_file("paper.pdf") ``` ## Formula Modes @@ -36,15 +36,15 @@ config = ProcessingConfig(formula_mode="smart") ```python # Pages -for page in doc.pages: +for page in result.document.pages: print(f"Page {page.page_number}: {page.width}x{page.height}") # Blocks (semantic units) -for block in doc.blocks: +for block in result.document.blocks: print(f"[{block.type}] p={block.provenance.page_number}: {block.text[:80]}") # Chunks (RAG-ready) -for chunk in doc.chunks: +for chunk in result.chunks: print(f"{chunk.chunk_type} | {chunk.token_count} tokens | pages={chunk.page_numbers}") ``` diff --git a/docs/index.md b/docs/index.md index 650ed63..4e7ff6e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,7 +16,7 @@ Monthly Downloads   - Python + Python   MIT License @@ -57,9 +57,10 @@ pip install longparser from longparser import DocumentPipeline, ProcessingConfig pipeline = DocumentPipeline(ProcessingConfig()) -doc = pipeline.process("report.pdf") +result = pipeline.process_file("report.pdf") -print(f"Extracted {len(doc.blocks)} blocks, {len(doc.chunks)} chunks") +print(f"Chunks: {len(result.chunks)}") +print(result.chunks[0].text) ``` --- diff --git a/docs/reference/pipeline.md b/docs/reference/pipeline.md index 8f3e5a4..7cdfbf9 100644 --- a/docs/reference/pipeline.md +++ b/docs/reference/pipeline.md @@ -7,39 +7,49 @@ The `DocumentPipeline` is the main entry point for LongParser's extraction pipel ```python from longparser import DocumentPipeline, ProcessingConfig -pipeline = DocumentPipeline(config=ProcessingConfig()) -doc = pipeline.process("document.pdf") +pipeline = DocumentPipeline(ProcessingConfig()) +result = pipeline.process_file("document.pdf") ``` ### Constructor ```python -DocumentPipeline(config: ProcessingConfig) +DocumentPipeline(config: ProcessingConfig | None = None) ``` | Parameter | Type | Description | |---|---|---| -| `config` | `ProcessingConfig` | Extraction and chunking configuration | +| `config` | `ProcessingConfig \| None` | Extraction and chunking configuration (uses defaults if `None`) | ### Methods -#### `process(file_path)` +#### `process_file(file_path)` Process a document end-to-end through Extract → Validate → Chunk. ```python -doc = pipeline.process("report.pdf") -# Returns: longparser.schemas.Document +result = pipeline.process_file("report.pdf") +# Returns: longparser.pipeline.PipelineResult ``` -**Returns:** `Document` with `.pages`, `.blocks`, `.chunks` populated. +**Returns:** `PipelineResult` with `.document` and `.chunks` populated. + +#### `process(request)` + +Process a document from a `JobRequest` object. + +```python +from longparser import JobRequest +request = JobRequest(file_path="report.pdf") +result = pipeline.process(request) +``` #### `process_batch(file_paths)` Process multiple documents sequentially. ```python -docs = pipeline.process_batch(["a.pdf", "b.docx", "c.pptx"]) +results = pipeline.process_batch(["a.pdf", "b.docx", "c.pptx"]) ``` ## ProcessingConfig diff --git a/docs/reference/schemas.md b/docs/reference/schemas.md index 7e33ac6..e4dda21 100644 --- a/docs/reference/schemas.md +++ b/docs/reference/schemas.md @@ -4,7 +4,7 @@ Core data models used throughout LongParser. ## Document -Top-level container returned by `DocumentPipeline.process()`. +Top-level container returned by `DocumentPipeline.process_file()`. ```python class Document: diff --git a/pyproject.toml b/pyproject.toml index 38330da..afea16d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "longparser" -version = "0.1.2" +version = "0.1.3" description = "Privacy-first document intelligence engine — converts PDFs, DOCX, PPTX, XLSX, and CSV into AI-ready Markdown + structured JSON for RAG pipelines." readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.10" diff --git a/src/longparser/__init__.py b/src/longparser/__init__.py index 5de272e..7d00c7e 100755 --- a/src/longparser/__init__.py +++ b/src/longparser/__init__.py @@ -9,9 +9,9 @@ Quick start:: - from longparser import PipelineOrchestrator, ProcessingConfig + from longparser import DocumentPipeline, ProcessingConfig - pipeline = PipelineOrchestrator() + pipeline = DocumentPipeline(ProcessingConfig()) result = pipeline.process_file("document.pdf") print(result.chunks[0].text) @@ -19,13 +19,13 @@ uv run uvicorn longparser.server.app:app --reload --port 8000 -See :class:`~longparser.pipeline.PipelineOrchestrator` for the main SDK entry +See :class:`~longparser.pipeline.DocumentPipeline` for the main SDK entry point and :mod:`longparser.server` for the REST API layer. """ from __future__ import annotations -__version__ = "0.1.2" +__version__ = "0.1.3" __author__ = "ENDEVSOLS Team" __license__ = "MIT" @@ -62,6 +62,9 @@ def __getattr__(name: str): if name == "PipelineOrchestrator": from .pipeline import PipelineOrchestrator return PipelineOrchestrator + if name == "DocumentPipeline": + from .pipeline import DocumentPipeline + return DocumentPipeline if name == "PipelineResult": from .pipeline import PipelineResult return PipelineResult @@ -99,6 +102,7 @@ def __getattr__(name: str): # Lazily imported (require extras) "DoclingExtractor", "PipelineOrchestrator", + "DocumentPipeline", "PipelineResult", "HybridChunker", ] diff --git a/src/longparser/pipeline/__init__.py b/src/longparser/pipeline/__init__.py index 6b775d9..710800e 100755 --- a/src/longparser/pipeline/__init__.py +++ b/src/longparser/pipeline/__init__.py @@ -2,7 +2,11 @@ from .orchestrator import PipelineOrchestrator, PipelineResult +# Public alias — docs and quickstart use this name +DocumentPipeline = PipelineOrchestrator + __all__ = [ "PipelineOrchestrator", + "DocumentPipeline", "PipelineResult", ] diff --git a/src/longparser/server/chat/engine.py b/src/longparser/server/chat/engine.py index b55b7cf..d50a7af 100755 --- a/src/longparser/server/chat/engine.py +++ b/src/longparser/server/chat/engine.py @@ -76,7 +76,7 @@ # Token Counting (model-aware) — kept as custom logic # --------------------------------------------------------------------------- -def count_tokens(text: str, model: str = "gpt-4o") -> int: +def count_tokens(text: str, model: str = "gpt-5.3") -> int: """Count tokens — exact for OpenAI models, conservative approx for others.""" try: import tiktoken @@ -96,7 +96,7 @@ def budget_trim( recent_turns: list[dict], rolling_summary: str, long_term_facts: list[dict], - model: str = "gpt-4o", + model: str = "gpt-5.3", max_prompt_tokens: int = 6000, ) -> dict: """Priority-ordered truncation of prompt variables to fit token budget. diff --git a/src/longparser/server/chat/llm_chain.py b/src/longparser/server/chat/llm_chain.py index f2cb8e7..b32bb2f 100755 --- a/src/longparser/server/chat/llm_chain.py +++ b/src/longparser/server/chat/llm_chain.py @@ -115,7 +115,7 @@ def get_chat_model( """ config = config or ChatConfig() provider = provider or config.llm_provider - model = model or config.llm_model or DEFAULT_MODELS.get(provider, "gpt-4o") + model = model or config.llm_model or DEFAULT_MODELS.get(provider, "gpt-5.3") max_tokens = max_tokens or config.max_output_tokens creator = _CREATORS.get(provider) diff --git a/src/longparser/server/chat/schemas.py b/src/longparser/server/chat/schemas.py index 0405a84..0479cf7 100755 --- a/src/longparser/server/chat/schemas.py +++ b/src/longparser/server/chat/schemas.py @@ -33,7 +33,7 @@ class ChatConfig(BaseModel): default_factory=lambda: os.getenv("LONGPARSER_LLM_PROVIDER", "openai") ) llm_model: str = Field( - default_factory=lambda: os.getenv("LONGPARSER_LLM_MODEL", "gpt-4o") + default_factory=lambda: os.getenv("LONGPARSER_LLM_MODEL", "gpt-5.3") ) max_input_tokens: int = Field( default_factory=lambda: int(os.getenv("LONGPARSER_CHAT_MAX_INPUT_TOKENS", "1000")) diff --git a/src/longparser/server/embeddings.py b/src/longparser/server/embeddings.py index e59f513..e0b2bbc 100755 --- a/src/longparser/server/embeddings.py +++ b/src/longparser/server/embeddings.py @@ -108,7 +108,7 @@ def dim(self) -> int: return self._dim fp = self.get_fingerprint() - cache_key = f"cleanrag:embed_dim:{fp}" + cache_key = f"longparser:embed_dim:{fp}" # 1) Try Redis cross-process cache if available try: