Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 78 additions & 16 deletions backend/.env.example
Original file line number Diff line number Diff line change
@@ -1,32 +1,94 @@

# ==========================================================
# Unified OpenAI-Compatible Provider Config (Primary)
# Use these 4 keys for OpenAI / Xeon / Ollama-compatible endpoints.
# API_ENDPOINT:
# - OpenAI cloud: leave empty
# - Xeon/Gateway: https://api.example.com (without /v1; auto-appended)
# - Ollama: http://localhost:11434 (without /v1; auto-appended)
# PROVIDER_NAME is optional and only used for logs/UI labeling.
# If omitted, provider name is inferred from API_ENDPOINT.
# ==========================================================
API_ENDPOINT=
API_TOKEN=your-api-token-here
MODEL_NAME=gpt-4o-mini
PROVIDER_NAME=

# OpenAI Configuration
OPENAI_API_KEY=your_openai_api_key_here
# Legacy
OPENAI_MODEL=gpt-4o-mini


# Simple embedding config (OpenAI-compatible)
# If embeddings use the same endpoint/token as chat, leave EMBEDDING_ENDPOINT
# and EMBEDDING_API empty.
# Example:
# EMBEDDING_ENDPOINT=http://localhost:11434
# EMBEDDING_API=ollama
# EMBEDDING_MODEL=nomic-embed-text
EMBEDDING_ENDPOINT=
EMBEDDING_API=
EMBEDDING_MODEL=text-embedding-3-small
EMBEDDING_PROVIDER_NAME=
# Optional: fail fast for slow local embedding endpoints
EMBEDDING_TIMEOUT=20
EMBEDDING_MAX_RETRIES=0


RAG_CHUNK_SIZE=800
RAG_CHUNK_OVERLAP=150
RAG_TOP_K=5

RAG_MAX_DOCS=25
RAG_TTL_SECONDS=3600
# Optional per-model context window overrides
# MODEL_CONTEXT_TOKENS_MAP format:
# {"gpt-4o-mini":128000,"llama3.2:3b":32768}
MODEL_CONTEXT_TOKENS=
MODEL_CONTEXT_TOKENS_MAP=
MIN_MODEL_CONTEXT_TOKENS=4096
CONTEXT_RETRY_SHRINK_RATIO=0.75
CONTEXT_RETRY_MARGIN_TOKENS=1200
# Speed knobs (recommended for local models)
# false = heuristic section chips only (faster, fewer LLM calls)
DYNAMIC_SECTIONS_USE_LLM=false
# false = skip second "retry summary" LLM call
SUMMARY_RETRY_ENABLE=false


# ==========================================================
# LLM Tuning
# ==========================================================
LLM_TEMPERATURE=0.2
LLM_MAX_TOKENS=900

CACHE_MAX_DOCS=25
CACHE_TTL_SECONDS=3600

# ==========================================================
# RAG / Chunking
# ==========================================================
RAG_CHUNK_CHARS=1400
RAG_CHUNK_OVERLAP_CHARS=220
# For local embedding models, start with 1-4.
RAG_EMBED_BATCH_SIZE=64
RAG_SUMMARY_TOP_K=8
RAG_SECTION_TOP_K=10
RAG_MIN_SCORE=0.15
RAG_CONTEXT_MAX_CHARS=18000
RETRIEVAL_FIRST_ENABLE=true
# If true, request path can synchronously build index when missing (may be slow).
RETRIEVAL_FORCE_INDEX_ON_DEMAND=false

# ==========================================================
# Map-Reduce Summarization for Large Docs
# ==========================================================
MAP_REDUCE_ENABLE=true
MAP_REDUCE_MIN_CHARS=18000
MAP_REDUCE_MIN_CHUNKS=8
MAP_REDUCE_MAX_CHUNKS=24
MAP_REDUCE_CHUNK_CHARS=2800
MAP_REDUCE_CHUNK_OVERLAP=250
MAP_REDUCE_BATCH_SIZE=6

# ==========================================================
# Cache
# ==========================================================
CACHE_MAX_DOCS=25
CACHE_TTL_SECONDS=3600

# Service Configuration
# ==========================================================
# Service
# ==========================================================
SERVICE_PORT=8000
LOG_LEVEL=INFO

# CORS Settings
CORS_ORIGINS=*
CORS_ORIGINS=*
22 changes: 17 additions & 5 deletions backend/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

from fastapi import APIRouter, Form, File, UploadFile, HTTPException, BackgroundTasks
from fastapi.responses import PlainTextResponse
from fastapi.responses import StreamingResponse
from typing import Optional
import os
Expand All @@ -14,7 +15,8 @@
from models import HealthResponse

from services import pdf_service, llm_service
from services.rag_index_service import rag_index_service # <-- ADDED
from services.rag.rag_index_service import rag_index_service
from services.observability_service import observability_service

logger = logging.getLogger(__name__)

Expand All @@ -23,18 +25,26 @@

@router.get("/health", response_model=HealthResponse)
async def health_check():
"""Health check endpoint - OpenAI-only"""
"""Health check endpoint"""
llm_health = llm_service.health_check()

response = {
"status": "healthy" if llm_health.get("status") == "healthy" else "unhealthy",
"service": config.APP_TITLE,
"version": config.APP_VERSION,
"llm_provider": "OpenAI",
"llm_provider": llm_health.get("provider", llm_service.get_provider_name()),
}

return response


@router.get("/v1/observability", response_class=PlainTextResponse)
async def observability(limit: int = 100):
"""
Plain text table with LLM token observability only.
"""
return observability_service.render_table(limit=limit, llm_only=True)

@router.get("/v1/rag/status")
async def rag_status(doc_id: str):
"""
Expand All @@ -46,11 +56,13 @@ async def rag_status(doc_id: str):
return {"doc_id": doc_id.strip(), **status}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))


@router.post("/v1/rag/chat")
async def rag_chat(
doc_id: str = Form(""),
message: str = Form(""),
max_tokens: int = Form(500),
max_tokens: int = Form(220),
temperature: float = Form(0.2),
):
"""
Expand Down Expand Up @@ -119,7 +131,7 @@ async def delete_vectors(doc_id: str):
if not doc_id_clean:
raise HTTPException(status_code=400, detail="doc_id is required")

from services.vector_store import vector_store
from services.rag.vector_store import vector_store
vector_store.clear_doc(doc_id_clean)

return {"doc_id": doc_id_clean, "status": "deleted", "message": "Vector data cleared"}
Expand Down
33 changes: 32 additions & 1 deletion backend/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Configuration settings for Doc-Sum Application
Configuration settings for FinSights Application
"""

import os
Expand All @@ -10,8 +10,39 @@

# OpenAI Configuration (optional)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Optional fallback model when MODEL_NAME is not set
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")

# Unified Provider Configuration (primary)
API_ENDPOINT = os.getenv("API_ENDPOINT", "")
API_TOKEN = os.getenv("API_TOKEN", "")
MODEL_NAME = os.getenv("MODEL_NAME", "")
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "")
PROVIDER_NAME = os.getenv("PROVIDER_NAME", "")
VERIFY_SSL = os.getenv("VERIFY_SSL", "true")
LOCAL_URL_ENDPOINT = os.getenv("LOCAL_URL_ENDPOINT", "not-needed")
EMBEDDING_PROVIDER = os.getenv("EMBEDDING_PROVIDER", "same")
EMBEDDING_ENDPOINT = os.getenv("EMBEDDING_ENDPOINT", "")
EMBEDDING_API = os.getenv("EMBEDDING_API", "")
EMBEDDING_API_ENDPOINT = os.getenv("EMBEDDING_API_ENDPOINT", "")
EMBEDDING_API_TOKEN = os.getenv("EMBEDDING_API_TOKEN", "")
EMBEDDING_PROVIDER_NAME = os.getenv("EMBEDDING_PROVIDER_NAME", "")

# Optional embedding overrides (advanced)
OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
INFERENCE_EMBEDDING_MODEL_NAME = os.getenv("INFERENCE_EMBEDDING_MODEL_NAME", "")
OLLAMA_EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL", "")

# Legacy compatibility (optional)
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "")
INFERENCE_API_ENDPOINT = os.getenv("INFERENCE_API_ENDPOINT", "")
INFERENCE_API_TOKEN = os.getenv("INFERENCE_API_TOKEN", "")
INFERENCE_MODEL_NAME = os.getenv("INFERENCE_MODEL_NAME", "")
OLLAMA_ENDPOINT = os.getenv("OLLAMA_ENDPOINT", "")
OLLAMA_TOKEN = os.getenv("OLLAMA_TOKEN", "")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "")



# LLM Configuration (tuned for section summaries)
Expand Down
43 changes: 30 additions & 13 deletions backend/server.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
"""
FastAPI server for Doc-Sum Application (OpenAI-only)
FastAPI server for Doc-Sum Application
"""

import logging
import time
from fastapi import FastAPI
from fastapi import Request
from fastapi.middleware.cors import CORSMiddleware
import uvicorn

import config
from models import HealthResponse
from api.routes import router
from services.observability_service import observability_service

# IMPORTANT: llm_service is inside the "service" package
from services.llm_service import llm_service
# IMPORTANT: import the llm_service singleton object directly
from services.llm.llm_service import llm_service

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -40,6 +43,22 @@
# Include API routes
app.include_router(router)

@app.middleware("http")
async def request_observability_middleware(request: Request, call_next):
started = time.perf_counter()
ctx_tokens = observability_service.set_request_context(request.url.path, request.method)
status_code = 500
try:
response = await call_next(request)
status_code = response.status_code
return response
finally:
observability_service.record_request(
status_code=status_code,
duration_ms=(time.perf_counter() - started) * 1000.0,
)
observability_service.reset_request_context(ctx_tokens)


@app.get("/")
def root():
Expand All @@ -51,17 +70,17 @@ def root():
"docs": "/docs",
"health": "/health",
"config": {
"llm_provider": "OpenAI",
"llm_model": config.OPENAI_MODEL,
"openai_configured": bool(config.OPENAI_API_KEY),
"llm_provider": llm_service.get_provider_name(),
"llm_model": llm_service.model,
"api_token_configured": bool(config.API_TOKEN or config.OPENAI_API_KEY),
},
}
return response


@app.get("/health", response_model=HealthResponse)
def health_check():
"""Detailed health check - OpenAI only"""
"""Detailed health check"""
response_data = {
"status": "healthy",
"service": config.APP_TITLE,
Expand All @@ -70,9 +89,7 @@ def health_check():

llm_health = llm_service.health_check()

# Only set fields that likely exist in your HealthResponse model
# Keep the original fields + add llm_provider (as your old code did)
response_data["llm_provider"] = "OpenAI"
response_data["llm_provider"] = llm_health.get("provider", llm_service.get_provider_name())

# If OpenAI isn't configured or health check fails, mark unhealthy
if llm_health.get("status") in ("not_configured", "unhealthy"):
Expand All @@ -87,9 +104,9 @@ async def startup_event():
logger.info("=" * 60)
logger.info(f"Starting {config.APP_TITLE} v{config.APP_VERSION}")
logger.info("=" * 60)
logger.info("LLM Provider: OpenAI")
logger.info(f"OpenAI Configured: {bool(config.OPENAI_API_KEY)}")
logger.info(f"Model: {config.OPENAI_MODEL}")
logger.info(f"LLM Provider: {llm_service.get_provider_name()}")
logger.info(f"API Token Configured: {bool(config.API_TOKEN or config.OPENAI_API_KEY)}")
logger.info(f"Model: {llm_service.model}")
logger.info(f"Port: {config.SERVICE_PORT}")
logger.info("=" * 60)

Expand Down
4 changes: 2 additions & 2 deletions backend/services/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Services module - Business logic layer"""

from .pdf_service import pdf_service
from .llm_service import llm_service
from .pdf import pdf_service
from .llm.llm_service import llm_service

__all__ = ["pdf_service", "llm_service"]
3 changes: 3 additions & 0 deletions backend/services/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .llm_service import llm_service

__all__ = ["llm_service"]
Loading
Loading