From 8e4c91c4ac81fe567b50647233c465932ee716e1 Mon Sep 17 00:00:00 2001
From: pranavshankar1221
Date: Fri, 5 Jun 2026 23:21:59 +0530
Subject: [PATCH 1/4] feat(auth): add Cloudflare Turnstile bot protection and
rate limiting
---
.env.example | 3 +
.gitignore | 25 ++++++-
README.md | 6 ++
backend/.env.example | 3 +
backend/main.py | 50 +++++++++++--
backend/requirements-base.txt | 1 +
backend/turnstile.py | 46 ++++++++++++
package-lock.json | 4 +-
src/lib/api.ts | 12 ++-
src/lib/useTurnstile.ts | 136 ++++++++++++++++++++++++++++++++++
src/pages/AuthPage.tsx | 52 ++++++++++---
11 files changed, 319 insertions(+), 19 deletions(-)
create mode 100644 backend/turnstile.py
create mode 100644 src/lib/useTurnstile.ts
diff --git a/.env.example b/.env.example
index 598cb3a..ec4371a 100644
--- a/.env.example
+++ b/.env.example
@@ -12,6 +12,9 @@ VITE_API_URL=
# Set to "true" locally. NEVER set in Vercel env vars.
VITE_DEV_MODE=true
+# Cloudflare Turnstile
+VITE_TURNSTILE_SITE_KEY=
+
# ── PostHog Analytics ─────────────────────────────────────────────────────────
# Leave blank locally — PostHog is silently disabled when this key is missing.
# Set both vars in your Vercel / production environment dashboard.
diff --git a/.gitignore b/.gitignore
index 96ca793..e525b4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,4 +62,27 @@ Models/
*.ntvs*
*.njsproj
*.sln
-*.sw?
\ No newline at end of file
+*.sw?
+
+# Additional local / secret ignores
+# Local build outputs
+build/
+
+# Alternate virtualenv names
+myenv/
+
+# Docker local overrides and runtime artifacts
+docker-compose.override.yml
+.docker/
+
+# Hugging Face / ML cache
+.hf/
+.cache/
+
+# Secrets files
+secrets.env
+*.secret
+
+# Local DB / artifacts
+*.sqlite3
+*.db
\ No newline at end of file
diff --git a/README.md b/README.md
index f6436e4..3bf85e7 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,7 @@ The setup script detects your environment automatically:
```env
VITE_API_URL= # leave blank for local dev; Vite proxy handles /api/*
VITE_DEV_MODE=true # enables DEV LOGIN button; never set true in production
+VITE_TURNSTILE_SITE_KEY= # Cloudflare Turnstile site key
```
**Backend** — copy `backend/.env.example` to `backend/.env`:
@@ -109,9 +110,14 @@ SUPABASE_KEY=
SUPABASE_SERVICE_KEY=
FRONTEND_URL=http://localhost:5173
API_BASE_URL=http://localhost:8000
+TURNSTILE_SECRET_KEY=
DEV_BYPASS_AUTH=true # never set true in production
```
+> Note: When using production auth flows, set `VITE_TURNSTILE_SITE_KEY` in the frontend and `TURNSTILE_SECRET_KEY` in the backend. This enables Cloudflare Turnstile protection for auth endpoint requests.
+
+> Authentication start requests are rate limited in the backend to 5 requests per minute. Invalid or missing Turnstile tokens are rejected with a standard 400 response.
+
### Available Scripts
| Script | Description |
diff --git a/backend/.env.example b/backend/.env.example
index 2e3bc79..bbbeaa6 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -23,6 +23,9 @@ CORS_ALLOW_ALL=true
DEV_BYPASS_AUTH=true
DEV_BYPASS_TOKEN=dev-local-bypass-token
+# ── Cloudflare Turnstile — protect auth endpoints from automated abuse
+TURNSTILE_SECRET_KEY=
+
# ── ML Models (optional) ──────────────────────────────────────────────────────
# Leave unset → demo mode (random scores, no PyTorch needed).
# Uncomment to enable real ML inference:
diff --git a/backend/main.py b/backend/main.py
index 2a9ed14..298253d 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -15,9 +15,12 @@
except ImportError:
pass
-from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Depends, Query
+from fastapi import Body, FastAPI, File, UploadFile, Form, HTTPException, Depends, Query, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.middleware import SlowAPIMiddleware
+from slowapi.util import get_remote_address
from supabase import create_client, Client
from PIL import Image
@@ -32,6 +35,7 @@
print("WARNING: PyTorch not installed. Scan endpoints will return 503.")
from auth import get_current_user, get_google_oauth_url, exchange_code_for_session
+from turnstile import TURNSTILE_SECRET_KEY, verify_turnstile_token
# ── Configuration ─────────────────────────────────────────────────────────────
# All secrets MUST come from environment variables — no hardcoded fallbacks.
@@ -110,7 +114,10 @@ async def lifespan(app: FastAPI):
allow_methods=["*"],
allow_headers=["*"],
)
-
+limiter = Limiter(key_func=get_remote_address)
+app.state.limiter = limiter
+app.add_exception_handler(429, _rate_limit_exceeded_handler)
+app.add_middleware(SlowAPIMiddleware)
# ── Domain helpers ────────────────────────────────────────────────────────────
@@ -328,12 +335,43 @@ async def _upload_image(image_bytes: bytes, user_id: str, scan_id: str) -> Optio
# ── AUTH ──────────────────────────────────────────────────────────────────────
-@app.get("/api/v1/auth/login/google")
-async def login_google():
+def _auth_redirect_url() -> str:
callback_url = f"{API_BASE_URL}/api/v1/auth/callback"
+ return get_google_oauth_url(redirect_to=callback_url)
+
+
+async def _verify_turnstile(turnstile_token: str | None, request: Request) -> None:
+ if TURNSTILE_SECRET_KEY:
+ await verify_turnstile_token(turnstile_token, request.client.host)
+
+
+@app.get("/api/v1/auth/login/google")
+@limiter.limit("5/minute")
+async def login_google_get(
+ request: Request,
+ turnstile_token: str | None = Query(None, alias="turnstile_token"),
+):
+ try:
+ await _verify_turnstile(turnstile_token, request)
+ return RedirectResponse(url=_auth_redirect_url())
+ except HTTPException:
+ raise
+ except Exception as exc:
+ raise HTTPException(status_code=500, detail=f"Could not generate OAuth URL: {exc}")
+
+
+@app.post("/api/v1/auth/login/google")
+@limiter.limit("5/minute")
+async def login_google_post(
+ request: Request,
+ payload: dict | None = Body(None),
+):
+ turnstile_token = payload.get("turnstile_token") if payload else None
try:
- url = get_google_oauth_url(redirect_to=callback_url)
- return RedirectResponse(url=url)
+ await _verify_turnstile(turnstile_token, request)
+ return {"redirect_url": _auth_redirect_url()}
+ except HTTPException:
+ raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Could not generate OAuth URL: {exc}")
diff --git a/backend/requirements-base.txt b/backend/requirements-base.txt
index dc40008..dda6406 100644
--- a/backend/requirements-base.txt
+++ b/backend/requirements-base.txt
@@ -6,3 +6,4 @@ numpy>=2.4.6
python-dotenv>=1.0.0
python-multipart>=0.0.29
httpx>=0.27.0
+slowapi>=0.1.4
diff --git a/backend/turnstile.py b/backend/turnstile.py
new file mode 100644
index 0000000..075f7b6
--- /dev/null
+++ b/backend/turnstile.py
@@ -0,0 +1,46 @@
+import os
+from typing import Optional
+
+import httpx
+from fastapi import HTTPException
+
+TURNSTILE_SECRET_KEY = os.environ.get('TURNSTILE_SECRET_KEY', '')
+
+async def verify_turnstile_token(turnstile_token: Optional[str], remote_ip: Optional[str] = None) -> None:
+ if not TURNSTILE_SECRET_KEY:
+ raise HTTPException(
+ status_code=500,
+ detail='Turnstile secret key is not configured. Set TURNSTILE_SECRET_KEY.',
+ )
+
+ if not turnstile_token:
+ raise HTTPException(status_code=400, detail='Turnstile token is required.')
+
+ payload = {
+ 'secret': TURNSTILE_SECRET_KEY,
+ 'response': turnstile_token,
+ }
+ if remote_ip:
+ payload['remoteip'] = remote_ip
+
+ try:
+ async with httpx.AsyncClient(timeout=10.0) as client:
+ response = await client.post(
+ 'https://challenges.cloudflare.com/turnstile/v0/siteverify',
+ data=payload,
+ headers={'Content-Type': 'application/x-www-form-urlencoded'},
+ )
+ response.raise_for_status()
+ data = response.json()
+ except Exception as exc:
+ raise HTTPException(
+ status_code=502,
+ detail=f'Turnstile verification failed: {exc}',
+ )
+
+ if not data.get('success'):
+ errors = data.get('error-codes', [])
+ raise HTTPException(
+ status_code=400,
+ detail=f"Turnstile verification failed: {', '.join(errors) or 'invalid token'}",
+ )
diff --git a/package-lock.json b/package-lock.json
index 884076d..2a7c95e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "frontend",
- "version": "1.2.0",
+ "version": "1.3.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "frontend",
- "version": "1.2.0",
+ "version": "1.3.0",
"dependencies": {
"@tailwindcss/vite": "^4.2.2",
"framer-motion": "^12.38.0",
diff --git a/src/lib/api.ts b/src/lib/api.ts
index 84ae3d0..1ff8c09 100644
--- a/src/lib/api.ts
+++ b/src/lib/api.ts
@@ -92,7 +92,17 @@ export interface GradcamResponse { gradcam_image: string; predicted_class: strin
// ── API surface ───────────────────────────────────────────────────────────────
export const api = {
- loginUrl: (): string => `${API_BASE}/api/v1/auth/login/google`,
+ loginUrl: async (turnstileToken?: string): Promise => {
+ if (turnstileToken) {
+ const response = await apiFetch<{ redirect_url: string }>('/api/v1/auth/login/google', {
+ method: 'POST',
+ body: JSON.stringify({ turnstile_token: turnstileToken }),
+ });
+ return response.redirect_url;
+ }
+
+ return `${API_BASE}/api/v1/auth/login/google`;
+ },
getMe: (): Promise => apiFetch('/api/v1/auth/me'),
diff --git a/src/lib/useTurnstile.ts b/src/lib/useTurnstile.ts
new file mode 100644
index 0000000..512cc65
--- /dev/null
+++ b/src/lib/useTurnstile.ts
@@ -0,0 +1,136 @@
+import { useEffect, useRef, useState } from 'react';
+
+declare global {
+ interface Window {
+ turnstile?: {
+ render: (container: HTMLElement, options: Record) => number;
+ execute: (widgetId: number) => void;
+ reset: (widgetId: number) => void;
+ };
+ }
+}
+
+const SCRIPT_SRC = 'https://challenges.cloudflare.com/turnstile/v0/api.js';
+
+async function loadTurnstileScript(): Promise {
+ if (window.turnstile) {
+ return;
+ }
+
+ return new Promise((resolve, reject) => {
+ const existingScript = document.querySelector(`script[src="${SCRIPT_SRC}"]`);
+ if (existingScript) {
+ if ((existingScript as HTMLScriptElement).dataset.loaded === 'true') {
+ return resolve();
+ }
+ existingScript.addEventListener('load', () => resolve());
+ existingScript.addEventListener('error', () => reject(new Error('Failed to load Turnstile script.')));
+ return;
+ }
+
+ const script = document.createElement('script');
+ script.src = SCRIPT_SRC;
+ script.async = true;
+ script.defer = true;
+ script.onload = () => {
+ script.dataset.loaded = 'true';
+ resolve();
+ };
+ script.onerror = () => reject(new Error('Failed to load Turnstile script.'));
+ document.head.appendChild(script);
+ });
+}
+
+export default function useTurnstile(siteKey: string | undefined) {
+ const containerRef = useRef(null);
+ const widgetIdRef = useRef(null);
+ const pendingRef = useRef<{
+ resolve: (token: string) => void;
+ reject: (error: Error) => void;
+ } | null>(null);
+ const [ready, setReady] = useState(false);
+ const [error, setError] = useState(null);
+
+ useEffect(() => {
+ if (!siteKey) {
+ setReady(false);
+ return;
+ }
+
+ let canceled = false;
+
+ async function setup() {
+ try {
+ await loadTurnstileScript();
+ if (canceled) return;
+ if (!window.turnstile || !containerRef.current) {
+ throw new Error('Turnstile is unavailable in this environment.');
+ }
+
+ if (widgetIdRef.current === null) {
+ widgetIdRef.current = window.turnstile.render(containerRef.current, {
+ sitekey: siteKey,
+ size: 'normal',
+
+ callback: (token: string) => {
+ const pending = pendingRef.current;
+ pending?.resolve(token);
+ pendingRef.current = null;
+ },
+
+ 'error-callback': () => {
+ const pending = pendingRef.current;
+ pending?.reject(new Error('Turnstile verification failed.'));
+ pendingRef.current = null;
+ },
+
+ 'expired-callback': () => {
+ const pending = pendingRef.current;
+ pending?.reject(
+ new Error('Turnstile token expired. Please verify again.')
+ );
+ pendingRef.current = null;
+ },
+ });
+ }
+ setReady(true);
+ } catch (err) {
+ if (!canceled) {
+ const loadError = err instanceof Error ? err : new Error('Unknown Turnstile load error');
+ setError(loadError);
+ setReady(false);
+ }
+ }
+ }
+
+ setup();
+
+ return () => {
+ canceled = true;
+ };
+ }, [siteKey]);
+
+ const execute = async (): Promise => {
+ if (!siteKey) {
+ throw new Error('Turnstile site key is not configured.');
+ }
+ if (error) {
+ throw error;
+ }
+ if (!ready || widgetIdRef.current === null) {
+ throw new Error('Turnstile is not ready yet. Please wait and try again.');
+ }
+
+ return new Promise((resolve, reject) => {
+ pendingRef.current = { resolve, reject };
+ try {
+ window.turnstile?.execute(widgetIdRef.current as number);
+ } catch (exc) {
+ pendingRef.current = null;
+ reject(exc instanceof Error ? exc : new Error('Turnstile execution failed.'));
+ }
+ });
+ };
+
+ return { containerRef, ready, execute, error };
+}
diff --git a/src/pages/AuthPage.tsx b/src/pages/AuthPage.tsx
index 616e2ec..78671a6 100644
--- a/src/pages/AuthPage.tsx
+++ b/src/pages/AuthPage.tsx
@@ -3,16 +3,19 @@ import { useNavigate } from 'react-router-dom';
import { usePostHog } from 'posthog-js/react';
import StatusTerminal from '../components/StatusTerminal';
import { api, setToken, isAuthenticated } from '../lib/api';
+import useTurnstile from '../lib/useTurnstile';
// Bypass token must match DEV_BYPASS_TOKEN in backend/.env
const DEV_BYPASS_TOKEN = 'dev-local-bypass-token';
const IS_DEV_MODE = import.meta.env.VITE_DEV_MODE === 'true';
+const TURNSTILE_SITE_KEY = import.meta.env.VITE_TURNSTILE_SITE_KEY as string | undefined;
export default function AuthPage() {
const navigate = useNavigate();
const posthog = usePostHog();
const [status, setStatus] = useState<'idle' | 'processing' | 'error'>('idle');
const [errorMsg, setErrorMsg] = useState('');
+ const { containerRef, ready: turnstileReady, execute: executeTurnstile, error: turnstileError } = useTurnstile(TURNSTILE_SITE_KEY);
// Handle redirect from backend OAuth callback
useEffect(() => {
@@ -40,21 +43,35 @@ export default function AuthPage() {
}
}, [navigate, posthog]);
- const handleGoogleLogin = () => {
+ const handleGoogleLogin = async () => {
try {
setStatus('processing');
- const loginUrl = api.loginUrl();
-
+ let turnstileToken: string | undefined;
+
+ if (TURNSTILE_SITE_KEY) {
+ if (!turnstileReady) {
+ throw new Error('Turnstile is still loading. Please wait and try again.');
+ }
+ if (turnstileError) {
+ throw turnstileError;
+ }
+ turnstileToken = await executeTurnstile();
+ }
+
+ const loginUrl = await api.loginUrl(turnstileToken);
if (!loginUrl) {
- throw new Error("Login URL configuration missing");
+ throw new Error('Login URL configuration missing');
}
-
- // Force full browser navigation for OAuth
+
window.location.href = loginUrl;
} catch (err) {
setStatus('error');
- setErrorMsg('Could not initiate Google Login. Please check your network connection.');
- console.error("Auth initiation failed:", err);
+ setErrorMsg(
+ err instanceof Error
+ ? err.message
+ : 'Could not initiate Google Login. Please check your network connection.'
+ );
+ console.error('Auth initiation failed:', err);
}
};
@@ -102,12 +119,27 @@ export default function AuthPage() {
Sign in to view your live Trust Map and sync biomarker data across devices.
)}
+
+ {TURNSTILE_SITE_KEY && !turnstileReady && (
+
+ Loading verification challenge...
+
+ )}
+
+ {turnstileError && (
+
+ {turnstileError.message}
+
+ )}
+
+
);
}
\ No newline at end of file
From 163f389df3bde80e7ba1b03157385459e90c142c Mon Sep 17 00:00:00 2001
From: pranavshankar1221
Date: Sat, 6 Jun 2026 08:25:40 +0530
Subject: [PATCH 2/4] style(auth): fix Ruff line length violation
---
backend/turnstile.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/backend/turnstile.py b/backend/turnstile.py
index 075f7b6..49c8e54 100644
--- a/backend/turnstile.py
+++ b/backend/turnstile.py
@@ -6,7 +6,10 @@
TURNSTILE_SECRET_KEY = os.environ.get('TURNSTILE_SECRET_KEY', '')
-async def verify_turnstile_token(turnstile_token: Optional[str], remote_ip: Optional[str] = None) -> None:
+async def verify_turnstile_token(
+ turnstile_token: Optional[str],
+ remote_ip: Optional[str] = None,
+) -> None:
if not TURNSTILE_SECRET_KEY:
raise HTTPException(
status_code=500,
From dc2f8b7cd379d3b00dcdcec346f88ca621a1c930 Mon Sep 17 00:00:00 2001
From: pranavshankar1221
Date: Tue, 16 Jun 2026 10:44:16 +0530
Subject: [PATCH 3/4] feat: AI chatbot Integration
---
TODO.md | 13 ++
backend/.env.example | 10 +-
backend/chat_logger.py | 75 +++++++
backend/chat_router.py | 164 +++++++++++++++
backend/llm_provider.py | 336 ++++++++++++++++++++++++++++++
backend/main.py | 4 +-
backend/rag_retriever.py | 251 +++++++++++++++++++++++
src/components/ChatAssistant.tsx | 338 +++++++++++++++++++++++++++++++
src/components/Layout.tsx | 4 +
src/lib/api.ts | 20 ++
src/pages/AuthPage.tsx | 2 -
11 files changed, 1213 insertions(+), 4 deletions(-)
create mode 100644 TODO.md
create mode 100644 backend/chat_logger.py
create mode 100644 backend/chat_router.py
create mode 100644 backend/llm_provider.py
create mode 100644 backend/rag_retriever.py
create mode 100644 src/components/ChatAssistant.tsx
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..8b87868
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,13 @@
+# FreshScanAi - Backend error resolution
+
+## Plan (approved)
+- Fix build-time errors and likely import/module conflicts in `backend/main.py`.
+- Ensure `backend/main.py` correctly imports local modules when run as a package.
+- Make DB insert failures visible (no silent success) for scan endpoints.
+
+## Steps
+1. Update `backend/main.py` to use package-relative imports (`from .auth ...`, `from .turnstile ...`, `from .rate_limiter ...`, etc.).
+2. Fix vendor router registration import to use relative imports.
+3. Update scan endpoints (`/api/v1/scan` and `/api/v1/scan-auto`) so DB write failures raise HTTP 500 (instead of printing and continuing success).
+4. Run backend import check (e.g., `python -c "from backend.main import app"`) and run unit tests if available.
+
diff --git a/backend/.env.example b/backend/.env.example
index d552fce..d216b09 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -32,4 +32,12 @@ TURNSTILE_SECRET_KEY=
# MODEL_TOKEN=hf_xxxxxxxxxxxxxxxxxxxx
=========
# ── CORS ──────────────────────────────────────────────────────────────────────
-CORS_ALLOW_ALL=true
\ No newline at end of file
+CORS_ALLOW_ALL=true
+
+# ── LLM Provider (for AI Chat Assistant) ──────────────────────────────────────
+# Options: gemini (default), openai, claude, ollama
+LLM_PROVIDER=gemini
+GEMINI_API_KEY=
+OPENAI_API_KEY=
+CLAUDE_API_KEY=
+OLLAMA_BASE_URL=http://localhost:11434
\ No newline at end of file
diff --git a/backend/chat_logger.py b/backend/chat_logger.py
new file mode 100644
index 0000000..8ea08a5
--- /dev/null
+++ b/backend/chat_logger.py
@@ -0,0 +1,75 @@
+import sqlite3
+from pathlib import Path
+from datetime import datetime, timezone
+
+DB_DIR = Path(__file__).parent / "data"
+DB_PATH = DB_DIR / "chat_logs.db"
+
+def init_db():
+ """Initializes the SQLite database and creates chat_logs table if it doesn't exist."""
+ DB_DIR.mkdir(parents=True, exist_ok=True)
+
+ conn = sqlite3.connect(str(DB_PATH))
+ cursor = conn.cursor()
+ cursor.execute("""
+ CREATE TABLE IF NOT EXISTS chat_logs (
+ id TEXT PRIMARY KEY,
+ question TEXT NOT NULL,
+ response TEXT NOT NULL,
+ current_page TEXT,
+ current_feature TEXT,
+ timestamp TEXT NOT NULL,
+ feedback TEXT
+ )
+ """)
+ conn.commit()
+ conn.close()
+
+def log_chat_message(
+ msg_id: str,
+ question: str,
+ response: str,
+ current_page: str = None,
+ current_feature: str = None
+):
+ """Logs a generated Q&A exchange to the database."""
+ init_db() # Ensure DB and table are initialized
+
+ timestamp = datetime.now(timezone.utc).isoformat()
+
+ conn = sqlite3.connect(str(DB_PATH))
+ cursor = conn.cursor()
+ try:
+ cursor.execute(
+ """
+ INSERT INTO chat_logs (id, question, response, current_page, current_feature, timestamp, feedback)
+ VALUES (?, ?, ?, ?, ?, ?, ?)
+ """,
+ (msg_id, question, response, current_page, current_feature, timestamp, None)
+ )
+ conn.commit()
+ except Exception as e:
+ print(f"ChatLogger Error logging message: {e}")
+ finally:
+ conn.close()
+
+def update_chat_feedback(msg_id: str, feedback: str):
+ """Updates feedback (e.g., 'up' or 'down') for a specific message ID."""
+ init_db() # Ensure DB and table are initialized
+
+ conn = sqlite3.connect(str(DB_PATH))
+ cursor = conn.cursor()
+ try:
+ cursor.execute(
+ """
+ UPDATE chat_logs
+ SET feedback = ?
+ WHERE id = ?
+ """,
+ (feedback, msg_id)
+ )
+ conn.commit()
+ except Exception as e:
+ print(f"ChatLogger Error updating feedback: {e}")
+ finally:
+ conn.close()
diff --git a/backend/chat_router.py b/backend/chat_router.py
new file mode 100644
index 0000000..0221fa1
--- /dev/null
+++ b/backend/chat_router.py
@@ -0,0 +1,164 @@
+import uuid
+import logging
+from typing import List, Optional
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+from llm_provider import get_llm_provider
+from rag_retriever import get_retriever
+from chat_logger import log_chat_message, update_chat_feedback
+
+logger = logging.getLogger("freshscan.chat")
+router = APIRouter(prefix="/api/v1/chat", tags=["chat"])
+
+# ── Pydantic Request/Response Models ──────────────────────────────────────────
+
+class ChatHistoryItem(BaseModel):
+ role: str = Field(..., description="Either 'user' or 'assistant'")
+ content: str = Field(..., description="The message content")
+
+class ChatMessageRequest(BaseModel):
+ question: str = Field(..., min_length=1, description="User question")
+ currentPage: Optional[str] = Field(
+ None, description="Active page user is viewing"
+ )
+ currentFeature: Optional[str] = Field(
+ None, description="Feature area user is interacting with"
+ )
+ history: Optional[List[ChatHistoryItem]] = Field(
+ default_factory=list, description="Recent conversation history"
+ )
+
+class ChatMessageResponse(BaseModel):
+ message_id: str = Field(..., description="Unique ID for this response")
+ response: str = Field(..., description="Generated markdown text answer")
+
+class ChatFeedbackRequest(BaseModel):
+ message_id: str = Field(..., description="ID of the message being rated")
+ feedback: str = Field(..., description="Feedback direction: 'up' or 'down'")
+
+# ── Endpoints ─────────────────────────────────────────────────────────────────
+
+@router.post("/message", response_model=ChatMessageResponse)
+async def chat_message(request: ChatMessageRequest):
+ """
+ Main Chat Assistant endpoint.
+ Retrieves local RAG context, merges page context & history, sends to LLM, and logs analytics.
+ """
+ try:
+ # 1. Retrieve local documentation context (RAG)
+ context = ""
+ try:
+ retriever = get_retriever()
+ context = retriever.retrieve_relevant_context(request.question)
+ except Exception as e:
+ logger.error(f"RAG retrieval error: {e}")
+ # Non-blocking, continue with empty context
+
+ # 2. Build system prompt
+ system_prompt = (
+ "You are the official FreshScanAI Assistant.\n"
+ "Your primary purpose is helping users understand and navigate FreshScanAI.\n"
+ "Answer questions related to platform features, workflows, onboarding, "
+ "reports, dashboards, uploads, analysis processes, and troubleshooting.\n"
+ "Use retrieved documentation whenever available.\n"
+ "Never invent product features that do not exist.\n"
+ "If documentation does not contain the answer, politely explain that the "
+ "information is unavailable."
+ )
+
+ # 3. Incorporate page and feature context if provided
+ context_details = []
+ if request.currentPage:
+ context_details.append(f"- Active Page: {request.currentPage}")
+ if request.currentFeature:
+ context_details.append(f"- Active Feature/Section: {request.currentFeature}")
+
+ context_info = ""
+ if context_details:
+ context_info = "\nUser Current App Context:\n" + "\n".join(context_details) + "\n"
+
+ # 4. Integrate RAG documentation into LLM input
+ prompt = ""
+ if context:
+ prompt += (
+ f"Retrieved Documentation:\n{context}\n\n"
+ f"Instructions:\n"
+ "Use the retrieved documentation to answer the user's question. "
+ "Be factual, concise, and helpful. "
+ "If the information to answer is not present in the retrieved documentation, "
+ "state that the information is not available in the platform's documentation.\n\n"
+ )
+ else:
+ prompt += (
+ "No documentation was retrieved for this question. "
+ "Answer using only verified platform information if you are certain, "
+ "or politely state that the info is unavailable.\n\n"
+ )
+
+ if context_info:
+ prompt += context_info + "\n"
+
+ prompt += f"User Question: {request.question}"
+
+ # 5. Format history for provider
+ history_list = []
+ if request.history:
+ # Limit history to last 5 turns to prevent token bloat
+ for item in request.history[-10:]:
+ history_list.append({
+ "role": "user" if item.role == "user" else "assistant",
+ "content": item.content
+ })
+
+ # 6. Generate answer via provider
+ try:
+ provider = get_llm_provider()
+ response_text = provider.generate_response(system_prompt, prompt, history_list)
+ except Exception as provider_err:
+ logger.error(f"LLM Provider execution failed: {provider_err}")
+ response_text = (
+ "I'm sorry, I encountered a temporary connection issue "
+ "while trying to reach the AI model. "
+ "Please ensure LLM_PROVIDER and API keys are set correctly "
+ "in the environment configuration."
+ )
+
+ # 7. Log exchange to SQLite for analytics
+ msg_id = str(uuid.uuid4())
+ try:
+ log_chat_message(
+ msg_id=msg_id,
+ question=request.question,
+ response=response_text,
+ current_page=request.currentPage,
+ current_feature=request.currentFeature
+ )
+ except Exception as log_err:
+ logger.error(f"Failed to log chat interaction: {log_err}")
+
+ return ChatMessageResponse(message_id=msg_id, response=response_text)
+
+ except Exception as e:
+ logger.error(f"Unhandled error in chat message handler: {e}")
+ # Always return a user-friendly error envelope, never expose stack traces
+ raise HTTPException(
+ status_code=500,
+ detail="An unexpected error occurred in the chat assistant. Please try again."
+ )
+
+@router.post("/feedback")
+async def chat_feedback(request: ChatFeedbackRequest):
+ """Logs thumbs up/down user feedback for a given message ID."""
+ if request.feedback not in ("up", "down"):
+ raise HTTPException(status_code=400, detail="Feedback must be either 'up' or 'down'")
+
+ try:
+ update_chat_feedback(request.message_id, request.feedback)
+ return {"success": True}
+ except Exception as e:
+ logger.error(f"Failed to record feedback for message {request.message_id}: {e}")
+ raise HTTPException(
+ status_code=500,
+ detail="Could not submit feedback due to an internal logger issue."
+ )
diff --git a/backend/llm_provider.py b/backend/llm_provider.py
new file mode 100644
index 0000000..e50b3fb
--- /dev/null
+++ b/backend/llm_provider.py
@@ -0,0 +1,336 @@
+import os
+import logging
+from typing import List, Dict
+
+logger = logging.getLogger("freshscan.llm")
+
+
+class LLMProvider:
+ def generate_response(
+ self, system_prompt: str, prompt: str,
+ history: List[Dict[str, str]] = None,
+ ) -> str:
+ """
+ Generate a response from the LLM.
+
+ Args:
+ system_prompt: The system instruction for the LLM.
+ prompt: The user prompt with RAG context.
+ history: list of {"role": "user"|"assistant", "content": str}
+ """
+ raise NotImplementedError(
+ "Subclasses must implement generate_response"
+ )
+
+
+class GeminiProvider(LLMProvider):
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+ self.model = os.environ.get(
+ "GEMINI_MODEL", "gemini-2.5-flash"
+ )
+
+ def generate_response(
+ self, system_prompt: str, prompt: str,
+ history: List[Dict[str, str]] = None,
+ ) -> str:
+ import httpx
+ url = (
+ "https://generativelanguage.googleapis.com"
+ f"/v1beta/models/{self.model}:generateContent"
+ f"?key={self.api_key}"
+ )
+
+ # Build contents list
+ contents = []
+ if history:
+ for turn in history:
+ role = "user" if turn["role"] == "user" else "model"
+ contents.append({
+ "role": role,
+ "parts": [{"text": turn["content"]}]
+ })
+
+ # Add the current user prompt
+ contents.append({
+ "role": "user",
+ "parts": [{"text": prompt}]
+ })
+
+ payload = {
+ "contents": contents,
+ "systemInstruction": {
+ "parts": [{"text": system_prompt}]
+ },
+ "generationConfig": {
+ "temperature": 0.2,
+ "maxOutputTokens": 1024
+ }
+ }
+
+ try:
+ response = httpx.post(url, json=payload, timeout=30.0)
+ response.raise_for_status()
+ data = response.json()
+ text = data["candidates"][0]["content"]["parts"][0]["text"]
+ return text
+ except Exception as e:
+ logger.error(f"Gemini API error: {e}")
+ raise RuntimeError(f"Gemini provider failed: {e}")
+
+
+class OpenAIProvider(LLMProvider):
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+ self.model = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
+
+ def generate_response(
+ self, system_prompt: str, prompt: str,
+ history: List[Dict[str, str]] = None,
+ ) -> str:
+ import httpx
+ url = "https://api.openai.com/v1/chat/completions"
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+
+ messages = [{"role": "system", "content": system_prompt}]
+ if history:
+ for turn in history:
+ messages.append({
+ "role": turn["role"],
+ "content": turn["content"],
+ })
+ messages.append({"role": "user", "content": prompt})
+
+ payload = {
+ "model": self.model,
+ "messages": messages,
+ "temperature": 0.2,
+ "max_tokens": 1024
+ }
+
+ try:
+ response = httpx.post(
+ url, json=payload, headers=headers, timeout=30.0,
+ )
+ response.raise_for_status()
+ data = response.json()
+ text = data["choices"][0]["message"]["content"]
+ return text
+ except Exception as e:
+ logger.error(f"OpenAI API error: {e}")
+ raise RuntimeError(f"OpenAI provider failed: {e}")
+
+
+class ClaudeProvider(LLMProvider):
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+ self.model = os.environ.get(
+ "CLAUDE_MODEL", "claude-3-5-sonnet-20241022"
+ )
+
+ def generate_response(
+ self, system_prompt: str, prompt: str,
+ history: List[Dict[str, str]] = None,
+ ) -> str:
+ import httpx
+ url = "https://api.anthropic.com/v1/messages"
+ headers = {
+ "x-api-key": self.api_key,
+ "anthropic-version": "2023-06-01",
+ "content-type": "application/json"
+ }
+
+ messages = []
+ if history:
+ for turn in history:
+ messages.append({
+ "role": turn["role"],
+ "content": turn["content"],
+ })
+ messages.append({"role": "user", "content": prompt})
+
+ payload = {
+ "model": self.model,
+ "system": system_prompt,
+ "messages": messages,
+ "max_tokens": 1024,
+ "temperature": 0.2
+ }
+
+ try:
+ response = httpx.post(
+ url, json=payload, headers=headers, timeout=30.0,
+ )
+ response.raise_for_status()
+ data = response.json()
+ text = data["content"][0]["text"]
+ return text
+ except Exception as e:
+ logger.error(f"Claude API error: {e}")
+ raise RuntimeError(f"Claude provider failed: {e}")
+
+
+class OllamaProvider(LLMProvider):
+ def __init__(self, base_url: str):
+ self.base_url = base_url.rstrip('/')
+ self.model = os.environ.get("OLLAMA_MODEL", "llama3")
+
+ def generate_response(
+ self, system_prompt: str, prompt: str,
+ history: List[Dict[str, str]] = None,
+ ) -> str:
+ import httpx
+ url = f"{self.base_url}/api/chat"
+
+ messages = [{"role": "system", "content": system_prompt}]
+ if history:
+ for turn in history:
+ messages.append({
+ "role": turn["role"],
+ "content": turn["content"],
+ })
+ messages.append({"role": "user", "content": prompt})
+
+ payload = {
+ "model": self.model,
+ "messages": messages,
+ "stream": False,
+ "options": {
+ "temperature": 0.2
+ }
+ }
+
+ try:
+ response = httpx.post(
+ url, json=payload, timeout=60.0,
+ )
+ response.raise_for_status()
+ data = response.json()
+ text = data["message"]["content"]
+ return text
+ except Exception as e:
+ logger.error(f"Ollama API error: {e}")
+ raise RuntimeError(f"Ollama provider failed: {e}")
+
+
+class MockProvider(LLMProvider):
+ """Fallback provider when no API keys are configured."""
+
+ def generate_response(
+ self, system_prompt: str, prompt: str,
+ history: List[Dict[str, str]] = None,
+ ) -> str:
+ p_lower = prompt.lower()
+
+ reply = (
+ "\U0001f916 **FreshScanAI Assistant [DEMO MODE]**\n\n"
+ "No active LLM API key detected in your "
+ "environment. I am running in local document "
+ "retrieval fallback mode. "
+ "To enable fully conversational answers, "
+ "please set `GEMINI_API_KEY` (or other provider "
+ "credentials) in `backend/.env`.\n\n"
+ )
+
+ if (
+ "hello" in p_lower
+ or "hi" in p_lower
+ or "hey" in p_lower
+ ):
+ reply += (
+ "Hello! Welcome to FreshScanAI. "
+ "How can I help you navigate the platform today?"
+ )
+ elif "upload" in p_lower:
+ reply += (
+ "To upload a file for freshness assessment:\n"
+ "1. Go to the **Scanner** page.\n"
+ "2. Click the **Upload File** button, "
+ "or drag and drop your fish image.\n"
+ "3. The system will process your image and "
+ "auto-navigate to the detailed "
+ "**Analysis Dashboard**."
+ )
+ elif "work" in p_lower or "how does" in p_lower:
+ reply += (
+ "FreshScanAI works by analyzing three "
+ "biologically-significant freshness markers:\n"
+ "- **Gills**: Evaluates hemoglobin oxidation "
+ "(color saturation).\n"
+ "- **Eyes**: Analyzes corneal clarity and "
+ "pupil reflex.\n"
+ "- **Body**: Assesses epidermal tension, "
+ "scale adhesion, and mucus integrity.\n\n"
+ "A dual-stream CNN fuses these outputs into a "
+ "single **Freshness Index (0-100)** and "
+ "letter grade."
+ )
+ elif "map" in p_lower or "vendor" in p_lower:
+ reply += (
+ "The **Market Trust Map** aggregates "
+ "anonymized scans to rank markets and "
+ "vendors. Markets are color-coded based on "
+ "average freshness: Green (85+), "
+ "Yellow (70-84), and Red (<70)."
+ )
+ else:
+ reply += (
+ "Here is the local documentation context "
+ "I retrieved for your query:\n\n"
+ "> *Query context matches your question:*\n"
+ "*(Full AI responses will be active once "
+ "GEMINI_API_KEY is configured in "
+ "backend/.env)*"
+ )
+
+ return reply
+
+
+def get_llm_provider() -> LLMProvider:
+ provider_name = os.environ.get("LLM_PROVIDER", "gemini").lower()
+
+ if provider_name == "gemini":
+ api_key = os.environ.get("GEMINI_API_KEY")
+ if not api_key:
+ logger.warning(
+ "GEMINI_API_KEY is not set. "
+ "Falling back to MockProvider."
+ )
+ return MockProvider()
+ return GeminiProvider(api_key)
+
+ elif provider_name == "openai":
+ api_key = os.environ.get("OPENAI_API_KEY")
+ if not api_key:
+ logger.warning(
+ "OPENAI_API_KEY is not set. "
+ "Falling back to MockProvider."
+ )
+ return MockProvider()
+ return OpenAIProvider(api_key)
+
+ elif provider_name == "claude":
+ api_key = os.environ.get("CLAUDE_API_KEY")
+ if not api_key:
+ logger.warning(
+ "CLAUDE_API_KEY is not set. "
+ "Falling back to MockProvider."
+ )
+ return MockProvider()
+ return ClaudeProvider(api_key)
+
+ elif provider_name == "ollama":
+ base_url = os.environ.get(
+ "OLLAMA_BASE_URL", "http://localhost:11434"
+ )
+ return OllamaProvider(base_url)
+
+ else:
+ logger.warning(
+ f"Unknown LLM provider: {provider_name}. "
+ "Falling back to MockProvider."
+ )
+ return MockProvider()
diff --git a/backend/main.py b/backend/main.py
index 096f104..12a3121 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -11,13 +11,14 @@
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
from rate_limiter import limiter
+from chat_router import router as chat_router
# Load .env file if present (python-dotenv)
try:
from dotenv import load_dotenv
- load_dotenv(Path(__file__).parent / ".env")
+ load_dotenv(Path(__file__).parent / ".env", override=True)
except ImportError:
pass
@@ -126,6 +127,7 @@ async def lifespan(app: FastAPI):
app.state.limiter = limiter
app.add_exception_handler(429, _rate_limit_exceeded_handler)
app.add_middleware(SlowAPIMiddleware)
+app.include_router(chat_router)
@app.exception_handler(RateLimitExceeded)
async def rate_limit_handler(request: Request, exc: RateLimitExceeded):
diff --git a/backend/rag_retriever.py b/backend/rag_retriever.py
new file mode 100644
index 0000000..3c46463
--- /dev/null
+++ b/backend/rag_retriever.py
@@ -0,0 +1,251 @@
+import re
+import math
+from pathlib import Path
+from typing import List, Dict, Tuple, Set
+
+# Common stop words to exclude from TF-IDF indexing
+STOP_WORDS: Set[str] = {
+ 'the', 'a', 'an', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+ 'in', 'on', 'at', 'to', 'for', 'of', 'by', 'with', 'about', 'against', 'between', 'into',
+ 'through', 'during', 'before', 'after', 'above', 'below', 'from', 'up', 'down', 'in', 'out',
+ 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why',
+ 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no',
+ 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will',
+ 'just', 'don', 'should', 'now', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves',
+ 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she',
+ 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves'
+}
+
+class RAGChunk:
+ def __init__(self, source: str, heading: str, content: str):
+ self.source = source # e.g., "README.md" or "DOCUMENTATION.md"
+ self.heading = heading # Heading name for context
+ self.content = content # The chunk text content
+ self.full_text = f"Source: {source} > {heading}\n{content}"
+ self.tokens: List[str] = []
+ self.tf: Dict[str, float] = {}
+
+def tokenize(text: str) -> List[str]:
+ # Lowercase and replace non-alphanumeric with spaces
+ text = text.lower()
+ text = re.sub(r'[^a-z0-9\s]', ' ', text)
+ tokens = text.split()
+ # Remove stopwords and short tokens
+ return [t for t in tokens if t not in STOP_WORDS and len(t) > 1]
+
+class RAGRetriever:
+ def __init__(self):
+ self.chunks: List[RAGChunk] = []
+ self.idf: Dict[str, float] = {}
+ self.load_and_index_docs()
+
+ def load_and_index_docs(self):
+ """Loads README.md and DOCUMENTATION.md from project root and indexes them."""
+ # Backend directory is at FreshScanAi/backend
+ # Project root is at FreshScanAi/
+ backend_dir = Path(__file__).parent
+ project_root = backend_dir.parent
+
+ files_to_load = [
+ ("README.md", project_root / "README.md"),
+ ("DOCUMENTATION.md", project_root / "DOCUMENTATION.md")
+ ]
+
+ raw_chunks: List[Tuple[str, str, str]] = [] # (source, heading, content)
+
+ for name, path in files_to_load:
+ if not path.exists():
+ print(f"RAG WARNING: {name} not found at {path.absolute()}")
+ continue
+
+ try:
+ content = path.read_text(encoding='utf-8')
+ file_chunks = self.split_by_markdown_headers(name, content)
+ raw_chunks.extend(file_chunks)
+ except Exception as e:
+ print(f"RAG ERROR reading {name}: {e}")
+
+ # If no chunks were loaded, create a default fallback chunk so we don't crash
+ if not raw_chunks:
+ raw_chunks.append((
+ "System", "System Info",
+ "FreshScanAI provides edge and server fish freshness scanning using PyTorch. "
+ "It has a scanner, a live market map, scan history, and support for 47+ species."
+ ))
+
+ # Build chunks and TF-IDF
+ self.chunks = []
+ doc_frequency: Dict[str, int] = {}
+
+ for src, heading, text in raw_chunks:
+ chunk = RAGChunk(src, heading, text)
+ chunk.tokens = tokenize(chunk.full_text)
+
+ # Term Frequency (TF)
+ if chunk.tokens:
+ word_counts = {}
+ for token in chunk.tokens:
+ word_counts[token] = word_counts.get(token, 0) + 1
+
+ num_tokens = len(chunk.tokens)
+ for word, count in word_counts.items():
+ chunk.tf[word] = count / num_tokens
+
+ # Track Document Frequency (DF)
+ for word in word_counts.keys():
+ doc_frequency[word] = doc_frequency.get(word, 0) + 1
+
+ self.chunks.append(chunk)
+
+ # Compute Inverse Document Frequency (IDF)
+ num_docs = len(self.chunks)
+ for word, df in doc_frequency.items():
+ # Standard smooth IDF formula
+ self.idf[word] = math.log(1.0 + (num_docs / (1.0 + df)))
+
+ print(f"RAG Indexing complete. Indexed {len(self.chunks)} chunks across {len(files_to_load)} files.")
+
+ def split_by_markdown_headers(self, source: str, content: str) -> List[Tuple[str, str, str]]:
+ """Parses markdown and splits it into chunks based on header sections."""
+ chunks: List[Tuple[str, str, str]] = []
+ lines = content.splitlines()
+
+ current_heading = "Introduction"
+ current_lines: List[str] = []
+
+ # Heading regex for #, ##, ###, ####
+ heading_re = re.compile(r'^(#{1,4})\s+(.+)$')
+
+ for line in lines:
+ match = heading_re.match(line)
+ if match:
+ # Flush the previous chunk if it has content
+ if current_lines:
+ text_block = "\n".join(current_lines).strip()
+ if len(text_block) > 40: # Ignore tiny trivial chunks
+ chunks.extend(self.split_large_text(source, current_heading, text_block))
+
+ current_heading = match.group(2).strip()
+ current_lines = [line]
+ else:
+ current_lines.append(line)
+
+ # Flush the last section
+ if current_lines:
+ text_block = "\n".join(current_lines).strip()
+ if len(text_block) > 40:
+ chunks.extend(self.split_large_text(source, current_heading, text_block))
+
+ return chunks
+
+ def split_large_text(self, source: str, heading: str, text: str, max_chars: int = 1500) -> List[Tuple[str, str, str]]:
+ """Sub-splits a markdown section if it is too long to maintain granularity."""
+ if len(text) <= max_chars:
+ return [(source, heading, text)]
+
+ # Split by paragraph
+ paragraphs = text.split('\n\n')
+ sub_chunks: List[Tuple[str, str, str]] = []
+
+ current_chunk_lines: List[str] = []
+ current_len = 0
+
+ for para in paragraphs:
+ para = para.strip()
+ if not para:
+ continue
+
+ # If a single paragraph is extremely large, just force split by length
+ if len(para) > max_chars:
+ # Flush existing chunk
+ if current_chunk_lines:
+ sub_chunks.append((source, heading, "\n\n".join(current_chunk_lines)))
+ current_chunk_lines = []
+ current_len = 0
+
+ # Split large paragraph by sentences or fixed size
+ sentences = re.split(r'(?<=[.!?])\s+', para)
+ for sentence in sentences:
+ if current_len + len(sentence) > max_chars:
+ if current_chunk_lines:
+ sub_chunks.append((source, heading, " ".join(current_chunk_lines)))
+ current_chunk_lines = []
+ current_len = 0
+ current_chunk_lines.append(sentence)
+ current_len += len(sentence)
+ else:
+ if current_len + len(para) > max_chars:
+ if current_chunk_lines:
+ sub_chunks.append((source, heading, "\n\n".join(current_chunk_lines)))
+ current_chunk_lines = []
+ current_len = 0
+ current_chunk_lines.append(para)
+ current_len += len(para)
+
+ # Flush remaining
+ if current_chunk_lines:
+ sub_chunks.append((source, heading, "\n\n".join(current_chunk_lines)))
+
+ return sub_chunks
+
+ def retrieve_relevant_context(self, query: str, limit: int = 3) -> str:
+ """Retrieves and formats the top K matching chunks as a single context block."""
+ query_tokens = tokenize(query)
+ if not query_tokens:
+ return ""
+
+ # Compute query TF-IDF representation
+ # (For simple cosine matching, TF is query_term_count / query_len)
+ query_tf: Dict[str, float] = {}
+ for token in query_tokens:
+ query_tf[token] = query_tf.get(token, 0) + 1 / len(query_tokens)
+
+ scored_chunks: List[Tuple[RAGChunk, float]] = []
+
+ for chunk in self.chunks:
+ score = 0.0
+ # Cosine similarity dot product
+ for word, q_tf in query_tf.items():
+ if word in chunk.tf:
+ # Score contribution = (Query TF * IDF) * (Chunk TF * IDF)
+ w_idf = self.idf.get(word, 0.0)
+ score += (q_tf * w_idf) * (chunk.tf[word] * w_idf)
+
+ if score > 0:
+ scored_chunks.append((chunk, score))
+
+ # Sort by score descending
+ scored_chunks.sort(key=lambda x: x[1], reverse=True)
+ top_chunks = scored_chunks[:limit]
+
+ if not top_chunks:
+ # Fallback to simple sub-string search on the query words if TF-IDF yields nothing
+ overlap_chunks = []
+ for chunk in self.chunks:
+ matches = sum(1 for token in query_tokens if token in chunk.full_text.lower())
+ if matches > 0:
+ overlap_chunks.append((chunk, matches))
+ overlap_chunks.sort(key=lambda x: x[1], reverse=True)
+ top_chunks = overlap_chunks[:limit]
+
+ if not top_chunks:
+ return ""
+
+ # Format retrieved chunks
+ formatted_blocks = []
+ for rank, (chunk, score) in enumerate(top_chunks, 1):
+ formatted_blocks.append(
+ f"[Document {rank}]\n"
+ f"{chunk.full_text.strip()}"
+ )
+
+ return "\n\n---\n\n".join(formatted_blocks)
+
+# Singleton retriever instance loaded at application startup
+_retriever = None
+
+def get_retriever() -> RAGRetriever:
+ global _retriever
+ if _retriever is None:
+ _retriever = RAGRetriever()
+ return _retriever
diff --git a/src/components/ChatAssistant.tsx b/src/components/ChatAssistant.tsx
new file mode 100644
index 0000000..80ce932
--- /dev/null
+++ b/src/components/ChatAssistant.tsx
@@ -0,0 +1,338 @@
+import React, { useState, useEffect, useRef } from 'react';
+import { useLocation } from 'react-router-dom';
+import { MessageSquareCode, X, Send, ThumbsUp, ThumbsDown, Sparkles } from 'lucide-react';
+import { api } from '../lib/api';
+
+// Definition of Chat Message
+interface Message {
+ id?: string;
+ role: 'user' | 'assistant';
+ content: string;
+ feedback?: 'up' | 'down';
+}
+
+// Map react-router-dom location pathnames to descriptive page and feature names
+function getPageContext(pathname: string): { page: string; feature: string } {
+ switch (pathname) {
+ case '/scanner':
+ return { page: 'Scanner', feature: 'Real-time Camera/Upload Image Assessment' };
+ case '/map':
+ return { page: 'Market Map', feature: 'Crowdsourced Market Trust Heatmap' };
+ case '/mode':
+ return { page: 'Mode Selection', feature: 'Scan Protocol selector (Auto vs Multi-Image)' };
+ case '/analysis':
+ return { page: 'Analysis Dashboard', feature: 'Biomarker fresh index (gill, eye, body) results' };
+ case '/results':
+ return { page: 'History Results', feature: 'Aggregate user scans history & metrics' };
+ case '/auth':
+ return { page: 'Authentication', feature: 'Google Secure Sign-in / Dev Bypass' };
+ case '/':
+ return { page: 'Landing Page', feature: 'Hero CTA & Platform features summary' };
+ default:
+ return { page: 'App Shell', feature: 'General Navigation' };
+ }
+}
+
+export default function ChatAssistant() {
+ const location = useLocation();
+ const [isOpen, setIsOpen] = useState(false);
+ const [messages, setMessages] = useState([]);
+ const [inputValue, setInputValue] = useState('');
+ const [isLoading, setIsLoading] = useState(false);
+
+ const messagesEndRef = useRef(null);
+
+ // Auto-scroll to bottom of chat window when new messages are added
+ useEffect(() => {
+ messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
+ }, [messages, isLoading]);
+
+ // Suggested onboarding questions shown on initial load
+ const suggestedQuestions = [
+ { label: 'How does FreshScanAI work?', text: 'How does FreshScanAI work?' },
+ { label: 'How do I upload a file?', text: 'How do I upload a file?' },
+ { label: 'Where can I see the Trust Map?', text: 'Where can I see the Trust Map?' },
+ { label: '⚡ I am new here (Start Onboarding)', text: 'I am new here' }
+ ];
+
+ const handleSendMessage = async (text: string) => {
+ if (!text.trim() || isLoading) return;
+
+ const userMsg: Message = { role: 'user', content: text };
+ setMessages(prev => [...prev, userMsg]);
+ setInputValue('');
+ setIsLoading(true);
+
+ try {
+ // Determine page context from current path
+ const { page, feature } = getPageContext(location.pathname);
+
+ // Clean history to match expected format on backend: [{role, content}]
+ const historyPayload = messages.map(m => ({
+ role: m.role,
+ content: m.content
+ }));
+
+ // Call API
+ const response = await api.chatMessage(text, page, feature, historyPayload);
+
+ const assistantMsg: Message = {
+ id: response.message_id,
+ role: 'assistant',
+ content: response.response
+ };
+
+ setMessages(prev => [...prev, assistantMsg]);
+ } catch (err) {
+ console.error('Failed to get chat response:', err);
+ const errorMsg: Message = {
+ role: 'assistant',
+ content: '⚠️ I encountered an error connecting to the FreshScanAI neural chat hub. Please verify your internet connection or check LLM configurations.'
+ };
+ setMessages(prev => [...prev, errorMsg]);
+ } finally {
+ setIsLoading(false);
+ }
+ };
+
+ const handleFeedback = async (msgId: string, idx: number, type: 'up' | 'down') => {
+ const msg = messages[idx];
+ if (msg.feedback) return; // Prevent double rating
+
+ try {
+ await api.submitChatFeedback(msgId, type);
+ setMessages(prev => {
+ const updated = [...prev];
+ updated[idx] = { ...updated[idx], feedback: type };
+ return updated;
+ });
+ } catch (err) {
+ console.error('Failed to submit feedback:', err);
+ }
+ };
+
+ // Basic formatter for markdown-like text to avoid requiring external packages
+ const formatText = (text: string) => {
+ const paragraphs = text.split('\n');
+ return paragraphs.map((para, pIdx) => {
+ // Inline formatting helpers: bold (**text**), code (`code`)
+ let content: React.ReactNode = para;
+
+ // Handle bold blocks
+ if (para.includes('**')) {
+ const parts = para.split('**');
+ content = parts.map((part, i) => i % 2 === 1 ? {part} : part);
+ }
+
+ // Check if paragraph is a bullet point
+ if (para.trim().startsWith('- ') || para.trim().startsWith('* ')) {
+ const listText = para.trim().substring(2);
+ return (
+
+ {listText}
+
+ );
+ }
+
+ // Check if paragraph is numbered list
+ const numMatch = para.trim().match(/^(\d+)\.\s+(.+)$/);
+ if (numMatch) {
+ return (
+
+ {numMatch[2]}
+
+ );
+ }
+
+ // Check for code blocks / console styling
+ if (para.trim().startsWith('>')) {
+ return (
+
+ {para.trim().substring(1).trim()}
+
+ );
+ }
+
+ return (
+
+ {content}
+
+ );
+ });
+ };
+
+ return (
+ <>
+ {/* Floating Toggle Button */}
+
+
+ {/* Chat Window Panel */}
+ {isOpen && (
+
+ {/* Header */}
+
+
+
+
+ FRESHSCAN_AI_ASSISTANT_HUD
+
+
+
+
+
+ {/* Messages Area */}
+
+ {messages.length === 0 ? (
+
+
+
+
+
+
+ AI CHAT ASSISTANT
+
+
+ Ask questions about fish freshness analysis, scanning workflows, market mappings, or troubleshoot issues.
+
+
+
+ {/* Suggestions List */}
+
+
+ Suggested Prompts:
+
+
+ {suggestedQuestions.map((q, i) => (
+
+ ))}
+
+
+
+ ) : (
+ messages.map((msg, idx) => {
+ const isUser = msg.role === 'user';
+ return (
+
+ {/* Role Tag */}
+
+ {isUser ? '[USER]' : '[ASSISTANT]'}
+
+
+ {/* Message Bubble */}
+
+ {formatText(msg.content)}
+
+ {/* Feedback Rating Icons (Assistant Only) */}
+ {!isUser && msg.id && (
+
+
+
+
+ )}
+
+
+ );
+ })
+ )}
+
+ {/* Typing Loader */}
+ {isLoading && (
+
+
+ [ASSISTANT]
+
+
+
+
+
+
+
+ Processing...
+
+
+
+
+ )}
+
+
+
+
+ {/* Input Area */}
+
+
+ )}
+ >
+ );
+}
diff --git a/src/components/Layout.tsx b/src/components/Layout.tsx
index 4eec2ee..6423d1f 100644
--- a/src/components/Layout.tsx
+++ b/src/components/Layout.tsx
@@ -2,6 +2,7 @@ import { Outlet } from 'react-router-dom';
import Navbar from './Navbar';
import BottomNav from './BottomNav';
import Footer from './Footer';
+import ChatAssistant from './ChatAssistant';
export default function Layout() {
return (
@@ -29,6 +30,9 @@ export default function Layout() {
{/* Mobile Bottom Nav */}
+
+ {/* AI Chat Assistant */}
+
);
}
\ No newline at end of file
diff --git a/src/lib/api.ts b/src/lib/api.ts
index 6696d23..4c95313 100644
--- a/src/lib/api.ts
+++ b/src/lib/api.ts
@@ -184,4 +184,24 @@ export const api = {
getMarkets: (): Promise =>
apiFetch('/api/v1/maps/markets'),
+
+ chatMessage: (
+ question: string,
+ currentPage?: string,
+ currentFeature?: string,
+ history: Array<{ role: 'user' | 'assistant'; content: string }> = []
+ ): Promise<{ message_id: string; response: string }> =>
+ apiFetch<{ message_id: string; response: string }>('/api/v1/chat/message', {
+ method: 'POST',
+ body: JSON.stringify({ question, currentPage, currentFeature, history }),
+ }),
+
+ submitChatFeedback: (
+ messageId: string,
+ feedback: 'up' | 'down'
+ ): Promise<{ success: boolean }> =>
+ apiFetch<{ success: boolean }>('/api/v1/chat/feedback', {
+ method: 'POST',
+ body: JSON.stringify({ message_id: messageId, feedback }),
+ }),
};
diff --git a/src/pages/AuthPage.tsx b/src/pages/AuthPage.tsx
index d8943ed..efb4577 100644
--- a/src/pages/AuthPage.tsx
+++ b/src/pages/AuthPage.tsx
@@ -13,8 +13,6 @@ const TURNSTILE_SITE_KEY = import.meta.env.VITE_TURNSTILE_SITE_KEY as string | u
export default function AuthPage() {
const navigate = useNavigate();
const posthog = usePostHog();
- const [status, setStatus] = useState<'idle' | 'processing' | 'error'>('idle');
- const [errorMsg, setErrorMsg] = useState('');
const { containerRef, ready: turnstileReady, execute: executeTurnstile, error: turnstileError } = useTurnstile(TURNSTILE_SITE_KEY);
const [status, setStatus] = useState<'idle' | 'processing' | 'error'>(() => {
const params = new URLSearchParams(window.location.search);
From 304dce4314a0d9a9446edd08159099c612f7c318 Mon Sep 17 00:00:00 2001
From: pranavshankar1221
Date: Tue, 16 Jun 2026 11:53:01 +0530
Subject: [PATCH 4/4] fix:frontend and backend eslit errors
---
backend/chat_logger.py | 15 +++++++++++++--
backend/rag_retriever.py | 15 +++++++++++----
src/lib/api.ts | 13 -------------
3 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/backend/chat_logger.py b/backend/chat_logger.py
index 8ea08a5..81ef509 100644
--- a/backend/chat_logger.py
+++ b/backend/chat_logger.py
@@ -42,10 +42,21 @@ def log_chat_message(
try:
cursor.execute(
"""
- INSERT INTO chat_logs (id, question, response, current_page, current_feature, timestamp, feedback)
+ INSERT INTO chat_logs (
+ id, question, response, current_page, current_feature,
+ timestamp, feedback
+ )
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
- (msg_id, question, response, current_page, current_feature, timestamp, None)
+ (
+ msg_id,
+ question,
+ response,
+ current_page,
+ current_feature,
+ timestamp,
+ None,
+ )
)
conn.commit()
except Exception as e:
diff --git a/backend/rag_retriever.py b/backend/rag_retriever.py
index 3c46463..d1ea3ec 100644
--- a/backend/rag_retriever.py
+++ b/backend/rag_retriever.py
@@ -103,9 +103,14 @@ def load_and_index_docs(self):
# Standard smooth IDF formula
self.idf[word] = math.log(1.0 + (num_docs / (1.0 + df)))
- print(f"RAG Indexing complete. Indexed {len(self.chunks)} chunks across {len(files_to_load)} files.")
-
- def split_by_markdown_headers(self, source: str, content: str) -> List[Tuple[str, str, str]]:
+ print(
+ f"RAG Indexing complete. Indexed {len(self.chunks)} chunks "
+ f"across {len(files_to_load)} files."
+ )
+
+ def split_by_markdown_headers(
+ self, source: str, content: str
+ ) -> List[Tuple[str, str, str]]:
"""Parses markdown and splits it into chunks based on header sections."""
chunks: List[Tuple[str, str, str]] = []
lines = content.splitlines()
@@ -138,7 +143,9 @@ def split_by_markdown_headers(self, source: str, content: str) -> List[Tuple[str
return chunks
- def split_large_text(self, source: str, heading: str, text: str, max_chars: int = 1500) -> List[Tuple[str, str, str]]:
+ def split_large_text(
+ self, source: str, heading: str, text: str, max_chars: int = 1500
+ ) -> List[Tuple[str, str, str]]:
"""Sub-splits a markdown section if it is too long to maintain granularity."""
if len(text) <= max_chars:
return [(source, heading, text)]
diff --git a/src/lib/api.ts b/src/lib/api.ts
index 8ae7af5..3e94084 100644
--- a/src/lib/api.ts
+++ b/src/lib/api.ts
@@ -140,18 +140,6 @@ export const api = {
return response.redirect_url;
}
- return `${API_BASE}/api/v1/auth/login/google`;
- },
- const response = await apiFetch<{ redirect_url: string }>(
- "/api/v1/auth/login/google",
- {
- method: "POST",
- body: JSON.stringify({ turnstile_token: turnstileToken }),
- },
- );
- return response.redirect_url;
- }
-
return `${API_BASE}/api/v1/auth/login/google`;
},
@@ -266,7 +254,6 @@ export const api = {
method: 'POST',
body: JSON.stringify({ message_id: messageId, feedback }),
}),
- apiFetch("/api/v1/maps/markets"),
getLiveMarkets: (
lat: number,