Yvette-0508 · Yvette-0508 · Dec 5, 2025
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -0,0 +1,17 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(pytest:*)",
+      "Bash(pip install:*)",
+      "Bash(python -m pytest tests/test_parser.py -v)",
+      "Bash(ls:*)",
+      "Bash(source:*)",
+      "Bash(.venv/bin/python -m pip install:*)",
+      "Bash(.venv/bin/python -m pytest tests/test_parser.py -v)",
+      "Bash(grep:*)",
+      "Bash(python3:*)",
+      "Bash(python:*)"
+    ],
+    "deny": []
+  }
+}
diff --git a/.env.example b/.env.example
@@ -1,82 +1,12 @@
-# QuantMind RAG System - Environment Configuration
-# Copy this file to .env and update with your actual values
+# QuantMind RAG Environment Variables
+# Copy this file to .env and fill in your values
 
-# =============================================================================
-# API SERVER SETTINGS
-# =============================================================================
-QUANTMIND_HOST=127.0.0.1
-QUANTMIND_PORT=8000
-QUANTMIND_DEBUG=false
+# LlamaParse API Key (required for document parsing)
+# Get your key at: https://cloud.llamaindex.ai
+LLAMA_CLOUD_API_KEY=your-api-key-here
 
-# =============================================================================
-# LLM API KEYS (Required for production use)
-# =============================================================================
-# OpenAI API Key (for GPT models)
-OPENAI_API_KEY=your_openai_api_key_here
+# Optional: OpenAI API Key (for embeddings/LLM)
+OPENAI_API_KEY=
 
-# Anthropic API Key (for Claude models)
-# ANTHROPIC_API_KEY=your_anthropic_api_key_here
-
-# =============================================================================
-# LLM PROVIDER SETTINGS
-# =============================================================================
-# Options: "openai", "anthropic", "mock"
-QUANTMIND_LLM_PROVIDER=openai
-
-# OpenAI models: gpt-4o, gpt-4o-mini, gpt-4-turbo
-# Anthropic models: claude-3-5-sonnet-20241022, claude-3-opus-20240229
-QUANTMIND_LLM_MODEL=gpt-4o-mini
-
-# =============================================================================
-# VECTOR STORE SETTINGS
-# =============================================================================
-# Options: "memory", "chroma", "qdrant"
-QUANTMIND_VECTOR_STORE=memory
-
-# Directory for persistent storage (optional, only for chroma/qdrant)
-# QUANTMIND_PERSIST_DIR=./data/vectorstore
-
-# =============================================================================
-# EMBEDDING MODEL SETTINGS
-# =============================================================================
-# Sentence Transformer model for embeddings
-QUANTMIND_EMBEDDING_MODEL=all-MiniLM-L6-v2
-
-# =============================================================================
-# RETRIEVAL SETTINGS
-# =============================================================================
-# Maximum number of chunks to retrieve
-QUANTMIND_MAX_CHUNKS=10
-
-# Maximum tokens for context
-QUANTMIND_MAX_TOKENS=4000
-
-# Enable reranking for better results
-QUANTMIND_RERANKING=true
-
-# =============================================================================
-# VERIFICATION SETTINGS
-# =============================================================================
-# Enable hallucination verification
-QUANTMIND_VERIFICATION=true
-
-# Minimum source tier (1-5, lower is more authoritative)
-QUANTMIND_MIN_TIER=3
-
-# =============================================================================
-# SUMMARIZATION SETTINGS
-# =============================================================================
-# Enable adaptive summarization
-QUANTMIND_SUMMARIZATION=true
-
-# =============================================================================
-# QDRANT SETTINGS (Optional - if using Qdrant)
-# =============================================================================
-# Qdrant URL (for remote Qdrant server)
-# QDRANT_URL=http://localhost:6333
-
-# Qdrant API Key (for Qdrant Cloud)
-# QDRANT_API_KEY=your_qdrant_api_key_here
-
-# Qdrant Collection Name
-# QDRANT_COLLECTION=quantmind
+# Optional: Anthropic API Key
+ANTHROPIC_API_KEY=
diff --git a/.gitignore b/.gitignore
@@ -69,3 +69,4 @@ htmlcov/
 temp/
 tmp/
 
+
diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,8 @@ uvicorn>=0.24.0
 pydantic>=2.5.0
 
 # Document Parsing
-PyMuPDF>=1.23.0          # PDF parsing (fitz)
+llama-parse>=0.5.0       # LlamaParse - multi-modal document parsing (recommended)
+PyMuPDF>=1.23.0          # PDF parsing fallback (fitz)
 beautifulsoup4>=4.12.0   # HTML parsing
 trafilatura>=1.6.0       # Web content extraction
 pandas>=2.0.0            # CSV/Excel handling
@@ -15,6 +16,7 @@ openpyxl>=3.1.0          # Excel support
 # Embeddings & Vector Store
 sentence-transformers>=2.2.0
 chromadb>=0.4.0
+supabase>=2.0.0           # Supabase for cloud storage
 
 # LLM APIs (optional - install based on provider)
 openai>=1.3.0            # OpenAI API

diff --git a/src/extraction/__pycache__/parser.cpython-312.pyc b/src/extraction/__pycache__/parser.cpython-312.pyc
diff --git a/src/extraction/__pycache__/parser.cpython-39.pyc b/src/extraction/__pycache__/parser.cpython-39.pyc