diff --git a/.github/workflows/code-scans.yaml b/.github/workflows/code-scans.yaml
index 18e5cd9..5139404 100644
--- a/.github/workflows/code-scans.yaml
+++ b/.github/workflows/code-scans.yaml
@@ -37,7 +37,7 @@ jobs:
         run: mkdir -p trivy-reports
         
       - name: Run Trivy FS Scan
-        uses: aquasecurity/trivy-action@0.24.0
+        uses: aquasecurity/trivy-action@0.35.0
         with:
           scan-type: 'fs'
           scan-ref: '.'
diff --git a/README.md b/README.md
index 197e780..97f31a1 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,13 @@ AI-powered financial document analysis with intelligent section-based summarizat
 - [Project Structure](#project-structure)
 - [Usage Guide](#usage-guide)
 - [Environment Variables](#environment-variables)
+- [Inference Benchmarks](#inference-benchmarks)
+- [Model Capabilities](#model-capabilities)
+  - [Meta Llama 3.2 3B Instruct](#meta-llama-32-3b-instruct)
+  - [BAAI/bge-base-en-v1.5](#baaibge-base-en-v15)
+  - [OpenAI text-embedding-3-small](#openai-text-embedding-3-small)
+  - [GPT-4o-mini](#gpt-4o-mini)
+  - [Comparison Summary](#comparison-summary)
 - [Technology Stack](#technology-stack)
 - [Troubleshooting](#troubleshooting)
 - [License](#license)
@@ -361,6 +368,119 @@ This blueprint uses a **document-cached RAG approach without static chunking**.
 
 
 
+---
+
+## Inference Benchmarks
+
+The table below compares inference performance across different providers, deployment modes, and hardware profiles using a standardized FinSights document analysis workload (averaged over 3 runs of the full pipeline: initial summary, overall summary, section summary, RAG indexing, and RAG chat).
+
+
+| Provider       | LLM Model                      | Embedding Model              | Deployment           | Avg Input Tokens/Gen | Avg Output Tokens/Gen | Avg Total Tokens/Gen | P50 Latency (ms) | P95 Latency (ms) | Throughput (req/s) | Hardware                               |
+| -------------- | ------------------------------ | ---------------------------- | -------------------- | -------------------- | --------------------- | -------------------- | ---------------- | ---------------- | ------------------ | -------------------------------------- |
+| vLLM           | `Llama-3.2-3B-Instruct`       | `BAAI/bge-base-en-v1.5`     | Local                | 441                  | 127                   | 568                  | 15,283           | 59,437           | 0.050              | Apple Silicon (Metal) (MacBook Pro M4) |
+| [Intel OPEA EI](https://github.com/opea-project/Enterprise-Inference)  | `Llama-3.2-3B-Instruct`       | `BAAI/bge-base-en-v1.5`     | Enterprise (On-Prem) | 444                  | 122                   | 566                  | 4,393            | 23,270           | 0.133              | CPU-only (Xeon)                        |
+| OpenAI (Cloud) | `gpt-4o-mini`                  | `text-embedding-3-small`     | API (Cloud)          | 411                  | 133                   | 544                  | 2,772            | 11,906           | 0.221              | N/A                                    |
+
+
+> **Notes:**
+>
+> - All benchmarks use the same FinSights document analysis pipeline. Token counts may vary slightly per run due to non-deterministic model output.
+> - vLLM on Apple Silicon uses Metal (MPS) GPU acceleration for the LLM and CPU-based vLLM for the BERT embedding model (`BAAI/bge-base-en-v1.5`).
+> - [Intel OPEA Enterprise Inference](https://github.com/opea-project/Enterprise-Inference) runs on Intel Xeon CPUs without GPU acceleration.
+> - Each benchmark run exercises 5 generations: initial summary, overall summary, section summary, RAG indexing (embeddings), and RAG chat.
+> - Langfuse tracing is used for full observability of each benchmark run.
+
+---
+
+## Model Capabilities
+
+### Meta Llama 3.2 3B Instruct
+
+A 3-billion-parameter open-weight model from Meta's Llama family, optimized for instruction-following and on-device deployment.
+
+
+| Attribute                   | Details                                                                                      |
+| --------------------------- | -------------------------------------------------------------------------------------------- |
+| **Parameters**              | 3.21B                                                                                        |
+| **Architecture**            | Transformer with Grouped Query Attention (GQA) — 28 layers, 24 Q-heads / 8 KV-heads          |
+| **Context Window**          | 128,000 tokens                                                                               |
+| **Instruction Tuning**      | RLHF + supervised fine-tuning on instruction data                                            |
+| **Multilingual**            | English, German, French, Italian, Portuguese, Hindi, Spanish, Thai                           |
+| **Quantization Formats**    | GGUF, AWQ, GPTQ, MLX (4-bit)                                                                |
+| **Inference Runtimes**      | vLLM, Ollama, llama.cpp, LMStudio, SGLang, TGI                                               |
+| **License**                 | Llama 3.2 Community License (permissive, with acceptable use policy)                         |
+| **Deployment**              | Local, on-prem, air-gapped, cloud — full data sovereignty                                    |
+
+
+### BAAI/bge-base-en-v1.5
+
+A 110M-parameter BERT-based embedding model from BAAI, widely used for retrieval and RAG pipelines.
+
+
+| Attribute                   | Details                                                    |
+| --------------------------- | ---------------------------------------------------------- |
+| **Parameters**              | 109M                                                       |
+| **Architecture**            | BERT base (12 layers, 768 hidden dim)                      |
+| **Embedding Dimensions**    | 768                                                        |
+| **Max Sequence Length**      | 512 tokens                                                 |
+| **MTEB Retrieval Score**    | 53.25 (competitive with models 3x its size)                |
+| **Inference Runtimes**      | sentence-transformers, vLLM (CPU), ONNX, TGI               |
+| **License**                 | MIT                                                        |
+| **Deployment**              | Local, on-prem, air-gapped — lightweight enough for CPU    |
+
+
+### OpenAI text-embedding-3-small
+
+OpenAI's compact embedding model, used for RAG indexing and retrieval when running with the OpenAI provider.
+
+
+| Attribute                   | Details                                                    |
+| --------------------------- | ---------------------------------------------------------- |
+| **Parameters**              | Not publicly disclosed                                     |
+| **Embedding Dimensions**    | 1,536 (default) or 512 (with `dimensions` parameter)      |
+| **Max Sequence Length**      | 8,191 tokens                                               |
+| **MTEB Retrieval Score**    | 44.0                                                       |
+| **Pricing**                 | $0.02 / 1M tokens                                          |
+| **License**                 | Proprietary (OpenAI Terms of Use)                          |
+| **Deployment**              | Cloud-only — OpenAI API or Azure OpenAI Service            |
+
+
+### GPT-4o-mini
+
+OpenAI's cost-efficient multimodal model, accessible exclusively via cloud API.
+
+
+| Attribute                   | Details                                                                           |
+| --------------------------- | --------------------------------------------------------------------------------- |
+| **Parameters**              | Not publicly disclosed                                                            |
+| **Architecture**            | Multimodal Transformer (text + image input, text output)                          |
+| **Context Window**          | 128,000 tokens input / 16,384 tokens max output                                   |
+| **Tool / Function Calling** | Supported; parallel function calling                                              |
+| **Structured Output**       | JSON mode and strict JSON schema adherence supported                              |
+| **Multilingual**            | Broad multilingual support                                                        |
+| **Pricing**                 | $0.15 / 1M input tokens, $0.60 / 1M output tokens (Batch API: 50% discount)       |
+| **Fine-Tuning**             | Supervised fine-tuning via OpenAI API                                             |
+| **License**                 | Proprietary (OpenAI Terms of Use)                                                 |
+| **Deployment**              | Cloud-only — OpenAI API or Azure OpenAI Service. No self-hosted or on-prem option |
+
+
+### Comparison Summary
+
+
+| Capability                      | Llama 3.2 3B Instruct            | GPT-4o-mini                       |
+| ------------------------------- | -------------------------------- | --------------------------------- |
+| Financial document analysis     | Yes                              | Yes                               |
+| RAG-based document chat         | Yes                              | Yes                               |
+| On-prem / air-gapped deployment | Yes                              | No                                |
+| Data sovereignty                | Full (weights run locally)       | No (data sent to cloud API)       |
+| Open weights                    | Yes (Llama Community License)    | No (proprietary)                  |
+| Custom fine-tuning              | Full fine-tuning + LoRA adapters | Supervised fine-tuning (API only) |
+| Multimodal (image input)        | No                               | Yes                               |
+| Native context window           | 128K                             | 128K                              |
+
+
+> Both models support financial document analysis and RAG-based chat. However, only Llama 3.2 offers open weights, data sovereignty, and local deployment flexibility — making it suitable for air-gapped, regulated, or cost-sensitive environments. GPT-4o-mini offers lower latency and higher throughput via OpenAI's cloud infrastructure, with added multimodal capabilities.
+
 ---
 
 ## Technology Stack
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 5fe35d0..9b30ee7 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -8,7 +8,7 @@
       "name": "finsights-react-ui",
       "version": "1.0.0",
       "dependencies": {
-        "jspdf": "^4.2.0",
+        "jspdf": "^4.2.1",
         "lucide-react": "^0.294.0",
         "react": "^18.2.0",
         "react-dom": "^18.2.0",
@@ -63,6 +63,7 @@
       "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.5.tgz",
       "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
       "dev": true,
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.5",
@@ -1326,6 +1327,7 @@
           "url": "https://github.com/sponsors/ai"
         }
       ],
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.9.0",
         "caniuse-lite": "^1.0.30001759",
@@ -1474,7 +1476,8 @@
     "node_modules/csstype": {
       "version": "3.2.3",
       "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
-      "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="
+      "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
+      "peer": true
     },
     "node_modules/debug": {
       "version": "4.4.3",
@@ -1789,6 +1792,7 @@
       "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz",
       "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
       "dev": true,
+      "peer": true,
       "bin": {
         "jiti": "bin/jiti.js"
       }
@@ -1823,9 +1827,9 @@
       }
     },
     "node_modules/jspdf": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/jspdf/-/jspdf-4.2.0.tgz",
-      "integrity": "sha512-hR/hnRevAXXlrjeqU5oahOE+Ln9ORJUB5brLHHqH67A+RBQZuFr5GkbI9XQI8OUFSEezKegsi45QRpc4bGj75Q==",
+      "version": "4.2.1",
+      "resolved": "https://registry.npmjs.org/jspdf/-/jspdf-4.2.1.tgz",
+      "integrity": "sha512-YyAXyvnmjTbR4bHQRLzex3CuINCDlQnBqoSYyjJwTP2x9jDLuKDzy7aKUl0hgx3uhcl7xzg32agn5vlie6HIlQ==",
       "license": "MIT",
       "dependencies": {
         "@babel/runtime": "^7.28.6",
@@ -2047,6 +2051,7 @@
           "url": "https://github.com/sponsors/ai"
         }
       ],
+      "peer": true,
       "dependencies": {
         "nanoid": "^3.3.11",
         "picocolors": "^1.1.1",
@@ -2217,6 +2222,7 @@
       "version": "18.3.1",
       "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
       "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
+      "peer": true,
       "dependencies": {
         "loose-envify": "^1.1.0"
       },
@@ -2228,6 +2234,7 @@
       "version": "18.3.1",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
       "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
+      "peer": true,
       "dependencies": {
         "loose-envify": "^1.1.0",
         "scheduler": "^0.23.2"
@@ -2610,6 +2617,7 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -2685,6 +2693,7 @@
       "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz",
       "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==",
       "dev": true,
+      "peer": true,
       "dependencies": {
         "esbuild": "^0.21.3",
         "postcss": "^8.4.43",
diff --git a/frontend/package.json b/frontend/package.json
index df7df27..07e883e 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -8,12 +8,12 @@
     "preview": "vite preview"
   },
   "dependencies": {
-    "jspdf": "^4.2.0",
+    "jspdf": "^4.2.1",
     "lucide-react": "^0.294.0",
     "react": "^18.2.0",
     "react-dom": "^18.2.0",
     "react-hot-toast": "^2.4.1",
-    "react-router-dom": "^6.30.3"
+    "react-router-dom": "^6.30.3"
   },
   "devDependencies": {
     "@vitejs/plugin-react": "^4.2.0",