From 1b4e9c548f1f853d74ed85ad5184099151739fd1 Mon Sep 17 00:00:00 2001
From: Joseph <josephfernandes273@gmail.com>
Date: Sat, 27 Dec 2025 13:01:04 +0530
Subject: [PATCH] chore: add .gitignore and initial project files

---
 .github/workflows/ci.yml         | 121 +++++
 .gitignore                       |  94 +++-
 .pre-commit-config.yaml          |  78 ++++
 LICENSE                          |  21 +
 README.md                        | 199 ++++++++
 SECURITY.md                      | 100 ++++
 docs/API.md                      | 382 +++++++++++++++
 docs/ARCHITECTURE.md             | 187 ++++++++
 docs/CONTRIBUTING.md             | 304 ++++++++++++
 docs/DEPLOYMENT.md               | 620 +++++++++++++++++++++++++
 docs/EXAMPLES.md                 | 768 +++++++++++++++++++++++++++++++
 pyproject.toml                   | 229 +++++++++
 requirements.txt                 |  17 +
 scripts/setup_dev_env.ps1        | 117 +++++
 src/__init__.py                  |  25 +
 src/config.py                    | 110 +++++
 src/document_processor.py        | 104 +++++
 src/main.py                      | 152 ++++++
 src/rag_chain.py                 | 126 +++++
 src/vector_store.py              | 147 ++++++
 tests/__init__.py                |  20 +
 tests/test_config.py             | 153 ++++++
 tests/test_document_processor.py | 162 +++++++
 tests/test_integration.py        | 140 ++++++
 24 files changed, 4371 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 SECURITY.md
 create mode 100644 docs/API.md
 create mode 100644 docs/ARCHITECTURE.md
 create mode 100644 docs/CONTRIBUTING.md
 create mode 100644 docs/DEPLOYMENT.md
 create mode 100644 docs/EXAMPLES.md
 create mode 100644 pyproject.toml
 create mode 100644 requirements.txt
 create mode 100644 scripts/setup_dev_env.ps1
 create mode 100644 src/__init__.py
 create mode 100644 src/config.py
 create mode 100644 src/document_processor.py
 create mode 100644 src/main.py
 create mode 100644 src/rag_chain.py
 create mode 100644 src/vector_store.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_config.py
 create mode 100644 tests/test_document_processor.py
 create mode 100644 tests/test_integration.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..f6499e0
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,121 @@
+name: CI Pipeline
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  lint-and-test:
+    name: Lint and Test
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[dev]
+
+      - name: Run pre-commit hooks
+        run: |
+          pre-commit run --all-files
+
+      - name: Run tests with coverage
+        run: |
+          pytest tests/ --cov=src --cov-report=xml --cov-fail-under=80
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ./coverage.xml
+          fail_ci_if_error: false
+
+  security-scan:
+    name: Security Scan
+    runs-on: ubuntu-latest
+    needs: lint-and-test
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: GitGuardian Shield Scan
+        uses: GitGuardian/ggshield-action@v1
+        env:
+          GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
+        with:
+          args: scan ci
+
+      - name: Check for secrets
+        uses: GitGuardian/ggshield-action@v1
+        env:
+          GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
+        with:
+          args: secret scan ci
+
+  build-and-package:
+    name: Build and Package
+    runs-on: ubuntu-latest
+    needs: security-scan
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install build twine
+
+      - name: Build package
+        run: |
+          python -m build
+
+      - name: Check package
+        run: |
+          twine check dist/*
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: python-package
+          path: dist/
+
+  deploy:
+    name: Deploy
+    runs-on: ubuntu-latest
+    needs: build-and-package
+    if: github.ref == 'refs/heads/main'
+
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: python-package
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@v1.8.10
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
+          skip_existing: true
diff --git a/.gitignore b/.gitignore
index e8c7d23..4686045 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,89 @@
-# Created by venv; see https://docs.python.org/3/library/venv.html
-Lib
-env
-Scripts
-.env
\ No newline at end of file
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+*.pyd
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+
+# Jupyter
+.ipynb_checkpoints/
+
+# pyenv
+.python-version
+
+# Virtual environments
+env/
+venv/
+ENV/
+.venv/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Editors and IDEs
+.vscode/
+.idea/
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+
+# Type check / test caches
+.mypy_cache/
+.pytest_cache/
+
+# Local env files
+.env
+.env.*
+
+# Database files
+*.sqlite3
+*.db
+
+# Faiss indexes and dataset artifacts
+*.faiss
+health_supplemets/index.faiss
+rag-dataset/
+
+# Misc
+*.egg
+*.egg-info
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..32a5ba8
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,78 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-added-large-files
+      - id: check-case-conflict
+      - id: check-docstring-first
+      - id: check-executables-have-shebangs
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-yaml
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+
+  - repo: https://github.com/psf/black
+    rev: 23.12.1
+    hooks:
+      - id: black
+        language_version: python3
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.6
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.7.1
+    hooks:
+      - id: mypy
+        additional_dependencies: [types-python-dotenv]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        name: isort (python)
+
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.1.0
+    hooks:
+      - id: flake8
+
+  - repo: https://github.com/GitGuardian/ggshield
+    rev: v1.18.0
+    hooks:
+      - id: ggshield
+        language: system
+        stages: [commit, push]
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: detect-private-key
+      - id: detect-aws-credentials
+      - id: detect-google-credentials
+
+  - repo: local
+    hooks:
+      - id: pytest
+        name: Run pytest
+        entry: pytest tests/ -v --tb=short
+        language: system
+        pass_filenames: false
+        always_run: true
+        stages: [push]
+
+      - id: pytest-cov
+        name: Run pytest with coverage
+        entry: pytest tests/ --cov=src --cov-report=term-missing --cov-fail-under=80
+        language: system
+        pass_filenames: false
+        always_run: true
+        stages: [push]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5ce8723
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 RAG PDF Chatbot Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a1d7b6e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,199 @@
+# RAG PDF Chatbot
+
+![RAG PDF Chatbot Logo](https://via.placeholder.com/150)
+
+**A Professional, Enterprise-Grade Retrieval-Augmented Generation System for PDF Documents**
+
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
+[![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+## 🎯 Problem Statement
+
+Organizations struggle with extracting actionable insights from large collections of PDF documents. Traditional search methods fail to provide contextual, accurate answers to complex questions. RAG PDF Chatbot solves this by combining:
+
+- **Document Retrieval**: Find relevant information from PDF collections
+- **Contextual Understanding**: Use LLM to understand and synthesize information
+- **Natural Language Interface**: Ask questions in plain English and get precise answers
+
+## 🏗️ Architecture
+
+```mermaid
+graph TD
+    A[PDF Documents] --> B[Document Processor]
+    B --> C[Vector Store]
+    C --> D[Retriever]
+    D --> E[RAG Chain]
+    E --> F[LLM]
+    F --> G[Answer]
+    G --> H[User]
+    H -->|Question| E
+```
+
+### Key Components
+
+1. **Document Processor**: Loads and chunks PDF documents
+2. **Vector Store**: Stores document embeddings for efficient retrieval
+3. **Retriever**: Finds relevant documents for a given question
+4. **RAG Chain**: Combines retrieved context with LLM for answer generation
+5. **LLM Interface**: Uses Ollama to run local language models
+
+## 🛠️ Tech Stack
+
+- **Core**: Python 3.8+
+- **Document Processing**: LangChain, PyMuPDF
+- **Embeddings**: Ollama (nomic-embed-text)
+- **Vector Store**: FAISS
+- **LLM**: Ollama (llama3.2:3b)
+- **Configuration**: Python dataclasses + environment variables
+- **Testing**: pytest
+
+## 🚀 Quick Start
+
+### Prerequisites
+
+- Python 3.8+
+- Ollama running locally with required models
+- PDF documents in the `rag-dataset/` directory
+
+### Installation
+
+```bash
+# Clone the repository
+git clone https://github.com/your-org/rag-pdf-chatbot.git
+cd rag-pdf-chatbot
+
+# Create virtual environment (recommended)
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Set up environment variables
+cp .env.example .env
+# Edit .env with your configuration
+```
+
+### Running the Application
+
+```bash
+# Basic usage
+python -m src.main --help
+
+# Ask a specific question
+python -m src.main --question "What are the benefits of BCAA supplements?"
+
+# Interactive mode
+python -m src.main --interactive
+
+# Rebuild vector store
+python -m src.main --rebuild --interactive
+```
+
+## 📂 Project Structure
+
+```
+rag-pdf-chatbot/
+├── src/                  # Core application code
+│   ├── __init__.py       # Package initialization
+│   ├── config.py         # Configuration management
+│   ├── document_processor.py  # Document loading and processing
+│   ├── vector_store.py   # Vector storage and retrieval
+│   ├── rag_chain.py      # RAG pipeline implementation
+│   └── main.py           # Main application entry point
+├── tests/                # Unit and integration tests
+├── docs/                 # Architecture and design documentation
+├── config/               # Configuration files
+├── scripts/              # Automation and utility scripts
+├── .env.example          # Environment variable template
+├── .gitignore            # Git ignore patterns
+├── README.md             # This file
+└── requirements.txt      # Python dependencies
+```
+
+## 🔧 Configuration
+
+The application uses environment variables for configuration. See `.env.example` for all available options:
+
+```env
+# Ollama Configuration
+OLLAMA_BASE_URL=http://localhost:11434
+EMBEDDING_MODEL=nomic-embed-text
+LLM_MODEL=llama3.2:3b
+
+# Document Processing
+DATASET_PATH=rag-dataset
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=100
+
+# Vector Store
+VECTOR_STORE_PATH=health_supplemets
+SAVE_VECTOR_STORE=true
+
+# Retrieval
+RETRIEVAL_TYPE=mmr
+RETRIEVAL_K=3
+RETRIEVAL_FETCH_K=100
+RETRIEVAL_LAMBDA=1.0
+```
+
+## 🧪 Testing
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run specific test
+pytest tests/test_document_processor.py
+
+# Run with coverage
+pytest --cov=src tests/
+```
+
+## 📖 Documentation
+
+- [Architecture Overview](docs/ARCHITECTURE.md)
+- [Contributing Guide](docs/CONTRIBUTING.md)
+- [API Reference](docs/API.md)
+
+## 🤝 Contributing
+
+We welcome contributions! Please see [CONTRIBUTING.md](docs/CONTRIBUTING.md) for guidelines.
+
+## 📜 License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+
+## 🎯 Value Proposition
+
+**For Developers:**
+- Clean, modular architecture following SOLID principles
+- Easy to extend and customize
+- Comprehensive documentation and examples
+
+**For Organizations:**
+- Extract insights from PDF documents efficiently
+- Reduce manual document review time
+- Improve knowledge discovery and decision making
+
+**For Recruiters:**
+- Professional, enterprise-grade codebase
+- Follows best practices for security and maintainability
+- Demonstrates advanced Python and AI/ML skills
+
+## 🔒 Security
+
+This project follows GitGuardian security standards:
+- No hardcoded secrets
+- Environment variable configuration
+- Secure dependency management
+- Regular security audits
+
+## 📞 Support
+
+For issues, questions, or feature requests, please open an issue on GitHub.
+
+---
+
+**Built with ❤️ for developers, by developers.**
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..7ca5cda
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,100 @@
+# Security Policy
+
+## 🔒 Supported Versions
+
+We provide security updates for the following versions:
+
+| Version | Supported          |
+| ------- | ------------------ |
+| 1.x     | ✅ Yes             |
+| < 1.0   | ❌ No              |
+
+## 📋 Reporting a Vulnerability
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them by emailing: `security@rag-pdf-chatbot.com`
+
+### Vulnerability Reporting Process
+
+1. **Report**: Send an email with detailed information about the vulnerability
+2. **Acknowledge**: We will acknowledge receipt within 48 hours
+3. **Assess**: Our security team will assess the vulnerability
+4. **Fix**: We will develop and test a fix
+5. **Disclose**: We will coordinate disclosure with you
+
+### What to Include in Your Report
+
+- Detailed description of the vulnerability
+- Steps to reproduce
+- Potential impact
+- Suggested mitigation (if any)
+- Your contact information
+
+## 🛡️ Security Best Practices
+
+### For Users
+
+1. **Keep Dependencies Updated**: Regularly update all dependencies
+2. **Use Secure Configuration**: Follow our `.env.example` template
+3. **Limit Access**: Restrict access to sensitive endpoints
+4. **Monitor Logs**: Regularly review application logs
+5. **Use HTTPS**: Always use secure connections
+
+### For Developers
+
+1. **Never Commit Secrets**: Use environment variables for sensitive data
+2. **Input Validation**: Validate all user inputs
+3. **Dependency Scanning**: Regularly scan for vulnerable dependencies
+4. **Code Reviews**: All changes must be reviewed
+5. **Security Testing**: Include security tests in CI/CD
+
+## 🔐 Security Features
+
+### Built-in Security Measures
+
+- **Environment Variable Configuration**: No hardcoded secrets
+- **Input Validation**: All inputs are validated
+- **Error Handling**: Graceful error handling
+- **Dependency Management**: Regular security updates
+- **Secure Defaults**: Safe defaults for all configurations
+
+### Security Configuration
+
+```env
+# Security-related environment variables
+ALLOW_DANGEROUS_DESERIALIZATION=false
+LOG_LEVEL=INFO
+```
+
+## 🔍 Security Audits
+
+We perform regular security audits including:
+
+- **Dependency Scanning**: Using GitGuardian and Snyk
+- **Code Analysis**: Static and dynamic analysis
+- **Penetration Testing**: Regular security testing
+- **Third-party Audits**: Annual security reviews
+
+## 📚 Security Resources
+
+- [OWASP Top 10](https://owasp.org/www-project-top-ten/)
+- [GitGuardian Documentation](https://docs.gitguardian.com/)
+- [Python Security Best Practices](https://docs.python.org/3/howto/security.html)
+
+## 🤝 Security Community
+
+We welcome security researchers to responsibly disclose vulnerabilities. We will:
+
+- Acknowledge your report promptly
+- Work with you to understand and validate the issue
+- Develop and test a fix
+- Credit you in our release notes (if desired)
+
+## 📜 License
+
+This security policy is provided under the same license as the main project.
+
+---
+
+**Your security is our priority. Thank you for helping keep RAG PDF Chatbot secure!**
diff --git a/docs/API.md b/docs/API.md
new file mode 100644
index 0000000..af3c6e1
--- /dev/null
+++ b/docs/API.md
@@ -0,0 +1,382 @@
+# API Reference
+
+## 📚 Table of Contents
+
+- [Core Classes](#-core-classes)
+- [Configuration Classes](#-configuration-classes)
+- [Main Interface](#-main-interface)
+- [Exception Handling](#-exception-handling)
+- [Examples](#-examples)
+
+## 🔧 Core Classes
+
+### RAGPDFChatbot
+
+The main application class that orchestrates the entire RAG pipeline.
+
+#### Constructor
+
+```python
+RAGPDFChatbot()
+```
+
+**Returns:** `RAGPDFChatbot` instance
+
+**Description:** Initializes the chatbot with all required components.
+
+#### Methods
+
+##### `initialize(rebuild_vector_store: bool = False) -> None`
+
+Initialize the application and prepare for question answering.
+
+**Parameters:**
+- `rebuild_vector_store` (bool, optional): Whether to rebuild vector store from scratch. Defaults to `False`.
+
+**Raises:**
+- `RuntimeError`: If document processing or vector store initialization fails
+
+##### `ask(question: str) -> str`
+
+Ask a question using the RAG pipeline.
+
+**Parameters:**
+- `question` (str): Question to answer
+
+**Returns:** `str` - Answer to the question
+
+**Raises:**
+- `RuntimeError`: If application is not initialized
+
+##### `interactive_mode() -> None`
+
+Run the application in interactive mode allowing multiple questions in a session.
+
+### DocumentProcessor
+
+Handles loading and processing of PDF documents.
+
+#### Constructor
+
+```python
+DocumentProcessor()
+```
+
+**Returns:** `DocumentProcessor` instance
+
+#### Methods
+
+##### `discover_pdf_files() -> List[str]`
+
+Discover all PDF files in the dataset directory.
+
+**Returns:** `List[str]` - List of file paths to PDF documents
+
+**Raises:**
+- `FileNotFoundError`: If no PDF files are found
+
+##### `load_documents() -> List[Dict[str, Any]]`
+
+Load all PDF documents from the dataset.
+
+**Returns:** `List[Dict[str, Any]]` - List of document objects
+
+**Raises:**
+- `RuntimeError`: If no documents are successfully loaded
+
+##### `chunk_documents(documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]`
+
+Split documents into chunks for embedding.
+
+**Parameters:**
+- `documents` (List[Dict[str, Any]]): List of document objects
+
+**Returns:** `List[Dict[str, Any]]` - List of document chunks
+
+##### `process_documents() -> List[Dict[str, Any]]`
+
+Complete document processing pipeline.
+
+**Returns:** `List[Dict[str, Any]]` - List of processed document chunks
+
+### VectorStoreManager
+
+Manages document embedding and vector storage.
+
+#### Constructor
+
+```python
+VectorStoreManager()
+```
+
+**Returns:** `VectorStoreManager` instance
+
+#### Methods
+
+##### `create_vector_store(documents: List[Dict[str, Any]]) -> FAISS`
+
+Create and populate vector store with document embeddings.
+
+**Parameters:**
+- `documents` (List[Dict[str, Any]]): List of document chunks to embed
+
+**Returns:** `FAISS` - Populated vector store
+
+##### `get_retriever() -> Any`
+
+Get a retriever configured with current settings.
+
+**Returns:** `Any` - Configured retriever object
+
+**Raises:**
+- `RuntimeError`: If vector store is not initialized
+
+##### `save_vector_store(path: Optional[str] = None) -> None`
+
+Save vector store to local storage.
+
+**Parameters:**
+- `path` (str, optional): Path to save vector store
+
+**Raises:**
+- `RuntimeError`: If vector store is not initialized
+
+##### `load_vector_store(path: Optional[str] = None) -> FAISS`
+
+Load vector store from local storage.
+
+**Parameters:**
+- `path` (str, optional): Path to load vector store from
+
+**Returns:** `FAISS` - Loaded vector store
+
+##### `vector_store_exists(path: Optional[str] = None) -> bool`
+
+Check if vector store exists at specified path.
+
+**Parameters:**
+- `path` (str, optional): Path to check
+
+**Returns:** `bool` - True if vector store exists
+
+### RAGChain
+
+Implements the Retrieval-Augmented Generation pipeline.
+
+#### Constructor
+
+```python
+RAGChain(retriever: Any)
+```
+
+**Parameters:**
+- `retriever` (Any): Document retriever object
+
+**Returns:** `RAGChain` instance
+
+#### Methods
+
+##### `ask_question(question: str) -> str`
+
+Ask a question using the RAG pipeline.
+
+**Parameters:**
+- `question` (str): Question to answer
+
+**Returns:** `str` - Answer to the question
+
+**Raises:**
+- `RuntimeError`: If RAG chain is not initialized
+
+##### `get_chain() -> Any`
+
+Get the RAG chain object.
+
+**Returns:** `Any` - RAG chain object
+
+## ⚙️ Configuration Classes
+
+### AppConfig
+
+Main application configuration dataclass.
+
+#### Attributes
+
+- `embedding: EmbeddingConfig` - Embedding model configuration
+- `llm: LLMConfig` - LLM model configuration
+- `vector_store: VectorStoreConfig` - Vector store configuration
+- `retrieval: RetrievalConfig` - Retrieval configuration
+- `document_processing: DocumentProcessingConfig` - Document processing configuration
+
+### EmbeddingConfig
+
+Configuration for embedding models.
+
+#### Attributes
+
+- `model_name: str` - Model name (default: "nomic-embed-text")
+- `base_url: str` - Ollama base URL (default: "http://localhost:11434")
+- `dimension: Optional[int]` - Embedding dimension (default: None)
+
+### LLMConfig
+
+Configuration for LLM models.
+
+#### Attributes
+
+- `model_name: str` - Model name (default: "llama3.2:3b")
+- `base_url: str` - Ollama base URL (default: "http://localhost:11434")
+- `temperature: float` - Generation temperature (default: 0.7)
+- `max_tokens: int` - Maximum tokens (default: 512)
+
+### VectorStoreConfig
+
+Configuration for vector store.
+
+#### Attributes
+
+- `index_type: str` - Index type (default: "flat")
+- `metric: str` - Distance metric (default: "L2")
+- `save_local: bool` - Save locally (default: True)
+- `local_path: str` - Local path (default: "health_supplemets")
+
+### RetrievalConfig
+
+Configuration for document retrieval.
+
+#### Attributes
+
+- `search_type: str` - Search type (default: "mmr")
+- `k: int` - Number of documents (default: 3)
+- `fetch_k: int` - Number to fetch (default: 100)
+- `lambda_mult: float` - MMR lambda (default: 1.0)
+
+### DocumentProcessingConfig
+
+Configuration for document processing.
+
+#### Attributes
+
+- `chunk_size: int` - Chunk size (default: 1000)
+- `chunk_overlap: int` - Chunk overlap (default: 100)
+- `dataset_path: str` - Dataset path (default: "rag-dataset")
+
+## 🎯 Main Interface
+
+### CLI Interface
+
+The application can be run via command line with the following options:
+
+```bash
+python -m src.main [OPTIONS]
+
+Options:
+  --rebuild           Rebuild vector store from scratch
+  --interactive       Run in interactive mode
+  --question TEXT     Ask a specific question
+  --help              Show help message
+```
+
+### Programmatic Interface
+
+```python
+from src import RAGPDFChatbot
+
+# Initialize chatbot
+chatbot = RAGPDFChatbot()
+chatbot.initialize()
+
+# Ask questions
+answer = chatbot.ask("What are the benefits of BCAA supplements?")
+print(answer)
+
+# Interactive mode
+chatbot.interactive_mode()
+```
+
+## 🚨 Exception Handling
+
+### RuntimeError
+- Raised when application is not properly initialized
+- Raised when vector store operations fail
+- Raised when document processing fails
+
+### FileNotFoundError
+- Raised when no PDF files are found in dataset directory
+
+### ValueError
+- Raised for invalid configuration values
+- Raised for invalid input parameters
+
+## 📝 Examples
+
+### Basic Usage
+
+```python
+from src import RAGPDFChatbot
+
+# Create and initialize chatbot
+chatbot = RAGPDFChatbot()
+chatbot.initialize()
+
+# Ask a question
+question = "What are the benefits of BCAA supplements?"
+answer = chatbot.ask(question)
+print(f"Answer: {answer}")
+```
+
+### Configuration
+
+```python
+import os
+from src.config import config
+
+# Set environment variables before importing
+os.environ["LLM_MODEL"] = "llama3.2:1b"
+os.environ["CHUNK_SIZE"] = "500"
+
+# Config will use the new values
+print(f"Model: {config.llm.model_name}")
+print(f"Chunk size: {config.document_processing.chunk_size}")
+```
+
+### Custom Configuration
+
+```python
+from src.config import EmbeddingConfig, LLMConfig, AppConfig
+
+# Create custom config
+custom_config = AppConfig(
+    embedding=EmbeddingConfig(model_name="custom-embedding"),
+    llm=LLMConfig(model_name="custom-llm", temperature=0.5),
+    # ... other configs
+)
+
+# Use with components
+processor = DocumentProcessor()
+processor.dataset_path = custom_config.document_processing.dataset_path
+```
+
+### Error Handling
+
+```python
+from src import RAGPDFChatbot
+
+chatbot = RAGPDFChatbot()
+
+try:
+    # This will fail if not initialized
+    answer = chatbot.ask("Test question")
+except RuntimeError as e:
+    print(f"Error: {e}")
+    # Initialize first
+    chatbot.initialize()
+    answer = chatbot.ask("Test question")
+```
+
+## 🔗 Related Documentation
+
+- [README.md](../README.md) - Project overview and quick start
+- [ARCHITECTURE.md](ARCHITECTURE.md) - System architecture details
+- [CONTRIBUTING.md](CONTRIBUTING.md) - Contribution guidelines
+- [SECURITY.md](../SECURITY.md) - Security policy
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
new file mode 100644
index 0000000..9b61ae5
--- /dev/null
+++ b/docs/ARCHITECTURE.md
@@ -0,0 +1,187 @@
+# Architecture Overview
+
+## 🏗️ System Architecture
+
+RAG PDF Chatbot follows a modular, layered architecture designed for scalability, maintainability, and extensibility.
+
+### High-Level Architecture
+
+```mermaid
+graph TD
+    subgraph User Interface
+        A[CLI Interface] --> B[Main Application]
+    end
+
+    subgraph Core Components
+        B --> C[Document Processor]
+        B --> D[Vector Store Manager]
+        B --> E[RAG Chain]
+    end
+
+    subgraph External Services
+        C --> F[PDF Documents]
+        D --> G[Ollama Embeddings]
+        D --> H[FAISS Index]
+        E --> I[Ollama LLM]
+    end
+
+    subgraph Configuration
+        J[Environment Variables] --> B
+        J --> C
+        J --> D
+        J --> E
+    end
+```
+
+## 🧩 Component Architecture
+
+### 1. Document Processor
+
+**Responsibilities:**
+- Discover PDF files in dataset directory
+- Load PDF content using PyMuPDFLoader
+- Split documents into chunks for embedding
+- Handle document processing errors gracefully
+
+**Key Features:**
+- Configurable chunk size and overlap
+- Error handling and warnings
+- Support for nested directory structures
+- PDF-specific processing
+
+### 2. Vector Store Manager
+
+**Responsibilities:**
+- Initialize embedding models
+- Create and manage FAISS vector stores
+- Handle vector store persistence
+- Provide document retrieval capabilities
+
+**Key Features:**
+- Support for multiple distance metrics (L2, IP)
+- Automatic dimension detection
+- Local storage and loading
+- Configurable retrieval parameters
+
+### 3. RAG Chain
+
+**Responsibilities:**
+- Initialize LLM models
+- Create prompt templates
+- Build RAG pipeline
+- Handle question answering
+
+**Key Features:**
+- Fallback to custom prompts if hub fails
+- Configurable LLM parameters
+- Document formatting for context
+- Error handling and validation
+
+### 4. Main Application
+
+**Responsibilities:**
+- Orchestrate the RAG pipeline
+- Provide CLI interface
+- Handle application lifecycle
+- Manage vector store caching
+
+**Key Features:**
+- Multiple operation modes (single question, interactive)
+- Vector store caching and rebuilding
+- Command-line argument parsing
+- Interactive session management
+
+## 🔧 Design Patterns
+
+### SOLID Principles
+
+1. **Single Responsibility Principle**
+   - Each module has a single, well-defined responsibility
+   - Clear separation of concerns between components
+
+2. **Open/Closed Principle**
+   - Components are open for extension but closed for modification
+   - Configuration-driven behavior allows easy customization
+
+3. **Liskov Substitution Principle**
+   - Interfaces are designed for substitutability
+   - Components can be replaced with alternative implementations
+
+4. **Interface Segregation Principle**
+   - Small, focused interfaces
+   - Clients only depend on what they need
+
+5. **Dependency Inversion Principle**
+   - High-level modules depend on abstractions
+   - Configuration and dependencies are injected
+
+### Other Patterns
+
+- **Factory Pattern**: Configuration loading and initialization
+- **Strategy Pattern**: Different retrieval strategies
+- **Facade Pattern**: Main application as facade for complex pipeline
+- **Repository Pattern**: Vector store as document repository
+
+## 📦 Module Dependencies
+
+```mermaid
+graph TD
+    main.py --> document_processor.py
+    main.py --> vector_store.py
+    main.py --> rag_chain.py
+    main.py --> config.py
+
+    document_processor.py --> config.py
+    vector_store.py --> config.py
+    rag_chain.py --> config.py
+
+    config.py --> .env
+```
+
+## 🔄 Data Flow
+
+1. **Initialization Phase**
+   - Load configuration from environment variables
+   - Initialize document processor with config
+   - Initialize vector store manager with config
+   - Check for existing vector store or build new one
+
+2. **Question Answering Phase**
+   - User provides question via CLI
+   - Retriever finds relevant documents from vector store
+   - RAG chain formats context and question
+   - LLM generates answer using prompt template
+   - Answer is returned to user
+
+3. **Persistence Phase**
+   - Vector store is saved to disk (if configured)
+   - Configuration remains in memory for session
+   - Application state is maintained for interactive sessions
+
+## 🎯 Performance Considerations
+
+- **Vector Store**: FAISS provides efficient similarity search
+- **Chunking**: Optimal chunk size balances context and performance
+- **Caching**: Vector store persistence avoids reprocessing
+- **Batch Processing**: Documents processed in batches for efficiency
+
+## 🛡️ Security Architecture
+
+- **Configuration**: All sensitive data via environment variables
+- **Validation**: Input validation at all levels
+- **Error Handling**: Graceful degradation and meaningful errors
+- **Dependencies**: Regular security updates and audits
+
+## 🔮 Future Architecture Evolution
+
+- **Microservices**: Potential to split components into services
+- **API Layer**: REST/GraphQL interface for programmatic access
+- **Plugin System**: Extensible architecture for custom components
+- **Distributed Processing**: Support for large-scale document processing
+
+## 📚 References
+
+- Clean Architecture by Robert C. Martin
+- Design Patterns: Elements of Reusable Object-Oriented Software
+- Domain-Driven Design by Eric Evans
+- SOLID Principles of Object-Oriented Design
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
new file mode 100644
index 0000000..d2a311d
--- /dev/null
+++ b/docs/CONTRIBUTING.md
@@ -0,0 +1,304 @@
+# Contributing Guide
+
+🎉 **First off, thanks for taking the time to contribute!** 🎉
+
+We welcome contributions from everyone, regardless of experience level. This guide will help you get started with contributing to RAG PDF Chatbot.
+
+## 📋 Table of Contents
+
+- [Code of Conduct](#-code-of-conduct)
+- [How Can I Contribute?](#-how-can-i-contribute)
+- [Development Setup](#-development-setup)
+- [Coding Standards](#-coding-standards)
+- [Commit Guidelines](#-commit-guidelines)
+- [Pull Request Process](#-pull-request-process)
+- [Testing](#-testing)
+- [Documentation](#-documentation)
+- [Issue Reporting](#-issue-reporting)
+- [Feature Requests](#-feature-requests)
+
+## 🤝 Code of Conduct
+
+This project adheres to the [Contributor Covenant](https://www.contributor-covenant.org/). By participating, you are expected to uphold this code. Please report unacceptable behavior to [maintainers].
+
+## 🤔 How Can I Contribute?
+
+### Reporting Bugs
+
+- **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/your-org/rag-pdf-chatbot/issues)
+- If you're unable to find an open issue addressing the problem, [open a new one](#-issue-reporting)
+
+### Suggesting Enhancements
+
+- Open a new issue with the "enhancement" label
+- Provide a clear description of the proposed enhancement
+- Explain why this enhancement would be useful
+
+### Writing Code
+
+- Check the [open issues](https://github.com/your-org/rag-pdf-chatbot/issues) for tasks
+- Look for issues labeled "good first issue" if you're new
+- Comment on the issue to let others know you're working on it
+
+### Improving Documentation
+
+- Fix typos, grammar, or unclear explanations
+- Add missing documentation
+- Improve existing documentation
+
+## 🛠️ Development Setup
+
+### Prerequisites
+
+- Python 3.8+
+- Git
+- Ollama with required models
+- Virtual environment (recommended)
+
+### Setup Steps
+
+```bash
+# Clone the repository
+git clone https://github.com/your-org/rag-pdf-chatbot.git
+cd rag-pdf-chatbot
+
+# Create and activate virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
+pip install -r dev-requirements.txt  # Development dependencies
+
+# Set up pre-commit hooks
+pre-commit install
+```
+
+### Running the Application
+
+```bash
+# Basic test
+python -m src.main --help
+
+# Run with sample question
+python -m src.main --question "What is RAG?"
+```
+
+## 📏 Coding Standards
+
+### Python Style
+
+- Follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guide
+- Use [Black](https://github.com/psf/black) for code formatting
+- Use [isort](https://github.com/PyCQA/isort) for import sorting
+- Use [flake8](https://flake8.pycqa.org/) for linting
+
+### Type Hints
+
+- Use Python type hints for all functions and methods
+- Follow [PEP 484](https://www.python.org/dev/peps/pep-0484/) type hinting guidelines
+- Use `Optional` for nullable types
+- Use `Any` sparingly
+
+### Documentation
+
+- Follow [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings)
+- Document all public classes, methods, and functions
+- Include examples where helpful
+- Keep documentation up-to-date
+
+### Testing
+
+- Write unit tests for new functionality
+- Aim for 80%+ code coverage
+- Use descriptive test names
+- Test edge cases and error conditions
+
+## 📝 Commit Guidelines
+
+### Commit Message Format
+
+```
+<type>(<scope>): <subject>
+<BLANK LINE>
+<body>
+<BLANK LINE>
+<footer>
+```
+
+### Types
+
+- `feat`: A new feature
+- `fix`: A bug fix
+- `docs`: Documentation only changes
+- `style`: Changes that do not affect the meaning of the code
+- `refactor`: A code change that neither fixes a bug nor adds a feature
+- `perf`: A code change that improves performance
+- `test`: Adding missing tests or correcting existing tests
+- `chore`: Changes to the build process or auxiliary tools
+
+### Examples
+
+```
+feat(document_processor): add support for DOCX files
+
+- Added PyMuPDFLoader for DOCX document loading
+- Updated chunking strategy for DOCX content
+- Added unit tests for DOCX processing
+
+Fixes #123
+```
+
+```
+fix(vector_store): handle missing vector store gracefully
+
+- Added null checks before vector store operations
+- Improved error messages for missing vector store
+- Added fallback to rebuild vector store
+
+Closes #456
+```
+
+## 🔄 Pull Request Process
+
+1. **Fork the repository** and create your branch from `main`
+2. **Install development dependencies** and set up pre-commit hooks
+3. **Make your changes** following coding standards
+4. **Write tests** for your changes
+5. **Update documentation** if needed
+6. **Run tests** to ensure nothing is broken
+7. **Commit your changes** with clear commit messages
+8. **Push to your fork** and submit a pull request
+9. **Wait for review** and address any feedback
+
+### Pull Request Requirements
+
+- Clear title and description
+- Reference related issues (e.g., "Fixes #123")
+- Pass all CI checks
+- Include appropriate tests
+- Update documentation if needed
+- Follow coding standards
+
+## 🧪 Testing
+
+### Running Tests
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run specific test
+pytest tests/test_document_processor.py
+
+# Run with coverage
+pytest --cov=src tests/
+
+# Run with verbose output
+pytest -v tests/
+```
+
+### Test Structure
+
+```
+tests/
+├── __init__.py
+├── test_config.py
+├── test_document_processor.py
+├── test_vector_store.py
+├── test_rag_chain.py
+└── test_main.py
+```
+
+### Writing Tests
+
+- Use `pytest` framework
+- Follow Arrange-Act-Assert pattern
+- Test both happy paths and error cases
+- Use mocking for external dependencies
+- Keep tests focused and fast
+
+## 📚 Documentation
+
+### Documentation Standards
+
+- Use Markdown for all documentation
+- Follow consistent formatting
+- Include code examples where helpful
+- Keep documentation up-to-date with code changes
+- Use diagrams (Mermaid) for complex concepts
+
+### Documentation Structure
+
+```
+docs/
+├── ARCHITECTURE.md      # System architecture
+├── CONTRIBUTING.md      # Contribution guidelines
+├── API.md               # API reference
+├── DEPLOYMENT.md        # Deployment guide
+└── EXAMPLES.md          # Usage examples
+```
+
+## 🐛 Issue Reporting
+
+### Bug Report Template
+
+```markdown
+### Description
+Clear and concise description of the bug.
+
+### Steps to Reproduce
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+### Expected Behavior
+What you expected to happen.
+
+### Actual Behavior
+What actually happened.
+
+### Screenshots
+If applicable, add screenshots.
+
+### Environment
+- OS: [e.g., Windows 10, macOS 12.1, Ubuntu 20.04]
+- Python version: [e.g., 3.8.10]
+- RAG PDF Chatbot version: [e.g., 1.0.0]
+
+### Additional Context
+Add any other context about the problem.
+```
+
+## 💡 Feature Requests
+
+### Feature Request Template
+
+```markdown
+### Problem Statement
+Is your feature request related to a problem? Please describe.
+
+### Proposed Solution
+Describe the solution you'd like.
+
+### Alternatives
+Describe alternatives you've considered.
+
+### Additional Context
+Add any other context or screenshots.
+```
+
+## 🤝 Community
+
+- Join our [Discord](https://discord.gg/example) for discussions
+- Follow us on [Twitter](https://twitter.com/example) for updates
+- Star the repository to show your support
+
+## 🙏 Acknowledgments
+
+Thank you to all our contributors who have helped make this project better!
+
+---
+
+**Happy Coding!** 🚀
diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
new file mode 100644
index 0000000..4bc1c41
--- /dev/null
+++ b/docs/DEPLOYMENT.md
@@ -0,0 +1,620 @@
+# Deployment Guide
+
+## 📚 Table of Contents
+
+- [Prerequisites](#-prerequisites)
+- [Local Development Setup](#-local-development-setup)
+- [Production Deployment](#-production-deployment)
+- [Docker Deployment](#-docker-deployment)
+- [Cloud Deployment](#-cloud-deployment)
+- [Monitoring and Maintenance](#-monitoring-and-maintenance)
+- [Troubleshooting](#-troubleshooting)
+
+## 🔧 Prerequisites
+
+### System Requirements
+
+- **Operating System**: Linux, macOS, or Windows (with WSL)
+- **Python**: 3.8 or higher
+- **RAM**: Minimum 8GB, recommended 16GB+
+- **Storage**: 10GB+ free space for models and data
+- **Network**: Stable internet connection for model downloads
+
+### External Dependencies
+
+- **Ollama**: Running locally or accessible via network
+  - Required models: `nomic-embed-text`, `llama3.2:3b`
+  - Installation: https://ollama.com/download
+
+- **PDF Documents**: Dataset in `rag-dataset/` directory
+  - Minimum: 1 PDF file
+  - Recommended: Multiple PDFs for better results
+
+## 🏠 Local Development Setup
+
+### 1. Environment Setup
+
+```bash
+# Clone the repository
+git clone https://github.com/your-org/rag-pdf-chatbot.git
+cd rag-pdf-chatbot
+
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -e .[dev]
+
+# Setup pre-commit hooks
+pre-commit install
+```
+
+### 2. Ollama Setup
+
+```bash
+# Install Ollama (if not already installed)
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Start Ollama service
+ollama serve
+
+# Pull required models
+ollama pull nomic-embed-text
+ollama pull llama3.2:3b
+```
+
+### 3. Configuration
+
+```bash
+# Copy environment template
+cp .env.example .env
+
+# Edit configuration
+nano .env
+```
+
+**Key Configuration Options:**
+```env
+# Ollama Configuration
+OLLAMA_BASE_URL=http://localhost:11434
+
+# Document Processing
+DATASET_PATH=./rag-dataset
+
+# Vector Store
+SAVE_VECTOR_STORE=true
+VECTOR_STORE_PATH=./health_supplemets
+```
+
+### 4. First Run
+
+```bash
+# Build vector store and test
+python -m src.main --rebuild --question "What is RAG?"
+```
+
+### 5. Development Workflow
+
+```bash
+# Run tests
+pytest tests/
+
+# Run with coverage
+pytest --cov=src tests/
+
+# Lint code
+ruff check src/
+black src/
+
+# Format code
+black src/
+isort src/
+```
+
+## 🚀 Production Deployment
+
+### 1. Server Preparation
+
+```bash
+# Update system packages
+sudo apt update && sudo apt upgrade -y
+
+# Install Python and pip
+sudo apt install python3 python3-pip python3-venv -y
+
+# Install system dependencies
+sudo apt install build-essential -y
+```
+
+### 2. Application Deployment
+
+```bash
+# Create application user
+sudo useradd -m -s /bin/bash raguser
+sudo su - raguser
+
+# Clone repository
+git clone https://github.com/your-org/rag-pdf-chatbot.git
+cd rag-pdf-chatbot
+
+# Setup virtual environment
+python3 -m venv venv
+source venv/bin/activate
+
+# Install production dependencies
+pip install -e .
+
+# Configure environment
+cp .env.example .env
+nano .env  # Edit for production settings
+```
+
+### 3. Data Setup
+
+```bash
+# Create data directory
+mkdir -p data/rag-dataset
+
+# Copy your PDF documents
+cp /path/to/your/pdfs/* data/rag-dataset/
+
+# Update configuration
+echo "DATASET_PATH=./data/rag-dataset" >> .env
+echo "VECTOR_STORE_PATH=./data/vector-store" >> .env
+```
+
+### 4. Ollama Setup (Production)
+
+```bash
+# Install Ollama
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Create systemd service
+sudo tee /etc/systemd/system/ollama.service > /dev/null <<EOF
+[Unit]
+Description=Ollama Service
+After=network-online.target
+
+[Service]
+ExecStart=/usr/local/bin/ollama serve
+User=raguser
+Group=raguser
+Restart=always
+RestartSec=3
+
+[Install]
+WantedBy=default.target
+EOF
+
+# Enable and start service
+sudo systemctl enable ollama
+sudo systemctl start ollama
+
+# Pull models
+ollama pull nomic-embed-text
+ollama pull llama3.2:3b
+```
+
+### 5. Application Service
+
+```bash
+# Create systemd service
+sudo tee /etc/systemd/system/rag-chatbot.service > /dev/null <<EOF
+[Unit]
+Description=RAG PDF Chatbot
+After=network.target ollama.service
+
+[Service]
+Type=simple
+User=raguser
+Group=raguser
+WorkingDirectory=/home/raguser/rag-pdf-chatbot
+Environment=PATH=/home/raguser/rag-pdf-chatbot/venv/bin
+ExecStart=/home/raguser/rag-pdf-chatbot/venv/bin/python -m src.main --interactive
+Restart=always
+RestartSec=10
+
+[Install]
+WantedBy=multi-user.target
+EOF
+
+# Enable and start service
+sudo systemctl enable rag-chatbot
+sudo systemctl start rag-chatbot
+```
+
+### 6. Nginx Reverse Proxy (Optional)
+
+```bash
+# Install Nginx
+sudo apt install nginx -y
+
+# Configure site
+sudo tee /etc/nginx/sites-available/rag-chatbot > /dev/null <<EOF
+server {
+    listen 80;
+    server_name your-domain.com;
+
+    location / {
+        proxy_pass http://127.0.0.1:8000;
+        proxy_set_header Host \$host;
+        proxy_set_header X-Real-IP \$remote_addr;
+    }
+}
+EOF
+
+# Enable site
+sudo ln -s /etc/nginx/sites-available/rag-chatbot /etc/nginx/sites-enabled/
+sudo nginx -t
+sudo systemctl reload nginx
+```
+
+## 🐳 Docker Deployment
+
+### 1. Dockerfile
+
+```dockerfile
+FROM python:3.11-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | sh
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirements first for better caching
+COPY requirements.txt pyproject.toml ./
+RUN pip install --no-cache-dir -e .
+
+# Copy application code
+COPY src/ ./src/
+COPY scripts/ ./scripts/
+
+# Create data directories
+RUN mkdir -p data/rag-dataset data/vector-store
+
+# Copy entrypoint script
+COPY scripts/docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+# Expose port (if using web interface)
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD python -c "from src import RAGPDFChatbot; chatbot = RAGPDFChatbot(); chatbot.initialize(); print('OK')"
+
+ENTRYPOINT ["docker-entrypoint.sh"]
+```
+
+### 2. Docker Compose
+
+```yaml
+version: '3.8'
+
+services:
+  ollama:
+    image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    restart: unless-stopped
+
+  rag-chatbot:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    depends_on:
+      - ollama
+    environment:
+      - OLLAMA_BASE_URL=http://ollama:11434
+      - DATASET_PATH=/app/data/rag-dataset
+      - VECTOR_STORE_PATH=/app/data/vector-store
+    restart: unless-stopped
+
+volumes:
+  ollama_data:
+```
+
+### 3. Docker Entrypoint
+
+```bash
+#!/bin/bash
+
+# Wait for Ollama to be ready
+echo "Waiting for Ollama..."
+while ! curl -s http://ollama:11434/api/tags > /dev/null; do
+  sleep 2
+done
+
+# Pull models
+echo "Pulling models..."
+ollama pull nomic-embed-text
+ollama pull llama3.2:3b
+
+# Start application
+echo "Starting RAG Chatbot..."
+exec python -m src.main --interactive
+```
+
+### 4. Build and Run
+
+```bash
+# Build and start
+docker-compose up --build
+
+# Or run manually
+docker build -t rag-chatbot .
+docker run -p 8000:8000 rag-chatbot
+```
+
+## ☁️ Cloud Deployment
+
+### AWS EC2 Deployment
+
+```bash
+# Launch EC2 instance (t3.large or better recommended)
+# Ubuntu 22.04 LTS, 16GB RAM minimum
+
+# Security group settings:
+# - SSH (22) from your IP
+# - HTTP (80) and HTTPS (443) if using web interface
+# - Custom TCP (11434) for Ollama if needed internally
+
+# After SSH into instance, follow production deployment steps
+```
+
+### Google Cloud Run
+
+```yaml
+# cloudbuild.yaml
+steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['build', '-t', 'gcr.io/$PROJECT_ID/rag-chatbot', '.']
+
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['push', 'gcr.io/$PROJECT_ID/rag-chatbot']
+
+  - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
+    entrypoint: gcloud
+    args:
+      - run
+      - deploy
+      - rag-chatbot
+      - --image=gcr.io/$PROJECT_ID/rag-chatbot
+      - --platform=managed
+      - --region=us-central1
+      - --allow-unauthenticated
+      - --memory=4Gi
+      - --cpu=2
+```
+
+### Heroku Deployment
+
+```yaml
+# Procfile
+web: python -m src.main --interactive
+
+# requirements.txt (Heroku-specific)
+-r requirements.txt
+gunicorn==21.2.0
+```
+
+## 📊 Monitoring and Maintenance
+
+### 1. Logging
+
+```python
+import logging
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('logs/rag-chatbot.log'),
+        logging.StreamHandler()
+    ]
+)
+```
+
+### 2. Health Checks
+
+```bash
+# Application health check
+curl -f http://localhost:8000/health || exit 1
+
+# Ollama health check
+curl -f http://localhost:11434/api/tags || exit 1
+```
+
+### 3. Monitoring Commands
+
+```bash
+# Check application status
+sudo systemctl status rag-chatbot
+
+# View application logs
+sudo journalctl -u rag-chatbot -f
+
+# Check resource usage
+htop
+df -h
+free -h
+
+# Ollama status
+ollama list
+ollama ps
+```
+
+### 4. Backup Strategy
+
+```bash
+# Backup vector store and configuration
+tar -czf backup-$(date +%Y%m%d).tar.gz \
+    data/vector-store/ \
+    .env \
+    logs/
+
+# Automated backup script
+#!/bin/bash
+BACKUP_DIR="/var/backups/rag-chatbot"
+mkdir -p $BACKUP_DIR
+
+tar -czf $BACKUP_DIR/backup-$(date +%Y%m%d-%H%M%S).tar.gz \
+    -C /home/raguser/rag-pdf-chatbot \
+    data/vector-store .env logs
+
+# Keep only last 7 backups
+cd $BACKUP_DIR
+ls -t backup-*.tar.gz | tail -n +8 | xargs rm -f
+```
+
+### 5. Updates
+
+```bash
+# Update application
+cd /home/raguser/rag-pdf-chatbot
+git pull origin main
+source venv/bin/activate
+pip install -e .
+
+# Restart service
+sudo systemctl restart rag-chatbot
+
+# Update Ollama models
+ollama pull nomic-embed-text:latest
+ollama pull llama3.2:3b:latest
+```
+
+## 🔧 Troubleshooting
+
+### Common Issues
+
+#### 1. Ollama Connection Issues
+
+```bash
+# Check if Ollama is running
+ps aux | grep ollama
+
+# Check Ollama API
+curl http://localhost:11434/api/tags
+
+# Restart Ollama
+sudo systemctl restart ollama
+
+# Check logs
+sudo journalctl -u ollama -f
+```
+
+#### 2. Vector Store Issues
+
+```bash
+# Rebuild vector store
+rm -rf data/vector-store/
+python -m src.main --rebuild
+
+# Check disk space
+df -h
+```
+
+#### 3. Memory Issues
+
+```bash
+# Check memory usage
+free -h
+
+# Monitor process memory
+ps aux --sort=-%mem | head
+
+# Increase swap space if needed
+sudo fallocate -l 4G /swapfile
+sudo chmod 600 /swapfile
+sudo mkswap /swapfile
+sudo swapon /swapfile
+echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
+```
+
+#### 4. Performance Issues
+
+```bash
+# Profile application
+python -m cProfile -s time -m src.main --question "test question"
+
+# Check CPU usage
+top -p $(pgrep -f "python -m src.main")
+
+# Optimize configuration
+# Reduce chunk_size, k, fetch_k for lower memory usage
+```
+
+#### 5. PDF Processing Issues
+
+```bash
+# Check PDF files
+file data/rag-dataset/*.pdf
+
+# Validate PDF content
+python -c "
+import fitz
+doc = fitz.open('data/rag-dataset/sample.pdf')
+print(f'Pages: {len(doc)}')
+print(f'Text length: {len(doc[0].get_text())}')
+"
+```
+
+### Debug Mode
+
+```bash
+# Run with debug logging
+LOG_LEVEL=DEBUG python -m src.main --question "test"
+
+# Check configuration
+python -c "from src.config import config; print(config)"
+```
+
+### Getting Help
+
+1. Check application logs: `tail -f logs/rag-chatbot.log`
+2. Check system logs: `sudo journalctl -u rag-chatbot -f`
+3. Review configuration: `cat .env`
+4. Test components individually:
+
+```bash
+# Test document processing
+python -c "
+from src.document_processor import DocumentProcessor
+processor = DocumentProcessor()
+docs = processor.process_documents()
+print(f'Processed {len(docs)} documents')
+"
+
+# Test vector store
+python -c "
+from src.vector_store import VectorStoreManager
+manager = VectorStoreManager()
+print('Vector store initialized successfully')
+"
+```
+
+## 📞 Support
+
+For deployment issues:
+1. Check the [troubleshooting section](#-troubleshooting)
+2. Review [GitHub Issues](https://github.com/your-org/rag-pdf-chatbot/issues)
+3. Check application and system logs
+4. Provide detailed error messages and configuration
+
+---
+
+**🚀 Happy Deploying!**
diff --git a/docs/EXAMPLES.md b/docs/EXAMPLES.md
new file mode 100644
index 0000000..bf7aec7
--- /dev/null
+++ b/docs/EXAMPLES.md
@@ -0,0 +1,768 @@
+# Usage Examples
+
+## 📚 Table of Contents
+
+- [Quick Start Examples](#-quick-start-examples)
+- [Basic Usage](#-basic-usage)
+- [Advanced Configuration](#-advanced-configuration)
+- [Custom Integration](#-custom-integration)
+- [Performance Optimization](#-performance-optimization)
+- [Troubleshooting Examples](#-troubleshooting-examples)
+
+## 🚀 Quick Start Examples
+
+### 1. Basic Question Answering
+
+```bash
+# Install and setup
+pip install -e .
+ollama pull nomic-embed-text
+ollama pull llama3.2:3b
+
+# Ask a simple question
+python -m src.main --question "What are the benefits of BCAA supplements?"
+
+# Interactive mode
+python -m src.main --interactive
+```
+
+### 2. Rebuild Vector Store
+
+```bash
+# Force rebuild of vector store from documents
+python -m src.main --rebuild --question "What is muscle protein synthesis?"
+```
+
+### 3. Custom Configuration
+
+```bash
+# Use different LLM model
+OLLAMA_BASE_URL=http://localhost:11434 LLM_MODEL=llama3.2:1b python -m src.main --question "Explain creatine supplementation"
+
+# Smaller chunks for better precision
+CHUNK_SIZE=500 CHUNK_OVERLAP=50 python -m src.main --question "What are the side effects of protein supplements?"
+```
+
+## 💡 Basic Usage
+
+### Command Line Interface
+
+```bash
+# Help
+python -m src.main --help
+
+# Single question
+python -m src.main --question "How does creatine work?"
+
+# Interactive session
+python -m src.main --interactive
+
+# Rebuild and ask
+python -m src.main --rebuild --question "What are BCAAs?"
+```
+
+### Programmatic Usage
+
+```python
+from src import RAGPDFChatbot
+
+# Initialize chatbot
+chatbot = RAGPDFChatbot()
+chatbot.initialize()
+
+# Ask questions
+questions = [
+    "What are the benefits of whey protein?",
+    "How much protein should I consume daily?",
+    "What are the side effects of creatine?"
+]
+
+for question in questions:
+    answer = chatbot.ask(question)
+    print(f"Q: {question}")
+    print(f"A: {answer}")
+    print("-" * 50)
+```
+
+### Batch Processing
+
+```python
+from src import RAGPDFChatbot
+
+# Load questions from file
+with open('questions.txt', 'r') as f:
+    questions = [line.strip() for line in f if line.strip()]
+
+# Process batch
+chatbot = RAGPDFChatbot()
+chatbot.initialize()
+
+results = []
+for i, question in enumerate(questions, 1):
+    print(f"Processing question {i}/{len(questions)}")
+    answer = chatbot.ask(question)
+    results.append({
+        'question': question,
+        'answer': answer,
+        'timestamp': datetime.now().isoformat()
+    })
+
+# Save results
+import json
+with open('results.json', 'w') as f:
+    json.dump(results, f, indent=2)
+```
+
+## 🔧 Advanced Configuration
+
+### Environment Variables
+
+```bash
+# Complete configuration
+export OLLAMA_BASE_URL=http://localhost:11434
+export EMBEDDING_MODEL=nomic-embed-text
+export LLM_MODEL=llama3.2:3b
+export LLM_TEMPERATURE=0.7
+export LLM_MAX_TOKENS=512
+export DATASET_PATH=./rag-dataset
+export CHUNK_SIZE=1000
+export CHUNK_OVERLAP=100
+export VECTOR_STORE_PATH=./health_supplemets
+export SAVE_VECTOR_STORE=true
+export RETRIEVAL_TYPE=mmr
+export RETRIEVAL_K=3
+export RETRIEVAL_FETCH_K=100
+export RETRIEVAL_LAMBDA=1.0
+export LOG_LEVEL=INFO
+```
+
+### Custom Configuration File
+
+```python
+import os
+from src.config import AppConfig, EmbeddingConfig, LLMConfig, DocumentProcessingConfig
+
+# Create custom configuration
+custom_config = AppConfig(
+    embedding=EmbeddingConfig(
+        model_name="nomic-embed-text",
+        base_url="http://localhost:11434"
+    ),
+    llm=LLMConfig(
+        model_name="llama3.2:3b",
+        base_url="http://localhost:11434",
+        temperature=0.5,
+        max_tokens=256
+    ),
+    document_processing=DocumentProcessingConfig(
+        chunk_size=500,
+        chunk_overlap=50,
+        dataset_path="./custom-dataset"
+    )
+)
+
+# Use with components
+from src.document_processor import DocumentProcessor
+processor = DocumentProcessor()
+processor.dataset_path = custom_config.document_processing.dataset_path
+processor.chunk_size = custom_config.document_processing.chunk_size
+processor.chunk_overlap = custom_config.document_processing.chunk_overlap
+```
+
+### Multiple Vector Stores
+
+```python
+from src.vector_store import VectorStoreManager
+from src.document_processor import DocumentProcessor
+
+# Create separate managers for different domains
+supplements_manager = VectorStoreManager()
+fitness_manager = VectorStoreManager()
+
+# Configure different paths
+supplements_manager.vector_store_path = "./vector-stores/supplements"
+fitness_manager.vector_store_path = "./vector-stores/fitness"
+
+# Process different datasets
+supplements_processor = DocumentProcessor()
+supplements_processor.dataset_path = "./datasets/supplements"
+
+fitness_processor = DocumentProcessor()
+fitness_processor.dataset_path = "./datasets/fitness"
+
+# Build vector stores
+supplements_docs = supplements_processor.process_documents()
+supplements_manager.create_vector_store(supplements_docs)
+
+fitness_docs = fitness_processor.process_documents()
+fitness_manager.create_vector_store(fitness_docs)
+```
+
+## 🔗 Custom Integration
+
+### Web API with FastAPI
+
+```python
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from src import RAGPDFChatbot
+
+app = FastAPI(title="RAG PDF Chatbot API")
+
+# Initialize chatbot
+chatbot = None
+
+@app.on_event("startup")
+async def startup_event():
+    global chatbot
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize()
+
+class Question(BaseModel):
+    question: str
+    rebuild_vector_store: bool = False
+
+class Answer(BaseModel):
+    question: str
+    answer: str
+    timestamp: str
+
+@app.post("/ask", response_model=Answer)
+async def ask_question(question: Question):
+    try:
+        if question.rebuild_vector_store:
+            chatbot.initialize(rebuild_vector_store=True)
+
+        answer = chatbot.ask(question.question)
+        return Answer(
+            question=question.question,
+            answer=answer,
+            timestamp=datetime.now().isoformat()
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "timestamp": datetime.now().isoformat()}
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+```
+
+### Discord Bot Integration
+
+```python
+import discord
+from discord.ext import commands
+from src import RAGPDFChatbot
+
+intents = discord.Intents.default()
+intents.message_content = True
+
+bot = commands.Bot(command_prefix='!', intents=intents)
+chatbot = None
+
+@bot.event
+async def on_ready():
+    global chatbot
+    print(f'Bot connected as {bot.user}')
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize()
+
+@bot.command(name='ask')
+async def ask(ctx, *, question: str):
+    """Ask a question about the PDF documents"""
+    try:
+        # Send typing indicator
+        async with ctx.typing():
+            answer = chatbot.ask(question)
+
+        # Split long answers into chunks
+        if len(answer) > 2000:
+            chunks = [answer[i:i+2000] for i in range(0, len(answer), 2000)]
+            for chunk in chunks:
+                await ctx.send(chunk)
+        else:
+            await ctx.send(answer)
+
+    except Exception as e:
+        await ctx.send(f"Sorry, I encountered an error: {str(e)}")
+
+@bot.command(name='rebuild')
+async def rebuild(ctx):
+    """Rebuild the vector store"""
+    try:
+        async with ctx.typing():
+            chatbot.initialize(rebuild_vector_store=True)
+        await ctx.send("Vector store rebuilt successfully!")
+    except Exception as e:
+        await ctx.send(f"Error rebuilding vector store: {str(e)}")
+
+# Run bot
+if __name__ == "__main__":
+    bot.run('YOUR_BOT_TOKEN')
+```
+
+### Streamlit Web Interface
+
+```python
+import streamlit as st
+from src import RAGPDFChatbot
+
+# Page configuration
+st.set_page_config(
+    page_title="RAG PDF Chatbot",
+    page_icon="📚",
+    layout="wide"
+)
+
+# Initialize chatbot in session state
+if 'chatbot' not in st.session_state:
+    with st.spinner('Initializing chatbot...'):
+        st.session_state.chatbot = RAGPDFChatbot()
+        st.session_state.chatbot.initialize()
+
+chatbot = st.session_state.chatbot
+
+# Sidebar
+with st.sidebar:
+    st.title("⚙️ Configuration")
+
+    if st.button("🔄 Rebuild Vector Store"):
+        with st.spinner('Rebuilding vector store...'):
+            chatbot.initialize(rebuild_vector_store=True)
+        st.success("Vector store rebuilt!")
+
+    st.markdown("---")
+    st.markdown("### 📊 Statistics")
+    # Add statistics if available
+
+# Main interface
+st.title("📚 RAG PDF Chatbot")
+st.markdown("Ask questions about your PDF documents!")
+
+# Chat interface
+if 'messages' not in st.session_state:
+    st.session_state.messages = []
+
+# Display chat history
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+
+# Chat input
+if prompt := st.chat_input("Ask a question about your documents..."):
+    # Add user message
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+
+    # Get bot response
+    with st.chat_message("assistant"):
+        with st.spinner('Thinking...'):
+            response = chatbot.ask(prompt)
+        st.markdown(response)
+
+    # Add bot response to history
+    st.session_state.messages.append({"role": "assistant", "content": response})
+```
+
+## ⚡ Performance Optimization
+
+### Memory Optimization
+
+```python
+# Reduce chunk size for lower memory usage
+CHUNK_SIZE=500 CHUNK_OVERLAP=50 python -m src.main --question "test"
+
+# Use smaller retrieval parameters
+RETRIEVAL_K=2 RETRIEVAL_FETCH_K=50 python -m src.main --interactive
+
+# Limit concurrent processing
+MAX_WORKERS=2 python -c "
+import concurrent.futures
+from src import RAGPDFChatbot
+
+def process_question(question):
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize()
+    return chatbot.ask(question)
+
+questions = ['Q1', 'Q2', 'Q3']
+with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+    results = list(executor.map(process_question, questions))
+"
+```
+
+### Caching Strategies
+
+```python
+import pickle
+from functools import lru_cache
+from src import RAGPDFChatbot
+
+@lru_cache(maxsize=1000)
+def cached_ask(question: str) -> str:
+    """Cache answers for repeated questions"""
+    if not hasattr(cached_ask, 'chatbot'):
+        cached_ask.chatbot = RAGPDFChatbot()
+        cached_ask.chatbot.initialize()
+
+    return cached_ask.chatbot.ask(question)
+
+# Use cached version
+answer1 = cached_ask("What is creatine?")
+answer2 = cached_ask("What is creatine?")  # Will use cache
+```
+
+### Batch Processing
+
+```python
+from src import RAGPDFChatbot
+import asyncio
+
+async def batch_process_questions(questions, batch_size=5):
+    """Process questions in batches to manage memory"""
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize()
+
+    results = []
+    for i in range(0, len(questions), batch_size):
+        batch = questions[i:i + batch_size]
+        print(f"Processing batch {i//batch_size + 1}/{(len(questions) + batch_size - 1)//batch_size}")
+
+        batch_results = []
+        for question in batch:
+            answer = chatbot.ask(question)
+            batch_results.append((question, answer))
+
+        results.extend(batch_results)
+
+        # Optional: Clear some memory between batches
+        import gc
+        gc.collect()
+
+    return results
+
+# Usage
+questions = ["Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10"]
+results = asyncio.run(batch_process_questions(questions, batch_size=3))
+```
+
+## 🔧 Troubleshooting Examples
+
+### Debug Configuration
+
+```python
+import logging
+import os
+
+# Enable debug logging
+os.environ['LOG_LEVEL'] = 'DEBUG'
+logging.basicConfig(level=logging.DEBUG)
+
+from src import RAGPDFChatbot
+
+# Create chatbot with debug info
+chatbot = RAGPDFChatbot()
+print("Chatbot created")
+
+try:
+    chatbot.initialize()
+    print("Initialization successful")
+except Exception as e:
+    print(f"Initialization failed: {e}")
+    import traceback
+    traceback.print_exc()
+```
+
+### Test Components Individually
+
+```python
+# Test document processing
+from src.document_processor import DocumentProcessor
+
+processor = DocumentProcessor()
+print(f"Dataset path: {processor.dataset_path}")
+print(f"Chunk size: {processor.chunk_size}")
+
+try:
+    pdf_files = processor.discover_pdf_files()
+    print(f"Found {len(pdf_files)} PDF files: {pdf_files}")
+
+    documents = processor.load_documents()
+    print(f"Loaded {len(documents)} documents")
+
+    chunks = processor.chunk_documents(documents[:1])  # Test with first doc
+    print(f"Created {len(chunks)} chunks")
+
+except Exception as e:
+    print(f"Error: {e}")
+    import traceback
+    traceback.print_exc()
+```
+
+### Vector Store Debugging
+
+```python
+from src.vector_store import VectorStoreManager
+
+manager = VectorStoreManager()
+print("Vector store manager created")
+
+try:
+    # Test embedding model
+    test_embedding = manager.embedding_model.embed_query("test")
+    print(f"Embedding dimension: {len(test_embedding)}")
+
+    # Test vector store creation
+    from src.document_processor import DocumentProcessor
+    processor = DocumentProcessor()
+    documents = processor.process_documents()
+
+    if documents:
+        vector_store = manager.create_vector_store(documents[:1])  # Test with one doc
+        print("Vector store created successfully")
+
+        retriever = manager.get_retriever()
+        print("Retriever created successfully")
+
+        # Test retrieval
+        docs = retriever.invoke("test query")
+        print(f"Retrieved {len(docs)} documents")
+
+except Exception as e:
+    print(f"Error: {e}")
+    import traceback
+    traceback.print_exc()
+```
+
+### Network and Ollama Debugging
+
+```bash
+# Test Ollama connection
+curl http://localhost:11434/api/tags
+
+# Test embedding model
+curl http://localhost:11434/api/embeddings \
+  -H "Content-Type: application/json" \
+  -d '{"model": "nomic-embed-text", "prompt": "test"}'
+
+# Test LLM model
+curl http://localhost:11434/api/generate \
+  -H "Content-Type: application/json" \
+  -d '{"model": "llama3.2:3b", "prompt": "Hello", "stream": false}'
+```
+
+### Configuration Validation
+
+```python
+from src.config import load_config
+import os
+
+# Test configuration loading
+try:
+    config = load_config()
+    print("Configuration loaded successfully")
+    print(f"LLM Model: {config.llm.model_name}")
+    print(f"Dataset Path: {config.document_processing.dataset_path}")
+    print(f"Vector Store Path: {config.vector_store.local_path}")
+
+    # Validate paths
+    import os.path
+    if not os.path.exists(config.document_processing.dataset_path):
+        print(f"Warning: Dataset path does not exist: {config.document_processing.dataset_path}")
+    else:
+        pdf_count = len([f for f in os.listdir(config.document_processing.dataset_path) if f.endswith('.pdf')])
+        print(f"Found {pdf_count} PDF files in dataset")
+
+except Exception as e:
+    print(f"Configuration error: {e}")
+    import traceback
+    traceback.print_exc()
+```
+
+### Performance Profiling
+
+```python
+import cProfile
+import pstats
+from src import RAGPDFChatbot
+
+# Profile initialization
+print("Profiling initialization...")
+pr = cProfile.Profile()
+pr.enable()
+
+chatbot = RAGPDFChatbot()
+chatbot.initialize()
+
+pr.disable()
+stats = pstats.Stats(pr)
+stats.sort_stats('cumulative').print_stats(20)
+
+# Profile question answering
+print("\nProfiling question answering...")
+pr = cProfile.Profile()
+pr.enable()
+
+answer = chatbot.ask("What are the benefits of protein supplements?")
+
+pr.disable()
+stats = pstats.Stats(pr)
+stats.sort_stats('cumulative').print_stats(20)
+
+print(f"Answer: {answer[:200]}...")
+```
+
+## 🎯 Advanced Examples
+
+### Custom Document Processing
+
+```python
+from src.document_processor import DocumentProcessor
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+import fitz  # PyMuPDF
+
+class CustomDocumentProcessor(DocumentProcessor):
+    """Custom processor with advanced text extraction"""
+
+    def load_documents(self):
+        """Enhanced document loading with metadata extraction"""
+        pdf_files = self.discover_pdf_files()
+        documents = []
+
+        for pdf_file in pdf_files:
+            try:
+                # Use PyMuPDF directly for better control
+                doc = fitz.open(pdf_file)
+
+                for page_num in range(len(doc)):
+                    page = doc[page_num]
+
+                    # Extract text with layout preservation
+                    text = page.get_text("text")
+
+                    # Extract metadata
+                    metadata = {
+                        "source": pdf_file,
+                        "page": page_num + 1,
+                        "total_pages": len(doc),
+                        "file_size": os.path.getsize(pdf_file),
+                        "creation_date": doc.metadata.get("creationDate", ""),
+                        "mod_date": doc.metadata.get("modDate", ""),
+                    }
+
+                    documents.append({
+                        "page_content": text,
+                        "metadata": metadata
+                    })
+
+                doc.close()
+
+            except Exception as e:
+                print(f"Failed to load {pdf_file}: {str(e)}")
+                continue
+
+        if not documents:
+            raise RuntimeError("No documents were successfully loaded")
+
+        return documents
+
+    def chunk_documents(self, documents):
+        """Custom chunking with overlap preservation"""
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.chunk_overlap,
+            separators=["\n\n", "\n", ". ", " ", ""],  # Preserve structure
+            keep_separator=True
+        )
+
+        chunks = []
+        for doc in documents:
+            doc_chunks = text_splitter.split_text(doc["page_content"])
+
+            for i, chunk in enumerate(doc_chunks):
+                chunk_metadata = doc["metadata"].copy()
+                chunk_metadata.update({
+                    "chunk_id": i,
+                    "total_chunks": len(doc_chunks),
+                    "chunk_start": i * (self.chunk_size - self.chunk_overlap),
+                    "chunk_end": (i + 1) * (self.chunk_size - self.chunk_overlap) + self.chunk_overlap
+                })
+
+                chunks.append({
+                    "page_content": chunk,
+                    "metadata": chunk_metadata
+                })
+
+        return chunks
+```
+
+### Custom Vector Store with Persistence
+
+```python
+from src.vector_store import VectorStoreManager
+import json
+import os
+from datetime import datetime
+
+class PersistentVectorStoreManager(VectorStoreManager):
+    """Enhanced vector store with metadata persistence"""
+
+    def save_vector_store(self, path=None):
+        """Save vector store with metadata"""
+        super().save_vector_store(path)
+
+        save_path = path or self.vector_store_path
+        metadata_path = f"{save_path}_metadata.json"
+
+        metadata = {
+            "created_at": datetime.now().isoformat(),
+            "embedding_model": self.embedding_model.model,
+            "embedding_dimension": len(self.embedding_model.embed_query("test")),
+            "vector_count": self.vector_store.index.ntotal if self.vector_store else 0,
+            "index_type": "FAISS",
+            "metric": "L2",
+            "configuration": {
+                "chunk_size": getattr(self, 'chunk_size', 'unknown'),
+                "chunk_overlap": getattr(self, 'chunk_overlap', 'unknown'),
+            }
+        }
+
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=2)
+
+    def load_vector_store(self, path=None):
+        """Load vector store with metadata validation"""
+        load_path = path or self.vector_store_path
+        metadata_path = f"{load_path}_metadata.json"
+
+        # Load metadata if exists
+        if os.path.exists(metadata_path):
+            with open(metadata_path, 'r') as f:
+                metadata = json.load(f)
+            print(f"Loading vector store created at {metadata['created_at']}")
+            print(f"Vector store contains {metadata['vector_count']} vectors")
+
+        # Load the actual vector store
+        super().load_vector_store(path)
+
+    def get_statistics(self):
+        """Get vector store statistics"""
+        if not self.vector_store:
+            return None
+
+        return {
+            "total_vectors": self.vector_store.index.ntotal,
+            "dimension": self.vector_store.index.d,
+            "is_trained": self.vector_store.index.is_trained,
+            "metric_type": "L2",  # FAISS default
+        }
+```
+
+---
+
+**🎉 Explore, Experiment, and Build Amazing Things!**
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d036d53
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,229 @@
+[build-system]
+requires = ["setuptools>=61.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "rag-pdf-chatbot"
+version = "1.0.0"
+description = "A Professional, Enterprise-Grade Retrieval-Augmented Generation System for PDF Documents"
+authors = [
+    {name = "RAG PDF Chatbot Team", email = "team@rag-pdf-chatbot.com"},
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Text Processing :: Linguistic",
+]
+
+dependencies = [
+    "python-dotenv>=1.0.0",
+    "langchain>=0.1.0",
+    "langchain-community>=0.0.1",
+    "langchain-core>=0.1.0",
+    "langchain-text-splitters>=0.0.1",
+    "faiss-cpu>=1.7.0",
+    "pymupdf>=1.23.0",
+    "tiktoken>=0.5.0",
+    "typing-extensions>=4.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "black>=23.0.0",
+    "ruff>=0.1.0",
+    "mypy>=1.0.0",
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "pre-commit>=3.0.0",
+    "isort>=5.0.0",
+    "flake8>=6.0.0",
+    "types-python-dotenv>=1.0.0",
+]
+
+test = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "pytest-mock>=3.0.0",
+]
+
+docs = [
+    "mkdocs>=1.0.0",
+    "mkdocs-material>=9.0.0",
+    "mkdocstrings>=0.20.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/your-org/rag-pdf-chatbot"
+Documentation = "https://github.com/your-org/rag-pdf-chatbot/docs"
+Repository = "https://github.com/your-org/rag-pdf-chatbot"
+Issues = "https://github.com/your-org/rag-pdf-chatbot/issues"
+
+[tool.setuptools]
+packages = ["src"]
+
+[tool.setuptools.package-data]
+"*" = ["*.txt", "*.md", "*.json"]
+
+[tool.black]
+line-length = 88
+target-version = ['py38', 'py39', 'py310', 'py311', 'py312']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+  | __pycache__
+)/
+'''
+
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+select = [
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+    "F",    # pyflakes
+    "I",    # isort
+    "B",    # flake8-bugbear
+    "C4",   # flake8-comprehensions
+    "UP",   # pyupgrade
+    "N",    # pep8-naming
+    "S",    # flake8-bandit
+    "A",    # flake8-builtins
+    "C90",  # mccabe
+    "T20",  # flake8-print
+    "PT",   # flake8-pytest-style
+    "RUF",  # ruff-specific rules
+]
+ignore = [
+    "E501",  # line too long (handled by black)
+    "B008",  # do not perform function calls in argument defaults
+    "C901",  # too complex
+    "S101",  # use of assert detected
+    "T201",  # print found (we allow print for CLI apps)
+]
+
+[tool.ruff.isort]
+known-first-party = ["src"]
+known-third-party = [
+    "langchain",
+    "langchain_community",
+    "langchain_core",
+    "faiss",
+    "pymupdf",
+    "tiktoken",
+    "python_dotenv",
+]
+
+[tool.mypy]
+python_version = "3.8"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_return_any = true
+warn_unreachable = true
+show_error_codes = true
+exclude = [
+    "venv/",
+    ".venv/",
+    "env/",
+    "build/",
+    "dist/",
+]
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+addopts = "--cov=src --cov-report=term-missing --cov-report=html"
+testpaths = ["tests"]
+python_files = "test_*.py"
+python_functions = "test_*"
+python_classes = "Test*"
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "integration: marks tests as integration tests",
+    "unit: marks tests as unit tests",
+]
+
+[tool.coverage.run]
+source = ["src"]
+branch = true
+parallel = true
+omit = [
+    "src/__init__.py",
+    "*/tests/*",
+]
+
+[tool.coverage.report]
+show_missing = true
+fail_under = 80
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+]
+
+[tool.isort]
+profile = "black"
+line_length = 88
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
+known_first_party = ["src"]
+known_third_party = [
+    "langchain",
+    "langchain_community",
+    "langchain_core",
+    "faiss",
+    "pymupdf",
+    "tiktoken",
+    "python_dotenv",
+]
+
+[tool.flake8]
+max-line-length = 88
+extend-ignore = [
+    "E203",  # whitespace before ':' (conflicts with black)
+    "E501",  # line too long (handled by black)
+    "W503",  # line break before binary operator (conflicts with black)
+]
+max-complexity = 10
+exclude = [
+    ".git",
+    "__pycache__",
+    "docs",
+    "build",
+    "dist",
+    "venv",
+    ".venv",
+    "env",
+]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4d3082e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,17 @@
+# RAG PDF Chatbot - Production Requirements
+
+# Core dependencies
+python-dotenv>=1.0.0
+langchain>=0.1.0
+langchain-community>=0.0.1
+langchain-core>=0.1.0
+langchain-text-splitters>=0.0.1
+faiss-cpu>=1.7.0
+pymupdf>=1.23.0
+tiktoken>=0.5.0
+typing-extensions>=4.0.0
+
+# Optional dependencies for specific features
+# ollama>=0.1.0  # Uncomment if you want to use Ollama directly
+# sentence-transformers>=2.2.0  # For alternative embeddings
+# torch>=2.0.0  # For GPU support with some models
diff --git a/scripts/setup_dev_env.ps1 b/scripts/setup_dev_env.ps1
new file mode 100644
index 0000000..a086dd3
--- /dev/null
+++ b/scripts/setup_dev_env.ps1
@@ -0,0 +1,117 @@
+<#
+.SYNOPSIS
+    Sets up the development environment for RAG PDF Chatbot.
+.DESCRIPTION
+    This script automates the setup of the development environment including:
+    - Creating a virtual environment
+    - Installing dependencies
+    - Setting up pre-commit hooks
+    - Running initial tests
+#>
+
+param (
+    [string]$PythonPath = "python"
+)
+
+function Write-Section {
+    param([string]$Title)
+    Write-Host "`n==========================================" -ForegroundColor Cyan
+    Write-Host $Title -ForegroundColor Green
+    Write-Host "==========================================`n" -ForegroundColor Cyan
+}
+
+function Write-Step {
+    param([string]$Message)
+    Write-Host "📋 $Message" -ForegroundColor Yellow
+}
+
+function Write-Success {
+    param([string]$Message)
+    Write-Host "✅ $Message" -ForegroundColor Green
+}
+
+function Write-Error {
+    param([string]$Message)
+    Write-Host "❌ $Message" -ForegroundColor Red
+}
+
+try {
+    # Check if Python is available
+    Write-Section "Checking Python Installation"
+    Write-Step "Verifying Python installation..."
+    $pythonVersion = & $PythonPath --version 2>&1
+    if (-not $pythonVersion) {
+        throw "Python not found. Please install Python 3.8+ and ensure it's in your PATH."
+    }
+    Write-Success "Python found: $pythonVersion"
+
+    # Create virtual environment
+    Write-Section "Setting Up Virtual Environment"
+    Write-Step "Creating virtual environment..."
+    if (Test-Path "venv") {
+        Write-Host "Virtual environment already exists." -ForegroundColor Yellow
+    } else {
+        & $PythonPath -m venv venv
+        Write-Success "Virtual environment created."
+    }
+
+    # Activate virtual environment
+    Write-Step "Activating virtual environment..."
+    if ($IsWindows) {
+        & .\venv\Scripts\Activate.ps1
+    } else {
+        & source venv/bin/activate
+    }
+    Write-Success "Virtual environment activated."
+
+    # Upgrade pip
+    Write-Step "Upgrading pip..."
+    & python -m pip install --upgrade pip
+    Write-Success "Pip upgraded."
+
+    # Install development dependencies
+    Write-Section "Installing Dependencies"
+    Write-Step "Installing development dependencies..."
+    & pip install -e .[dev]
+    Write-Success "Development dependencies installed."
+
+    # Set up pre-commit hooks
+    Write-Section "Setting Up Pre-Commit Hooks"
+    Write-Step "Installing pre-commit..."
+    & pre-commit install
+    Write-Success "Pre-commit hooks installed."
+
+    # Run pre-commit on all files
+    Write-Step "Running pre-commit on all files..."
+    & pre-commit run --all-files
+    Write-Success "Pre-commit checks completed."
+
+    # Run tests
+    Write-Section "Running Tests"
+    Write-Step "Running unit tests..."
+    & pytest tests/ -v
+    Write-Success "Tests completed."
+
+    # Create .env file if it doesn't exist
+    Write-Section "Setting Up Configuration"
+    if (-not (Test-Path ".env")) {
+        Write-Step "Creating .env file from template..."
+        Copy-Item .env.example .env
+        Write-Success ".env file created. Please review and modify as needed."
+    } else {
+        Write-Host ".env file already exists." -ForegroundColor Yellow
+    }
+
+    Write-Section "Setup Complete! 🎉"
+    Write-Host "Your development environment is ready." -ForegroundColor Green
+    Write-Host "`nNext steps:" -ForegroundColor Cyan
+    Write-Host "1. Review and modify .env file as needed" -ForegroundColor Yellow
+    Write-Host "2. Start coding! 🚀" -ForegroundColor Yellow
+    Write-Host "3. Run tests with: pytest tests/" -ForegroundColor Yellow
+    Write-Host "4. Run the application with: python -m src.main --help" -ForegroundColor Yellow
+
+} catch {
+    Write-Error "Setup failed: $_"
+    Write-Error "Please check the error and try again."
+    exit 1
+}
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..bbe56c4
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,25 @@
+"""
+RAG PDF Chatbot - A Retrieval-Augmented Generation system for PDF documents.
+
+This package provides a modular, enterprise-grade implementation of RAG
+for question answering using PDF document collections.
+"""
+
+from .config import config, load_config
+from .document_processor import DocumentProcessor
+from .vector_store import VectorStoreManager
+from .rag_chain import RAGChain
+from .main import RAGPDFChatbot, main
+
+__version__ = "1.0.0"
+__author__ = "RAG PDF Chatbot Team"
+__license__ = "MIT"
+__all__ = [
+    "config",
+    "load_config",
+    "DocumentProcessor",
+    "VectorStoreManager",
+    "RAGChain",
+    "RAGPDFChatbot",
+    "main"
+]
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..8c1db7b
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,110 @@
+"""
+Configuration module for RAG PDF Chatbot.
+
+This module handles all configuration settings, environment variables,
+and application constants in a centralized manner.
+"""
+
+import os
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+@dataclass
+class EmbeddingConfig:
+    """Configuration for embedding models."""
+    model_name: str = "nomic-embed-text"
+    base_url: str = "http://localhost:11434"
+    dimension: Optional[int] = None
+
+@dataclass
+class LLMConfig:
+    """Configuration for LLM models."""
+    model_name: str = "llama3.2:3b"
+    base_url: str = "http://localhost:11434"
+    temperature: float = 0.7
+    max_tokens: int = 512
+
+@dataclass
+class VectorStoreConfig:
+    """Configuration for vector store."""
+    index_type: str = "flat"
+    metric: str = "L2"
+    save_local: bool = True
+    local_path: str = "health_supplemets"
+
+@dataclass
+class RetrievalConfig:
+    """Configuration for document retrieval."""
+    search_type: str = "mmr"
+    k: int = 3
+    fetch_k: int = 100
+    lambda_mult: float = 1.0
+
+@dataclass
+class DocumentProcessingConfig:
+    """Configuration for document processing."""
+    chunk_size: int = 1000
+    chunk_overlap: int = 100
+    dataset_path: str = "rag-dataset"
+
+@dataclass
+class AppConfig:
+    """Main application configuration."""
+    embedding: EmbeddingConfig
+    llm: LLMConfig
+    vector_store: VectorStoreConfig
+    retrieval: RetrievalConfig
+    document_processing: DocumentProcessingConfig
+
+def load_config() -> AppConfig:
+    """
+    Load application configuration from environment variables and defaults.
+
+    Returns:
+        AppConfig: Application configuration object
+    """
+    # Load from environment variables if available, otherwise use defaults
+    embedding_config = EmbeddingConfig(
+        model_name=os.getenv("EMBEDDING_MODEL", "nomic-embed-text"),
+        base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+    )
+
+    llm_config = LLMConfig(
+        model_name=os.getenv("LLM_MODEL", "llama3.2:3b"),
+        base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
+        temperature=float(os.getenv("LLM_TEMPERATURE", "0.7")),
+        max_tokens=int(os.getenv("LLM_MAX_TOKENS", "512"))
+    )
+
+    vector_store_config = VectorStoreConfig(
+        local_path=os.getenv("VECTOR_STORE_PATH", "health_supplemets"),
+        save_local=os.getenv("SAVE_VECTOR_STORE", "true").lower() == "true"
+    )
+
+    retrieval_config = RetrievalConfig(
+        search_type=os.getenv("RETRIEVAL_TYPE", "mmr"),
+        k=int(os.getenv("RETRIEVAL_K", "3")),
+        fetch_k=int(os.getenv("RETRIEVAL_FETCH_K", "100")),
+        lambda_mult=float(os.getenv("RETRIEVAL_LAMBDA", "1.0"))
+    )
+
+    doc_processing_config = DocumentProcessingConfig(
+        chunk_size=int(os.getenv("CHUNK_SIZE", "1000")),
+        chunk_overlap=int(os.getenv("CHUNK_OVERLAP", "100")),
+        dataset_path=os.getenv("DATASET_PATH", "rag-dataset")
+    )
+
+    return AppConfig(
+        embedding=embedding_config,
+        llm=llm_config,
+        vector_store=vector_store_config,
+        retrieval=retrieval_config,
+        document_processing=doc_processing_config
+    )
+
+# Global configuration instance
+config = load_config()
diff --git a/src/document_processor.py b/src/document_processor.py
new file mode 100644
index 0000000..63c0589
--- /dev/null
+++ b/src/document_processor.py
@@ -0,0 +1,104 @@
+"""
+Document processing module for RAG PDF Chatbot.
+
+This module handles loading, processing, and chunking of PDF documents
+for the RAG pipeline.
+"""
+
+import os
+import warnings
+from typing import List, Dict, Any
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from src.config import config
+
+# Suppress warnings for cleaner output
+warnings.filterwarnings("ignore")
+
+class DocumentProcessor:
+    """
+    Handles loading and processing of PDF documents.
+
+    Responsibilities:
+    - Discover PDF files in the dataset directory
+    - Load PDF content using PyMuPDFLoader
+    - Split documents into chunks for embedding
+    """
+
+    def __init__(self):
+        """Initialize document processor with configuration."""
+        self.dataset_path = config.document_processing.dataset_path
+        self.chunk_size = config.document_processing.chunk_size
+        self.chunk_overlap = config.document_processing.chunk_overlap
+
+    def discover_pdf_files(self) -> List[str]:
+        """
+        Discover all PDF files in the dataset directory.
+
+        Returns:
+            List[str]: List of file paths to PDF documents
+        """
+        pdf_files = []
+
+        for root, _, files in os.walk(self.dataset_path):
+            for file in files:
+                if file.lower().endswith('.pdf'):
+                    pdf_files.append(os.path.join(root, file))
+
+        if not pdf_files:
+            raise FileNotFoundError(
+                f"No PDF files found in dataset directory: {self.dataset_path}"
+            )
+
+        return pdf_files
+
+    def load_documents(self) -> List[Dict[str, Any]]:
+        """
+        Load all PDF documents from the dataset.
+
+        Returns:
+            List[Dict[str, Any]]: List of document objects
+        """
+        pdf_files = self.discover_pdf_files()
+        documents = []
+
+        for pdf_file in pdf_files:
+            try:
+                loader = PyMuPDFLoader(pdf_file)
+                pages = loader.load()
+                documents.extend(pages)
+            except Exception as e:
+                warnings.warn(f"Failed to load {pdf_file}: {str(e)}")
+                continue
+
+        if not documents:
+            raise RuntimeError("No documents were successfully loaded")
+
+        return documents
+
+    def chunk_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Split documents into chunks for embedding.
+
+        Args:
+            documents: List of document objects
+
+        Returns:
+            List[Dict[str, Any]]: List of document chunks
+        """
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.chunk_overlap
+        )
+
+        return text_splitter.split_documents(documents)
+
+    def process_documents(self) -> List[Dict[str, Any]]:
+        """
+        Complete document processing pipeline.
+
+        Returns:
+            List[Dict[str, Any]]: List of processed document chunks
+        """
+        documents = self.load_documents()
+        return self.chunk_documents(documents)
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..5610306
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,152 @@
+"""
+Main application module for RAG PDF Chatbot.
+
+This module provides the main entry point and CLI interface
+for the RAG PDF Chatbot application.
+"""
+
+import os
+import warnings
+from typing import Optional
+from src.config import config
+from src.document_processor import DocumentProcessor
+from src.vector_store import VectorStoreManager
+from src.rag_chain import RAGChain
+
+# Suppress warnings for cleaner output
+warnings.filterwarnings("ignore")
+
+class RAGPDFChatbot:
+    """
+    Main application class for RAG PDF Chatbot.
+
+    This class orchestrates the entire RAG pipeline and provides
+    a simple interface for question answering.
+    """
+
+    def __init__(self):
+        """Initialize the RAG PDF Chatbot application."""
+        self.document_processor = DocumentProcessor()
+        self.vector_store_manager = VectorStoreManager()
+        self.rag_chain = None
+
+        # Set environment variable for KMP compatibility
+        os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
+
+    def initialize(self, rebuild_vector_store: bool = False) -> None:
+        """
+        Initialize the application and prepare for question answering.
+
+        Args:
+            rebuild_vector_store: Whether to rebuild vector store from scratch
+        """
+        # Check if vector store exists and load it if not rebuilding
+        if not rebuild_vector_store and self.vector_store_manager.vector_store_exists():
+            print("Loading existing vector store...")
+            self.vector_store_manager.load_vector_store()
+        else:
+            print("Building vector store from documents...")
+            documents = self.document_processor.process_documents()
+            self.vector_store_manager.create_vector_store(documents)
+
+            if config.vector_store.save_local:
+                print("Saving vector store...")
+                self.vector_store_manager.save_vector_store()
+
+        # Initialize RAG chain
+        retriever = self.vector_store_manager.get_retriever()
+        self.rag_chain = RAGChain(retriever)
+
+    def ask(self, question: str) -> str:
+        """
+        Ask a question using the RAG pipeline.
+
+        Args:
+            question: Question to answer
+
+        Returns:
+            str: Answer to the question
+        """
+        if not self.rag_chain:
+            raise RuntimeError("Application not initialized. Call initialize() first.")
+
+        return self.rag_chain.ask_question(question)
+
+    def interactive_mode(self) -> None:
+        """
+        Run the application in interactive mode.
+
+        Allows users to ask multiple questions in a session.
+        """
+        print("RAG PDF Chatbot - Interactive Mode")
+        print("Type 'quit', 'exit', or 'q' to end the session.")
+        print()
+
+        while True:
+            try:
+                question = input("Ask a question: ").strip()
+
+                if question.lower() in ['quit', 'exit', 'q']:
+                    print("Goodbye!")
+                    break
+
+                if not question:
+                    continue
+
+                print("Processing your question...")
+                answer = self.ask(question)
+                print("\nAnswer:")
+                print(answer)
+                print("\n" + "="*50 + "\n")
+
+            except KeyboardInterrupt:
+                print("\nGoodbye!")
+                break
+            except Exception as e:
+                print(f"Error: {str(e)}")
+                continue
+
+def main():
+    """
+    Main entry point for the RAG PDF Chatbot application.
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="RAG PDF Chatbot - Retrieval-Augmented Generation for PDF documents"
+    )
+    parser.add_argument(
+        "--rebuild",
+        action="store_true",
+        help="Rebuild vector store from scratch"
+    )
+    parser.add_argument(
+        "--interactive",
+        action="store_true",
+        help="Run in interactive mode"
+    )
+    parser.add_argument(
+        "--question",
+        type=str,
+        help="Ask a specific question"
+    )
+
+    args = parser.parse_args()
+
+    # Initialize application
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize(rebuild_vector_store=args.rebuild)
+
+    # Handle different modes
+    if args.interactive:
+        chatbot.interactive_mode()
+    elif args.question:
+        answer = chatbot.ask(args.question)
+        print("Answer:")
+        print(answer)
+    else:
+        print("RAG PDF Chatbot")
+        print("Use --help for usage information")
+
+if __name__ == "__main__":
+    main()
diff --git a/src/rag_chain.py b/src/rag_chain.py
new file mode 100644
index 0000000..4eafb7a
--- /dev/null
+++ b/src/rag_chain.py
@@ -0,0 +1,126 @@
+"""
+RAG chain module for RAG PDF Chatbot.
+
+This module implements the Retrieval-Augmented Generation pipeline
+for question answering using retrieved document context.
+"""
+
+import warnings
+from typing import Dict, Any, Optional
+from langchain import hub
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_ollama import ChatOllama
+from src.config import config
+
+# Suppress warnings for cleaner output
+warnings.filterwarnings("ignore")
+
+class RAGChain:
+    """
+    Implements the RAG (Retrieval-Augmented Generation) pipeline.
+
+    Responsibilities:
+    - Initialize LLM model
+    - Create prompt templates
+    - Build RAG chain pipeline
+    - Handle question answering
+    """
+
+    def __init__(self, retriever: Any):
+        """
+        Initialize RAG chain with retriever.
+
+        Args:
+            retriever: Document retriever object
+        """
+        self.retriever = retriever
+        self.llm = self._initialize_llm()
+        self.prompt = self._create_prompt()
+        self.chain = self._build_chain()
+
+    def _initialize_llm(self) -> ChatOllama:
+        """
+        Initialize the LLM model.
+
+        Returns:
+            ChatOllama: Configured LLM model
+        """
+        return ChatOllama(
+            model=config.llm.model_name,
+            base_url=config.llm.base_url,
+            temperature=config.llm.temperature
+        )
+
+    def _create_prompt(self) -> ChatPromptTemplate:
+        """
+        Create the prompt template for RAG.
+
+        Returns:
+            ChatPromptTemplate: Configured prompt template
+        """
+        # Try to load from hub first, fall back to custom template
+        try:
+            prompt = hub.pull("rlm/rag-prompt")
+        except Exception:
+            # Custom RAG prompt template
+            template = """
+You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
+
+If possible answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
+
+Question:{question}
+
+Context:{context}
+
+Answer:
+"""
+            prompt = ChatPromptTemplate.from_template(template)
+
+        return prompt
+
+    def _build_chain(self) -> Any:
+        """
+        Build the RAG chain pipeline.
+
+        Returns:
+            Any: Configured RAG chain
+        """
+        def format_docs(docs: List[Dict[str, Any]]) -> str:
+            """Format retrieved documents for context."""
+            return "\n\n".join([doc.page_content for doc in docs])
+
+        return (
+            {
+                "context": self.retriever | format_docs,
+                "question": RunnablePassthrough()
+            }
+            | self.prompt
+            | self.llm
+            | StrOutputParser()
+        )
+
+    def ask_question(self, question: str) -> str:
+        """
+        Ask a question using the RAG pipeline.
+
+        Args:
+            question: Question to answer
+
+        Returns:
+            str: Answer to the question
+        """
+        if not self.chain:
+            raise RuntimeError("RAG chain not initialized")
+
+        return self.chain.invoke(question)
+
+    def get_chain(self) -> Any:
+        """
+        Get the RAG chain object.
+
+        Returns:
+            Any: RAG chain object
+        """
+        return self.chain
diff --git a/src/vector_store.py b/src/vector_store.py
new file mode 100644
index 0000000..13def48
--- /dev/null
+++ b/src/vector_store.py
@@ -0,0 +1,147 @@
+"""
+Vector store module for RAG PDF Chatbot.
+
+This module handles document embedding, vector storage, and retrieval
+using FAISS and Ollama embeddings.
+"""
+
+import os
+import warnings
+import faiss
+from typing import List, Dict, Any, Optional
+from langchain_ollama import OllamaEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.docstore.in_memory import InMemoryDocstore
+from src.config import config
+
+# Suppress warnings for cleaner output
+warnings.filterwarnings("ignore")
+
+class VectorStoreManager:
+    """
+    Manages document embedding and vector storage.
+
+    Responsibilities:
+    - Create embeddings using Ollama
+    - Store and retrieve vectors using FAISS
+    - Handle vector store persistence
+    """
+
+    def __init__(self):
+        """Initialize vector store manager with configuration."""
+        self.embedding_model = self._initialize_embedding_model()
+        self.vector_store = None
+        self.index = None
+
+    def _initialize_embedding_model(self) -> OllamaEmbeddings:
+        """
+        Initialize the embedding model.
+
+        Returns:
+            OllamaEmbeddings: Configured embedding model
+        """
+        return OllamaEmbeddings(
+            model=config.embedding.model_name,
+            base_url=config.embedding.base_url
+        )
+
+    def _initialize_vector_store(self) -> FAISS:
+        """
+        Initialize FAISS vector store with appropriate index.
+
+        Returns:
+            FAISS: Configured vector store
+        """
+        # Get embedding dimension by embedding a test string
+        test_embedding = self.embedding_model.embed_query("test")
+        embedding_dim = len(test_embedding)
+
+        # Create FAISS index
+        if config.vector_store.metric == "L2":
+            index = faiss.IndexFlatL2(embedding_dim)
+        else:
+            index = faiss.IndexFlatIP(embedding_dim)
+
+        return FAISS(
+            embedding_function=self.embedding_model,
+            index=index,
+            docstore=InMemoryDocstore(),
+            index_to_docstore_id={}
+        )
+
+    def create_vector_store(self, documents: List[Dict[str, Any]]) -> FAISS:
+        """
+        Create and populate vector store with document embeddings.
+
+        Args:
+            documents: List of document chunks to embed
+
+        Returns:
+            FAISS: Populated vector store
+        """
+        self.vector_store = self._initialize_vector_store()
+        self.vector_store.add_documents(documents)
+        return self.vector_store
+
+    def get_retriever(self) -> Any:
+        """
+        Get a retriever configured with current settings.
+
+        Returns:
+            Any: Configured retriever object
+        """
+        if not self.vector_store:
+            raise RuntimeError("Vector store not initialized")
+
+        return self.vector_store.as_retriever(
+            search_type=config.retrieval.search_type,
+            search_kwargs={
+                'k': config.retrieval.k,
+                'fetch_k': config.retrieval.fetch_k,
+                'lambda_mult': config.retrieval.lambda_mult
+            }
+        )
+
+    def save_vector_store(self, path: Optional[str] = None) -> None:
+        """
+        Save vector store to local storage.
+
+        Args:
+            path: Optional path to save vector store
+        """
+        if not self.vector_store:
+            raise RuntimeError("Vector store not initialized")
+
+        save_path = path or config.vector_store.local_path
+        self.vector_store.save_local(save_path)
+
+    def load_vector_store(self, path: Optional[str] = None) -> FAISS:
+        """
+        Load vector store from local storage.
+
+        Args:
+            path: Optional path to load vector store from
+
+        Returns:
+            FAISS: Loaded vector store
+        """
+        load_path = path or config.vector_store.local_path
+        self.vector_store = FAISS.load_local(
+            load_path,
+            embeddings=self.embedding_model,
+            allow_dangerous_deserialization=True
+        )
+        return self.vector_store
+
+    def vector_store_exists(self, path: Optional[str] = None) -> bool:
+        """
+        Check if vector store exists at specified path.
+
+        Args:
+            path: Optional path to check
+
+        Returns:
+            bool: True if vector store exists
+        """
+        check_path = path or config.vector_store.local_path
+        return os.path.exists(check_path) and os.path.isdir(check_path)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..27722a5
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,20 @@
+"""
+Test package for RAG PDF Chatbot.
+
+This package contains unit and integration tests for all components
+of the RAG PDF Chatbot application.
+"""
+
+import os
+import sys
+
+# Add src to path for testing
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))
+
+# Mock environment variables for testing
+os.environ["OLLAMA_BASE_URL"] = "http://localhost:11434"
+os.environ["EMBEDDING_MODEL"] = "test-model"
+os.environ["LLM_MODEL"] = "test-llm"
+os.environ["DATASET_PATH"] = "tests/test-data"
+os.environ["VECTOR_STORE_PATH"] = "tests/test-vector-store"
+os.environ["SAVE_VECTOR_STORE"] = "false"
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..32d689f
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,153 @@
+"""
+Unit tests for the config module.
+"""
+
+import os
+import tempfile
+from unittest.mock import patch
+import pytest
+from src.config import (
+    EmbeddingConfig,
+    LLMConfig,
+    VectorStoreConfig,
+    RetrievalConfig,
+    DocumentProcessingConfig,
+    AppConfig,
+    load_config,
+    config,
+)
+
+def test_embedding_config_defaults():
+    """Test EmbeddingConfig with default values."""
+    embedding_config = EmbeddingConfig()
+    assert embedding_config.model_name == "nomic-embed-text"
+    assert embedding_config.base_url == "http://localhost:11434"
+    assert embedding_config.dimension is None
+
+def test_llm_config_defaults():
+    """Test LLMConfig with default values."""
+    llm_config = LLMConfig()
+    assert llm_config.model_name == "llama3.2:3b"
+    assert llm_config.base_url == "http://localhost:11434"
+    assert llm_config.temperature == 0.7
+    assert llm_config.max_tokens == 512
+
+def test_vector_store_config_defaults():
+    """Test VectorStoreConfig with default values."""
+    vector_config = VectorStoreConfig()
+    assert vector_config.index_type == "flat"
+    assert vector_config.metric == "L2"
+    assert vector_config.save_local is True
+    assert vector_config.local_path == "health_supplemets"
+
+def test_retrieval_config_defaults():
+    """Test RetrievalConfig with default values."""
+    retrieval_config = RetrievalConfig()
+    assert retrieval_config.search_type == "mmr"
+    assert retrieval_config.k == 3
+    assert retrieval_config.fetch_k == 100
+    assert retrieval_config.lambda_mult == 1.0
+
+def test_document_processing_config_defaults():
+    """Test DocumentProcessingConfig with default values."""
+    doc_config = DocumentProcessingConfig()
+    assert doc_config.chunk_size == 1000
+    assert doc_config.chunk_overlap == 100
+    assert doc_config.dataset_path == "rag-dataset"
+
+def test_app_config_structure():
+    """Test AppConfig structure."""
+    app_config = AppConfig(
+        embedding=EmbeddingConfig(),
+        llm=LLMConfig(),
+        vector_store=VectorStoreConfig(),
+        retrieval=RetrievalConfig(),
+        document_processing=DocumentProcessingConfig()
+    )
+    assert isinstance(app_config.embedding, EmbeddingConfig)
+    assert isinstance(app_config.llm, LLMConfig)
+    assert isinstance(app_config.vector_store, VectorStoreConfig)
+    assert isinstance(app_config.retrieval, RetrievalConfig)
+    assert isinstance(app_config.document_processing, DocumentProcessingConfig)
+
+@patch.dict(os.environ, {
+    "EMBEDDING_MODEL": "custom-embedding",
+    "OLLAMA_BASE_URL": "http://custom-ollama:11434",
+    "LLM_MODEL": "custom-llm",
+    "LLM_TEMPERATURE": "0.5",
+    "LLM_MAX_TOKENS": "256",
+    "VECTOR_STORE_PATH": "custom-vector-store",
+    "SAVE_VECTOR_STORE": "false",
+    "RETRIEVAL_TYPE": "similarity",
+    "RETRIEVAL_K": "5",
+    "RETRIEVAL_FETCH_K": "50",
+    "RETRIEVAL_LAMBDA": "0.5",
+    "CHUNK_SIZE": "500",
+    "CHUNK_OVERLAP": "50",
+    "DATASET_PATH": "custom-dataset"
+})
+def test_load_config_from_environment():
+    """Test loading configuration from environment variables."""
+    # Reload config to pick up environment variables
+    test_config = load_config()
+
+    # Test embedding config
+    assert test_config.embedding.model_name == "custom-embedding"
+    assert test_config.embedding.base_url == "http://custom-ollama:11434"
+
+    # Test LLM config
+    assert test_config.llm.model_name == "custom-llm"
+    assert test_config.llm.temperature == 0.5
+    assert test_config.llm.max_tokens == 256
+
+    # Test vector store config
+    assert test_config.vector_store.local_path == "custom-vector-store"
+    assert test_config.vector_store.save_local is False
+
+    # Test retrieval config
+    assert test_config.retrieval.search_type == "similarity"
+    assert test_config.retrieval.k == 5
+    assert test_config.retrieval.fetch_k == 50
+    assert test_config.retrieval.lambda_mult == 0.5
+
+    # Test document processing config
+    assert test_config.document_processing.chunk_size == 500
+    assert test_config.document_processing.chunk_overlap == 50
+    assert test_config.document_processing.dataset_path == "custom-dataset"
+
+def test_load_config_with_missing_env_vars():
+    """Test loading configuration with missing environment variables."""
+    # Clear environment variables
+    for key in [
+        "EMBEDDING_MODEL", "OLLAMA_BASE_URL", "LLM_MODEL", "LLM_TEMPERATURE",
+        "LLM_MAX_TOKENS", "VECTOR_STORE_PATH", "SAVE_VECTOR_STORE",
+        "RETRIEVAL_TYPE", "RETRIEVAL_K", "RETRIEVAL_FETCH_K", "RETRIEVAL_LAMBDA",
+        "CHUNK_SIZE", "CHUNK_OVERLAP", "DATASET_PATH"
+    ]:
+        if key in os.environ:
+            del os.environ[key]
+
+    # Load config should use defaults
+    test_config = load_config()
+
+    # Test that defaults are used
+    assert test_config.embedding.model_name == "nomic-embed-text"
+    assert test_config.llm.model_name == "llama3.2:3b"
+    assert test_config.vector_store.local_path == "health_supplemets"
+    assert test_config.retrieval.search_type == "mmr"
+    assert test_config.document_processing.dataset_path == "rag-dataset"
+
+def test_config_global_instance():
+    """Test that global config instance is properly initialized."""
+    assert isinstance(config, AppConfig)
+    assert isinstance(config.embedding, EmbeddingConfig)
+    assert isinstance(config.llm, LLMConfig)
+    assert isinstance(config.vector_store, VectorStoreConfig)
+    assert isinstance(config.retrieval, RetrievalConfig)
+    assert isinstance(config.document_processing, DocumentProcessingConfig)
+
+def test_config_immutability():
+    """Test that config instances are immutable where appropriate."""
+    embedding_config = EmbeddingConfig()
+    with pytest.raises(Exception):  # dataclasses are immutable by default
+        embedding_config.model_name = "new-model"
diff --git a/tests/test_document_processor.py b/tests/test_document_processor.py
new file mode 100644
index 0000000..1fc0108
--- /dev/null
+++ b/tests/test_document_processor.py
@@ -0,0 +1,162 @@
+"""
+Unit tests for the document processor module.
+"""
+
+import os
+import tempfile
+from unittest.mock import patch, MagicMock
+import pytest
+from src.document_processor import DocumentProcessor
+from src.config import config
+
+def test_document_processor_initialization():
+    """Test DocumentProcessor initialization."""
+    processor = DocumentProcessor()
+    assert processor.dataset_path == config.document_processing.dataset_path
+    assert processor.chunk_size == config.document_processing.chunk_size
+    assert processor.chunk_overlap == config.document_processing.chunk_overlap
+
+@patch('os.walk')
+@patch('src.document_processor.PyMuPDFLoader')
+def test_discover_pdf_files(mock_loader, mock_walk):
+    """Test PDF file discovery."""
+    # Setup mock file structure
+    mock_walk.return_value = [
+        ('tests/test-data', ['subdir'], ['test1.pdf', 'test2.txt', 'test3.pdf']),
+        ('tests/test-data/subdir', [], ['test4.pdf', 'test5.doc']),
+    ]
+
+    processor = DocumentProcessor()
+    pdf_files = processor.discover_pdf_files()
+
+    # Should find 3 PDF files
+    assert len(pdf_files) == 3
+    assert 'tests/test-data/test1.pdf' in pdf_files
+    assert 'tests/test-data/test3.pdf' in pdf_files
+    assert 'tests/test-data/subdir/test4.pdf' in pdf_files
+    assert 'tests/test-data/test2.txt' not in pdf_files
+    assert 'tests/test-data/subdir/test5.doc' not in pdf_files
+
+@patch('os.walk')
+def test_discover_pdf_files_no_pdfs(mock_walk):
+    """Test PDF file discovery when no PDFs are found."""
+    # Setup mock file structure with no PDFs
+    mock_walk.return_value = [
+        ('tests/test-data', [], ['test1.txt', 'test2.doc']),
+    ]
+
+    processor = DocumentProcessor()
+
+    with pytest.raises(FileNotFoundError) as excinfo:
+        processor.discover_pdf_files()
+
+    assert "No PDF files found in dataset directory" in str(excinfo.value)
+
+@patch('src.document_processor.PyMuPDFLoader')
+def test_load_documents(mock_loader):
+    """Test document loading."""
+    # Setup mock loader
+    mock_pages = [
+        MagicMock(page_content="Page 1 content", metadata={"source": "test.pdf", "page": 1}),
+        MagicMock(page_content="Page 2 content", metadata={"source": "test.pdf", "page": 2}),
+    ]
+    mock_loader.return_value.load.return_value = mock_pages
+
+    processor = DocumentProcessor()
+
+    with patch.object(processor, 'discover_pdf_files') as mock_discover:
+        mock_discover.return_value = ['test.pdf']
+        documents = processor.load_documents()
+
+    assert len(documents) == 2
+    assert documents[0].page_content == "Page 1 content"
+    assert documents[1].page_content == "Page 2 content"
+
+@patch('src.document_processor.PyMuPDFLoader')
+def test_load_documents_with_error(mock_loader):
+    """Test document loading with error handling."""
+    # Setup mock loader to raise exception
+    mock_loader.return_value.load.side_effect = Exception("Test error")
+
+    processor = DocumentProcessor()
+
+    with patch.object(processor, 'discover_pdf_files') as mock_discover:
+        mock_discover.return_value = ['test.pdf', 'test2.pdf']
+
+        # Mock the second loader to work
+        def side_effect(file):
+            if file == 'test.pdf':
+                raise Exception("Test error")
+            return [MagicMock(page_content="Working content", metadata={})]
+
+        mock_loader.return_value.load.side_effect = side_effect
+
+        documents = processor.load_documents()
+
+    assert len(documents) == 1
+    assert documents[0].page_content == "Working content"
+
+@patch('src.document_processor.PyMuPDFLoader')
+def test_load_documents_all_fail(mock_loader):
+    """Test document loading when all files fail."""
+    # Setup mock loader to always raise exception
+    mock_loader.return_value.load.side_effect = Exception("Test error")
+
+    processor = DocumentProcessor()
+
+    with patch.object(processor, 'discover_pdf_files') as mock_discover:
+        mock_discover.return_value = ['test.pdf']
+
+        with pytest.raises(RuntimeError) as excinfo:
+            processor.load_documents()
+
+        assert "No documents were successfully loaded" in str(excinfo.value)
+
+@patch('src.document_processor.RecursiveCharacterTextSplitter')
+@patch('src.document_processor.PyMuPDFLoader')
+def test_chunk_documents(mock_loader, mock_splitter):
+    """Test document chunking."""
+    # Setup mock documents
+    mock_documents = [
+        MagicMock(page_content="Document 1 content", metadata={}),
+        MagicMock(page_content="Document 2 content", metadata={}),
+    ]
+
+    # Setup mock splitter
+    mock_chunks = [
+        MagicMock(page_content="Chunk 1", metadata={}),
+        MagicMock(page_content="Chunk 2", metadata={}),
+        MagicMock(page_content="Chunk 3", metadata={}),
+    ]
+    mock_splitter.return_value.split_documents.return_value = mock_chunks
+
+    processor = DocumentProcessor()
+    chunks = processor.chunk_documents(mock_documents)
+
+    assert len(chunks) == 3
+    assert chunks == mock_chunks
+
+    # Verify splitter was called with correct parameters
+    mock_splitter.assert_called_once()
+    splitter_instance = mock_splitter.return_value
+    splitter_instance.split_documents.assert_called_once_with(mock_documents)
+
+@patch('src.document_processor.RecursiveCharacterTextSplitter')
+@patch('src.document_processor.PyMuPDFLoader')
+def test_process_documents(mock_loader, mock_splitter):
+    """Test complete document processing pipeline."""
+    # Setup mock documents and chunks
+    mock_documents = [MagicMock(page_content="Doc content", metadata={})]
+    mock_chunks = [MagicMock(page_content="Chunk content", metadata={})]
+
+    mock_loader.return_value.load.return_value = mock_documents
+    mock_splitter.return_value.split_documents.return_value = mock_chunks
+
+    processor = DocumentProcessor()
+
+    with patch.object(processor, 'discover_pdf_files') as mock_discover:
+        mock_discover.return_value = ['test.pdf']
+        chunks = processor.process_documents()
+
+    assert len(chunks) == 1
+    assert chunks == mock_chunks
diff --git a/tests/test_integration.py b/tests/test_integration.py
new file mode 100644
index 0000000..d9019ee
--- /dev/null
+++ b/tests/test_integration.py
@@ -0,0 +1,140 @@
+"""
+Integration tests for RAG PDF Chatbot.
+
+These tests verify that the major components work together correctly.
+"""
+
+import os
+import tempfile
+from unittest.mock import patch, MagicMock
+import pytest
+from src.main import RAGPDFChatbot
+from src.config import config
+
+@patch('src.vector_store.VectorStoreManager')
+@patch('src.document_processor.DocumentProcessor')
+def test_chatbot_initialization(mock_doc_processor, mock_vector_store):
+    """Test RAGPDFChatbot initialization."""
+    # Setup mocks
+    mock_doc_processor_instance = mock_doc_processor.return_value
+    mock_vector_store_instance = mock_vector_store.return_value
+    mock_vector_store_instance.vector_store_exists.return_value = False
+
+    # Initialize chatbot
+    chatbot = RAGPDFChatbot()
+
+    # Verify components are initialized
+    assert chatbot.document_processor == mock_doc_processor_instance
+    assert chatbot.vector_store_manager == mock_vector_store_instance
+    assert chatbot.rag_chain is None
+
+@patch('src.vector_store.VectorStoreManager')
+@patch('src.document_processor.DocumentProcessor')
+def test_chatbot_initialize_with_existing_vector_store(mock_doc_processor, mock_vector_store):
+    """Test chatbot initialization with existing vector store."""
+    # Setup mocks
+    mock_vector_store_instance = mock_vector_store.return_value
+    mock_vector_store_instance.vector_store_exists.return_value = True
+    mock_retriever = MagicMock()
+    mock_vector_store_instance.get_retriever.return_value = mock_retriever
+
+    # Initialize chatbot
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize(rebuild_vector_store=False)
+
+    # Verify vector store was loaded
+    mock_vector_store_instance.load_vector_store.assert_called_once()
+    mock_vector_store_instance.create_vector_store.assert_not_called()
+
+    # Verify RAG chain was initialized
+    assert chatbot.rag_chain is not None
+
+@patch('src.vector_store.VectorStoreManager')
+@patch('src.document_processor.DocumentProcessor')
+def test_chatbot_initialize_with_new_vector_store(mock_doc_processor, mock_vector_store):
+    """Test chatbot initialization with new vector store."""
+    # Setup mocks
+    mock_doc_processor_instance = mock_doc_processor.return_value
+    mock_vector_store_instance = mock_vector_store.return_value
+    mock_vector_store_instance.vector_store_exists.return_value = False
+
+    mock_documents = [MagicMock(), MagicMock()]
+    mock_doc_processor_instance.process_documents.return_value = mock_documents
+
+    mock_retriever = MagicMock()
+    mock_vector_store_instance.get_retriever.return_value = mock_retriever
+
+    # Initialize chatbot
+    chatbot = RAGPDFChatbot()
+    chatbot.initialize(rebuild_vector_store=False)
+
+    # Verify vector store was created
+    mock_vector_store_instance.create_vector_store.assert_called_once_with(mock_documents)
+    mock_vector_store_instance.load_vector_store.assert_not_called()
+
+    # Verify RAG chain was initialized
+    assert chatbot.rag_chain is not None
+
+@patch('src.vector_store.VectorStoreManager')
+@patch('src.document_processor.DocumentProcessor')
+def test_chatbot_ask_question(mock_doc_processor, mock_vector_store):
+    """Test chatbot question answering."""
+    # Setup mocks
+    mock_vector_store_instance = mock_vector_store.return_value
+    mock_vector_store_instance.vector_store_exists.return_value = False
+
+    mock_documents = [MagicMock()]
+    mock_doc_processor.return_value.process_documents.return_value = mock_documents
+
+    mock_retriever = MagicMock()
+    mock_vector_store_instance.get_retriever.return_value = mock_retriever
+
+    mock_rag_chain = MagicMock()
+    mock_rag_chain.ask_question.return_value = "Test answer"
+
+    # Mock RAGChain constructor
+    with patch('src.main.RAGChain') as mock_rag_chain_class:
+        mock_rag_chain_class.return_value = mock_rag_chain
+
+        # Initialize chatbot
+        chatbot = RAGPDFChatbot()
+        chatbot.initialize(rebuild_vector_store=False)
+
+        # Ask a question
+        answer = chatbot.ask("Test question")
+
+        # Verify answer
+        assert answer == "Test answer"
+        mock_rag_chain.ask_question.assert_called_once_with("Test question")
+
+@patch('src.vector_store.VectorStoreManager')
+@patch('src.document_processor.DocumentProcessor')
+def test_chatbot_ask_before_initialization(mock_doc_processor, mock_vector_store):
+    """Test asking question before initialization raises error."""
+    # Initialize chatbot without calling initialize
+    chatbot = RAGPDFChatbot()
+
+    # Should raise error when asking question
+    with pytest.raises(RuntimeError) as excinfo:
+        chatbot.ask("Test question")
+
+    assert "Application not initialized" in str(excinfo.value)
+
+def test_chatbot_interactive_mode_mock():
+    """Test interactive mode with mocked input."""
+    # This is a basic test - in a real scenario, you'd want more comprehensive testing
+    # of the interactive mode, possibly using a testing framework that can simulate
+    # user input
+
+    with patch('builtins.input', side_effect=['quit']):
+        with patch('builtins.print') as mock_print:
+            chatbot = RAGPDFChatbot()
+
+            # Mock the initialize method to avoid dependencies
+            with patch.object(chatbot, 'initialize'):
+                chatbot.interactive_mode()
+
+            # Verify it printed the welcome message
+            calls = [str(call) for call in mock_print.call_args_list]
+            welcome_msg = any("RAG PDF Chatbot - Interactive Mode" in str(call) for call in calls)
+            assert welcome_msg is True