diff --git a/Dockerfile.webapp b/Dockerfile.webapp new file mode 100644 index 0000000..ab20eb4 --- /dev/null +++ b/Dockerfile.webapp @@ -0,0 +1,30 @@ +FROM python:3.13-slim + +# Install LibreOffice for local conversion (optional, can use the separate container) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Install UV first +RUN pip install --no-cache-dir uv + +# Copy project files +COPY pyproject.toml ./ +COPY src/ ./src/ + +# Install dependencies (without frozen lock to allow updates) +RUN uv sync + +# Set Python path +ENV PYTHONPATH=/app + +# Expose port +EXPOSE 8000 + +# Run the web application +CMD ["uv", "run", "python", "-m", "uvicorn", "src.webapp:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/LOCALHOST_GUIDE.md b/LOCALHOST_GUIDE.md new file mode 100644 index 0000000..47104c6 --- /dev/null +++ b/LOCALHOST_GUIDE.md @@ -0,0 +1,166 @@ +# Running PPT2Desc on Localhost + +This guide explains how to run the PPT2Desc web application on your local machine. + +## Quick Start + +### Option 1: Using Docker Compose (Recommended) + +This is the easiest way to get started. Both LibreOffice converter and the web application will run in containers. + +1. **Start the services:** + ```bash + docker compose up -d + ``` + +2. **Access the web interface:** + - Open your browser and navigate to: **http://localhost:5001** + +3. **Stop the services:** + ```bash + docker compose down + ``` + +That's it! The web interface will be available at http://localhost:5001, and you can upload PowerPoint files directly through your browser. + +### Option 2: Running Locally with UV + +If you prefer to run the application directly on your machine without Docker: + +1. **Install dependencies:** + ```bash + uv sync + ``` + +2. **Start the LibreOffice converter (optional, if you want to use Docker-based conversion):** + ```bash + docker compose up -d libreoffice-converter + ``` + +3. **Run the web application:** + ```bash + uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5001 + ``` + +4. **Access the web interface:** + - Open your browser and navigate to: **http://localhost:5001** + +## Using the Web Interface + +Once the application is running, you can: + +1. **Upload a PowerPoint file** (.ppt or .pptx) +2. **Select an AI provider** (Gemini, OpenAI, Anthropic, etc.) +3. **Configure model settings** (API keys, model name, etc.) +4. **Add optional instructions** to customize the output +5. **Click "Convert Presentation"** to process your file + +The results will be displayed directly in the browser, showing detailed descriptions for each slide. + +The web service runs on **port 5001** by default. + +## Configuration Options + +### AI Provider Settings + +The web interface supports multiple AI providers: + +- **Google Gemini API**: Requires API key +- **Google Vertex AI**: Requires GCP project ID, region, and service account credentials +- **OpenAI**: Requires API key +- **Anthropic Claude**: Requires API key +- **Azure OpenAI**: Requires API key, endpoint, and deployment name +- **AWS Bedrock**: Requires access key ID, secret access key, and region + +### LibreOffice Configuration + +By default, the web application uses the Docker-based LibreOffice converter at `http://libreoffice-converter:2002` (when using Docker Compose) or `http://localhost:2002` (when running locally). + +If you have LibreOffice installed locally, you can leave the LibreOffice URL field blank, and the application will attempt to find it in your system PATH. + +## API Endpoints + +If you want to integrate the service programmatically: + +### Health Check +```bash +curl http://localhost:5001/health +``` + +### Convert Presentation +```bash +curl -X POST http://localhost:5001/convert \ + -F "file=@presentation.pptx" \ + -F "client=gemini" \ + -F "api_key=YOUR_API_KEY" \ + -F "model=gemini-2.5-flash" +``` + +## Troubleshooting + +### Port Already in Use + +If port 5001 is already in use, you can change it: + +**Docker Compose:** +Edit `docker-compose.yml` and change the port mapping: +```yaml +ports: + - "5002:8000" # Change 5002 to any available port +``` + +**Local Running:** +```bash +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5002 +``` + +### LibreOffice Connection Issues + +If you get errors about LibreOffice conversion: + +1. Make sure the LibreOffice converter is running: + ```bash + docker compose ps + ``` + +2. Check the health of the converter: + ```bash + curl http://localhost:2002/health + ``` + +3. If using local LibreOffice, ensure it's installed: + ```bash + which soffice + # or + which libreoffice + ``` + +### Memory Issues + +For large presentations or high rate limits, you may need to increase Docker memory limits. Edit your Docker settings or add resource limits to `docker-compose.yml`. + +## Development + +To run in development mode with auto-reload: + +```bash +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5001 --reload +``` + +## Environment Variables + +You can set default values using environment variables: + +```bash +export GEMINI_API_KEY=your_api_key +export OPENAI_API_KEY=your_api_key +export ANTHROPIC_API_KEY=your_api_key +``` + +Then you won't need to enter API keys in the web interface each time. + +## Next Steps + +- Check the main [README.md](README.md) for detailed information about the project +- Learn about customizing prompts and instructions +- Explore the CLI version for batch processing diff --git a/README.md b/README.md index aa6de3a..28a4b56 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ ppt2desc is a command-line tool that converts PowerPoint presentations into deta ## Features +- **Web Interface**: Easy-to-use browser-based interface for converting presentations +- **CLI Tool**: Command-line interface for batch processing and automation - Convert PPT/PPTX files to semantic descriptions - Process individual files or entire directories - Support for visual elements interpretation (charts, graphs, figures) @@ -80,6 +82,26 @@ This will create a virtual environment and install all dependencies from `pyproj ## Usage +### Web Interface (Recommended for Quick Start) + +The easiest way to use ppt2desc is through the web interface: + +1. **Start the web application:** + ```bash + docker compose up -d + ``` + +2. **Open your browser and navigate to:** + ``` + http://localhost:5001 + ``` + +3. **Upload your PowerPoint file, configure your AI provider, and convert!** + +For detailed instructions, see [LOCALHOST_GUIDE.md](LOCALHOST_GUIDE.md). + +### Command Line Interface + Basic usage with Gemini API: ```bash uv run src/main.py \ diff --git a/docker-compose.yml b/docker-compose.yml index 3c56a3a..11bdd02 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: libreoffice-converter: - build: + build: context: ./src/libreoffice_docker dockerfile: Dockerfile ports: @@ -11,4 +11,21 @@ services: test: ["CMD", "curl", "-f", "http://localhost:2002/health"] interval: 300s timeout: 10s + retries: 3 + + ppt2desc-web: + build: + context: . + dockerfile: Dockerfile.webapp + ports: + - "5001:8000" + restart: unless-stopped + depends_on: + - libreoffice-converter + environment: + - LIBREOFFICE_URL=http://libreoffice-converter:2002 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s retries: 3 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8b7c2d7..bf9cb7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "charset-normalizer==3.4.1", "distro==1.9.0", "docstring-parser==0.16", + "fastapi>=0.115.0", "google-ai-generativelanguage==0.6.10", "google-api-core==2.24.0", "google-api-python-client==2.156.0", @@ -53,6 +54,7 @@ dependencies = [ "pymupdf==1.25.1", "pyparsing==3.2.1", "python-dateutil==2.9.0.post0", + "python-multipart>=0.0.12", "requests==2.32.3", "rsa==4.9", "s3transfer==0.10.4", @@ -63,6 +65,7 @@ dependencies = [ "typing-extensions==4.12.2", "uritemplate==4.1.1", "urllib3==2.3.0", + "uvicorn>=0.32.0", "pytest==8.3.3", "pytest-mock==3.14.0", ] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..8074dd0 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# Package initialization diff --git a/src/processor.py b/src/processor.py index 8ede73c..689d88f 100644 --- a/src/processor.py +++ b/src/processor.py @@ -10,11 +10,19 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm -from llm import LLMClient -from converters.ppt_converter import convert_pptx_to_pdf -from converters.pdf_converter import convert_pdf_to_images -from converters.docker_converter import convert_pptx_via_docker -from schemas.deck import DeckData, SlideData +# Support both relative imports (for webapp) and absolute imports (for main.py) +try: + from .llm import LLMClient + from .converters.ppt_converter import convert_pptx_to_pdf + from .converters.pdf_converter import convert_pdf_to_images + from .converters.docker_converter import convert_pptx_via_docker + from .schemas.deck import DeckData, SlideData +except ImportError: + from llm import LLMClient + from converters.ppt_converter import convert_pptx_to_pdf + from converters.pdf_converter import convert_pdf_to_images + from converters.docker_converter import convert_pptx_via_docker + from schemas.deck import DeckData, SlideData # Create a type alias for all possible clients logger = logging.getLogger(__name__) diff --git a/src/webapp.py b/src/webapp.py new file mode 100644 index 0000000..f09a326 --- /dev/null +++ b/src/webapp.py @@ -0,0 +1,726 @@ +from fastapi import FastAPI, UploadFile, File, Form, HTTPException +from fastapi.responses import HTMLResponse, JSONResponse +from fastapi.staticfiles import StaticFiles +from pathlib import Path +import tempfile +import shutil +import logging +import sys +from typing import Optional +import json + +# Support both relative imports (for module) and absolute imports (for direct run) +try: + from .llm.google_unified import GoogleUnifiedClient + from .llm.openai import OpenAIClient + from .llm.anthropic import AnthropicClient + from .llm.azure import AzureClient + from .llm.aws import AWSClient + from .processor import process_input_path + from .prompt import BASE_PROMPT +except ImportError: + from llm.google_unified import GoogleUnifiedClient + from llm.openai import OpenAIClient + from llm.anthropic import AnthropicClient + from llm.azure import AzureClient + from llm.aws import AWSClient + from processor import process_input_path + from prompt import BASE_PROMPT + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)] +) +logger = logging.getLogger(__name__) + +app = FastAPI(title="PPT2Desc Web Service") + +# HTML template for the web interface +HTML_TEMPLATE = """ + + + + + + PPT to Description Converter + + + +
+

🎯 PPT to Description

+

Convert PowerPoint presentations into semantic descriptions using AI

+ +
+
+ + +
+ +
+ + +
+ +
+ + + Leave blank to use default model for selected provider +
+ + +
+ + + Required for gemini, openai, and anthropic providers +
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+ + + Use Docker-based LibreOffice converter at http://localhost:2002. Leave blank to use local LibreOffice installation. +
+ +
+ + +
+ +
+
+ + +
+
+ + +
+ +
+
+
+ + + + +""" + + +@app.get("/", response_class=HTMLResponse) +async def home(): + """Serve the web interface""" + return HTML_TEMPLATE + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return {"status": "healthy"} + + +@app.post("/convert") +async def convert_presentation( + file: UploadFile = File(...), + client: str = Form(...), + model: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + instructions: Optional[str] = Form(None), + libreoffice_url: Optional[str] = Form(None), + rate_limit: int = Form(60), + save_pdf: bool = Form(False), + save_images: bool = Form(False), + # Vertex AI fields + gcp_project_id: Optional[str] = Form(None), + gcp_region: Optional[str] = Form(None), + gcp_application_credentials: Optional[str] = Form(None), + # Azure fields + azure_openai_api_key: Optional[str] = Form(None), + azure_openai_endpoint: Optional[str] = Form(None), + azure_deployment_name: Optional[str] = Form(None), + azure_api_version: Optional[str] = Form("2023-12-01-preview"), + # AWS fields + aws_access_key_id: Optional[str] = Form(None), + aws_secret_access_key: Optional[str] = Form(None), + aws_region: Optional[str] = Form("us-east-1"), +): + """ + Convert a PowerPoint presentation to semantic descriptions + """ + if not file.filename or not file.filename.lower().endswith(('.pptx', '.ppt')): + raise HTTPException(status_code=400, detail="File must be a .pptx or .ppt") + + # Create temporary directories + temp_dir = tempfile.mkdtemp() + output_dir = tempfile.mkdtemp() + + try: + temp_path = Path(temp_dir) + output_path = Path(output_dir) + input_file = temp_path / file.filename + + # Save uploaded file + with input_file.open("wb") as f: + shutil.copyfileobj(file.file, f) + + # Build prompt + prompt = BASE_PROMPT + if instructions and instructions.strip(): + prompt = f"{BASE_PROMPT}\n\nAdditional instructions:\n{instructions}" + + # Set default model based on client if not provided + if not model or model.strip() == "": + model_defaults = { + "gemini": "gemini-2.5-flash", + "vertexai": "gemini-2.5-flash", + "openai": "gpt-4o", + "anthropic": "claude-3-5-sonnet-20241022", + "azure": "gpt-4o", + "aws": "us.amazon.nova-lite-v1:0" + } + model = model_defaults.get(client, "gemini-2.5-flash") + + # Initialize model instance + try: + if client == "gemini": + model_instance = GoogleUnifiedClient( + api_key=api_key, + model=model, + use_vertex=False + ) + elif client == "vertexai": + if not gcp_project_id or not gcp_application_credentials: + raise HTTPException( + status_code=400, + detail="GCP project_id and application_credentials are required for Vertex AI" + ) + model_instance = GoogleUnifiedClient( + credentials_path=gcp_application_credentials, + project_id=gcp_project_id, + region=gcp_region, + model=model, + use_vertex=True + ) + elif client == "openai": + model_instance = OpenAIClient(api_key=api_key, model=model) + elif client == "anthropic": + model_instance = AnthropicClient(api_key=api_key, model=model) + elif client == "azure": + if not azure_openai_api_key or not azure_openai_endpoint or not azure_deployment_name: + raise HTTPException( + status_code=400, + detail="Azure API key, endpoint, and deployment name are required" + ) + model_instance = AzureClient( + api_key=azure_openai_api_key, + endpoint=azure_openai_endpoint, + deployment=azure_deployment_name, + api_version=azure_api_version + ) + elif client == "aws": + if not aws_access_key_id or not aws_secret_access_key: + raise HTTPException( + status_code=400, + detail="AWS access key ID and secret access key are required" + ) + model_instance = AWSClient( + access_key_id=aws_access_key_id, + secret_access_key=aws_secret_access_key, + region=aws_region, + model=model + ) + else: + raise HTTPException(status_code=400, detail=f"Unsupported client: {client}") + except Exception as e: + logger.error(f"Failed to initialize model: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to initialize model: {str(e)}") + + # Determine LibreOffice configuration + if libreoffice_url and libreoffice_url.strip(): + libreoffice_endpoint = libreoffice_url + libreoffice_path = None + else: + # Try to find local LibreOffice + libreoffice_binary = shutil.which("soffice") or shutil.which("libreoffice") + if libreoffice_binary: + libreoffice_path = Path(libreoffice_binary) + libreoffice_endpoint = None + else: + raise HTTPException( + status_code=500, + detail="LibreOffice not found. Please provide --libreoffice_url or install LibreOffice locally" + ) + + # Process the presentation + logger.info(f"Processing {file.filename} with {client} model {model}") + results = process_input_path( + input_path=input_file, + output_dir=output_path, + libreoffice_path=libreoffice_path, + libreoffice_endpoint=libreoffice_endpoint, + model_instance=model_instance, + rate_limit=rate_limit, + prompt=prompt, + save_pdf=save_pdf, + save_images=save_images, + max_workers=None + ) + + if not results or len(results) == 0: + raise HTTPException(status_code=500, detail="Processing failed - no results returned") + + # Get the first (and should be only) result + ppt_file, image_paths = results[0] + + # The actual slide descriptions are written to a JSON file + # Read the JSON file that was generated by the processor + json_filename = input_file.stem + ".json" + json_file_path = output_path / json_filename + + if not json_file_path.exists(): + raise HTTPException( + status_code=500, + detail=f"Processing completed but JSON file not found: {json_filename}" + ) + + # Read and parse the JSON file + try: + with open(json_file_path, 'r', encoding='utf-8') as f: + json_data = json.load(f) + + # Validate that we have the expected structure + if "slides" not in json_data: + raise HTTPException( + status_code=500, + detail="Invalid JSON structure - missing 'slides' key" + ) + + # Return the JSON data directly (it's already in the correct format) + return JSONResponse(content=json_data) + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON file: {e}") + raise HTTPException( + status_code=500, + detail=f"Failed to parse generated JSON: {str(e)}" + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Conversion error: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") + finally: + # Cleanup temporary directories + shutil.rmtree(temp_dir, ignore_errors=True) + shutil.rmtree(output_dir, ignore_errors=True) + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/start_web.sh b/start_web.sh new file mode 100755 index 0000000..88361a7 --- /dev/null +++ b/start_web.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Start PPT2Desc Web Application +# This script provides an easy way to start the web interface + +echo "🚀 Starting PPT2Desc Web Application..." +echo "" + +# Parse arguments +REBUILD=false +if [ "$1" = "--rebuild" ]; then + REBUILD=true +fi + +# Check if Docker is available +if command -v docker &> /dev/null && command -v docker compose &> /dev/null; then + echo "✓ Docker found" + echo "" + + if [ "$REBUILD" = true ]; then + echo "Rebuilding containers..." + docker compose down + docker compose build --no-cache + fi + + echo "Starting services with Docker Compose..." + echo "" + + docker compose up -d + + if [ $? -eq 0 ]; then + echo "" + echo "✅ Services started successfully!" + echo "" + echo "🌐 Web Interface: http://localhost:5001" + echo "🔧 LibreOffice Converter: http://localhost:2002" + echo "" + echo "To view logs: docker compose logs -f" + echo "To stop: docker compose down" + echo "" + echo "Waiting for services to be healthy..." + sleep 3 + docker compose ps + else + echo "❌ Failed to start services" + echo "" + echo "💡 Try rebuilding with: ./start_web.sh --rebuild" + exit 1 + fi +else + echo "⚠ Docker not found, starting locally..." + echo "" + + # Check if UV is installed + if ! command -v uv &> /dev/null; then + echo "❌ UV package manager not found. Please install UV first:" + echo " curl -LsSf https://astral.sh/uv/install.sh | sh" + exit 1 + fi + + echo "Installing dependencies..." + uv sync + + if [ $? -ne 0 ]; then + echo "❌ Failed to install dependencies" + exit 1 + fi + + echo "" + echo "Starting web application..." + echo "" + echo "✅ Server starting..." + echo "" + echo "🌐 Web Interface: http://localhost:5001" + echo "" + echo "Note: For local mode, make sure LibreOffice is installed or" + echo " run 'docker compose up -d libreoffice-converter' separately" + echo "" + + export PYTHONPATH="${PYTHONPATH}:$(pwd)" + uv run python -m uvicorn src.webapp:app --host 0.0.0.0 --port 5001 +fi