diff --git a/Dockerfile.webapp b/Dockerfile.webapp new file mode 100644 index 0000000..ab20eb4 --- /dev/null +++ b/Dockerfile.webapp @@ -0,0 +1,30 @@ +FROM python:3.13-slim + +# Install LibreOffice for local conversion (optional, can use the separate container) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Install UV first +RUN pip install --no-cache-dir uv + +# Copy project files +COPY pyproject.toml ./ +COPY src/ ./src/ + +# Install dependencies (without frozen lock to allow updates) +RUN uv sync + +# Set Python path +ENV PYTHONPATH=/app + +# Expose port +EXPOSE 8000 + +# Run the web application +CMD ["uv", "run", "python", "-m", "uvicorn", "src.webapp:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/LOCALHOST_GUIDE.md b/LOCALHOST_GUIDE.md new file mode 100644 index 0000000..47104c6 --- /dev/null +++ b/LOCALHOST_GUIDE.md @@ -0,0 +1,166 @@ +# Running PPT2Desc on Localhost + +This guide explains how to run the PPT2Desc web application on your local machine. + +## Quick Start + +### Option 1: Using Docker Compose (Recommended) + +This is the easiest way to get started. Both LibreOffice converter and the web application will run in containers. + +1. **Start the services:** + ```bash + docker compose up -d + ``` + +2. **Access the web interface:** + - Open your browser and navigate to: **http://localhost:5001** + +3. **Stop the services:** + ```bash + docker compose down + ``` + +That's it! The web interface will be available at http://localhost:5001, and you can upload PowerPoint files directly through your browser. + +### Option 2: Running Locally with UV + +If you prefer to run the application directly on your machine without Docker: + +1. **Install dependencies:** + ```bash + uv sync + ``` + +2. **Start the LibreOffice converter (optional, if you want to use Docker-based conversion):** + ```bash + docker compose up -d libreoffice-converter + ``` + +3. **Run the web application:** + ```bash + uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5001 + ``` + +4. **Access the web interface:** + - Open your browser and navigate to: **http://localhost:5001** + +## Using the Web Interface + +Once the application is running, you can: + +1. **Upload a PowerPoint file** (.ppt or .pptx) +2. **Select an AI provider** (Gemini, OpenAI, Anthropic, etc.) +3. **Configure model settings** (API keys, model name, etc.) +4. **Add optional instructions** to customize the output +5. **Click "Convert Presentation"** to process your file + +The results will be displayed directly in the browser, showing detailed descriptions for each slide. + +The web service runs on **port 5001** by default. + +## Configuration Options + +### AI Provider Settings + +The web interface supports multiple AI providers: + +- **Google Gemini API**: Requires API key +- **Google Vertex AI**: Requires GCP project ID, region, and service account credentials +- **OpenAI**: Requires API key +- **Anthropic Claude**: Requires API key +- **Azure OpenAI**: Requires API key, endpoint, and deployment name +- **AWS Bedrock**: Requires access key ID, secret access key, and region + +### LibreOffice Configuration + +By default, the web application uses the Docker-based LibreOffice converter at `http://libreoffice-converter:2002` (when using Docker Compose) or `http://localhost:2002` (when running locally). + +If you have LibreOffice installed locally, you can leave the LibreOffice URL field blank, and the application will attempt to find it in your system PATH. + +## API Endpoints + +If you want to integrate the service programmatically: + +### Health Check +```bash +curl http://localhost:5001/health +``` + +### Convert Presentation +```bash +curl -X POST http://localhost:5001/convert \ + -F "file=@presentation.pptx" \ + -F "client=gemini" \ + -F "api_key=YOUR_API_KEY" \ + -F "model=gemini-2.5-flash" +``` + +## Troubleshooting + +### Port Already in Use + +If port 5001 is already in use, you can change it: + +**Docker Compose:** +Edit `docker-compose.yml` and change the port mapping: +```yaml +ports: + - "5002:8000" # Change 5002 to any available port +``` + +**Local Running:** +```bash +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5002 +``` + +### LibreOffice Connection Issues + +If you get errors about LibreOffice conversion: + +1. Make sure the LibreOffice converter is running: + ```bash + docker compose ps + ``` + +2. Check the health of the converter: + ```bash + curl http://localhost:2002/health + ``` + +3. If using local LibreOffice, ensure it's installed: + ```bash + which soffice + # or + which libreoffice + ``` + +### Memory Issues + +For large presentations or high rate limits, you may need to increase Docker memory limits. Edit your Docker settings or add resource limits to `docker-compose.yml`. + +## Development + +To run in development mode with auto-reload: + +```bash +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5001 --reload +``` + +## Environment Variables + +You can set default values using environment variables: + +```bash +export GEMINI_API_KEY=your_api_key +export OPENAI_API_KEY=your_api_key +export ANTHROPIC_API_KEY=your_api_key +``` + +Then you won't need to enter API keys in the web interface each time. + +## Next Steps + +- Check the main [README.md](README.md) for detailed information about the project +- Learn about customizing prompts and instructions +- Explore the CLI version for batch processing diff --git a/README.md b/README.md index aa6de3a..28a4b56 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ ppt2desc is a command-line tool that converts PowerPoint presentations into deta ## Features +- **Web Interface**: Easy-to-use browser-based interface for converting presentations +- **CLI Tool**: Command-line interface for batch processing and automation - Convert PPT/PPTX files to semantic descriptions - Process individual files or entire directories - Support for visual elements interpretation (charts, graphs, figures) @@ -80,6 +82,26 @@ This will create a virtual environment and install all dependencies from `pyproj ## Usage +### Web Interface (Recommended for Quick Start) + +The easiest way to use ppt2desc is through the web interface: + +1. **Start the web application:** + ```bash + docker compose up -d + ``` + +2. **Open your browser and navigate to:** + ``` + http://localhost:5001 + ``` + +3. **Upload your PowerPoint file, configure your AI provider, and convert!** + +For detailed instructions, see [LOCALHOST_GUIDE.md](LOCALHOST_GUIDE.md). + +### Command Line Interface + Basic usage with Gemini API: ```bash uv run src/main.py \ diff --git a/docker-compose.yml b/docker-compose.yml index 3c56a3a..11bdd02 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: libreoffice-converter: - build: + build: context: ./src/libreoffice_docker dockerfile: Dockerfile ports: @@ -11,4 +11,21 @@ services: test: ["CMD", "curl", "-f", "http://localhost:2002/health"] interval: 300s timeout: 10s + retries: 3 + + ppt2desc-web: + build: + context: . + dockerfile: Dockerfile.webapp + ports: + - "5001:8000" + restart: unless-stopped + depends_on: + - libreoffice-converter + environment: + - LIBREOFFICE_URL=http://libreoffice-converter:2002 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s retries: 3 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8b7c2d7..bf9cb7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "charset-normalizer==3.4.1", "distro==1.9.0", "docstring-parser==0.16", + "fastapi>=0.115.0", "google-ai-generativelanguage==0.6.10", "google-api-core==2.24.0", "google-api-python-client==2.156.0", @@ -53,6 +54,7 @@ dependencies = [ "pymupdf==1.25.1", "pyparsing==3.2.1", "python-dateutil==2.9.0.post0", + "python-multipart>=0.0.12", "requests==2.32.3", "rsa==4.9", "s3transfer==0.10.4", @@ -63,6 +65,7 @@ dependencies = [ "typing-extensions==4.12.2", "uritemplate==4.1.1", "urllib3==2.3.0", + "uvicorn>=0.32.0", "pytest==8.3.3", "pytest-mock==3.14.0", ] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..8074dd0 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# Package initialization diff --git a/src/processor.py b/src/processor.py index 8ede73c..689d88f 100644 --- a/src/processor.py +++ b/src/processor.py @@ -10,11 +10,19 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm -from llm import LLMClient -from converters.ppt_converter import convert_pptx_to_pdf -from converters.pdf_converter import convert_pdf_to_images -from converters.docker_converter import convert_pptx_via_docker -from schemas.deck import DeckData, SlideData +# Support both relative imports (for webapp) and absolute imports (for main.py) +try: + from .llm import LLMClient + from .converters.ppt_converter import convert_pptx_to_pdf + from .converters.pdf_converter import convert_pdf_to_images + from .converters.docker_converter import convert_pptx_via_docker + from .schemas.deck import DeckData, SlideData +except ImportError: + from llm import LLMClient + from converters.ppt_converter import convert_pptx_to_pdf + from converters.pdf_converter import convert_pdf_to_images + from converters.docker_converter import convert_pptx_via_docker + from schemas.deck import DeckData, SlideData # Create a type alias for all possible clients logger = logging.getLogger(__name__) diff --git a/src/webapp.py b/src/webapp.py new file mode 100644 index 0000000..f09a326 --- /dev/null +++ b/src/webapp.py @@ -0,0 +1,726 @@ +from fastapi import FastAPI, UploadFile, File, Form, HTTPException +from fastapi.responses import HTMLResponse, JSONResponse +from fastapi.staticfiles import StaticFiles +from pathlib import Path +import tempfile +import shutil +import logging +import sys +from typing import Optional +import json + +# Support both relative imports (for module) and absolute imports (for direct run) +try: + from .llm.google_unified import GoogleUnifiedClient + from .llm.openai import OpenAIClient + from .llm.anthropic import AnthropicClient + from .llm.azure import AzureClient + from .llm.aws import AWSClient + from .processor import process_input_path + from .prompt import BASE_PROMPT +except ImportError: + from llm.google_unified import GoogleUnifiedClient + from llm.openai import OpenAIClient + from llm.anthropic import AnthropicClient + from llm.azure import AzureClient + from llm.aws import AWSClient + from processor import process_input_path + from prompt import BASE_PROMPT + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)] +) +logger = logging.getLogger(__name__) + +app = FastAPI(title="PPT2Desc Web Service") + +# HTML template for the web interface +HTML_TEMPLATE = """ + + +
+ + +Convert PowerPoint presentations into semantic descriptions using AI
+ + + + + +