From b9351a8414f865a3c7af2b5482f863fc276b8af4 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 05:30:15 +0000 Subject: [PATCH 01/10] Add web interface for localhost usage Added a FastAPI-based web application that provides a browser interface for converting PowerPoint presentations to descriptions. This makes the tool more accessible and easier to use for quick conversions. Changes: - Created webapp.py with FastAPI endpoints and HTML interface - Added Dockerfile.webapp for containerized deployment - Updated docker-compose.yml to include ppt2desc-web service - Added FastAPI, uvicorn, and python-multipart dependencies - Created LOCALHOST_GUIDE.md with detailed setup instructions - Updated README.md to highlight web interface option - Added start_web.sh script for easy launching The web interface supports all AI providers (Gemini, OpenAI, Anthropic, Azure, AWS) and provides a clean UI for uploading files, configuring settings, and viewing results directly in the browser. Access at http://localhost:8000 after running docker compose up. --- Dockerfile.webapp | 27 ++ LOCALHOST_GUIDE.md | 164 +++++++++++ README.md | 22 ++ docker-compose.yml | 19 +- pyproject.toml | 3 + src/webapp.py | 676 +++++++++++++++++++++++++++++++++++++++++++++ start_web.sh | 62 +++++ 7 files changed, 972 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.webapp create mode 100644 LOCALHOST_GUIDE.md create mode 100644 src/webapp.py create mode 100755 start_web.sh diff --git a/Dockerfile.webapp b/Dockerfile.webapp new file mode 100644 index 0000000..b344516 --- /dev/null +++ b/Dockerfile.webapp @@ -0,0 +1,27 @@ +FROM python:3.13-slim + +# Install LibreOffice for local conversion (optional, can use the separate container) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy project files +COPY pyproject.toml uv.lock ./ +COPY src/ ./src/ + +# Install UV +RUN pip install --no-cache-dir uv + +# Install dependencies +RUN uv sync --frozen + +# Expose port +EXPOSE 8000 + +# Run the web application +CMD ["uv", "run", "uvicorn", "src.webapp:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/LOCALHOST_GUIDE.md b/LOCALHOST_GUIDE.md new file mode 100644 index 0000000..6d24449 --- /dev/null +++ b/LOCALHOST_GUIDE.md @@ -0,0 +1,164 @@ +# Running PPT2Desc on Localhost + +This guide explains how to run the PPT2Desc web application on your local machine. + +## Quick Start + +### Option 1: Using Docker Compose (Recommended) + +This is the easiest way to get started. Both LibreOffice converter and the web application will run in containers. + +1. **Start the services:** + ```bash + docker compose up -d + ``` + +2. **Access the web interface:** + - Open your browser and navigate to: **http://localhost:8000** + +3. **Stop the services:** + ```bash + docker compose down + ``` + +That's it! The web interface will be available at http://localhost:8000, and you can upload PowerPoint files directly through your browser. + +### Option 2: Running Locally with UV + +If you prefer to run the application directly on your machine without Docker: + +1. **Install dependencies:** + ```bash + uv sync + ``` + +2. **Start the LibreOffice converter (optional, if you want to use Docker-based conversion):** + ```bash + docker compose up -d libreoffice-converter + ``` + +3. **Run the web application:** + ```bash + uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8000 + ``` + +4. **Access the web interface:** + - Open your browser and navigate to: **http://localhost:8000** + +## Using the Web Interface + +Once the application is running, you can: + +1. **Upload a PowerPoint file** (.ppt or .pptx) +2. **Select an AI provider** (Gemini, OpenAI, Anthropic, etc.) +3. **Configure model settings** (API keys, model name, etc.) +4. **Add optional instructions** to customize the output +5. **Click "Convert Presentation"** to process your file + +The results will be displayed directly in the browser, showing detailed descriptions for each slide. + +## Configuration Options + +### AI Provider Settings + +The web interface supports multiple AI providers: + +- **Google Gemini API**: Requires API key +- **Google Vertex AI**: Requires GCP project ID, region, and service account credentials +- **OpenAI**: Requires API key +- **Anthropic Claude**: Requires API key +- **Azure OpenAI**: Requires API key, endpoint, and deployment name +- **AWS Bedrock**: Requires access key ID, secret access key, and region + +### LibreOffice Configuration + +By default, the web application uses the Docker-based LibreOffice converter at `http://libreoffice-converter:2002` (when using Docker Compose) or `http://localhost:2002` (when running locally). + +If you have LibreOffice installed locally, you can leave the LibreOffice URL field blank, and the application will attempt to find it in your system PATH. + +## API Endpoints + +If you want to integrate the service programmatically: + +### Health Check +```bash +curl http://localhost:8000/health +``` + +### Convert Presentation +```bash +curl -X POST http://localhost:8000/convert \ + -F "file=@presentation.pptx" \ + -F "client=gemini" \ + -F "api_key=YOUR_API_KEY" \ + -F "model=gemini-2.5-flash" +``` + +## Troubleshooting + +### Port Already in Use + +If port 8000 is already in use, you can change it: + +**Docker Compose:** +Edit `docker-compose.yml` and change the port mapping: +```yaml +ports: + - "8080:8000" # Change 8080 to any available port +``` + +**Local Running:** +```bash +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8080 +``` + +### LibreOffice Connection Issues + +If you get errors about LibreOffice conversion: + +1. Make sure the LibreOffice converter is running: + ```bash + docker compose ps + ``` + +2. Check the health of the converter: + ```bash + curl http://localhost:2002/health + ``` + +3. If using local LibreOffice, ensure it's installed: + ```bash + which soffice + # or + which libreoffice + ``` + +### Memory Issues + +For large presentations or high rate limits, you may need to increase Docker memory limits. Edit your Docker settings or add resource limits to `docker-compose.yml`. + +## Development + +To run in development mode with auto-reload: + +```bash +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8000 --reload +``` + +## Environment Variables + +You can set default values using environment variables: + +```bash +export GEMINI_API_KEY=your_api_key +export OPENAI_API_KEY=your_api_key +export ANTHROPIC_API_KEY=your_api_key +``` + +Then you won't need to enter API keys in the web interface each time. + +## Next Steps + +- Check the main [README.md](README.md) for detailed information about the project +- Learn about customizing prompts and instructions +- Explore the CLI version for batch processing diff --git a/README.md b/README.md index aa6de3a..bee4da1 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ ppt2desc is a command-line tool that converts PowerPoint presentations into deta ## Features +- **Web Interface**: Easy-to-use browser-based interface for converting presentations +- **CLI Tool**: Command-line interface for batch processing and automation - Convert PPT/PPTX files to semantic descriptions - Process individual files or entire directories - Support for visual elements interpretation (charts, graphs, figures) @@ -80,6 +82,26 @@ This will create a virtual environment and install all dependencies from `pyproj ## Usage +### Web Interface (Recommended for Quick Start) + +The easiest way to use ppt2desc is through the web interface: + +1. **Start the web application:** + ```bash + docker compose up -d + ``` + +2. **Open your browser and navigate to:** + ``` + http://localhost:8000 + ``` + +3. **Upload your PowerPoint file, configure your AI provider, and convert!** + +For detailed instructions, see [LOCALHOST_GUIDE.md](LOCALHOST_GUIDE.md). + +### Command Line Interface + Basic usage with Gemini API: ```bash uv run src/main.py \ diff --git a/docker-compose.yml b/docker-compose.yml index 3c56a3a..75bc816 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: libreoffice-converter: - build: + build: context: ./src/libreoffice_docker dockerfile: Dockerfile ports: @@ -11,4 +11,21 @@ services: test: ["CMD", "curl", "-f", "http://localhost:2002/health"] interval: 300s timeout: 10s + retries: 3 + + ppt2desc-web: + build: + context: . + dockerfile: Dockerfile.webapp + ports: + - "8000:8000" + restart: unless-stopped + depends_on: + - libreoffice-converter + environment: + - LIBREOFFICE_URL=http://libreoffice-converter:2002 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s retries: 3 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8b7c2d7..bf9cb7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "charset-normalizer==3.4.1", "distro==1.9.0", "docstring-parser==0.16", + "fastapi>=0.115.0", "google-ai-generativelanguage==0.6.10", "google-api-core==2.24.0", "google-api-python-client==2.156.0", @@ -53,6 +54,7 @@ dependencies = [ "pymupdf==1.25.1", "pyparsing==3.2.1", "python-dateutil==2.9.0.post0", + "python-multipart>=0.0.12", "requests==2.32.3", "rsa==4.9", "s3transfer==0.10.4", @@ -63,6 +65,7 @@ dependencies = [ "typing-extensions==4.12.2", "uritemplate==4.1.1", "urllib3==2.3.0", + "uvicorn>=0.32.0", "pytest==8.3.3", "pytest-mock==3.14.0", ] diff --git a/src/webapp.py b/src/webapp.py new file mode 100644 index 0000000..b79141a --- /dev/null +++ b/src/webapp.py @@ -0,0 +1,676 @@ +from fastapi import FastAPI, UploadFile, File, Form, HTTPException +from fastapi.responses import HTMLResponse, JSONResponse +from fastapi.staticfiles import StaticFiles +from pathlib import Path +import tempfile +import shutil +import logging +import sys +from typing import Optional +import json + +from llm.google_unified import GoogleUnifiedClient +from llm.openai import OpenAIClient +from llm.anthropic import AnthropicClient +from llm.azure import AzureClient +from llm.aws import AWSClient +from processor import process_input_path +from prompt import BASE_PROMPT + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)] +) +logger = logging.getLogger(__name__) + +app = FastAPI(title="PPT2Desc Web Service") + +# HTML template for the web interface +HTML_TEMPLATE = """ + + + + + + PPT to Description Converter + + + +
+

🎯 PPT to Description

+

Convert PowerPoint presentations into semantic descriptions using AI

+ +
+
+ + +
+ +
+ + +
+ +
+ + + Leave blank to use default model for selected provider +
+ + +
+ + + Required for gemini, openai, and anthropic providers +
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+ + + Use Docker-based LibreOffice converter. Leave blank to use local installation. +
+ +
+ + +
+ +
+
+ + +
+
+ + +
+ +
+
+
+ + + + +""" + + +@app.get("/", response_class=HTMLResponse) +async def home(): + """Serve the web interface""" + return HTML_TEMPLATE + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return {"status": "healthy"} + + +@app.post("/convert") +async def convert_presentation( + file: UploadFile = File(...), + client: str = Form(...), + model: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + instructions: Optional[str] = Form(None), + libreoffice_url: Optional[str] = Form(None), + rate_limit: int = Form(60), + save_pdf: bool = Form(False), + save_images: bool = Form(False), + # Vertex AI fields + gcp_project_id: Optional[str] = Form(None), + gcp_region: Optional[str] = Form(None), + gcp_application_credentials: Optional[str] = Form(None), + # Azure fields + azure_openai_api_key: Optional[str] = Form(None), + azure_openai_endpoint: Optional[str] = Form(None), + azure_deployment_name: Optional[str] = Form(None), + azure_api_version: Optional[str] = Form("2023-12-01-preview"), + # AWS fields + aws_access_key_id: Optional[str] = Form(None), + aws_secret_access_key: Optional[str] = Form(None), + aws_region: Optional[str] = Form("us-east-1"), +): + """ + Convert a PowerPoint presentation to semantic descriptions + """ + if not file.filename or not file.filename.lower().endswith(('.pptx', '.ppt')): + raise HTTPException(status_code=400, detail="File must be a .pptx or .ppt") + + # Create temporary directories + temp_dir = tempfile.mkdtemp() + output_dir = tempfile.mkdtemp() + + try: + temp_path = Path(temp_dir) + output_path = Path(output_dir) + input_file = temp_path / file.filename + + # Save uploaded file + with input_file.open("wb") as f: + shutil.copyfileobj(file.file, f) + + # Build prompt + prompt = BASE_PROMPT + if instructions and instructions.strip(): + prompt = f"{BASE_PROMPT}\n\nAdditional instructions:\n{instructions}" + + # Set default model based on client if not provided + if not model or model.strip() == "": + model_defaults = { + "gemini": "gemini-2.5-flash", + "vertexai": "gemini-2.5-flash", + "openai": "gpt-4o", + "anthropic": "claude-3-5-sonnet-20241022", + "azure": "gpt-4o", + "aws": "us.amazon.nova-lite-v1:0" + } + model = model_defaults.get(client, "gemini-2.5-flash") + + # Initialize model instance + try: + if client == "gemini": + model_instance = GoogleUnifiedClient( + api_key=api_key, + model=model, + use_vertex=False + ) + elif client == "vertexai": + if not gcp_project_id or not gcp_application_credentials: + raise HTTPException( + status_code=400, + detail="GCP project_id and application_credentials are required for Vertex AI" + ) + model_instance = GoogleUnifiedClient( + credentials_path=gcp_application_credentials, + project_id=gcp_project_id, + region=gcp_region, + model=model, + use_vertex=True + ) + elif client == "openai": + model_instance = OpenAIClient(api_key=api_key, model=model) + elif client == "anthropic": + model_instance = AnthropicClient(api_key=api_key, model=model) + elif client == "azure": + if not azure_openai_api_key or not azure_openai_endpoint or not azure_deployment_name: + raise HTTPException( + status_code=400, + detail="Azure API key, endpoint, and deployment name are required" + ) + model_instance = AzureClient( + api_key=azure_openai_api_key, + endpoint=azure_openai_endpoint, + deployment=azure_deployment_name, + api_version=azure_api_version + ) + elif client == "aws": + if not aws_access_key_id or not aws_secret_access_key: + raise HTTPException( + status_code=400, + detail="AWS access key ID and secret access key are required" + ) + model_instance = AWSClient( + access_key_id=aws_access_key_id, + secret_access_key=aws_secret_access_key, + region=aws_region, + model=model + ) + else: + raise HTTPException(status_code=400, detail=f"Unsupported client: {client}") + except Exception as e: + logger.error(f"Failed to initialize model: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to initialize model: {str(e)}") + + # Determine LibreOffice configuration + if libreoffice_url and libreoffice_url.strip(): + libreoffice_endpoint = libreoffice_url + libreoffice_path = None + else: + # Try to find local LibreOffice + libreoffice_binary = shutil.which("soffice") or shutil.which("libreoffice") + if libreoffice_binary: + libreoffice_path = Path(libreoffice_binary) + libreoffice_endpoint = None + else: + raise HTTPException( + status_code=500, + detail="LibreOffice not found. Please provide --libreoffice_url or install LibreOffice locally" + ) + + # Process the presentation + logger.info(f"Processing {file.filename} with {client} model {model}") + results = process_input_path( + input_path=input_file, + output_dir=output_path, + libreoffice_path=libreoffice_path, + libreoffice_endpoint=libreoffice_endpoint, + model_instance=model_instance, + rate_limit=rate_limit, + prompt=prompt, + save_pdf=save_pdf, + save_images=save_images, + max_workers=None + ) + + if not results or len(results) == 0: + raise HTTPException(status_code=500, detail="Processing failed - no results returned") + + # Get the first (and should be only) result + ppt_file, slides = results[0] + + if len(slides) == 0: + raise HTTPException(status_code=500, detail="Processing failed - no slides extracted") + + # Format response + response_data = { + "deck": file.filename, + "model": model, + "slides": [ + { + "number": i + 1, + "content": slide + } + for i, slide in enumerate(slides) + ] + } + + return JSONResponse(content=response_data) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Conversion error: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") + finally: + # Cleanup temporary directories + shutil.rmtree(temp_dir, ignore_errors=True) + shutil.rmtree(output_dir, ignore_errors=True) + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/start_web.sh b/start_web.sh new file mode 100755 index 0000000..45207bf --- /dev/null +++ b/start_web.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Start PPT2Desc Web Application +# This script provides an easy way to start the web interface + +echo "🚀 Starting PPT2Desc Web Application..." +echo "" + +# Check if Docker is available +if command -v docker &> /dev/null && command -v docker compose &> /dev/null; then + echo "✓ Docker found" + echo "" + echo "Starting services with Docker Compose..." + echo "" + + docker compose up -d + + if [ $? -eq 0 ]; then + echo "" + echo "✅ Services started successfully!" + echo "" + echo "🌐 Web Interface: http://localhost:8000" + echo "🔧 LibreOffice Converter: http://localhost:2002" + echo "" + echo "To view logs: docker compose logs -f" + echo "To stop: docker compose down" + else + echo "❌ Failed to start services" + exit 1 + fi +else + echo "⚠ Docker not found, starting locally..." + echo "" + + # Check if UV is installed + if ! command -v uv &> /dev/null; then + echo "❌ UV package manager not found. Please install UV first:" + echo " curl -LsSf https://astral.sh/uv/install.sh | sh" + exit 1 + fi + + echo "Installing dependencies..." + uv sync + + if [ $? -ne 0 ]; then + echo "❌ Failed to install dependencies" + exit 1 + fi + + echo "" + echo "Starting web application..." + echo "" + echo "✅ Server starting..." + echo "" + echo "🌐 Web Interface: http://localhost:8000" + echo "" + echo "Note: For local mode, make sure LibreOffice is installed or" + echo " run 'docker compose up -d libreoffice-converter' separately" + echo "" + + uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8000 +fi From 2c43ed6ecb74cc7fc1ca939848107e6f1d041ebb Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 05:47:01 +0000 Subject: [PATCH 02/10] Fix Docker web application startup issues Fixed several issues preventing the web container from starting: - Added src/__init__.py for proper Python package structure - Updated Dockerfile.webapp to use uv sync without --frozen flag - Set PYTHONPATH environment variable in container - Changed CMD to use 'python -m uvicorn' for better module loading - Enhanced start_web.sh with --rebuild flag for easy container rebuilding - Improved start_web.sh with better error messages and status checks The web application should now start correctly in Docker. To rebuild and start: ./start_web.sh --rebuild --- Dockerfile.webapp | 15 +++++++++------ src/__init__.py | 1 + start_web.sh | 22 +++++++++++++++++++++- 3 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 src/__init__.py diff --git a/Dockerfile.webapp b/Dockerfile.webapp index b344516..ab20eb4 100644 --- a/Dockerfile.webapp +++ b/Dockerfile.webapp @@ -10,18 +10,21 @@ RUN apt-get update && \ # Set working directory WORKDIR /app +# Install UV first +RUN pip install --no-cache-dir uv + # Copy project files -COPY pyproject.toml uv.lock ./ +COPY pyproject.toml ./ COPY src/ ./src/ -# Install UV -RUN pip install --no-cache-dir uv +# Install dependencies (without frozen lock to allow updates) +RUN uv sync -# Install dependencies -RUN uv sync --frozen +# Set Python path +ENV PYTHONPATH=/app # Expose port EXPOSE 8000 # Run the web application -CMD ["uv", "run", "uvicorn", "src.webapp:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["uv", "run", "python", "-m", "uvicorn", "src.webapp:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..8074dd0 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# Package initialization diff --git a/start_web.sh b/start_web.sh index 45207bf..167e158 100755 --- a/start_web.sh +++ b/start_web.sh @@ -6,10 +6,23 @@ echo "🚀 Starting PPT2Desc Web Application..." echo "" +# Parse arguments +REBUILD=false +if [ "$1" = "--rebuild" ]; then + REBUILD=true +fi + # Check if Docker is available if command -v docker &> /dev/null && command -v docker compose &> /dev/null; then echo "✓ Docker found" echo "" + + if [ "$REBUILD" = true ]; then + echo "Rebuilding containers..." + docker compose down + docker compose build --no-cache + fi + echo "Starting services with Docker Compose..." echo "" @@ -24,8 +37,14 @@ if command -v docker &> /dev/null && command -v docker compose &> /dev/null; the echo "" echo "To view logs: docker compose logs -f" echo "To stop: docker compose down" + echo "" + echo "Waiting for services to be healthy..." + sleep 3 + docker compose ps else echo "❌ Failed to start services" + echo "" + echo "💡 Try rebuilding with: ./start_web.sh --rebuild" exit 1 fi else @@ -58,5 +77,6 @@ else echo " run 'docker compose up -d libreoffice-converter' separately" echo "" - uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8000 + export PYTHONPATH="${PYTHONPATH}:$(pwd)" + uv run python -m uvicorn src.webapp:app --host 0.0.0.0 --port 8000 fi From c4f5c6d81c27c0caeedc5f6a900f89ad217bc8a9 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 05:59:57 +0000 Subject: [PATCH 03/10] Fix webapp imports to use relative imports Changed imports in webapp.py from absolute to relative imports using dot notation. This fixes the ModuleNotFoundError when running the webapp as a module (src.webapp:app) in the Docker container. Imports changed from: from llm.google_unified import ... To: from .llm.google_unified import ... This ensures the module can be properly imported when running as src.webapp:app via uvicorn. --- src/webapp.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/webapp.py b/src/webapp.py index b79141a..fe07d04 100644 --- a/src/webapp.py +++ b/src/webapp.py @@ -9,13 +9,13 @@ from typing import Optional import json -from llm.google_unified import GoogleUnifiedClient -from llm.openai import OpenAIClient -from llm.anthropic import AnthropicClient -from llm.azure import AzureClient -from llm.aws import AWSClient -from processor import process_input_path -from prompt import BASE_PROMPT +from .llm.google_unified import GoogleUnifiedClient +from .llm.openai import OpenAIClient +from .llm.anthropic import AnthropicClient +from .llm.azure import AzureClient +from .llm.aws import AWSClient +from .processor import process_input_path +from .prompt import BASE_PROMPT logging.basicConfig( level=logging.INFO, From 346407b96f8bbb327df1d5a83a92df648dccf5ca Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 06:15:04 +0000 Subject: [PATCH 04/10] Fix processor.py imports to use relative imports Changed imports in processor.py from absolute to relative imports to fix ModuleNotFoundError when webapp imports processor module. This is required because webapp.py is imported as a module (src.webapp:app) and all files it imports must use relative imports within the src package. --- src/processor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/processor.py b/src/processor.py index 8ede73c..17a5511 100644 --- a/src/processor.py +++ b/src/processor.py @@ -10,11 +10,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm -from llm import LLMClient -from converters.ppt_converter import convert_pptx_to_pdf -from converters.pdf_converter import convert_pdf_to_images -from converters.docker_converter import convert_pptx_via_docker -from schemas.deck import DeckData, SlideData +from .llm import LLMClient +from .converters.ppt_converter import convert_pptx_to_pdf +from .converters.pdf_converter import convert_pdf_to_images +from .converters.docker_converter import convert_pptx_via_docker +from .schemas.deck import DeckData, SlideData # Create a type alias for all possible clients logger = logging.getLogger(__name__) From 8087e43dbe8b7e87bcb467d249dca749dced0dc2 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 06:15:41 +0000 Subject: [PATCH 05/10] Support both relative and absolute imports Added try/except blocks to support both import styles in processor.py and webapp.py. This allows the code to work in both scenarios: 1. When imported as a module (e.g., src.webapp:app in Docker) - Uses relative imports (.llm, .processor, etc.) 2. When run as a script (e.g., uv run src/main.py) - Falls back to absolute imports (llm, processor, etc.) This ensures compatibility with both the web application (running as a module) and the CLI (running as a script). --- src/processor.py | 18 +++++++++++++----- src/webapp.py | 24 +++++++++++++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/processor.py b/src/processor.py index 17a5511..689d88f 100644 --- a/src/processor.py +++ b/src/processor.py @@ -10,11 +10,19 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm -from .llm import LLMClient -from .converters.ppt_converter import convert_pptx_to_pdf -from .converters.pdf_converter import convert_pdf_to_images -from .converters.docker_converter import convert_pptx_via_docker -from .schemas.deck import DeckData, SlideData +# Support both relative imports (for webapp) and absolute imports (for main.py) +try: + from .llm import LLMClient + from .converters.ppt_converter import convert_pptx_to_pdf + from .converters.pdf_converter import convert_pdf_to_images + from .converters.docker_converter import convert_pptx_via_docker + from .schemas.deck import DeckData, SlideData +except ImportError: + from llm import LLMClient + from converters.ppt_converter import convert_pptx_to_pdf + from converters.pdf_converter import convert_pdf_to_images + from converters.docker_converter import convert_pptx_via_docker + from schemas.deck import DeckData, SlideData # Create a type alias for all possible clients logger = logging.getLogger(__name__) diff --git a/src/webapp.py b/src/webapp.py index fe07d04..2abf5e8 100644 --- a/src/webapp.py +++ b/src/webapp.py @@ -9,13 +9,23 @@ from typing import Optional import json -from .llm.google_unified import GoogleUnifiedClient -from .llm.openai import OpenAIClient -from .llm.anthropic import AnthropicClient -from .llm.azure import AzureClient -from .llm.aws import AWSClient -from .processor import process_input_path -from .prompt import BASE_PROMPT +# Support both relative imports (for module) and absolute imports (for direct run) +try: + from .llm.google_unified import GoogleUnifiedClient + from .llm.openai import OpenAIClient + from .llm.anthropic import AnthropicClient + from .llm.azure import AzureClient + from .llm.aws import AWSClient + from .processor import process_input_path + from .prompt import BASE_PROMPT +except ImportError: + from llm.google_unified import GoogleUnifiedClient + from llm.openai import OpenAIClient + from llm.anthropic import AnthropicClient + from llm.azure import AzureClient + from llm.aws import AWSClient + from processor import process_input_path + from prompt import BASE_PROMPT logging.basicConfig( level=logging.INFO, From aa5913203ff734016f6ea4545900535aa7227b16 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 06:31:47 +0000 Subject: [PATCH 06/10] Fix JSON serialization error in webapp response Explicitly convert all response data to strings to prevent 'Object of type PosixPath is not JSON serializable' errors. Added str() conversions for: - deck filename - model name - slide content This ensures the JSONResponse can properly serialize all data. --- src/webapp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/webapp.py b/src/webapp.py index 2abf5e8..abec6c3 100644 --- a/src/webapp.py +++ b/src/webapp.py @@ -655,14 +655,14 @@ async def convert_presentation( if len(slides) == 0: raise HTTPException(status_code=500, detail="Processing failed - no slides extracted") - # Format response + # Format response - ensure all values are JSON serializable response_data = { - "deck": file.filename, - "model": model, + "deck": str(file.filename), + "model": str(model), "slides": [ { "number": i + 1, - "content": slide + "content": str(slide) } for i, slide in enumerate(slides) ] From e2fe78732259ae573c9f1f31a61019c1f9924881 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 07:10:02 +0000 Subject: [PATCH 07/10] Add Download JSON button to web interface Added a green 'Download JSON' button that appears after conversion completes. The button allows users to download the results as a properly formatted JSON file. Features: - Downloads with filename matching the presentation name - Nicely formatted JSON with 2-space indentation - Green button styling to distinguish from convert button - Downloads directly to user's default download folder The JSON file now persists on the user's machine instead of being deleted with the temporary files. --- src/webapp.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/webapp.py b/src/webapp.py index abec6c3..1f012eb 100644 --- a/src/webapp.py +++ b/src/webapp.py @@ -462,10 +462,35 @@ } }); + let currentJsonData = null; + + function downloadJSON() { + if (!currentJsonData) return; + + const dataStr = JSON.stringify(currentJsonData, null, 2); + const dataBlob = new Blob([dataStr], { type: 'application/json' }); + const url = URL.createObjectURL(dataBlob); + const link = document.createElement('a'); + link.href = url; + link.download = `${currentJsonData.deck.replace(/\.[^/.]+$/, '')}.json`; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + } + function displayResults(data) { + currentJsonData = data; const resultDiv = document.getElementById('result'); - let html = `

Results for: ${data.deck}

`; + let html = ` +
+

Results for: ${data.deck}

+ +
+ `; html += `

Model: ${data.model}

`; html += `

Total Slides: ${data.slides.length}

`; From 946a74591e03fa1482ece2a4de40c160f49f94a3 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 07:26:55 +0000 Subject: [PATCH 08/10] Fix webapp to read actual slide descriptions from JSON file The processor returns image paths, not slide descriptions. The actual AI-generated descriptions are written to a JSON file on disk. Fixed by: - Reading the generated JSON file after processing completes - Returning the actual slide content from the JSON instead of image paths - Added proper error handling for missing or invalid JSON files This fixes the issue where the web interface was showing image file paths instead of the actual AI-generated slide descriptions. --- src/webapp.py | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/webapp.py b/src/webapp.py index 1f012eb..af8af9f 100644 --- a/src/webapp.py +++ b/src/webapp.py @@ -675,25 +675,40 @@ async def convert_presentation( raise HTTPException(status_code=500, detail="Processing failed - no results returned") # Get the first (and should be only) result - ppt_file, slides = results[0] - - if len(slides) == 0: - raise HTTPException(status_code=500, detail="Processing failed - no slides extracted") - - # Format response - ensure all values are JSON serializable - response_data = { - "deck": str(file.filename), - "model": str(model), - "slides": [ - { - "number": i + 1, - "content": str(slide) - } - for i, slide in enumerate(slides) - ] - } + ppt_file, image_paths = results[0] + + # The actual slide descriptions are written to a JSON file + # Read the JSON file that was generated by the processor + json_filename = input_file.stem + ".json" + json_file_path = output_path / json_filename + + if not json_file_path.exists(): + raise HTTPException( + status_code=500, + detail=f"Processing completed but JSON file not found: {json_filename}" + ) + + # Read and parse the JSON file + try: + with open(json_file_path, 'r', encoding='utf-8') as f: + json_data = json.load(f) + + # Validate that we have the expected structure + if "slides" not in json_data: + raise HTTPException( + status_code=500, + detail="Invalid JSON structure - missing 'slides' key" + ) + + # Return the JSON data directly (it's already in the correct format) + return JSONResponse(content=json_data) - return JSONResponse(content=response_data) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON file: {e}") + raise HTTPException( + status_code=500, + detail=f"Failed to parse generated JSON: {str(e)}" + ) except HTTPException: raise From 46d7ce9a77b391e675da23d918996597a5e40338 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 19 Nov 2025 08:38:19 +0000 Subject: [PATCH 09/10] Change web service port from 8000 to 5001 Updated all references to use port 5001 instead of 8000: - docker-compose.yml: Map external port 5001 to internal port 8000 - start_web.sh: Update messages and local uvicorn command to use 5001 - README.md: Update web interface URL - LOCALHOST_GUIDE.md: Update all port references and examples The service is now accessible at http://localhost:5001 --- LOCALHOST_GUIDE.md | 22 ++++++++++++---------- README.md | 2 +- docker-compose.yml | 2 +- start_web.sh | 6 +++--- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/LOCALHOST_GUIDE.md b/LOCALHOST_GUIDE.md index 6d24449..47104c6 100644 --- a/LOCALHOST_GUIDE.md +++ b/LOCALHOST_GUIDE.md @@ -14,14 +14,14 @@ This is the easiest way to get started. Both LibreOffice converter and the web a ``` 2. **Access the web interface:** - - Open your browser and navigate to: **http://localhost:8000** + - Open your browser and navigate to: **http://localhost:5001** 3. **Stop the services:** ```bash docker compose down ``` -That's it! The web interface will be available at http://localhost:8000, and you can upload PowerPoint files directly through your browser. +That's it! The web interface will be available at http://localhost:5001, and you can upload PowerPoint files directly through your browser. ### Option 2: Running Locally with UV @@ -39,11 +39,11 @@ If you prefer to run the application directly on your machine without Docker: 3. **Run the web application:** ```bash - uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8000 + uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5001 ``` 4. **Access the web interface:** - - Open your browser and navigate to: **http://localhost:8000** + - Open your browser and navigate to: **http://localhost:5001** ## Using the Web Interface @@ -57,6 +57,8 @@ Once the application is running, you can: The results will be displayed directly in the browser, showing detailed descriptions for each slide. +The web service runs on **port 5001** by default. + ## Configuration Options ### AI Provider Settings @@ -82,12 +84,12 @@ If you want to integrate the service programmatically: ### Health Check ```bash -curl http://localhost:8000/health +curl http://localhost:5001/health ``` ### Convert Presentation ```bash -curl -X POST http://localhost:8000/convert \ +curl -X POST http://localhost:5001/convert \ -F "file=@presentation.pptx" \ -F "client=gemini" \ -F "api_key=YOUR_API_KEY" \ @@ -98,18 +100,18 @@ curl -X POST http://localhost:8000/convert \ ### Port Already in Use -If port 8000 is already in use, you can change it: +If port 5001 is already in use, you can change it: **Docker Compose:** Edit `docker-compose.yml` and change the port mapping: ```yaml ports: - - "8080:8000" # Change 8080 to any available port + - "5002:8000" # Change 5002 to any available port ``` **Local Running:** ```bash -uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8080 +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5002 ``` ### LibreOffice Connection Issues @@ -142,7 +144,7 @@ For large presentations or high rate limits, you may need to increase Docker mem To run in development mode with auto-reload: ```bash -uv run uvicorn src.webapp:app --host 0.0.0.0 --port 8000 --reload +uv run uvicorn src.webapp:app --host 0.0.0.0 --port 5001 --reload ``` ## Environment Variables diff --git a/README.md b/README.md index bee4da1..28a4b56 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ The easiest way to use ppt2desc is through the web interface: 2. **Open your browser and navigate to:** ``` - http://localhost:8000 + http://localhost:5001 ``` 3. **Upload your PowerPoint file, configure your AI provider, and convert!** diff --git a/docker-compose.yml b/docker-compose.yml index 75bc816..11bdd02 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,7 @@ services: context: . dockerfile: Dockerfile.webapp ports: - - "8000:8000" + - "5001:8000" restart: unless-stopped depends_on: - libreoffice-converter diff --git a/start_web.sh b/start_web.sh index 167e158..88361a7 100755 --- a/start_web.sh +++ b/start_web.sh @@ -32,7 +32,7 @@ if command -v docker &> /dev/null && command -v docker compose &> /dev/null; the echo "" echo "✅ Services started successfully!" echo "" - echo "🌐 Web Interface: http://localhost:8000" + echo "🌐 Web Interface: http://localhost:5001" echo "🔧 LibreOffice Converter: http://localhost:2002" echo "" echo "To view logs: docker compose logs -f" @@ -71,12 +71,12 @@ else echo "" echo "✅ Server starting..." echo "" - echo "🌐 Web Interface: http://localhost:8000" + echo "🌐 Web Interface: http://localhost:5001" echo "" echo "Note: For local mode, make sure LibreOffice is installed or" echo " run 'docker compose up -d libreoffice-converter' separately" echo "" export PYTHONPATH="${PYTHONPATH}:$(pwd)" - uv run python -m uvicorn src.webapp:app --host 0.0.0.0 --port 8000 + uv run python -m uvicorn src.webapp:app --host 0.0.0.0 --port 5001 fi From d2c2ea64f316881584c603b9bdbfb7f5d554022f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 20 Nov 2025 00:45:13 +0000 Subject: [PATCH 10/10] Fix LibreOffice URL default for local development Removed the default value 'http://libreoffice-converter:2002' which only works inside Docker. Now the field is empty by default with a placeholder showing 'http://localhost:2002' for local development. Updated help text to be clearer about using the Docker converter. --- src/webapp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/webapp.py b/src/webapp.py index af8af9f..f09a326 100644 --- a/src/webapp.py +++ b/src/webapp.py @@ -359,8 +359,8 @@
- - Use Docker-based LibreOffice converter. Leave blank to use local installation. + + Use Docker-based LibreOffice converter at http://localhost:2002. Leave blank to use local LibreOffice installation.