diff --git a/backend/blueprints/pdf_extract_images.py b/backend/blueprints/pdf_extract_images.py new file mode 100644 index 0000000..e7b6633 --- /dev/null +++ b/backend/blueprints/pdf_extract_images.py @@ -0,0 +1,164 @@ +""" +PDF Embedded Images Extractor +Extracts raw raster images (JPEG/PNG) from PDF files without re-compression +""" + +import fitz # PyMuPDF +import io +import zipfile +from flask import Blueprint, request, jsonify, send_file +from werkzeug.utils import secure_filename + +pdf_extract_images_bp = Blueprint('pdf_extract_images', __name__) + + +def extract_images_from_pdf(pdf_bytes, original_filename="document"): + """Extract all embedded images from a PDF file""" + try: + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + + if len(doc) == 0: + return "PDF file is empty or corrupted", None, None + + zip_buffer = io.BytesIO() + total_images = 0 + image_names = [] + + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + + for page_num in range(len(doc)): + page = doc[page_num] + image_list = page.get_images(full=True) + + for img_index, img in enumerate(image_list): + xref = img[0] + + try: + pix = fitz.Pixmap(doc, xref) + + if pix.n - pix.alpha < 4: + img_data = pix.tobytes("png") + ext = "png" + else: + pix = fitz.Pixmap(fitz.csRGB, pix) + img_data = pix.tobytes("png") + ext = "png" + + pix = None + + base_name = secure_filename(original_filename).replace('.pdf', '') + img_filename = f"{base_name}_page{page_num+1}_{img_index+1}.{ext}" + + zip_file.writestr(img_filename, img_data) + image_names.append(img_filename) + total_images += 1 + + except Exception as e: + print(f"Error: {str(e)}") + continue + + doc.close() + + if total_images == 0: + return "No embedded images found in this PDF", None, None + + metadata = f"""Extracted Images Report +PDF File: {original_filename} +Total Images Found: {total_images} +Extracted Images: +{chr(10).join(f'- {name}' for name in image_names)} +""" + zip_file.writestr("extraction_report.txt", metadata) + + zip_buffer.seek(0) + return zip_buffer, total_images, image_names + + except Exception as e: + return f"Error processing PDF: {str(e)}", None, None + + +@pdf_extract_images_bp.route('/extract-pdf-images', methods=['POST']) +def extract_images(): + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + + if file.filename == '': + return jsonify({'error': 'No file selected'}), 400 + + if not file.filename.lower().endswith('.pdf'): + return jsonify({'error': 'File must be a PDF'}), 400 + + try: + pdf_bytes = file.read() + result, count, names = extract_images_from_pdf(pdf_bytes, file.filename) + + if count is None: + return jsonify({'error': result}), 400 + + return send_file( + result, + mimetype='application/zip', + as_attachment=True, + download_name=f"{file.filename.replace('.pdf', '')}_extracted_images.zip" + ) + + except Exception as e: + return jsonify({'error': f'Server error: {str(e)}'}), 500 + + +@pdf_extract_images_bp.route('/preview-pdf-images', methods=['POST']) +def preview_images(): + import base64 + + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + + if file.filename == '' or not file.filename.lower().endswith('.pdf'): + return jsonify({'error': 'Valid PDF required'}), 400 + + try: + pdf_bytes = file.read() + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + + previews = [] + count = 0 + + for page_num in range(min(len(doc), 3)): + page = doc[page_num] + image_list = page.get_images(full=True) + + for img_index, img in enumerate(image_list[:3]): + if count >= 9: + break + + xref = img[0] + pix = fitz.Pixmap(doc, xref) + + if pix.n - pix.alpha >= 4: + pix = fitz.Pixmap(fitz.csRGB, pix) + + img_data = pix.tobytes("png") + b64 = base64.b64encode(img_data).decode('utf-8') + + previews.append({ + 'page': page_num + 1, + 'index': img_index + 1, + 'data': f'data:image/png;base64,{b64}' + }) + count += 1 + pix = None + + doc.close() + + return jsonify({ + 'success': True, + 'total_previews': len(previews), + 'previews': previews + }) + + except Exception as e: + return jsonify({'error': str(e)}), 500 \ No newline at end of file diff --git a/backend/blueprints/progress.py b/backend/blueprints/progress.py new file mode 100644 index 0000000..c6f1e26 --- /dev/null +++ b/backend/blueprints/progress.py @@ -0,0 +1,117 @@ +""" +SSE (Server-Sent Events) Blueprint for Real-Time Progress +""" + +import time +import json +from flask import Blueprint, Response, request, stream_with_context +from utils.progress_manager import progress_manager + +progress_bp = Blueprint('progress', __name__) + + +@progress_bp.route('/progress/') +def progress_stream(task_id: str): + """ + SSE endpoint for progress updates + Client connects to this endpoint and receives progress updates + """ + def generate(): + last_percent = -1 + + while True: + progress = progress_manager.get_progress(task_id) + + if progress: + # Only send update if progress changed + current_percent = progress.get('percent', 0) + + if current_percent != last_percent or progress.get('status') in ['complete', 'error']: + last_percent = current_percent + + # Send SSE message + yield f"data: {json.dumps(progress)}\n\n" + + # Stop streaming when complete or error + if progress.get('status') in ['complete', 'error']: + break + + # Wait before next check (don't flood) + time.sleep(0.5) + + return Response( + stream_with_context(generate()), + mimetype='text/event-stream', + headers={ + 'Cache-Control': 'no-cache', + 'X-Accel-Buffering': 'no' # Disable nginx buffering + } + ) + + +@progress_bp.route('/progress//status') +def get_progress_status(task_id: str): + """Get current progress status as JSON""" + from flask import jsonify + + progress = progress_manager.get_progress(task_id) + if progress: + return jsonify(progress) + + return jsonify({'error': 'Task not found'}), 404 + + +@progress_bp.route('/convert-pdf-progress', methods=['POST']) +def convert_pdf_with_progress(): + """ + Example: PDF to PNG conversion with progress tracking + """ + from flask import jsonify + + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + + if file.filename == '': + return jsonify({'error': 'No file selected'}), 400 + + # Create task for progress tracking + task_id = progress_manager.create_task() + + # Start async processing (simplified - use threading for real implementation) + import threading + + def process(): + try: + # Simulate processing with progress updates + total_pages = 10 # In real code, get actual page count + + progress_manager.update(task_id, 0, total_pages, "Starting conversion...") + + for page in range(1, total_pages + 1): + # Simulate work + time.sleep(0.5) + + # Update progress + progress_manager.update( + task_id, + page, + total_pages, + f"Processing page {page} of {total_pages}..." + ) + + progress_manager.complete(task_id, "/download/result.zip") + + except Exception as e: + progress_manager.error(task_id, str(e)) + + # Start background thread + thread = threading.Thread(target=process) + thread.start() + + return jsonify({ + 'task_id': task_id, + 'stream_url': f'/progress/{task_id}', + 'status_url': f'/progress/{task_id}/status' + }) \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index 8d26f32..f829e38 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,10 +1,14 @@ from app import create_app - +from blueprints.pdf_extract_images import pdf_extract_images_bp +from blueprints.progress import progress_bp import os app = create_app() - # ← moved AFTER app is created +# āœ… YAHAN PE REGISTER KARO (app create hone ke turant baad) +app.register_blueprint(pdf_extract_images_bp) + +app.register_blueprint(progress_bp) if __name__ == "__main__": port = int(os.getenv("PORT", "5000")) diff --git a/backend/test_extract.py b/backend/test_extract.py new file mode 100644 index 0000000..4108c3b --- /dev/null +++ b/backend/test_extract.py @@ -0,0 +1,79 @@ +import fitz # PyMuPDF +import io +import zipfile +import os + +def test_extract(pdf_path): + """Test PDF image extraction without Flask""" + + if not os.path.exists(pdf_path): + print(f"āŒ File not found: {pdf_path}") + return + + # Read PDF + with open(pdf_path, 'rb') as f: + pdf_bytes = f.read() + + # Open PDF from memory + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + + print(f"āœ… PDF opened: {pdf_path}") + print(f"šŸ“„ Total pages: {len(doc)}") + + total_images = 0 + + # Create ZIP in memory + zip_buffer = io.BytesIO() + + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + + for page_num in range(len(doc)): + page = doc[page_num] + image_list = page.get_images(full=True) + + print(f" Page {page_num + 1}: {len(image_list)} images found") + + for img_index, img in enumerate(image_list): + xref = img[0] + + try: + pix = fitz.Pixmap(doc, xref) + + if pix.n - pix.alpha < 4: + img_data = pix.tobytes("png") + ext = "png" + else: + pix = fitz.Pixmap(fitz.csRGB, pix) + img_data = pix.tobytes("png") + ext = "png" + + pix = None + + img_filename = f"page{page_num+1}_img{img_index+1}.{ext}" + zip_file.writestr(img_filename, img_data) + total_images += 1 + + print(f" āœ… Extracted: {img_filename}") + + except Exception as e: + print(f" āŒ Error: {e}") + + doc.close() + + # Add report + report = f"Extracted {total_images} images from {pdf_path}" + zip_file.writestr("report.txt", report) + + if total_images > 0: + # Save ZIP file + zip_buffer.seek(0) + with open("extracted_images.zip", "wb") as f: + f.write(zip_buffer.read()) + print(f"\nāœ… ZIP created: extracted_images.zip ({total_images} images)") + else: + print("\nāš ļø No images found in PDF") + +if __name__ == "__main__": + # Ask for PDF path + pdf_input = input("Enter PDF path (or drag-drop PDF here): ").strip().strip('"') + test_extract(pdf_input) \ No newline at end of file diff --git a/backend/utils/progress_manager.py b/backend/utils/progress_manager.py new file mode 100644 index 0000000..50715e6 --- /dev/null +++ b/backend/utils/progress_manager.py @@ -0,0 +1,104 @@ +""" +Progress Manager for SSE (Server-Sent Events) +Tracks progress of long-running operations +""" + +import time +import uuid +from typing import Dict, Optional +from dataclasses import dataclass, field +from datetime import datetime + +@dataclass +class Progress: + """Progress tracking object""" + task_id: str + status: str = "pending" # pending, processing, complete, error + current: int = 0 + total: int = 0 + message: str = "" + error: Optional[str] = None + start_time: float = field(default_factory=time.time) + end_time: Optional[float] = None + + @property + def percent(self) -> float: + if self.total == 0: + return 0 + return (self.current / self.total) * 100 + + @property + def elapsed_time(self) -> float: + if self.end_time: + return self.end_time - self.start_time + return time.time() - self.start_time + + def to_dict(self) -> dict: + return { + "task_id": self.task_id, + "status": self.status, + "current": self.current, + "total": self.total, + "percent": round(self.percent, 1), + "message": self.message, + "error": self.error, + "elapsed_time": round(self.elapsed_time, 1) + } + + +class ProgressManager: + """Manages progress tracking for multiple tasks""" + + def __init__(self): + self._progress: Dict[str, Progress] = {} + + def create_task(self) -> str: + """Create a new task and return its ID""" + task_id = str(uuid.uuid4())[:8] + self._progress[task_id] = Progress(task_id=task_id) + return task_id + + def update(self, task_id: str, current: int, total: int, message: str = ""): + """Update progress for a task""" + if task_id in self._progress: + progress = self._progress[task_id] + progress.current = current + progress.total = total + progress.status = "processing" + if message: + progress.message = message + + def complete(self, task_id: str, result_url: str = ""): + """Mark task as complete""" + if task_id in self._progress: + self._progress[task_id].status = "complete" + self._progress[task_id].end_time = time.time() + if result_url: + self._progress[task_id].message = result_url + + def error(self, task_id: str, error_msg: str): + """Mark task as failed""" + if task_id in self._progress: + self._progress[task_id].status = "error" + self._progress[task_id].error = error_msg + self._progress[task_id].end_time = time.time() + + def get_progress(self, task_id: str) -> Optional[dict]: + """Get progress for a task""" + if task_id in self._progress: + return self._progress[task_id].to_dict() + return None + + def cleanup_old_tasks(self, max_age_seconds: int = 3600): + """Remove tasks older than max_age_seconds""" + now = time.time() + to_remove = [] + for task_id, progress in self._progress.items(): + if progress.end_time and (now - progress.end_time) > max_age_seconds: + to_remove.append(task_id) + for task_id in to_remove: + del self._progress[task_id] + + +# Global instance +progress_manager = ProgressManager() \ No newline at end of file diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index b00849c..0ba8629 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -5,6 +5,7 @@ import ScrollToTop from "./components/ScrollToTop"; import Layout from "./components/Layout/Layout"; import ErrorBoundary from "./ErrorBoundary"; +import PdfExtractImages from './pages/PdfExtractImages'; const UrlToQr = lazy(() => import("./pages/UrlToQr")); @@ -60,6 +61,8 @@ function App() { {/* The Landing Page has its own clean view */} } /> + } /> + {/* Informational pages (Navbar + Footer wrapper, no tool sidebar) */} } /> } /> diff --git a/frontend/src/components/ProgressBar.css b/frontend/src/components/ProgressBar.css new file mode 100644 index 0000000..6b3d900 --- /dev/null +++ b/frontend/src/components/ProgressBar.css @@ -0,0 +1,12 @@ +@keyframes shimmer { + 0% { + transform: translateX(-100%); + } + 100% { + transform: translateX(100%); + } +} + +.animate-shimmer { + animation: shimmer 1.5s infinite; +} \ No newline at end of file diff --git a/frontend/src/components/ProgressBar.jsx b/frontend/src/components/ProgressBar.jsx new file mode 100644 index 0000000..33dff92 --- /dev/null +++ b/frontend/src/components/ProgressBar.jsx @@ -0,0 +1,84 @@ +import React from 'react'; +import './ProgressBar.css'; // Optional: add styling + +const ProgressBar = ({ + progress, + showPercentage = true, + showMessage = true, + height = '8px', + className = '' +}) => { + if (!progress) return null; + + const { percent, status, message, current, total, elapsed_time } = progress; + + // Status colors + const statusColors = { + pending: 'bg-gray-500', + processing: 'bg-blue-600', + complete: 'bg-green-600', + error: 'bg-red-600' + }; + + const barColor = statusColors[status] || 'bg-blue-600'; + + // Status messages + const statusMessages = { + pending: 'ā³ Preparing...', + processing: `āš™ļø Processing: ${message || `Step ${current} of ${total}`}`, + complete: 'āœ… Complete! Downloading...', + error: 'āŒ Error: ' + (progress.error || 'Something went wrong') + }; + + const displayMessage = statusMessages[status] || message; + + return ( +
+ {/* Progress Bar Container */} +
+
+ {/* Optional: Animated shimmer effect while processing */} + {status === 'processing' && ( +
+ )} +
+
+ + {/* Progress Info */} +
+ {showPercentage && ( + + {Math.round(percent)}% + + )} + + {showMessage && displayMessage && ( + + {displayMessage} + + )} + + {elapsed_time && status === 'processing' && ( + + {Math.round(elapsed_time)}s + + )} +
+ + {/* Current/Total indicator */} + {status === 'processing' && current && total && ( +
+ {current} / {total} completed +
+ )} +
+ ); +}; + +export default ProgressBar; \ No newline at end of file diff --git a/frontend/src/data/toolsData.jsx b/frontend/src/data/toolsData.jsx index acb4d51..e5459c6 100644 --- a/frontend/src/data/toolsData.jsx +++ b/frontend/src/data/toolsData.jsx @@ -360,7 +360,18 @@ const tools = [ path: "/url-to-qr", gradient: "from-emerald-500/10 to-green-500/10", iconGradient: "from-emerald-500 to-green-500", - } + }, + +{ + id: 'pdf-extract-images', + name: 'Extract PDF Images', + category: 'PDF Tools', + description: 'Extract all embedded images from PDF files', + icon: 'šŸ–¼ļø', + path: '/pdf/extract-images', + component: lazy(() => import('../pages/PdfExtractImages')), + comingSoon: false, +}, ]; diff --git a/frontend/src/hooks/useSSE.js b/frontend/src/hooks/useSSE.js new file mode 100644 index 0000000..f93915d --- /dev/null +++ b/frontend/src/hooks/useSSE.js @@ -0,0 +1,111 @@ +import { useState, useEffect, useCallback, useRef } from 'react'; + +/** + * Custom hook for Server-Sent Events (SSE) + * + * @param {string} url - SSE endpoint URL + * @param {Object} options - Configuration options + * @returns {Object} { progress, isConnected, error, close } + */ +export const useSSE = (url, options = {}) => { + const [progress, setProgress] = useState(null); + const [isConnected, setIsConnected] = useState(false); + const [error, setError] = useState(null); + + const eventSourceRef = useRef(null); + const reconnectTimeoutRef = useRef(null); + + const { + autoConnect = true, + onComplete = null, + onError = null, + onProgress = null + } = options; + + const close = useCallback(() => { + if (eventSourceRef.current) { + eventSourceRef.current.close(); + eventSourceRef.current = null; + } + if (reconnectTimeoutRef.current) { + clearTimeout(reconnectTimeoutRef.current); + } + setIsConnected(false); + }, []); + + const connect = useCallback(() => { + if (!url) return; + + // Close existing connection + close(); + + try { + const eventSource = new EventSource(url); + eventSourceRef.current = eventSource; + + eventSource.onopen = () => { + setIsConnected(true); + setError(null); + }; + + eventSource.onmessage = (event) => { + try { + const data = JSON.parse(event.data); + setProgress(data); + + // Call callback if provided + if (onProgress) onProgress(data); + + // Check if complete + if (data.status === 'complete') { + if (onComplete) onComplete(data); + close(); + } + + // Check if error + if (data.status === 'error') { + setError(data.error || 'Processing failed'); + if (onError) onError(data.error); + close(); + } + } catch (err) { + console.error('Failed to parse SSE message:', err); + } + }; + + eventSource.onerror = (err) => { + console.error('SSE connection error:', err); + setIsConnected(false); + setError('Connection lost. Attempting to reconnect...'); + + // Reconnect after delay + if (reconnectTimeoutRef.current) { + clearTimeout(reconnectTimeoutRef.current); + } + reconnectTimeoutRef.current = setTimeout(() => { + if (eventSourceRef.current?.readyState === EventSource.CLOSED) { + connect(); + } + }, 3000); + }; + + } catch (err) { + console.error('Failed to create EventSource:', err); + setError(err.message); + } + }, [url, close, onComplete, onError, onProgress]); + + useEffect(() => { + if (autoConnect && url) { + connect(); + } + + return () => { + close(); + }; + }, [url, autoConnect, connect, close]); + + return { progress, isConnected, error, close, reconnect: connect }; +}; + +export default useSSE; \ No newline at end of file diff --git a/frontend/src/pages/PdfExtractImages.jsx b/frontend/src/pages/PdfExtractImages.jsx new file mode 100644 index 0000000..fb8a99c --- /dev/null +++ b/frontend/src/pages/PdfExtractImages.jsx @@ -0,0 +1,146 @@ +import React, { useState } from 'react'; +import ToolPageTemplate from '../components/ToolPageTemplate'; +import FileUploadArea from '../components/FileUploadArea'; +import axios from 'axios'; + +const PdfExtractImages = () => { + const [file, setFile] = useState(null); + const [loading, setLoading] = useState(false); + const [previews, setPreviews] = useState([]); + const [error, setError] = useState(null); + + const API_URL = import.meta.env.VITE_API_URL || 'http://localhost:5000'; + + const handleFileSelect = async (selectedFile) => { + setFile(selectedFile); + setError(null); + await loadPreview(selectedFile); + }; + + const loadPreview = async (fileToPreview) => { + const formData = new FormData(); + formData.append('file', fileToPreview); + + try { + const response = await axios.post(`${API_URL}/preview-pdf-images`, formData, { + headers: { 'Content-Type': 'multipart/form-data' } + }); + + if (response.data.success) { + setPreviews(response.data.previews); + } else { + setError(response.data.error || 'No images to preview'); + } + } catch (err) { + setError('Could not load preview. PDF may have no images.'); + } + }; + + const handleExtract = async () => { + if (!file) { + setError('Please select a PDF file first'); + return; + } + + setLoading(true); + setError(null); + + const formData = new FormData(); + formData.append('file', file); + + try { + const response = await axios.post(`${API_URL}/extract-pdf-images`, formData, { + responseType: 'blob', + headers: { 'Content-Type': 'multipart/form-data' } + }); + + const url = window.URL.createObjectURL(new Blob([response.data])); + const link = document.createElement('a'); + link.href = url; + link.setAttribute('download', `${file.name.replace('.pdf', '')}_extracted_images.zip`); + document.body.appendChild(link); + link.click(); + link.remove(); + window.URL.revokeObjectURL(url); + + } catch (err) { + if (err.response && err.response.data instanceof Blob) { + const text = await err.response.data.text(); + try { + const errorJson = JSON.parse(text); + setError(errorJson.error || 'Extraction failed'); + } catch { + setError('Extraction failed. Please check the PDF format.'); + } + } else { + setError(err.response?.data?.error || 'Something went wrong'); + } + } finally { + setLoading(false); + } + }; + + return ( + +
+
+ +
+ + {previews.length > 0 && ( +
+

+ šŸ“· Preview Images Found +

+
+ {previews.map((preview, idx) => ( +
+ {`Page +

+ Page {preview.page} • Image {preview.index} +

+
+ ))} +
+
+ )} + + {file && ( +
+ +
+ )} + + {error && ( +
+

{error}

+
+ )} +
+
+ ); +}; + +export default PdfExtractImages; \ No newline at end of file diff --git a/frontend/src/pages/PdfPng.jsx b/frontend/src/pages/PdfPng.jsx index cbdec5d..46009e2 100644 --- a/frontend/src/pages/PdfPng.jsx +++ b/frontend/src/pages/PdfPng.jsx @@ -1,46 +1,68 @@ -import React, { useCallback, useState } from "react"; - +import React, { useCallback, useState, useRef } from "react"; import JSZip from "jszip"; - import ToolPageTemplate from "../components/ToolPageTemplate"; import MultiFileResults from "../components/MultiFileResults"; +import useSSE from "../hooks/useSSE"; +import ProgressBar from "../components/ProgressBar"; +import axios from "axios"; // Set worker source for PDF.js - - const PdfPng = () => { - const [scale, setScale] = useState(2.0); // Default scale (2x) - const [pageMode, setPageMode] = useState("all"); // all, single, range + const [scale, setScale] = useState(2.0); + const [pageMode, setPageMode] = useState("all"); const [pageRange, setPageRange] = useState(""); const [singlePage, setSinglePage] = useState("1"); const [numPages, setNumPages] = useState(0); const [language, setLanguage] = useState("eng"); const [outputFiles, setOutputFiles] = useState([]); + + // SSE Progress States + const [isProcessing, setIsProcessing] = useState(false); + const [sseUrl, setSseUrl] = useState(null); + const [taskId, setTaskId] = useState(null); + const currentFileRef = useRef(null); + + // SSE Hook for real-time progress + const { progress, isConnected, error: sseError } = useSSE(sseUrl, { + autoConnect: !!sseUrl, + onComplete: async (data) => { + setIsProcessing(false); + setSseUrl(null); + + // If we have a download URL from the server + if (data.message && data.message.includes('/download/')) { + window.location.href = data.message; + } else if (currentFileRef.current) { + // Fallback: Use client-side conversion if server didn't provide file + await performClientConversion(currentFileRef.current); + } + }, + onError: (err) => { + console.error('SSE Error:', err); + setIsProcessing(false); + setSseUrl(null); + // Fallback to client-side conversion + if (currentFileRef.current) { + performClientConversion(currentFileRef.current); + } + } + }); const validateFile = useCallback(async (selectedFile) => { if (selectedFile && selectedFile.type === "application/pdf") { try { const arrayBuffer = await selectedFile.arrayBuffer(); const pdfjsLib = await import("pdfjs-dist"); - -const pdfWorker = await import( - "pdfjs-dist/build/pdf.worker.min.mjs?url" -); - -pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; - -const pdf = await pdfjsLib.getDocument({ - data: arrayBuffer, -}).promise; + const pdfWorker = await import("pdfjs-dist/build/pdf.worker.min.mjs?url"); + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; + const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; setNumPages(pdf.numPages); } catch (err) { console.error("Error loading PDF info:", err); } return { isValid: true, - message: `File "${selectedFile.name}" selected (${( - selectedFile.size / 1024 - ).toFixed(1)} KB)`, + message: `File "${selectedFile.name}" selected (${(selectedFile.size / 1024).toFixed(1)} KB)`, }; } return { @@ -55,18 +77,18 @@ const pdf = await pdfjsLib.getDocument({ setSinglePage("1"); setPageMode("all"); setOutputFiles([]); + setIsProcessing(false); + setSseUrl(null); + setTaskId(null); + currentFileRef.current = null; }; - const handleCustomSubmit = async ({ file, setStatusMessage, setLoading, setStatusType }) => { - setStatusMessage("Processing PDF... This may take a while for large files."); + // Client-side conversion (existing logic) + const performClientConversion = async (file) => { try { const pdfjsLib = await import("pdfjs-dist/legacy/build/pdf"); - -const pdfWorker = await import( - "pdfjs-dist/legacy/build/pdf.worker.min.mjs?url" -); - -pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; + const pdfWorker = await import("pdfjs-dist/legacy/build/pdf.worker.min.mjs?url"); + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; const arrayBuffer = await file.arrayBuffer(); const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; const totalPages = pdf.numPages; @@ -77,9 +99,7 @@ pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; } else if (pageMode === "single") { const pageNum = parseInt(singlePage); if (isNaN(pageNum) || pageNum < 1 || pageNum > totalPages) { - throw new Error( - `Invalid page number: ${singlePage}. Please enter a value between 1 and ${totalPages}.`, - ); + throw new Error(`Invalid page number: ${singlePage}`); } pagesToRender = [pageNum]; } else if (pageMode === "range") { @@ -97,23 +117,18 @@ pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; }); } - // Deduplicate and sort pagesToRender = [...new Set(pagesToRender)].sort((a, b) => a - b); if (pagesToRender.length === 0) { throw new Error("No valid pages selected"); } - setOutputFiles([]); // Clear previous results - + setOutputFiles([]); const zip = new JSZip(); const results = []; for (let i = 0; i < pagesToRender.length; i++) { const pageNum = pagesToRender[i]; - setStatusMessage( - `Rendering page ${pageNum} (${i + 1}/${pagesToRender.length})...`, - ); const page = await pdf.getPage(pageNum); const viewport = page.getViewport({ scale }); const canvas = document.createElement("canvas"); @@ -123,9 +138,7 @@ pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; await page.render({ canvasContext: context, viewport }).promise; - const blob = await new Promise((resolve) => - canvas.toBlob(resolve, "image/png"), - ); + const blob = await new Promise((resolve) => canvas.toBlob(resolve, "image/png")); results.push({ name: `page-${pageNum}.png`, blob }); } @@ -140,10 +153,7 @@ pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; a.click(); document.body.removeChild(a); window.URL.revokeObjectURL(url); - setStatusMessage("Success! Your PNG file has been downloaded."); - setStatusType("success"); } else { - setStatusMessage("Packaging files into ZIP..."); results.forEach((res) => zip.file(res.name, res.blob)); const zipBlob = await zip.generateAsync({ type: "blob" }); const url = window.URL.createObjectURL(zipBlob); @@ -154,74 +164,102 @@ pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default; a.click(); document.body.removeChild(a); window.URL.revokeObjectURL(url); - setStatusMessage( - `Success! ZIP file with ${results.length} pages downloaded.`, - ); - setStatusType("success"); } - - setTimeout(() => setStatusMessage(""), 5000); } catch (error) { console.error("Client-side conversion error:", error); - setStatusMessage("Client conversion failed — trying server fallback..."); + throw error; + } + }; + + // Server-side conversion with SSE progress + const performServerConversion = async (file, setStatusMessage, setStatusType) => { + const formData = new FormData(); + formData.append("file", file); + formData.append("language", language); + formData.append("scale", scale.toString()); + formData.append("pageMode", pageMode); + formData.append("pageRange", pageRange); + formData.append("singlePage", singlePage); + + try { + const response = await axios.post("/api/convert-pdf-progress", formData, { + baseURL: import.meta.env.VITE_API_URL || "http://localhost:5000", + headers: { "Content-Type": "multipart/form-data" } + }); + + const { task_id, stream_url } = response.data; + setTaskId(task_id); + setSseUrl(stream_url); + setIsProcessing(true); + setStatusMessage("Connected to server. Processing with progress tracking..."); setStatusType("info"); + + } catch (error) { + console.error("Server conversion start error:", error); + throw new Error("Server conversion failed"); + } + }; - // Attempt server-side conversion fallback + const handleCustomSubmit = async ({ file, setStatusMessage, setLoading, setStatusType }) => { + currentFileRef.current = file; + setStatusMessage("Starting PDF conversion..."); + + try { + // Try server-side conversion first (with SSE progress) + await performServerConversion(file, setStatusMessage, setStatusType); + setStatusMessage("Processing PDF with real-time progress tracking..."); + setStatusType("info"); + + } catch (serverError) { + console.warn("Server conversion failed, falling back to client-side:", serverError); + setStatusMessage("Server unavailable. Using client-side conversion..."); + setStatusType("info"); + try { - const form = new FormData(); - form.append("file", file); - form.append("language", language); - - const tryUrls = ["/convertPng", "http://localhost:5000/convertPng"]; - - let response = null; - for (const url of tryUrls) { - try { - response = await fetch(url, { method: "POST", body: form }); - if (response && response.ok) break; - } catch (e) { - console.warn("Server convert attempt failed:", url, e); - response = null; - } - } - - if (response && response.ok) { - const blob = await response.blob(); - const name = file.name.replace(/\.pdf$/i, ".png"); - setOutputFiles([{ name, blob }]); - const downloadUrl = window.URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = downloadUrl; - a.download = file.name.replace(/\.pdf$/i, ".png"); - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - window.URL.revokeObjectURL(downloadUrl); - setStatusMessage("Success! PNG downloaded from server fallback."); - setStatusType("success"); - } else { - const msg = response - ? await response.text() - : "Server conversion unavailable"; - setStatusMessage(`Error: ${msg}`); - setStatusType("error"); - } - } catch (serverErr) { - console.error("Server fallback error:", serverErr); - setStatusMessage(`Error: ${error.message || "Failed to convert file"}`); + await performClientConversion(file); + setStatusMessage("Success! Conversion completed in browser."); + setStatusType("success"); + } catch (clientError) { + setStatusMessage(`Error: ${clientError.message || "Conversion failed"}`); setStatusType("error"); } - - setTimeout(() => setStatusMessage(""), 5000); + } finally { setLoading(false); + + // Clear status after delay + setTimeout(() => { + if (!isProcessing) { + setStatusMessage(""); + } + }, 3000); } }; const extraFields = ({ file }) => { if (!file) return null; return ( + +
+ {/* Progress Bar - Shows when processing */} + {isProcessing && ( +
+

+ šŸ“Š Real-Time Progress +

+ + {!isConnected && ( +

Connecting to server...

+ )} + {sseError && ( +

Connection issue: {sseError}

+ )} +
+ )} + +
+ {/* Quality Slider */}
@@ -248,6 +286,7 @@ pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker.default;
+ {/* Document Language */}