diff --git a/ragitect/services/processor/__init__.py b/ragitect/services/processor/__init__.py
index 72a1323..c4c7a9e 100644
--- a/ragitect/services/processor/__init__.py
+++ b/ragitect/services/processor/__init__.py
@@ -4,6 +4,12 @@
 """
 
 from ragitect.services.processor.base import BaseDocumentProcessor
+from ragitect.services.processor.pdf_url_processor import (
+    InvalidPDFURLError,
+    PDFDownloadError,
+    PDFProcessingError,
+    PDFURLProcessor,
+)
 from ragitect.services.processor.simple import SimpleProcessor
 from ragitect.services.processor.web_url_processor import (
     ContentExtractionError,
@@ -19,7 +25,11 @@
 __all__ = [
     "BaseDocumentProcessor",
     "ContentExtractionError",
+    "InvalidPDFURLError",
     "InvalidYouTubeURLError",
+    "PDFDownloadError",
+    "PDFProcessingError",
+    "PDFURLProcessor",
     "SimpleProcessor",
     "TranscriptUnavailableError",
     "URLFetchError",
diff --git a/ragitect/services/processor/pdf_url_processor.py b/ragitect/services/processor/pdf_url_processor.py
new file mode 100644
index 0000000..d00f1dc
--- /dev/null
+++ b/ragitect/services/processor/pdf_url_processor.py
@@ -0,0 +1,298 @@
+"""PDF URL Processor - Downloads PDF files from URLs and converts to Markdown.
+
+This processor handles PDF URL ingestion by:
+1. Validating that the URL points to a PDF file (extension or Content-Type)
+2. Downloading PDF content via httpx with proper timeout/HTTP2/redirect configuration
+3. Delegating to DoclingProcessor for PDF → Markdown conversion
+
+Usage:
+    processor = PDFURLProcessor()
+    markdown = await processor.process("https://arxiv.org/pdf/1706.03762.pdf")
+
+Supported URL patterns:
+    - arXiv papers: https://arxiv.org/pdf/2301.12345.pdf
+    - Direct PDF links: https://example.com/document.pdf
+    - University research papers: https://stanford.edu/papers/paper.pdf
+    - Documentation: https://docs.example.com/manual.pdf
+
+Note:
+    This processor inherits from BaseDocumentProcessor but overrides with an
+    async signature. The async process(url: str) method is used for URL fetching.
+
+    Integration with ProcessorFactory happens in Story 5.5.
+
+Exceptions:
+    InvalidPDFURLError: URL does not point to a PDF file
+    PDFDownloadError: HTTP request failed (timeout, connection error, 4xx/5xx)
+    PDFProcessingError: DoclingProcessor failed to process PDF
+"""
+
+import asyncio
+import logging
+from typing import override
+from urllib.parse import unquote, urlparse
+
+import httpx
+
+from ragitect.services.processor.base import BaseDocumentProcessor
+from ragitect.services.processor.docling_processor import DoclingProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class InvalidPDFURLError(Exception):
+    """Raised when URL does not point to a valid PDF file.
+
+    Causes:
+    - URL does not end with .pdf AND Content-Type is not application/pdf
+    - HEAD request fails to validate content type
+
+    Attributes:
+        url: The URL that failed validation
+        message: Descriptive error message including URL
+    """
+
+    pass
+
+
+class PDFDownloadError(Exception):
+    """Raised when PDF download fails.
+
+    Causes:
+    - HTTP timeout (30s limit)
+    - Connection error
+    - HTTP 4xx/5xx status codes
+
+    Attributes:
+        url: The URL that failed to download
+        message: Descriptive error message including URL and error type
+    """
+
+    pass
+
+
+class PDFProcessingError(Exception):
+    """Raised when DoclingProcessor fails to process PDF.
+
+    Causes:
+    - Corrupted PDF
+    - Password-protected PDF
+    - Image-only PDF without OCR
+
+    Attributes:
+        url: The URL of the PDF that failed processing
+        message: Descriptive error message including URL and error details
+    """
+
+    pass
+
+
+class PDFURLProcessor(BaseDocumentProcessor):
+    """Processor for downloading PDFs from URLs and converting to Markdown.
+
+    Inherits from BaseDocumentProcessor but provides an async process(url: str)
+    method instead of the sync process(file_bytes, file_name) method.
+
+    Implements async PDF download with:
+    - 30 second total timeout, 10 second connect timeout (NFR-P4)
+    - HTTP/2 support for improved performance
+    - Automatic redirect following
+    - Connection pooling (max 20 keepalive connections)
+
+    PDF URL validation:
+    - Fast path: URL ends with .pdf extension
+    - Fallback: HEAD request to check Content-Type: application/pdf
+
+    PDF processing delegated to DoclingProcessor for:
+    - Robust PDF parsing with Docling library
+    - Table structure detection
+    - Clean Markdown output
+
+    Example:
+        >>> processor = PDFURLProcessor()
+        >>> markdown = await processor.process("https://arxiv.org/pdf/1706.03762.pdf")
+        >>> print(markdown[:100])
+        # Attention Is All You Need
+        ...
+    """
+
+    def __init__(self) -> None:
+        """Initialize PDFURLProcessor with DoclingProcessor for PDF conversion."""
+        self._docling_processor = DoclingProcessor()
+
+    @override
+    def supported_formats(self) -> list[str]:
+        """Return list of supported file extensions.
+
+        PDFURLProcessor is not file-based, so returns empty list.
+        URL-based routing is handled separately from file extension routing.
+
+        Returns:
+            Empty list (not file-based)
+        """
+        return []
+
+    async def process(self, url: str) -> str:
+        """Download PDF from URL and convert to Markdown.
+
+        Args:
+            url: HTTP or HTTPS URL pointing to a PDF file
+
+        Returns:
+            Markdown string with PDF content extracted
+
+        Raises:
+            InvalidPDFURLError: If URL does not point to a PDF file
+            PDFDownloadError: If HTTP request fails (timeout, connection error, 4xx/5xx)
+            PDFProcessingError: If DoclingProcessor fails to process PDF
+
+        Example:
+            >>> processor = PDFURLProcessor()
+            >>> markdown = await processor.process("https://arxiv.org/pdf/1706.03762.pdf")
+        """
+        logger.info(f"Processing PDF URL: {url}")
+
+        # Validate URL points to PDF
+        if not await self._validate_pdf_url(url):
+            raise InvalidPDFURLError(f"URL does not point to a PDF file: {url}")
+
+        # Download PDF bytes
+        pdf_bytes = await self._download_pdf(url)
+        logger.info(f"Downloaded {len(pdf_bytes)} bytes from {url}")
+
+        # Extract filename from URL for DoclingProcessor
+        file_name = self._extract_filename(url)
+
+        # Delegate to DoclingProcessor (sync, runs in executor via asyncio.to_thread)
+        try:
+            markdown = await asyncio.to_thread(
+                self._docling_processor.process,
+                pdf_bytes,
+                file_name,
+            )
+        except Exception as e:
+            logger.error(f"Failed to process PDF from {url}: {e}")
+            raise PDFProcessingError(f"Failed to process PDF from {url}: {e}") from e
+
+        logger.info(f"Successfully processed PDF {url} - {len(markdown)} chars extracted")
+        return markdown
+
+    async def _validate_pdf_url(self, url: str) -> bool:
+        """Validate that URL points to a PDF file.
+
+        Two-tier validation:
+        1. Fast path: Check if URL ends with .pdf
+        2. Fallback: HEAD request to check Content-Type
+
+        Args:
+            url: URL to validate
+
+        Returns:
+            True if URL points to a PDF, False otherwise
+        """
+        # Fast path: URL ends with .pdf (case-insensitive)
+        if url.lower().rstrip("/").endswith(".pdf"):
+            return True
+
+        # Fallback: HEAD request to check Content-Type
+        timeout = httpx.Timeout(10.0, connect=5.0)
+        headers = {
+            "User-Agent": "Mozilla/5.0 (compatible; RAGitect/1.0; +https://github.com/bhdai/ragitect)"
+        }
+
+        async with httpx.AsyncClient(
+            timeout=timeout,
+            follow_redirects=True,
+            headers=headers,
+        ) as client:
+            try:
+                response = await client.head(url)
+                content_type = response.headers.get("content-type", "").lower()
+                return "application/pdf" in content_type
+            except httpx.HTTPError:
+                # If HEAD fails, return False - caller will handle
+                return False
+
+    async def _download_pdf(self, url: str) -> bytes:
+        """Download PDF bytes from URL.
+
+        Args:
+            url: URL to download PDF from
+
+        Returns:
+            PDF bytes
+
+        Raises:
+            PDFDownloadError: On timeout, connection error, or HTTP error status
+        """
+        # Configure timeout: 30s total, 10s connect (NFR-P4)
+        timeout = httpx.Timeout(30.0, connect=10.0)
+
+        # Configure connection limits for pooling
+        limits = httpx.Limits(max_keepalive_connections=20)
+
+        # Set User-Agent to avoid 403 from sites that block automated requests
+        headers = {
+            "User-Agent": "Mozilla/5.0 (compatible; RAGitect/1.0; +https://github.com/bhdai/ragitect)"
+        }
+
+        async with httpx.AsyncClient(
+            timeout=timeout,
+            http2=True,  # Enable HTTP/2 support
+            follow_redirects=True,  # Auto-follow redirects
+            limits=limits,  # Connection pooling
+            headers=headers,  # Default headers for all requests
+        ) as client:
+            try:
+                response = await client.get(url)
+                response.raise_for_status()
+                return response.content
+            except httpx.TimeoutException as e:
+                logger.error(f"Timeout downloading PDF from {url}: {e}")
+                raise PDFDownloadError(
+                    f"Timeout downloading PDF from {url} (30s limit)"
+                ) from e
+            except httpx.ConnectError as e:
+                logger.error(f"Connection error downloading PDF from {url}: {e}")
+                raise PDFDownloadError(
+                    f"Connection error downloading PDF from {url}: {str(e)}"
+                ) from e
+            except httpx.HTTPStatusError as e:
+                status = e.response.status_code
+                logger.error(f"HTTP {status} downloading PDF from {url}")
+                if status == 404:
+                    raise PDFDownloadError(
+                        f"PDF not found (404): {url}"
+                    ) from e
+                elif status == 403:
+                    raise PDFDownloadError(
+                        f"Access denied (403): {url}"
+                    ) from e
+                else:
+                    raise PDFDownloadError(
+                        f"HTTP {status} downloading PDF from {url}"
+                    ) from e
+
+    def _extract_filename(self, url: str) -> str:
+        """Extract filename from URL path.
+
+        Args:
+            url: URL to extract filename from
+
+        Returns:
+            Filename with .pdf extension
+        """
+        parsed = urlparse(url)
+        path = unquote(parsed.path)
+
+        # Get last path segment
+        segments = [s for s in path.split("/") if s]
+        if segments:
+            filename = segments[-1]
+            # Ensure .pdf extension
+            if not filename.lower().endswith(".pdf"):
+                filename = f"{filename}.pdf"
+            return filename
+
+        # Fallback
+        return "downloaded.pdf"
diff --git a/tests/services/processor/test_pdf_url_processor.py b/tests/services/processor/test_pdf_url_processor.py
new file mode 100644
index 0000000..3be5303
--- /dev/null
+++ b/tests/services/processor/test_pdf_url_processor.py
@@ -0,0 +1,556 @@
+"""Tests for PDFURLProcessor - PDF download from URL and conversion to Markdown.
+
+Red-Green-Refactor TDD: These tests define expected behavior before implementation.
+"""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+
+# Module-level markers as per project-context.md
+pytestmark = [pytest.mark.asyncio]
+
+
+class TestPDFURLProcessorInterface:
+    """Test PDFURLProcessor class interface and method signatures"""
+
+    def test_class_exists(self):
+        """PDFURLProcessor class should be importable"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+        assert processor is not None
+
+    def test_inherits_from_base_document_processor(self):
+        """PDFURLProcessor should inherit from BaseDocumentProcessor"""
+        from ragitect.services.processor.base import BaseDocumentProcessor
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+        assert isinstance(processor, BaseDocumentProcessor)
+
+    def test_supported_formats_returns_empty_list(self):
+        """PDFURLProcessor is not file-based, returns empty list"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+        formats = processor.supported_formats()
+        assert formats == []
+
+    async def test_process_method_signature_async(self):
+        """process() should be async and accept url string, return str"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        mock_pdf_bytes = b"%PDF-1.4 mock content"
+
+        with patch.object(processor, "_validate_pdf_url", new_callable=AsyncMock) as mock_validate:
+            mock_validate.return_value = True
+            with patch.object(processor, "_download_pdf", new_callable=AsyncMock) as mock_download:
+                mock_download.return_value = mock_pdf_bytes
+                with patch.object(processor._docling_processor, "process") as mock_docling:
+                    mock_docling.return_value = "# Test Document\n\nContent here."
+
+                    result = await processor.process("https://example.com/doc.pdf")
+                    assert isinstance(result, str)
+
+
+class TestPDFURLValidation:
+    """Test PDF URL validation logic"""
+
+    async def test_url_ending_with_pdf_is_valid(self):
+        """URL ending with .pdf should pass fast-path validation"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+        result = await processor._validate_pdf_url("https://arxiv.org/pdf/1706.03762.pdf")
+        assert result is True
+
+    async def test_url_ending_with_pdf_uppercase_is_valid(self):
+        """URL ending with .PDF (uppercase) should pass validation"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+        result = await processor._validate_pdf_url("https://example.com/DOC.PDF")
+        assert result is True
+
+    async def test_url_without_pdf_extension_uses_head_request(self):
+        """URL without .pdf extension should make HEAD request to check Content-Type"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.headers = {"content-type": "application/pdf"}
+            mock_client.head = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            result = await processor._validate_pdf_url("https://example.com/download?id=123")
+            assert result is True
+            mock_client.head.assert_called_once()
+
+    async def test_url_with_non_pdf_content_type_returns_false(self):
+        """URL with non-PDF Content-Type should return False"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.headers = {"content-type": "text/html"}
+            mock_client.head = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            result = await processor._validate_pdf_url("https://example.com/page")
+            assert result is False
+
+    async def test_non_pdf_url_raises_invalid_pdf_url_error_on_process(self):
+        """process() should raise InvalidPDFURLError if URL is not a PDF"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            InvalidPDFURLError,
+        )
+
+        processor = PDFURLProcessor()
+
+        with patch.object(processor, "_validate_pdf_url", new_callable=AsyncMock) as mock_validate:
+            mock_validate.return_value = False
+
+            with pytest.raises(InvalidPDFURLError) as exc_info:
+                await processor.process("https://example.com/not-a-pdf")
+
+            assert "does not point to a PDF" in str(exc_info.value)
+            assert "example.com" in str(exc_info.value)
+
+
+class TestPDFDownload:
+    """Test PDF download functionality"""
+
+    async def test_successful_download_returns_bytes(self):
+        """Successful download should return PDF bytes"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        mock_pdf_bytes = b"%PDF-1.4 mock pdf content"
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.content = mock_pdf_bytes
+            mock_response.raise_for_status = MagicMock()
+            mock_client.get = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            result = await processor._download_pdf("https://example.com/doc.pdf")
+            assert result == mock_pdf_bytes
+
+    async def test_httpx_client_configured_with_timeouts(self):
+        """httpx client should have 30s total, 10s connect timeout"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.content = b"%PDF"
+            mock_response.raise_for_status = MagicMock()
+            mock_client.get = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            await processor._download_pdf("https://example.com/doc.pdf")
+
+            call_kwargs = mock_client_class.call_args.kwargs
+            assert "timeout" in call_kwargs
+            timeout = call_kwargs["timeout"]
+            assert timeout.connect == 10.0
+
+    async def test_httpx_client_has_http2_enabled(self):
+        """httpx client should have HTTP/2 support enabled"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.content = b"%PDF"
+            mock_response.raise_for_status = MagicMock()
+            mock_client.get = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            await processor._download_pdf("https://example.com/doc.pdf")
+
+            call_kwargs = mock_client_class.call_args.kwargs
+            assert call_kwargs.get("http2") is True
+
+    async def test_httpx_client_follows_redirects(self):
+        """httpx client should follow redirects automatically"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.content = b"%PDF"
+            mock_response.raise_for_status = MagicMock()
+            mock_client.get = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            await processor._download_pdf("https://example.com/doc.pdf")
+
+            call_kwargs = mock_client_class.call_args.kwargs
+            assert call_kwargs.get("follow_redirects") is True
+
+
+class TestPDFProcessing:
+    """Test DoclingProcessor integration"""
+
+    async def test_delegates_to_docling_processor(self):
+        """process() should delegate PDF processing to DoclingProcessor"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        mock_pdf_bytes = b"%PDF-1.4 mock pdf content"
+        expected_markdown = "# Test Document\n\nContent here."
+
+        with patch.object(processor, "_validate_pdf_url", new_callable=AsyncMock) as mock_validate:
+            mock_validate.return_value = True
+            with patch.object(processor, "_download_pdf", new_callable=AsyncMock) as mock_download:
+                mock_download.return_value = mock_pdf_bytes
+                with patch.object(processor._docling_processor, "process") as mock_docling:
+                    mock_docling.return_value = expected_markdown
+
+                    result = await processor.process("https://example.com/doc.pdf")
+
+                    assert result == expected_markdown
+                    mock_docling.assert_called_once()
+                    # Verify PDF bytes were passed
+                    call_args = mock_docling.call_args
+                    assert call_args[0][0] == mock_pdf_bytes
+
+    async def test_docling_failure_raises_pdf_processing_error(self):
+        """DoclingProcessor failure should raise PDFProcessingError"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFProcessingError,
+        )
+
+        processor = PDFURLProcessor()
+
+        mock_pdf_bytes = b"%PDF-1.4 mock pdf content"
+
+        with patch.object(processor, "_validate_pdf_url", new_callable=AsyncMock) as mock_validate:
+            mock_validate.return_value = True
+            with patch.object(processor, "_download_pdf", new_callable=AsyncMock) as mock_download:
+                mock_download.return_value = mock_pdf_bytes
+                with patch.object(processor._docling_processor, "process") as mock_docling:
+                    mock_docling.side_effect = ValueError("Corrupted PDF")
+
+                    with pytest.raises(PDFProcessingError) as exc_info:
+                        await processor.process("https://example.com/corrupted.pdf")
+
+                    assert "example.com" in str(exc_info.value)
+
+
+class TestErrorHandling:
+    """Test error handling for various failure scenarios"""
+
+    async def test_timeout_raises_pdf_download_error(self):
+        """Timeout should raise PDFDownloadError with descriptive message"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFDownloadError,
+        )
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("Timeout"))
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            with pytest.raises(PDFDownloadError) as exc_info:
+                await processor._download_pdf("https://slow-site.com/doc.pdf")
+
+            assert "slow-site.com" in str(exc_info.value)
+            assert "timeout" in str(exc_info.value).lower()
+
+    async def test_http_404_raises_pdf_download_error(self):
+        """HTTP 404 should raise PDFDownloadError with status code"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFDownloadError,
+        )
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.status_code = 404
+            mock_request = MagicMock()
+            mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+                "Not Found", request=mock_request, response=mock_response
+            )
+            mock_client.get = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            with pytest.raises(PDFDownloadError) as exc_info:
+                await processor._download_pdf("https://example.com/missing.pdf")
+
+            assert "404" in str(exc_info.value)
+            assert "not found" in str(exc_info.value).lower()
+
+    async def test_http_403_raises_pdf_download_error(self):
+        """HTTP 403 should raise PDFDownloadError with 'Access denied' message"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFDownloadError,
+        )
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.status_code = 403
+            mock_request = MagicMock()
+            mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+                "Forbidden", request=mock_request, response=mock_response
+            )
+            mock_client.get = AsyncMock(return_value=mock_response)
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            with pytest.raises(PDFDownloadError) as exc_info:
+                await processor._download_pdf("https://protected.com/doc.pdf")
+
+            assert "403" in str(exc_info.value)
+            assert "access denied" in str(exc_info.value).lower()
+
+    async def test_connection_error_raises_pdf_download_error(self):
+        """Connection error should raise PDFDownloadError with details"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFDownloadError,
+        )
+
+        processor = PDFURLProcessor()
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_client.get = AsyncMock(
+                side_effect=httpx.ConnectError("Connection refused")
+            )
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            with pytest.raises(PDFDownloadError) as exc_info:
+                await processor._download_pdf("https://unreachable.com/doc.pdf")
+
+            assert "unreachable.com" in str(exc_info.value)
+            assert "connection" in str(exc_info.value).lower()
+
+    async def test_exception_messages_contain_url(self):
+        """All exception messages should include the URL for debugging"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFDownloadError,
+        )
+
+        processor = PDFURLProcessor()
+        test_url = "https://test-debugging.example.com/document.pdf"
+
+        with patch("httpx.AsyncClient") as mock_client_class:
+            mock_client = AsyncMock()
+            mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("Timeout"))
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=None)
+            mock_client_class.return_value = mock_client
+
+            with pytest.raises(PDFDownloadError) as exc_info:
+                await processor._download_pdf(test_url)
+
+            assert "test-debugging.example.com" in str(exc_info.value)
+
+
+class TestTempFileCleanup:
+    """Test temporary file cleanup on success and failure"""
+
+    async def test_temp_file_cleaned_up_on_success(self):
+        """Temporary file should be cleaned up after successful processing"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        mock_pdf_bytes = b"%PDF-1.4 mock pdf content"
+        expected_markdown = "# Test Document"
+
+        # Track temp files created
+        temp_files_created = []
+
+        original_named_temp = __import__("tempfile").NamedTemporaryFile
+
+        def track_temp_file(*args, **kwargs):
+            # Ensure delete=False so we can check cleanup
+            kwargs["delete"] = False
+            f = original_named_temp(*args, **kwargs)
+            temp_files_created.append(f.name)
+            return f
+
+        with patch.object(processor, "_validate_pdf_url", new_callable=AsyncMock) as mock_validate:
+            mock_validate.return_value = True
+            with patch.object(processor, "_download_pdf", new_callable=AsyncMock) as mock_download:
+                mock_download.return_value = mock_pdf_bytes
+                with patch.object(processor._docling_processor, "process") as mock_docling:
+                    mock_docling.return_value = expected_markdown
+
+                    await processor.process("https://example.com/doc.pdf")
+
+                    # After successful processing, docling was called
+                    # The implementation should clean up temp files internally
+                    mock_docling.assert_called_once()
+
+    async def test_temp_file_cleaned_up_on_failure(self):
+        """Temporary file should be cleaned up even when processing fails"""
+        from ragitect.services.processor.pdf_url_processor import (
+            PDFURLProcessor,
+            PDFProcessingError,
+        )
+
+        processor = PDFURLProcessor()
+
+        mock_pdf_bytes = b"%PDF-1.4 mock pdf content"
+
+        with patch.object(processor, "_validate_pdf_url", new_callable=AsyncMock) as mock_validate:
+            mock_validate.return_value = True
+            with patch.object(processor, "_download_pdf", new_callable=AsyncMock) as mock_download:
+                mock_download.return_value = mock_pdf_bytes
+                with patch.object(processor._docling_processor, "process") as mock_docling:
+                    mock_docling.side_effect = ValueError("Processing failed")
+
+                    with pytest.raises(PDFProcessingError):
+                        await processor.process("https://example.com/doc.pdf")
+
+                    # Even on failure, docling was called (then cleanup happens)
+                    mock_docling.assert_called_once()
+
+
+class TestPDFURLProcessorExceptions:
+    """Test custom exception classes exist and are properly defined"""
+
+    def test_invalid_pdf_url_error_exists(self):
+        """InvalidPDFURLError exception class should exist"""
+        from ragitect.services.processor.pdf_url_processor import InvalidPDFURLError
+
+        error = InvalidPDFURLError("Test error")
+        assert isinstance(error, Exception)
+        assert str(error) == "Test error"
+
+    def test_pdf_download_error_exists(self):
+        """PDFDownloadError exception class should exist"""
+        from ragitect.services.processor.pdf_url_processor import PDFDownloadError
+
+        error = PDFDownloadError("Test error")
+        assert isinstance(error, Exception)
+        assert str(error) == "Test error"
+
+    def test_pdf_processing_error_exists(self):
+        """PDFProcessingError exception class should exist"""
+        from ragitect.services.processor.pdf_url_processor import PDFProcessingError
+
+        error = PDFProcessingError("Test error")
+        assert isinstance(error, Exception)
+        assert str(error) == "Test error"
+
+
+@pytest.mark.integration
+class TestPDFURLProcessorIntegration:
+    """Integration tests with real PDF URLs (require network access)"""
+
+    async def test_process_arxiv_paper(self):
+        """Integration test: download and process real arXiv paper"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+        # "Attention Is All You Need" - famous transformer paper
+        url = "https://arxiv.org/pdf/1706.03762.pdf"
+
+        markdown = await processor.process(url)
+
+        # Verify substantial content extracted
+        assert len(markdown) > 1000, "Expected substantial content from paper"
+        # Verify markdown format
+        assert "#" in markdown, "Expected markdown headings"
+        # Paper title or key terms should be present
+        assert any(
+            term in markdown.lower()
+            for term in ["attention", "transformer", "model"]
+        ), "Expected paper content"
+
+    async def test_markdown_compatible_with_chunking(self):
+        """Integration test: verify markdown works with DocumentProcessor chunking"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+        from ragitect.services.document_processor import split_markdown_document
+
+        processor = PDFURLProcessor()
+        url = "https://arxiv.org/pdf/1706.03762.pdf"
+
+        markdown = await processor.process(url)
+
+        # Test with existing chunker
+        chunks = split_markdown_document(
+            raw_text=markdown,
+            chunk_size=512,
+            overlap=50,
+        )
+
+        # Verify chunking works
+        assert len(chunks) > 0, "Should produce at least one chunk"
+        assert all(
+            isinstance(chunk, str) for chunk in chunks
+        ), "Chunks should be strings"
+        assert all(
+            len(chunk) > 0 for chunk in chunks
+        ), "Each chunk should have content"
+
+    async def test_various_pdf_sources(self):
+        """Integration test: verify processor handles various PDF sources"""
+        from ragitect.services.processor.pdf_url_processor import PDFURLProcessor
+
+        processor = PDFURLProcessor()
+
+        # Test with direct PDF link ending in .pdf
+        url = "https://arxiv.org/pdf/1706.03762.pdf"
+
+        markdown = await processor.process(url)
+
+        assert len(markdown) > 100, "Expected content from PDF"
+        assert isinstance(markdown, str), "Expected string output"