diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4227f88..8f4e8c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,5 +117,26 @@ jobs: printf '# Smoke Test\n\nTest paragraph.\n' > "$input_dir/input.md" python3 scripts/md_to_pdf.py "$input_dir/input.md" "$input_dir/output.pdf" \ --title "CI Smoke Test" - - run: | python3 -c "import sys,pathlib; d=pathlib.Path('/tmp/smoke test path/output.pdf').read_bytes(); assert len(d)>0 and d[:4]==b'%PDF'; print('PDF OK: '+str(len(d))+' bytes')" + rm -rf "$input_dir" + - run: | + temp=$(mktemp -d) + dir="$temp/hash path #frag/query ?q/中文 路径" + mkdir -p "$dir" + md_file="$dir/in#frag?query 中文.md" + html_file="$dir/in#frag?query 中文.html" + pdf_file="$dir/out#frag?query 中文.pdf" + printf '# Special Char Test\n\nTest paragraph.\n' > "$md_file" + + # Full pipeline (md_to_pdf.py) + python3 scripts/md_to_pdf.py "$md_file" "$pdf_file" --title "Special Char Test" + python3 -c "import sys,pathlib; d=pathlib.Path('$pdf_file').read_bytes(); assert len(d)>0 and d[:4]==b'%PDF'; print('Pipeline OK: '+str(len(d))+' bytes')" + + # Direct renderer (render_pdf.py) — generate HTML first + python3 scripts/markdown_to_html.py "$md_file" "$html_file" + direct_pdf="$dir/renderer_direct.pdf" + python3 scripts/render_pdf.py "$html_file" "$direct_pdf" + python3 -c "import sys,pathlib; d=pathlib.Path('$direct_pdf').read_bytes(); assert len(d)>0 and d[:4]==b'%PDF'; print('Renderer direct OK: '+str(len(d))+' bytes')" + + echo 'Special character path smoke OK' + rm -rf "$temp" diff --git a/scripts/render_pdf.py b/scripts/render_pdf.py index cad56c0..8d58a91 100644 --- a/scripts/render_pdf.py +++ b/scripts/render_pdf.py @@ -34,34 +34,39 @@ async def html_to_pdf(html_path, pdf_path=None, format="A4", async with async_playwright() as p: browser = await p.chromium.launch() - page = await browser.new_page() + try: + page = await browser.new_page() - if block_remote: - await page.route("**/*", lambda route: route.abort() if route.request.url.startswith(("http://", "https://")) else route.continue_()) + if block_remote: + await page.route("**/*", lambda route: route.abort() if route.request.url.startswith(("http://", "https://")) else route.continue_()) - file_url = f"file://{html_path}" - await page.goto(file_url, wait_until="networkidle") - await page.emulate_media(media=media) + file_url = html_path.as_uri() + await page.goto(file_url, wait_until="networkidle") + await page.emulate_media(media=media) - pdf_kwargs = { - "path": str(pdf_path), - "format": format, - "margin": { - "top": margin_top, - "bottom": margin_bottom, - "left": margin_left, - "right": margin_right, - }, - "print_background": print_background, - "landscape": landscape, - "prefer_css_page_size": prefer_css_page_size, - "tagged": True, - } - if title: - await page.evaluate("(t) => { document.title = t; }", title) + pdf_kwargs = { + "path": str(pdf_path), + "format": format, + "margin": { + "top": margin_top, + "bottom": margin_bottom, + "left": margin_left, + "right": margin_right, + }, + "print_background": print_background, + "landscape": landscape, + "prefer_css_page_size": prefer_css_page_size, + "tagged": True, + } + if title: + await page.evaluate("(t) => { document.title = t; }", title) - await page.pdf(**pdf_kwargs) - await browser.close() + await page.pdf(**pdf_kwargs) + finally: + try: + await browser.close() + except Exception: + pass # swallow close errors to avoid masking the original exception size = os.path.getsize(pdf_path) print(f"PDF generated: {pdf_path}")