From b1cc5a2b32167cb8b27aacfb4262a0b90ff0a5d0 Mon Sep 17 00:00:00 2001 From: Gopi Rk Date: Sat, 7 Mar 2026 12:42:31 +0530 Subject: [PATCH] fix(filler): replace timestamp filename with uuid4 to prevent race condition Two concurrent requests processed in the same second previously generated identical output paths (YYYY%m%d_%H%M%S has only 1-second resolution). The second PdfWriter.write() call silently overwrote the first, leaving the first submission's DB record pointing to corrupted/wrong data with no exception raised anywhere in the stack. Fix: - Extract path generation into Filler._make_output_path() static method - Use uuid4().hex (128-bit randomness) instead of a timestamp - Write outputs into an outputs/ subdirectory adjacent to the template, separating generated files from source templates Also add tests/test_filler.py with four unit tests covering: - Uniqueness of 500 generated paths (regression guard) - Correct placement in outputs/ subdirectory - .pdf extension - Original template stem preserved in filename Fixes: race condition in PDF output naming --- src/filler.py | 32 +++++++++++++++++++++------- tests/test_filler.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 tests/test_filler.py diff --git a/src/filler.py b/src/filler.py index e31e535..77f8446 100644 --- a/src/filler.py +++ b/src/filler.py @@ -1,23 +1,41 @@ from pdfrw import PdfReader, PdfWriter from src.llm import LLM -from datetime import datetime +from pathlib import Path +from uuid import uuid4 class Filler: def __init__(self): pass + @staticmethod + def _make_output_path(pdf_form: str) -> str: + """ + Generate a unique output path for a filled PDF. + + Previously this used datetime.now().strftime("%Y%m%d_%H%M%S"), which has + only 1-second resolution. Two concurrent requests for the same template + within the same second would produce identical paths, causing the second + write to silently overwrite the first and corrupting the first submission's + stored record. + + uuid4().hex provides 128 bits of randomness — collision probability is + effectively zero (2^-128) regardless of concurrency or timing. + + Output is written to an `outputs/` subdirectory next to the template so + that generated files are cleanly separated from source templates. + """ + template_path = Path(pdf_form) + output_dir = template_path.parent / "outputs" + output_dir.mkdir(parents=True, exist_ok=True) + return str(output_dir / f"{template_path.stem}_{uuid4().hex}_filled.pdf") + def fill_form(self, pdf_form: str, llm: LLM): """ Fill a PDF form with values from user_input using LLM. Fields are filled in the visual order (top-to-bottom, left-to-right). """ - output_pdf = ( - pdf_form[:-4] - + "_" - + datetime.now().strftime("%Y%m%d_%H%M%S") - + "_filled.pdf" - ) + output_pdf = self._make_output_path(pdf_form) # Generate dictionary of answers from your original function t2j = llm.main_loop() diff --git a/tests/test_filler.py b/tests/test_filler.py new file mode 100644 index 0000000..a647465 --- /dev/null +++ b/tests/test_filler.py @@ -0,0 +1,50 @@ +""" +Tests for src/filler.py — specifically the output path generation logic. +""" +import os +from src.filler import Filler + + +def test_make_output_path_is_unique(): + """ + Every call to _make_output_path() must return a distinct path. + + Previously, paths were generated with a 1-second timestamp, meaning two + concurrent requests within the same second would produce the same path and + the second write would silently overwrite the first (race condition). + + With uuid4().hex the collision probability is 2^-128 — effectively zero. + """ + paths = {Filler._make_output_path("src/inputs/template.pdf") for _ in range(500)} + assert len(paths) == 500, ( + "Expected 500 unique output paths but got collisions. " + "The race condition fix may have been reverted." + ) + + +def test_make_output_path_lands_in_outputs_subdir(): + """ + Output PDFs must be written into an 'outputs/' subdirectory, + not alongside the source template. + """ + path = Filler._make_output_path("src/inputs/template.pdf") + parts = path.replace("\\", "/").split("/") + assert "outputs" in parts, ( + f"Output path '{path}' does not contain an 'outputs/' directory. " + "Generated files must be separated from source templates." + ) + + +def test_make_output_path_extension_is_pdf(): + """Output path must end with .pdf.""" + path = Filler._make_output_path("src/inputs/template.pdf") + assert path.endswith(".pdf"), f"Expected .pdf extension, got: {path}" + + +def test_make_output_path_preserves_stem(): + """Output filename must include the original template stem for traceability.""" + path = Filler._make_output_path("src/inputs/my_form.pdf") + filename = os.path.basename(path) + assert filename.startswith("my_form_"), ( + f"Expected filename to start with 'my_form_', got: {filename}" + )