Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ def fill_form(self, pdf_form: str, llm: LLM):
# Read PDF
pdf = PdfReader(pdf_form)

i = 0
# Loop through pages
for page in pdf.pages:
if page.Annots:
sorted_annots = sorted(
page.Annots, key=lambda a: (-float(a.Rect[1]), float(a.Rect[0]))
)

i = 0
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
if i < len(answers_list):
Expand Down
11 changes: 10 additions & 1 deletion src/inputs/input.txt
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
Officer Voldemort here, at an incident reported at 456 Oak Street. Two victims, Mark Smith and Jane Doe. Medical aid rendered for minor lacerations. Handed off to Sheriff's Deputy Alvarez. End of transmission.
UC Vaccine Declination Statement

Name/SID: Sarah Johnson, SID 4527891
Job Title: Research Scientist
Department: Microbiology
Phone Number: 831-555-0142
Email: sjohnson@ucsc.edu
Date: 03/15/2026

Signature: ________________________
53 changes: 53 additions & 0 deletions src/test/test_filler_multi_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from src.filler import Filler


class DummyAnnot:
def __init__(self, x, y):
self.Subtype = "/Widget"
self.T = "(field)"
self.Rect = [str(x), str(y), str(x + 10), str(y + 10)]
self.V = None
self.AP = "placeholder"


class DummyPage:
def __init__(self, annots):
self.Annots = annots


class DummyPdf:
def __init__(self, pages):
self.pages = pages


class DummyWriter:
def write(self, output_pdf, pdf):
return None


class DummyLLM:
def __init__(self, data):
self._data = data

def main_loop(self):
return self

def get_data(self):
return self._data


def test_fill_form_keeps_value_index_across_pages(monkeypatch):
page_one_annot = DummyAnnot(0, 100)
page_two_annot = DummyAnnot(0, 100)
dummy_pdf = DummyPdf([DummyPage([page_one_annot]), DummyPage([page_two_annot])])

monkeypatch.setattr("src.filler.PdfReader", lambda *_args, **_kwargs: dummy_pdf)
monkeypatch.setattr("src.filler.PdfWriter", lambda: DummyWriter())

llm = DummyLLM({"field1": "value-1", "field2": "value-2"})
filler = Filler()

filler.fill_form("form.pdf", llm)

assert page_one_annot.V == "value-1"
assert page_two_annot.V == "value-2"
Loading