fireform-core · utkarshqz · Mar 13, 2026 · Mar 14, 2026
diff --git a/api/db/repositories.py b/api/db/repositories.py
@@ -1,19 +1,33 @@
 from sqlmodel import Session, select
 from api.db.models import Template, FormSubmission
 
-# Templates
+
+# ── Templates ─────────────────────────────────────────────────
+
 def create_template(session: Session, template: Template) -> Template:
     session.add(template)
     session.commit()
     session.refresh(template)
     return template
 
+
 def get_template(session: Session, template_id: int) -> Template | None:
     return session.get(Template, template_id)
 
-# Forms
+
+def get_all_templates(session: Session, limit: int = 100, offset: int = 0) -> list[Template]:
+    statement = select(Template).offset(offset).limit(limit)
+    return session.exec(statement).all()
+
+
+# ── Forms ─────────────────────────────────────────────────────
+
 def create_form(session: Session, form: FormSubmission) -> FormSubmission:
     session.add(form)
     session.commit()
     session.refresh(form)
-    return form
+    return form
+
+
+def get_form(session: Session, submission_id: int) -> FormSubmission | None:
+    return session.get(FormSubmission, submission_id)
diff --git a/api/main.py b/api/main.py
@@ -1,7 +1,25 @@
-from fastapi import FastAPI
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 from api.routes import templates, forms
+from api.errors.base import AppError
+from typing import Union
 
 app = FastAPI()
 
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+@app.exception_handler(AppError)
+def app_error_handler(request: Request, exc: AppError):
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={"detail": exc.message}
+    )
+
 app.include_router(templates.router)
 app.include_router(forms.router)
diff --git a/api/routes/forms.py b/api/routes/forms.py
@@ -1,25 +1,216 @@
+import os
 from fastapi import APIRouter, Depends
+from fastapi.responses import FileResponse
 from sqlmodel import Session
 from api.deps import get_db
-from api.schemas.forms import FormFill, FormFillResponse
-from api.db.repositories import create_form, get_template
+from api.schemas.forms import FormFill, FormFillResponse, BatchFormFill, BatchFormFillResponse, BatchResultItem
+from api.db.repositories import create_form, get_template, get_form
 from api.db.models import FormSubmission
 from api.errors.base import AppError
 from src.controller import Controller
+from src.llm import LLM
+from src.filler import Filler
 
 router = APIRouter(prefix="/forms", tags=["forms"])
 
+
 @router.post("/fill", response_model=FormFillResponse)
 def fill_form(form: FormFill, db: Session = Depends(get_db)):
-    if not get_template(db, form.template_id):
+    template = get_template(db, form.template_id)
+    if not template:
         raise AppError("Template not found", status_code=404)
 
-    fetched_template = get_template(db, form.template_id)
+    # Validate PDF exists on disk (#235)
+    if not os.path.exists(template.pdf_path):
+        raise AppError(
+            f"Template PDF not found on disk: {template.pdf_path}. "
+            "Please re-upload the template.",
+            status_code=404
+        )
+
+    try:
+        controller = Controller()
+        fields_list = list(template.fields.keys()) if isinstance(template.fields, dict) else template.fields
+        path = controller.fill_form(
+            user_input=form.input_text,
+            fields=fields_list,
+            pdf_form_path=template.pdf_path
+        )
+    except ConnectionError:
+        raise AppError(
+            "Could not connect to Ollama. Make sure ollama serve is running.",
+            status_code=503
+        )
+    except Exception as e:
+        raise AppError(f"PDF filling failed: {str(e)}", status_code=500)
 
-    controller = Controller()
-    path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)
+    if not path:
+        raise AppError(
+            "PDF generation failed — no output file was produced. "
+            "Check that the PDF template is a valid fillable form and Ollama is running.",
+            status_code=500
+        )
 
-    submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
+    if not os.path.exists(path):
+        raise AppError(
+            f"PDF was generated but file not found at: {path}",
+            status_code=500
+        )
+
+    submission = FormSubmission(
+        **form.model_dump(),
+        output_pdf_path=path
+    )
     return create_form(db, submission)
 
 
+@router.post("/fill/batch", response_model=BatchFormFillResponse)
+def fill_batch(batch: BatchFormFill, db: Session = Depends(get_db)):
+    """
+    Batch multi-template form filling — closes #156.
+
+    KEY DESIGN: LLM extraction runs ONCE for the entire batch.
+    All templates share the same extracted JSON — no redundant Ollama calls.
+
+    Flow:
+      1. Validate all templates exist upfront
+      2. Merge ALL fields from ALL templates into one superset
+      3. ONE LLM call extracts all values from transcript
+      4. Each template PDF filled using its relevant subset of extracted values
+    """
+    if not batch.template_ids:
+        raise AppError("template_ids must not be empty", status_code=400)
+
+    # ── Step 1: Validate all templates upfront ────────────────
+    templates = []
+    for tid in batch.template_ids:
+        tpl = get_template(db, tid)
+        if not tpl:
+            raise AppError(f"Template {tid} not found", status_code=404)
+        if not os.path.exists(tpl.pdf_path):
+            raise AppError(
+                f"Template '{tpl.name}' (id={tid}) PDF not found on disk. "
+                "Please re-upload the template.",
+                status_code=404
+            )
+        templates.append(tpl)
+
+    print(f"[BATCH] Starting batch fill for {len(templates)} template(s)...")
+    print(f"[BATCH] Templates: {[t.name for t in templates]}")
+
+    # ── Step 2: Merge ALL fields from ALL templates into superset
+    # One LLM call covers every field needed across all templates
+    merged_fields = {}
+    for tpl in templates:
+        if isinstance(tpl.fields, dict):
+            merged_fields.update(tpl.fields)
+        else:
+            for f in tpl.fields:
+                merged_fields[f] = f
+
+    print(f"[BATCH] Merged superset: {len(merged_fields)} unique field(s) across all templates")
+
+    # ── Step 3: ONE LLM call for entire batch ─────────────────
+    print(f"[BATCH] Running single LLM extraction (no redundant calls)...")
+    try:
+        llm = LLM(
+            transcript_text=batch.input_text,
+            target_fields=merged_fields
+        )
+        llm.main_loop()
+        extracted_json = llm.get_data()
+        print(f"[BATCH] Extraction complete — {len(extracted_json)} fields extracted")
+    except ConnectionError:
+        raise AppError(
+            "Could not connect to Ollama. Make sure ollama serve is running.",
+            status_code=503
+        )
+    except Exception as e:
+        raise AppError(f"LLM extraction failed: {str(e)}", status_code=500)
+
+    # ── Step 4: Fill each PDF with pre-extracted data ─────────
+    # No new LLM calls — just PDF writing per template
+    results = []
+    success_count = 0
+    fail_count = 0
+    filler = Filler()
+
+    for tpl in templates:
+        print(f"[BATCH] Filling PDF: '{tpl.name}' (id={tpl.id})...")
+        try:
+            # Subset extracted data to only this template's fields
+            tpl_field_keys = list(tpl.fields.keys()) if isinstance(tpl.fields, dict) else tpl.fields
+            tpl_data = {k: extracted_json.get(k) for k in tpl_field_keys}
+
+            # Fill PDF directly — no LLM call
+            output_path = filler.fill_form_with_data(
+                pdf_form=tpl.pdf_path,
+                data=tpl_data
+            )
+
+            if not output_path or not os.path.exists(output_path):
+                raise RuntimeError("No output file produced")
+
+            submission = FormSubmission(
+                template_id=tpl.id,
+                input_text=batch.input_text,
+                output_pdf_path=output_path
+            )
+            saved = create_form(db, submission)
+
+            results.append(BatchResultItem(
+                template_id=tpl.id,
+                template_name=tpl.name,
+                success=True,
+                submission_id=saved.id,
+                download_url=f"/forms/download/{saved.id}",
+                error=None
+            ))
+            success_count += 1
+            print(f"[BATCH] ✅ '{tpl.name}' done (submission #{saved.id})")
+
+        except Exception as e:
+            fail_count += 1
+            results.append(BatchResultItem(
+                template_id=tpl.id,
+                template_name=tpl.name,
+                success=False,
+                submission_id=None,
+                download_url=None,
+                error=str(e)
+            ))
+            print(f"[BATCH] ✗ '{tpl.name}' failed: {e}")
+
+    print(f"[BATCH] Complete — {success_count} succeeded, {fail_count} failed")
+
+    return BatchFormFillResponse(
+        total=len(templates),
+        succeeded=success_count,
+        failed=fail_count,
+        results=results
+    )
+
+
+@router.get("/{submission_id}", response_model=FormFillResponse)
+def get_submission(submission_id: int, db: Session = Depends(get_db)):
+    submission = get_form(db, submission_id)
+    if not submission:
+        raise AppError("Submission not found", status_code=404)
+    return submission
+
+
+@router.get("/download/{submission_id}")
+def download_filled_pdf(submission_id: int, db: Session = Depends(get_db)):
+    submission = get_form(db, submission_id)
+    if not submission:
+        raise AppError("Submission not found", status_code=404)
+
+    file_path = submission.output_pdf_path
+    if not os.path.exists(file_path):
+        raise AppError("PDF file not found on server", status_code=404)
+
+    return FileResponse(
+        path=file_path,
+        media_type="application/pdf",
+        filename=os.path.basename(file_path)
+    )
diff --git a/api/routes/templates.py b/api/routes/templates.py
@@ -1,16 +1,89 @@
-from fastapi import APIRouter, Depends
+import os
+import shutil
+import uuid
+from fastapi import APIRouter, Depends, UploadFile, File, Form
 from sqlmodel import Session
 from api.deps import get_db
-from api.schemas.templates import TemplateCreate, TemplateResponse
-from api.db.repositories import create_template
+from api.schemas.templates import TemplateResponse
+from api.db.repositories import create_template, get_all_templates
 from api.db.models import Template
-from src.controller import Controller
+from api.errors.base import AppError
 
 router = APIRouter(prefix="/templates", tags=["templates"])
 
+# Save directly into src/inputs/ — stable location, won't get wiped
+TEMPLATES_DIR = os.path.join("src", "inputs")
+os.makedirs(TEMPLATES_DIR, exist_ok=True)
+
+
 @router.post("/create", response_model=TemplateResponse)
-def create(template: TemplateCreate, db: Session = Depends(get_db)):
-    controller = Controller()
-    template_path = controller.create_template(template.pdf_path)
-    tpl = Template(**template.model_dump(exclude={"pdf_path"}), pdf_path=template_path)
-    return create_template(db, tpl)
+async def create(
+    name: str = Form(...),
+    file: UploadFile = File(...),
+    db: Session = Depends(get_db)
+):
+    # Validate PDF
+    if not file.filename.endswith(".pdf"):
+        raise AppError("Only PDF files are allowed", status_code=400)
+
+    # Save uploaded file with unique name into src/inputs/
+    unique_name = f"{uuid.uuid4().hex}_{file.filename}"
+    save_path = os.path.join(TEMPLATES_DIR, unique_name)
+
+    with open(save_path, "wb") as f:
+        shutil.copyfileobj(file.file, f)
+
+    # Extract fields using commonforms + pypdf
+    # Store as simple list of field name strings — what Filler expects
+    try:
+        from commonforms import prepare_form
+        from pypdf import PdfReader
+
+        # Read real field names directly from original PDF
+        # Use /T (internal name) as both key and label
+        # Real names like "JobTitle", "Phone Number" are already human-readable
+        reader = PdfReader(save_path)
+        raw_fields = reader.get_fields() or {}
+
+        fields = {}
+        for internal_name, field_data in raw_fields.items():
+            # Use /TU tooltip if available, otherwise prettify /T name
+            label = None
+            if isinstance(field_data, dict):
+                label = field_data.get("/TU")
+            if not label:
+                # Prettify: "JobTitle" → "Job Title", "DATE7_af_date" → "Date"
+                import re
+                label = re.sub(r'([a-z])([A-Z])', r'\1 \2', internal_name)
+                label = re.sub(r'_af_.*$', '', label)  # strip "_af_date" suffix
+                label = label.replace('_', ' ').strip().title()
+            fields[internal_name] = label
+
+    except Exception as e:
+        print(f"Field extraction failed: {e}")
+        fields = []
+
+    # Save to DB
+    tpl = Template(name=name, pdf_path=save_path, fields=fields)
+    return create_template(db, tpl)
+
+
+@router.get("", response_model=list[TemplateResponse])
+def list_templates(
+    limit: int = 100,
+    offset: int = 0,
+    db: Session = Depends(get_db)
+):
+    return get_all_templates(db, limit=limit, offset=offset)
+
+
+@router.get("/{template_id}", response_model=TemplateResponse)
+def get_template_by_id(
+    template_id: int,
+    db: Session = Depends(get_db)
+):
+    from api.db.repositories import get_template
+    tpl = get_template(db, template_id)
+    if not tpl:
+        raise AppError("Template not found", status_code=404)
+    return tpl