Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ The result is hours of time saved per shift, per firefighter.
### ✨ Key Features
- **Agnostic:** Works with any department's existing fillable PDF forms.
- **AI-Powered:** Uses open-source, locally-run LLMs (Mistral) to extract data from natural language. No data ever needs to leave the local machine.
- **📍 NLP Geotagging:** Automatically extracts location descriptions (e.g., "North of the beach") and converts them into GPS coordinates.
- **🗺️ Map Integration:** Dynamically embeds a static map image of the incident location directly into the final PDF report.
- **Single Point of Entry:** Eliminates redundant data entry entirely.

Open-Source (DPG): Built 100% with open-source tools to be a true Digital Public Good, freely available for any department to adopt and modify.
Expand Down
8 changes: 7 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,10 @@ sqlmodel
pytest
httpx
numpy<2
ollama
ollama
deep-translator
langdetect
geopy
staticmap
PyMuPDF
Pillow
26 changes: 24 additions & 2 deletions src/file_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,31 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):

print("[3] Starting extraction and PDF filling process...")
try:
self.llm._target_fields = fields
# Add implicit Location Summary field to extract for Geotagging (Issue #108)
mapping_fields = {f: None for f in fields}
mapping_fields["Location Summary"] = None

self.llm._target_fields = mapping_fields
self.llm._transcript_text = user_input
output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm)

# The filler fills the PDF based on the LLM's final state
# It will ignore "Location Summary" if the PDF doesn't have a matching visual field index,
# but we can intercept it here for map generation.
self.llm.main_loop()

extracted_data = self.llm.get_data()
location_text = extracted_data.get("Location Summary")

map_image_path = None
if location_text and location_text != "-1":
from src.geocoder import Geotagger
geotagger = Geotagger()
coords = geotagger.get_coordinates(location_text)
if coords:
lat, lon = coords
map_image_path = geotagger.generate_map_image(lat, lon)

output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm, map_image_path=map_image_path)

print("\n----------------------------------")
print("✅ Process Complete.")
Expand Down
29 changes: 25 additions & 4 deletions src/filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Filler:
def __init__(self):
pass

def fill_form(self, pdf_form: str, llm: LLM):
def fill_form(self, pdf_form: str, llm: LLM, map_image_path: str = None):
"""
Fill a PDF form with values from user_input using LLM.
Fields are filled in the visual order (top-to-bottom, left-to-right).
Expand All @@ -20,12 +20,16 @@ def fill_form(self, pdf_form: str, llm: LLM):
)

# Generate dictionary of answers from your original function
t2j = llm.main_loop()
textbox_answers = t2j.get_data() # This is a dictionary
# Note: LLM has already run its main_loop upstream in file_manipulator before calling this.
textbox_answers = llm.get_data() # This is a dictionary

# Remove "Location Summary" from the answers so we don't accidentally fill it into a standard visual field
if "Location Summary" in textbox_answers:
del textbox_answers["Location Summary"]

answers_list = list(textbox_answers.values())

# Read PDF
# Read PDF via pdfrw
pdf = PdfReader(pdf_form)

# Loop through pages
Expand All @@ -47,6 +51,23 @@ def fill_form(self, pdf_form: str, llm: LLM):
break

PdfWriter().write(output_pdf, pdf)

# Step 2: Overlay map image onto the PDF if generation was successful
if map_image_path and os.path.exists(map_image_path):
import fitz
doc = fitz.open(output_pdf)
# Insert on the first page
page = doc[0]
# Define where the map image goes: bottom right corner, width 150, height 100
rect = fitz.Rect(page.rect.width - 200, page.rect.height - 150, page.rect.width - 50, page.rect.height - 50)
page.insert_image(rect, filename=map_image_path)
doc.saveIncr()
doc.close()
# Clean up temporary map image
try:
os.remove(map_image_path)
except OSError:
pass

# Your main.py expects this function to return the path
return output_pdf
76 changes: 76 additions & 0 deletions src/geocoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
from io import BytesIO
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
from staticmap import StaticMap, CircleMarker

class Geotagger:
def __init__(self, user_agent="fireform_geotagger_v1"):
"""
Initialize the geocoding service using OpenStreetMap's Nominatim API.
We use a specific user_agent to comply with their usage policy.
"""
self.geolocator = Nominatim(user_agent=user_agent)

def get_coordinates(self, location_description: str) -> tuple[float, float] | None:
"""
Translates a natural language location description into Lat/Lon coordinates.
Uses Nominatim under the hood. Fails gracefully.

Args:
location_description: String (e.g. "Eiffel Tower", "Agadir Beach")

Returns:
(latitude, longitude) tuple, or None if not found/error.
"""
if not location_description or location_description.lower() in ("unknown", "n/a", "none", "-1"):
return None

print(f"\n[GEOCODER] Attempting to geocode: '{location_description}'")
try:
location = self.geolocator.geocode(location_description, timeout=10)
if location:
print(f"[GEOCODER] Found: {location.address} ({location.latitude}, {location.longitude})")
return (location.latitude, location.longitude)
else:
print(f"[GEOCODER] No coordinates found for: '{location_description}'")
return None
except (GeocoderTimedOut, GeocoderServiceError) as e:
print(f"[GEOCODER] Error communicating with Geolocation API: {e}")
return None
except Exception as e:
print(f"[GEOCODER] Unexpected error: {e}")
return None

def generate_map_image(self, lat: float, lon: float, output_path: str = "temp_map.png") -> str | None:
"""
Generates a static map image centered on the given coordinates with a red pin.

Args:
lat: Latitude
lon: Longitude
output_path: Path to save the resulting .png image.

Returns:
The absolute path to the generated image, or None on failure.
"""
if lat is None or lon is None:
return None

print(f"[GEOCODER] Generating static map image for ({lat}, {lon})...")
try:
m = StaticMap(400, 300)

# Create a red circle marker to act as our pin
marker = CircleMarker((lon, lat), 'red', 12)
m.add_marker(marker)

# Render and save the map (zoom level 14 is good for cities/landmarks)
image = m.render(zoom=14)
image.save(output_path)

print(f"[GEOCODER] Map image saved to: {output_path}")
return os.path.abspath(output_path)
except Exception as e:
print(f"[GEOCODER] Failed to generate map image: {e}")
return None
72 changes: 72 additions & 0 deletions tests/test_geocoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest
from unittest.mock import patch, MagicMock
from src.geocoder import Geotagger
from geopy.exc import GeocoderTimedOut

class TestGeotagger:
def test_get_coordinates_success(self):
"""Test that a valid location returns coordinates."""
geotagger = Geotagger()

# Mock the geolocator response
mock_location = MagicMock()
mock_location.latitude = 48.8584
mock_location.longitude = 2.2945
mock_location.address = "Eiffel Tower, Paris"

with patch.object(geotagger.geolocator, 'geocode', return_value=mock_location):
result = geotagger.get_coordinates("Eiffel Tower")

assert result == (48.8584, 2.2945)

def test_get_coordinates_not_found(self):
"""Test that an invalid location returns None."""
geotagger = Geotagger()

with patch.object(geotagger.geolocator, 'geocode', return_value=None):
result = geotagger.get_coordinates("Some Fake Place That Does Not Exist")

assert result is None

def test_get_coordinates_timeout_handling(self):
"""Test that network timeouts are caught and return None gracefully."""
geotagger = Geotagger()

with patch.object(geotagger.geolocator, 'geocode', side_effect=GeocoderTimedOut("Timeout")):
result = geotagger.get_coordinates("Eiffel Tower")

assert result is None

def test_empty_or_unknown_string_returns_none(self):
"""Test that empty or filler strings instantly return None without network calls."""
geotagger = Geotagger()

with patch.object(geotagger.geolocator, 'geocode') as mock_geocode:
assert geotagger.get_coordinates("") is None
assert geotagger.get_coordinates("unknown") is None
assert geotagger.get_coordinates("-1") is None

mock_geocode.assert_not_called()

def test_generate_map_image_success(self):
"""Test map generation returns a path."""
geotagger = Geotagger()

# Patch the StaticMap methods so we don't actually hit the OSM tile servers in tests
with patch('src.geocoder.StaticMap') as MockStaticMap:
mock_map_instance = MagicMock()
mock_image = MagicMock()
mock_map_instance.render.return_value = mock_image
MockStaticMap.return_value = mock_map_instance

result = geotagger.generate_map_image(48.8584, 2.2945, output_path="test_map.png")

assert result is not None
assert "test_map.png" in result
mock_image.save.assert_called_once_with("test_map.png")

def test_generate_map_image_with_none_coords(self):
"""Test map generation with None coordinates."""
geotagger = Geotagger()
result = geotagger.generate_map_image(None, 2.2945)
assert result is None