diff --git a/README.md b/README.md index 42862e3..c03e5ad 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ The result is hours of time saved per shift, per firefighter. ### ✨ Key Features - **Agnostic:** Works with any department's existing fillable PDF forms. - **AI-Powered:** Uses open-source, locally-run LLMs (Mistral) to extract data from natural language. No data ever needs to leave the local machine. +- **πŸ“ NLP Geotagging:** Automatically extracts location descriptions (e.g., "North of the beach") and converts them into GPS coordinates. +- **πŸ—ΊοΈ Map Integration:** Dynamically embeds a static map image of the incident location directly into the final PDF report. - **Single Point of Entry:** Eliminates redundant data entry entirely. Open-Source (DPG): Built 100% with open-source tools to be a true Digital Public Good, freely available for any department to adopt and modify. diff --git a/requirements.txt b/requirements.txt index eaa6c81..a5b3725 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,10 @@ sqlmodel pytest httpx numpy<2 -ollama \ No newline at end of file +ollama +deep-translator +langdetect +geopy +staticmap +PyMuPDF +Pillow \ No newline at end of file diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..97a3d2a 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -31,9 +31,31 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str): print("[3] Starting extraction and PDF filling process...") try: - self.llm._target_fields = fields + # Add implicit Location Summary field to extract for Geotagging (Issue #108) + mapping_fields = {f: None for f in fields} + mapping_fields["Location Summary"] = None + + self.llm._target_fields = mapping_fields self.llm._transcript_text = user_input - output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm) + + # The filler fills the PDF based on the LLM's final state + # It will ignore "Location Summary" if the PDF doesn't have a matching visual field index, + # but we can intercept it here for map generation. + self.llm.main_loop() + + extracted_data = self.llm.get_data() + location_text = extracted_data.get("Location Summary") + + map_image_path = None + if location_text and location_text != "-1": + from src.geocoder import Geotagger + geotagger = Geotagger() + coords = geotagger.get_coordinates(location_text) + if coords: + lat, lon = coords + map_image_path = geotagger.generate_map_image(lat, lon) + + output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm, map_image_path=map_image_path) print("\n----------------------------------") print("βœ… Process Complete.") diff --git a/src/filler.py b/src/filler.py index e31e535..3857ed7 100644 --- a/src/filler.py +++ b/src/filler.py @@ -7,7 +7,7 @@ class Filler: def __init__(self): pass - def fill_form(self, pdf_form: str, llm: LLM): + def fill_form(self, pdf_form: str, llm: LLM, map_image_path: str = None): """ Fill a PDF form with values from user_input using LLM. Fields are filled in the visual order (top-to-bottom, left-to-right). @@ -20,12 +20,16 @@ def fill_form(self, pdf_form: str, llm: LLM): ) # Generate dictionary of answers from your original function - t2j = llm.main_loop() - textbox_answers = t2j.get_data() # This is a dictionary + # Note: LLM has already run its main_loop upstream in file_manipulator before calling this. + textbox_answers = llm.get_data() # This is a dictionary + + # Remove "Location Summary" from the answers so we don't accidentally fill it into a standard visual field + if "Location Summary" in textbox_answers: + del textbox_answers["Location Summary"] answers_list = list(textbox_answers.values()) - # Read PDF + # Read PDF via pdfrw pdf = PdfReader(pdf_form) # Loop through pages @@ -47,6 +51,23 @@ def fill_form(self, pdf_form: str, llm: LLM): break PdfWriter().write(output_pdf, pdf) + + # Step 2: Overlay map image onto the PDF if generation was successful + if map_image_path and os.path.exists(map_image_path): + import fitz + doc = fitz.open(output_pdf) + # Insert on the first page + page = doc[0] + # Define where the map image goes: bottom right corner, width 150, height 100 + rect = fitz.Rect(page.rect.width - 200, page.rect.height - 150, page.rect.width - 50, page.rect.height - 50) + page.insert_image(rect, filename=map_image_path) + doc.saveIncr() + doc.close() + # Clean up temporary map image + try: + os.remove(map_image_path) + except OSError: + pass # Your main.py expects this function to return the path return output_pdf diff --git a/src/geocoder.py b/src/geocoder.py new file mode 100644 index 0000000..f4f7da0 --- /dev/null +++ b/src/geocoder.py @@ -0,0 +1,76 @@ +import os +from io import BytesIO +from geopy.geocoders import Nominatim +from geopy.exc import GeocoderTimedOut, GeocoderServiceError +from staticmap import StaticMap, CircleMarker + +class Geotagger: + def __init__(self, user_agent="fireform_geotagger_v1"): + """ + Initialize the geocoding service using OpenStreetMap's Nominatim API. + We use a specific user_agent to comply with their usage policy. + """ + self.geolocator = Nominatim(user_agent=user_agent) + + def get_coordinates(self, location_description: str) -> tuple[float, float] | None: + """ + Translates a natural language location description into Lat/Lon coordinates. + Uses Nominatim under the hood. Fails gracefully. + + Args: + location_description: String (e.g. "Eiffel Tower", "Agadir Beach") + + Returns: + (latitude, longitude) tuple, or None if not found/error. + """ + if not location_description or location_description.lower() in ("unknown", "n/a", "none", "-1"): + return None + + print(f"\n[GEOCODER] Attempting to geocode: '{location_description}'") + try: + location = self.geolocator.geocode(location_description, timeout=10) + if location: + print(f"[GEOCODER] Found: {location.address} ({location.latitude}, {location.longitude})") + return (location.latitude, location.longitude) + else: + print(f"[GEOCODER] No coordinates found for: '{location_description}'") + return None + except (GeocoderTimedOut, GeocoderServiceError) as e: + print(f"[GEOCODER] Error communicating with Geolocation API: {e}") + return None + except Exception as e: + print(f"[GEOCODER] Unexpected error: {e}") + return None + + def generate_map_image(self, lat: float, lon: float, output_path: str = "temp_map.png") -> str | None: + """ + Generates a static map image centered on the given coordinates with a red pin. + + Args: + lat: Latitude + lon: Longitude + output_path: Path to save the resulting .png image. + + Returns: + The absolute path to the generated image, or None on failure. + """ + if lat is None or lon is None: + return None + + print(f"[GEOCODER] Generating static map image for ({lat}, {lon})...") + try: + m = StaticMap(400, 300) + + # Create a red circle marker to act as our pin + marker = CircleMarker((lon, lat), 'red', 12) + m.add_marker(marker) + + # Render and save the map (zoom level 14 is good for cities/landmarks) + image = m.render(zoom=14) + image.save(output_path) + + print(f"[GEOCODER] Map image saved to: {output_path}") + return os.path.abspath(output_path) + except Exception as e: + print(f"[GEOCODER] Failed to generate map image: {e}") + return None diff --git a/tests/test_geocoder.py b/tests/test_geocoder.py new file mode 100644 index 0000000..4f06226 --- /dev/null +++ b/tests/test_geocoder.py @@ -0,0 +1,72 @@ +import pytest +from unittest.mock import patch, MagicMock +from src.geocoder import Geotagger +from geopy.exc import GeocoderTimedOut + +class TestGeotagger: + def test_get_coordinates_success(self): + """Test that a valid location returns coordinates.""" + geotagger = Geotagger() + + # Mock the geolocator response + mock_location = MagicMock() + mock_location.latitude = 48.8584 + mock_location.longitude = 2.2945 + mock_location.address = "Eiffel Tower, Paris" + + with patch.object(geotagger.geolocator, 'geocode', return_value=mock_location): + result = geotagger.get_coordinates("Eiffel Tower") + + assert result == (48.8584, 2.2945) + + def test_get_coordinates_not_found(self): + """Test that an invalid location returns None.""" + geotagger = Geotagger() + + with patch.object(geotagger.geolocator, 'geocode', return_value=None): + result = geotagger.get_coordinates("Some Fake Place That Does Not Exist") + + assert result is None + + def test_get_coordinates_timeout_handling(self): + """Test that network timeouts are caught and return None gracefully.""" + geotagger = Geotagger() + + with patch.object(geotagger.geolocator, 'geocode', side_effect=GeocoderTimedOut("Timeout")): + result = geotagger.get_coordinates("Eiffel Tower") + + assert result is None + + def test_empty_or_unknown_string_returns_none(self): + """Test that empty or filler strings instantly return None without network calls.""" + geotagger = Geotagger() + + with patch.object(geotagger.geolocator, 'geocode') as mock_geocode: + assert geotagger.get_coordinates("") is None + assert geotagger.get_coordinates("unknown") is None + assert geotagger.get_coordinates("-1") is None + + mock_geocode.assert_not_called() + + def test_generate_map_image_success(self): + """Test map generation returns a path.""" + geotagger = Geotagger() + + # Patch the StaticMap methods so we don't actually hit the OSM tile servers in tests + with patch('src.geocoder.StaticMap') as MockStaticMap: + mock_map_instance = MagicMock() + mock_image = MagicMock() + mock_map_instance.render.return_value = mock_image + MockStaticMap.return_value = mock_map_instance + + result = geotagger.generate_map_image(48.8584, 2.2945, output_path="test_map.png") + + assert result is not None + assert "test_map.png" in result + mock_image.save.assert_called_once_with("test_map.png") + + def test_generate_map_image_with_none_coords(self): + """Test map generation with None coordinates.""" + geotagger = Geotagger() + result = geotagger.generate_map_image(None, 2.2945) + assert result is None