From 386a6eb3e61071f7eb382c3f2662b52552a4a223 Mon Sep 17 00:00:00 2001 From: Joaquim d'Souza Date: Thu, 16 Apr 2026 14:28:41 +0200 Subject: [PATCH 1/2] feat: add geocode cache --- bin/cmd.ts | 10 ++ migrations/1774658921006_geocode_cache.ts | 17 +++ src/server/commands/populateGeocodeCache.ts | 71 +++++++++ src/server/mapping/geocode.ts | 78 +++++++--- src/server/models/GeocodeCache.ts | 10 ++ src/server/services/database/index.ts | 2 + src/server/services/database/schema.ts | 13 ++ tests/feature/geocodeCache.test.ts | 156 ++++++++++++++++++++ tests/resources/addresses.csv | 11 ++ 9 files changed, 344 insertions(+), 24 deletions(-) create mode 100644 migrations/1774658921006_geocode_cache.ts create mode 100644 src/server/commands/populateGeocodeCache.ts create mode 100644 src/server/models/GeocodeCache.ts create mode 100644 tests/feature/geocodeCache.test.ts create mode 100644 tests/resources/addresses.csv diff --git a/bin/cmd.ts b/bin/cmd.ts index b7c298ff6..e82deed0f 100755 --- a/bin/cmd.ts +++ b/bin/cmd.ts @@ -3,6 +3,7 @@ import { SignJWT } from "jose"; import ensureOrganisationMap from "@/server/commands/ensureOrganisationMap"; import importAreaSet from "@/server/commands/importAreaSet"; import importPostcodes from "@/server/commands/importPostcodes"; +import populateGeocodeCache from "@/server/commands/populateGeocodeCache"; import regeocode from "@/server/commands/regeocode"; import removeDevWebhooks from "@/server/commands/removeDevWebhooks"; import Invite from "@/server/emails/Invite"; @@ -79,6 +80,15 @@ program await importDataSource({ dataSourceId }); }); +program + .command("populateGeocodeCache") + .description( + "Populate the geocode cache from existing address-geocoded data sources", + ) + .action(async () => { + await populateGeocodeCache(); + }); + program .command("upsertUser") .option("--email ") diff --git a/migrations/1774658921006_geocode_cache.ts b/migrations/1774658921006_geocode_cache.ts new file mode 100644 index 000000000..ba9cb20c7 --- /dev/null +++ b/migrations/1774658921006_geocode_cache.ts @@ -0,0 +1,17 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { type Kysely, sql } from "kysely"; + +export async function up(db: Kysely): Promise { + await db.schema + .createTable("geocodeCache") + .addColumn("address", "text", (col) => col.primaryKey()) + .addColumn("point", sql`geography`) + .addColumn("createdAt", "timestamp", (col) => + col.notNull().defaultTo(sql`CURRENT_TIMESTAMP`), + ) + .execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.dropTable("geocodeCache").execute(); +} diff --git a/src/server/commands/populateGeocodeCache.ts b/src/server/commands/populateGeocodeCache.ts new file mode 100644 index 000000000..b64c556e6 --- /dev/null +++ b/src/server/commands/populateGeocodeCache.ts @@ -0,0 +1,71 @@ +import { sql } from "kysely"; +import { GeocodingType } from "@/models/DataSource"; +import { db } from "@/server/services/database"; +import logger from "@/server/services/logger"; +import type { AddressGeocodingConfig } from "@/models/DataSource"; +import type { Point } from "@/models/shared"; + +export default async function populateGeocodeCache() { + const dataSources = await db + .selectFrom("dataSource") + .select(["id", "name", "geocodingConfig"]) + .where(sql`geocoding_config->>'type'`, "=", GeocodingType.Address) + .execute(); + + logger.info(`Found ${dataSources.length} address-geocoded data sources`); + + let totalInserted = 0; + + for (const dataSource of dataSources) { + const config = dataSource.geocodingConfig as AddressGeocodingConfig; + const addressColumns = config.columns; + + const records = await db + .selectFrom("dataRecord") + .select(["json", "geocodePoint"]) + .where("dataSourceId", "=", dataSource.id) + .execute(); + + const entries: { address: string; point: Point | null }[] = []; + for (const record of records) { + const json = record.json as Record; + const address = addressColumns + .map((c) => json[c] || "") + .filter(Boolean) + .join(", ") + .trim(); + if (!address) continue; + + entries.push({ address, point: record.geocodePoint as Point | null }); + } + + if (entries.length === 0) { + logger.info(` ${dataSource.name}: no records, skipping`); + continue; + } + + // Deduplicate by address within this data source + const uniqueByAddress = new Map(entries.map((e) => [e.address, e])); + const deduplicated = Array.from(uniqueByAddress.values()); + + // Insert in batches of 500 + const batchSize = 500; + let inserted = 0; + for (let i = 0; i < deduplicated.length; i += batchSize) { + const batch = deduplicated.slice(i, i + batchSize); + await db + .insertInto("geocodeCache") + .values(batch) + .onConflict((oc) => oc.column("address").doNothing()) + .execute(); + inserted += batch.length; + } + + logger.info( + ` ${dataSource.name}: inserted ${inserted} cache entries from ${records.length} records`, + ); + totalInserted += inserted; + } + + logger.info(`Done. Inserted ${totalInserted} total cache entries.`); +} diff --git a/src/server/mapping/geocode.ts b/src/server/mapping/geocode.ts index b1c3028fe..a32745331 100644 --- a/src/server/mapping/geocode.ts +++ b/src/server/mapping/geocode.ts @@ -1,3 +1,4 @@ +import { sql } from "kysely"; import { getBooleanEnvVar } from "@/env"; import { AreaSetCode } from "@/models/AreaSet"; import { @@ -12,6 +13,7 @@ import { findAreaByName, findAreasByPoint, } from "@/server/repositories/Area"; +import { db } from "@/server/services/database"; import logger from "@/server/services/logger"; import { geojsonPointToPoint } from "../utils/geo"; import type { GeocodeResult } from "@/models/DataRecord"; @@ -199,32 +201,10 @@ const geocodeRecordByAddress = async ( } if (!point) { - const geocodeUrl = new URL( - "https://api.mapbox.com/search/geocode/v6/forward", - ); - geocodeUrl.searchParams.set("q", address); - geocodeUrl.searchParams.set("country", "GB"); - geocodeUrl.searchParams.set( - "access_token", - process.env.MAPBOX_SECRET_TOKEN || "", - ); - - const response = await fetch(geocodeUrl); - if (!response.ok) { - throw new Error(`Geocode request failed: ${response.status}`); - } - const results = (await response.json()) as { - features?: { id: string; geometry: GeoJSONPoint }[]; - }; - if (!results.features?.length) { + point = await mapboxGeocode(address); + if (!point) { throw new Error(`Geocode request returned no features`); } - - const feature = results.features[0]; - point = { - lng: feature.geometry.coordinates[0], - lat: feature.geometry.coordinates[1], - }; } const geocodeResult: GeocodeResult = { @@ -352,3 +332,53 @@ const postcodesIOLookup = async ( }, }; }; + +const mapboxGeocode = async (address: string): Promise => { + const cached = await db + .selectFrom("geocodeCache") + .select("point") + .where("address", "=", address) + .where("createdAt", ">", sql`now() - interval '4 weeks'`) + .executeTakeFirst(); + + if (cached) { + logger.debug(`Geocode cache hit for "${address}"`); + return cached.point; + } + + logger.info(`Geocode cache miss for "${address}", calling Mapbox API`); + const geocodeUrl = new URL( + "https://api.mapbox.com/search/geocode/v6/forward", + ); + geocodeUrl.searchParams.set("q", address); + geocodeUrl.searchParams.set("country", "GB"); + geocodeUrl.searchParams.set( + "access_token", + process.env.MAPBOX_SECRET_TOKEN || "", + ); + + const response = await fetch(geocodeUrl); + if (!response.ok) { + throw new Error(`Geocode request failed: ${response.status}`); + } + const results = (await response.json()) as { + features?: { id: string; geometry: GeoJSONPoint }[]; + }; + + const point: Point | null = results.features?.length + ? { + lng: results.features[0].geometry.coordinates[0], + lat: results.features[0].geometry.coordinates[1], + } + : null; + + await db + .insertInto("geocodeCache") + .values({ address, point }) + .onConflict((oc) => + oc.column("address").doUpdateSet({ point, createdAt: sql`now()` }), + ) + .execute(); + + return point; +}; diff --git a/src/server/models/GeocodeCache.ts b/src/server/models/GeocodeCache.ts new file mode 100644 index 000000000..289f12342 --- /dev/null +++ b/src/server/models/GeocodeCache.ts @@ -0,0 +1,10 @@ +import type { Point } from "@/models/shared"; +import type { ColumnType, Insertable } from "kysely"; + +export interface GeocodeCacheTable { + address: string; + point: Point | null; + createdAt: ColumnType; +} + +export type NewGeocodeCache = Insertable; diff --git a/src/server/services/database/index.ts b/src/server/services/database/index.ts index b00258f7b..0f2270924 100644 --- a/src/server/services/database/index.ts +++ b/src/server/services/database/index.ts @@ -11,6 +11,7 @@ import type { DataRecordTable } from "@/server/models/DataRecord"; import type { DataSourceTable } from "@/server/models/DataSource"; import type { DataSourceOrganisationOverrideTable } from "@/server/models/DataSourceOrganisationOverride"; import type { FolderTable } from "@/server/models/Folder"; +import type { GeocodeCacheTable } from "@/server/models/GeocodeCache"; import type { InspectorDataSourceConfigTable } from "@/server/models/InspectorDataSourceConfig"; import type { InvitationTable } from "@/server/models/Invitation"; import type { JobTable } from "@/server/models/Job"; @@ -62,6 +63,7 @@ export interface Database { dataSource: DataSourceTable; dataRecord: DataRecordTable; folder: FolderTable; + geocodeCache: GeocodeCacheTable; invitation: InvitationTable; map: MapTable; mapView: MapViewTable; diff --git a/src/server/services/database/schema.ts b/src/server/services/database/schema.ts index 6a880cc77..a7a625391 100644 --- a/src/server/services/database/schema.ts +++ b/src/server/services/database/schema.ts @@ -374,6 +374,16 @@ export interface DataSourceOrganisationOverride { inspectorColumns: unknown[]; // jsonb, NOT NULL, DEFAULT [] } +/** + * geocodeCache Table + * Caches Mapbox geocoding API responses to avoid redundant requests + */ +export interface GeocodeCache { + address: string; // text, PRIMARY KEY + point: unknown; // geography, NOT NULL + createdAt: Date; // timestamp, NOT NULL, DEFAULT CURRENT_TIMESTAMP +} + // ============================================================================ // TYPE EXPORTS // ============================================================================ @@ -397,6 +407,9 @@ export interface Database { // Webhooks & Integrations airtableWebhook: AirtableWebhook; + // Caches + geocodeCache: GeocodeCache; + // Maps & Views map: Map; mapView: MapView; diff --git a/tests/feature/geocodeCache.test.ts b/tests/feature/geocodeCache.test.ts new file mode 100644 index 000000000..fc382adb8 --- /dev/null +++ b/tests/feature/geocodeCache.test.ts @@ -0,0 +1,156 @@ +import { sql } from "kysely"; +import { describe, expect, test, vi } from "vitest"; +import { GeocodingType } from "@/models/DataSource"; +import { geocodeRecord } from "@/server/mapping/geocode"; +import { db } from "@/server/services/database"; + +const MOCK_MAPBOX_RESPONSE = { + features: [ + { + id: "mock-feature", + geometry: { + type: "Point", + coordinates: [-0.1276, 51.5034], + }, + }, + ], +}; + +describe("geocode cache", () => { + test("mapbox geocoder caches results and does not call the API on subsequent lookups", async () => { + const address = "10 Downing Street, London"; + + // Clean up any existing cache entry for this address + await db + .deleteFrom("geocodeCache") + .where("address", "=", address) + .execute(); + + const geocodingConfig = { + type: GeocodingType.Address as const, + columns: ["Address"], + }; + + const dataRecord = { + externalId: "cache-test-1", + json: { Address: address }, + }; + + const originalFetch = global.fetch; + const fetchSpy = vi.fn((input, init) => { + const url = input instanceof URL ? input.href : String(input); + if (url.includes("api.mapbox.com")) { + return Promise.resolve( + new Response(JSON.stringify(MOCK_MAPBOX_RESPONSE), { status: 200 }), + ); + } + return originalFetch(input, init); + }); + global.fetch = fetchSpy; + + try { + // First call — should hit the Mapbox API + const result1 = await geocodeRecord(dataRecord, geocodingConfig); + expect(result1).not.toBeNull(); + expect(result1?.centralPoint).toEqual({ lat: 51.5034, lng: -0.1276 }); + + const mapboxCalls1 = fetchSpy.mock.calls.filter((call) => + String(call[0]).includes("api.mapbox.com"), + ); + expect(mapboxCalls1.length).toBe(1); + + fetchSpy.mockClear(); + + // Second call — should use the cache, no Mapbox API call + const result2 = await geocodeRecord(dataRecord, geocodingConfig); + expect(result2).not.toBeNull(); + expect(result2?.centralPoint).toEqual(result1?.centralPoint); + + const mapboxCalls2 = fetchSpy.mock.calls.filter((call) => + String(call[0]).includes("api.mapbox.com"), + ); + expect(mapboxCalls2.length).toBe(0); + } finally { + global.fetch = originalFetch; + + // Clean up + await db + .deleteFrom("geocodeCache") + .where("address", "=", address) + .execute(); + } + }); + + test("expired cache entries are re-geocoded", async () => { + const address = "Buckingham Palace, London"; + + // Clean up any existing cache entry + await db + .deleteFrom("geocodeCache") + .where("address", "=", address) + .execute(); + + // Insert a cache entry with a created_at older than 4 weeks + await db + .insertInto("geocodeCache") + .values({ + address, + point: { lat: 0, lng: 0 }, + }) + .execute(); + await db + .updateTable("geocodeCache") + .set({ createdAt: sql`now() - interval '5 weeks'` }) + .where("address", "=", address) + .execute(); + + const geocodingConfig = { + type: GeocodingType.Address as const, + columns: ["Address"], + }; + + const dataRecord = { + externalId: "cache-test-expired", + json: { Address: address }, + }; + + const originalFetch = global.fetch; + const fetchSpy = vi.fn((input, init) => { + const url = input instanceof URL ? input.href : String(input); + if (url.includes("api.mapbox.com")) { + return Promise.resolve( + new Response(JSON.stringify(MOCK_MAPBOX_RESPONSE), { status: 200 }), + ); + } + return originalFetch(input, init); + }); + global.fetch = fetchSpy; + + try { + const result = await geocodeRecord(dataRecord, geocodingConfig); + expect(result).not.toBeNull(); + expect(result?.centralPoint).toEqual({ lat: 51.5034, lng: -0.1276 }); + + // Should have called Mapbox because the cache entry was expired + const mapboxCalls = fetchSpy.mock.calls.filter((call) => + String(call[0]).includes("api.mapbox.com"), + ); + expect(mapboxCalls.length).toBe(1); + + // Verify the cache was updated with the new point + const cached = await db + .selectFrom("geocodeCache") + .select(["point", "createdAt"]) + .where("address", "=", address) + .executeTakeFirst(); + expect(cached?.point).toEqual({ lat: 51.5034, lng: -0.1276 }); + } finally { + global.fetch = originalFetch; + + await db + .deleteFrom("geocodeCache") + .where("address", "=", address) + .execute(); + } + }); +}); diff --git a/tests/resources/addresses.csv b/tests/resources/addresses.csv new file mode 100644 index 000000000..510d80d5d --- /dev/null +++ b/tests/resources/addresses.csv @@ -0,0 +1,11 @@ +Name,Address +Alice,10 Downing Street London +Bob,221B Baker Street London +Charlie,1 Parliament Square London +Diana,Buckingham Palace London +Eve,asdfghjkl qwerty +Frank,742 Evergreen Terrace Springfield +Grace,Edinburgh Castle Edinburgh +Hank,zzzzz zzzzz zzzzz +Iris,St Paul's Cathedral London +Jack,XXXXXXXXXXX From ef67c64d5404fb1489b5f748932cf63ac104529f Mon Sep 17 00:00:00 2001 From: joaquimds Date: Thu, 16 Apr 2026 14:38:57 +0200 Subject: [PATCH 2/2] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/server/services/database/schema.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/services/database/schema.ts b/src/server/services/database/schema.ts index a7a625391..34a8c4c68 100644 --- a/src/server/services/database/schema.ts +++ b/src/server/services/database/schema.ts @@ -380,7 +380,7 @@ export interface DataSourceOrganisationOverride { */ export interface GeocodeCache { address: string; // text, PRIMARY KEY - point: unknown; // geography, NOT NULL + point: unknown; // geography, NULLABLE createdAt: Date; // timestamp, NOT NULL, DEFAULT CURRENT_TIMESTAMP }