From 4b3a2725e32c31e64a143e6a44e86aca22814ec3 Mon Sep 17 00:00:00 2001 From: gabe-levin Date: Fri, 27 Mar 2026 13:55:59 +0100 Subject: [PATCH 1/2] feat: mcp server in install instructions --- space2stats_mcp/README.md | 157 +++++++ space2stats_mcp/pyproject.toml | 19 + .../src/space2stats_mcp/__init__.py | 0 space2stats_mcp/src/space2stats_mcp/server.py | 432 ++++++++++++++++++ 4 files changed, 608 insertions(+) create mode 100644 space2stats_mcp/README.md create mode 100644 space2stats_mcp/pyproject.toml create mode 100644 space2stats_mcp/src/space2stats_mcp/__init__.py create mode 100644 space2stats_mcp/src/space2stats_mcp/server.py diff --git a/space2stats_mcp/README.md b/space2stats_mcp/README.md new file mode 100644 index 00000000..2466e19e --- /dev/null +++ b/space2stats_mcp/README.md @@ -0,0 +1,157 @@ +# Space2Stats MCP Server + +An [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) server that exposes the Space2Stats API as tools for AI assistants. This allows LLMs in Claude Desktop, Claude Code, Cursor, and other MCP-compatible clients to directly query World Bank spatial statistics. + +## Tools + +| Tool | Description | +|------|-------------| +| `list_fields` | List all available field names | +| `list_timeseries_fields` | List available timeseries fields | +| `list_topics` | List dataset topics/themes from the STAC catalog | +| `get_topic_fields` | Get field descriptions for a specific topic | +| `fetch_admin_boundaries` | Fetch country boundaries (World Bank or GeoBoundaries) | +| `get_summary` | Get H3 hex-level statistics for an AOI | +| `get_summary_by_hexids` | Get statistics for specific H3 hex IDs | +| `get_aggregate` | Aggregate statistics for an AOI | +| `get_aggregate_by_hexids` | Aggregate statistics for specific hex IDs | +| `get_timeseries` | Get timeseries data for an AOI | +| `get_timeseries_by_hexids` | Get timeseries data for specific hex IDs | + +## Installation + +```bash +cd space2stats_mcp +pip install -e . +``` + +## Configuration + +By default the server connects to the production API at `https://space2stats.ds.io`. Override with: + +```bash +export SPACE2STATS_BASE_URL="http://localhost:8000" +``` + +### Claude Desktop + +Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows): + +```json +{ + "mcpServers": { + "space2stats": { + "command": "space2stats-mcp" + } + } +} +``` + +### Claude Code + +Add to `.claude/settings.json` in your project or `~/.claude/settings.json` globally: + +```json +{ + "mcpServers": { + "space2stats": { + "command": "space2stats-mcp" + } + } +} +``` + +### Cursor + +Add to `.cursor/mcp.json` in your project root: + +```json +{ + "mcpServers": { + "space2stats": { + "command": "space2stats-mcp" + } + } +} +``` + +### VS Code (GitHub Copilot) + +Add to `.vscode/mcp.json` in your project or user settings: + +```json +{ + "servers": { + "space2stats": { + "command": "space2stats-mcp" + } + } +} +``` + +### Windsurf + +Add to `~/.codeium/windsurf/mcp_config.json`: + +```json +{ + "mcpServers": { + "space2stats": { + "command": "space2stats-mcp" + } + } +} +``` + +### ChatGPT / Gemini / Other LLM chat interfaces + +These platforms **do not currently support MCP** (including their desktop apps). They have their own plugin/extension systems: + +- **ChatGPT / ChatGPT Desktop**: Uses "Actions" based on OpenAPI specs — not compatible with MCP +- **Gemini**: Uses Google-specific extensions — not compatible with MCP +- **OpenAI Agents SDK**: Has [experimental MCP support](https://openai.github.io/openai-agents-python/mcp/) for connecting to MCP servers programmatically + +MCP is an open standard and adoption is growing. As more platforms add support, this server will work with them without changes. + +## Testing + +### Quick smoke test + +Run the server directly to verify it starts without errors: + +```bash +space2stats-mcp +``` + +The server communicates over stdio (JSON-RPC), so it will sit waiting for input. Press `Ctrl+C` to stop. + +### Test with the MCP Inspector + +The [MCP Inspector](https://modelcontextprotocol.io/docs/tools/inspector) provides a web UI for interacting with MCP servers: + +```bash +npx @modelcontextprotocol/inspector space2stats-mcp +``` + +This opens a browser where you can: + +1. See all 11 tools and their schemas +2. Call `list_fields` to verify API connectivity +3. Call `fetch_admin_boundaries` with `{"iso3": "KEN", "adm": "ADM0"}` to test boundary fetching +4. Call `get_aggregate` with a small AOI to test data queries + +### Test with a manual JSON-RPC request + +You can pipe a JSON-RPC message directly to the server: + +```bash +echo '{"jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": {"name": "list_fields", "arguments": {}}}' | space2stats-mcp +``` + +### End-to-end test in Claude Desktop + +1. Install the server and add the config (see Configuration above) +2. Restart Claude Desktop +3. Look for the hammer icon — it should show "space2stats" with 11 tools +4. Try asking: "What population data fields are available in Space2Stats?" +5. Then try: "What is the total population of Andorra?" (small country = fast query) diff --git a/space2stats_mcp/pyproject.toml b/space2stats_mcp/pyproject.toml new file mode 100644 index 00000000..fadd39ab --- /dev/null +++ b/space2stats_mcp/pyproject.toml @@ -0,0 +1,19 @@ +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "space2stats-mcp" +version = "0.1.0" +description = "MCP server for querying World Bank Space2Stats spatial statistics" +authors = [{name = "Gabe Levin", email = "glevin@worldbank.org"}] +license = {text = "World Bank Master Community License Agreement"} +dependencies = [ + "mcp[cli]>=1.0.0", + "requests>=2.25.0", + "pystac>=1.0.0", +] +requires-python = ">=3.10" + +[project.scripts] +space2stats-mcp = "space2stats_mcp.server:main" diff --git a/space2stats_mcp/src/space2stats_mcp/__init__.py b/space2stats_mcp/src/space2stats_mcp/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/space2stats_mcp/src/space2stats_mcp/server.py b/space2stats_mcp/src/space2stats_mcp/server.py new file mode 100644 index 00000000..92101f1c --- /dev/null +++ b/space2stats_mcp/src/space2stats_mcp/server.py @@ -0,0 +1,432 @@ +"""MCP server for World Bank Space2Stats spatial statistics.""" + +import json +import os +import urllib.parse +import urllib.request +from typing import Literal, Optional + +import requests +from mcp.server.fastmcp import FastMCP +from pystac import Catalog + +mcp = FastMCP( + "space2stats", + instructions=( + "Space2Stats provides sub-national geospatial statistics from the World Bank. " + "Data is indexed by H3 hexagons (level 6). Use `list_fields` or `list_topics` " + "to discover available data, then query with an AOI (GeoJSON) or specific hex IDs." + ), +) + +BASE_URL = os.environ.get("SPACE2STATS_BASE_URL", "https://space2stats.ds.io") +CATALOG_URL = "https://raw.githubusercontent.com/worldbank/DECAT_Space2Stats/refs/heads/main/space2stats_api/src/space2stats_ingest/METADATA/stac/catalog.json" + +_catalog_cache: Optional[Catalog] = None + + +def _get_catalog() -> Catalog: + """Return a cached STAC catalog instance.""" + global _catalog_cache + if _catalog_cache is None: + _catalog_cache = Catalog.from_file(CATALOG_URL) + return _catalog_cache + + +def _get_field_metadata(fields: list[str]) -> dict: + """Look up STAC metadata (description, topic, source) for queried fields.""" + catalog = _get_catalog() + items = list(catalog.get_all_items()) + + field_meta = {} + for field in fields: + for item in items: + columns = item.properties.get("table:columns", []) + for col in columns: + if col["name"] == field: + field_meta[field] = { + "description": col.get("description", ""), + "topic": item.properties.get("name", item.id), + "source": item.properties.get("source_data", ""), + } + break + if field in field_meta: + break + + return field_meta + + +def _api_url(path: str) -> str: + return f"{BASE_URL}/{path.lstrip('/')}" + + +def _post(endpoint: str, payload: dict) -> dict | list: + resp = requests.post(_api_url(endpoint), json=payload, timeout=60) + if resp.status_code != 200: + try: + detail = resp.json() + except Exception: + detail = resp.text + raise RuntimeError(f"API error {resp.status_code}: {detail}") + return resp.json() + + +def _get(endpoint: str) -> dict | list: + resp = requests.get(_api_url(endpoint), timeout=30) + if resp.status_code != 200: + raise RuntimeError(f"API error {resp.status_code}: {resp.text}") + return resp.json() + + +# ── Discovery tools ────────────────────────────────────────────────────────── + + +@mcp.tool() +def list_fields() -> list[str]: + """List all available field names that can be queried from Space2Stats.""" + return _get("fields") + + +@mcp.tool() +def list_timeseries_fields() -> list[str]: + """List available fields from the Space2Stats timeseries table.""" + return _get("timeseries/fields") + + +@mcp.tool() +def list_topics() -> list[dict]: + """List dataset topics/themes with descriptions from the STAC catalog. + + Returns a list of items with their name, description, and source data info. + Use this to understand what data is available before querying. + """ + catalog = _get_catalog() + items = list(catalog.get_all_items()) + return [ + { + "id": item.id, + **{ + k: v + for k, v in item.properties.items() + if k in ["name", "description", "source_data"] + }, + } + for item in items + ] + + +@mcp.tool() +def get_topic_fields(item_id: str) -> list[dict]: + """Get detailed field descriptions for a specific dataset topic. + + Parameters + ---------- + item_id : str + The topic/item ID from list_topics (e.g., "world_pop", "nighttime_lights"). + """ + catalog = _get_catalog() + collection = next(catalog.get_collections()) + item = collection.get_item(item_id) + if item is None: + raise ValueError(f"Item '{item_id}' not found in catalog.") + return item.properties.get("table:columns", []) + + +# ── Boundary tools ─────────────────────────────────────────────────────────── + + +ESRI_BASE_URL = "https://services.arcgis.com/iQ1dY19aHwbSDYIF/arcgis/rest/services/World_Bank_Global_Administrative_Divisions/FeatureServer" +ESRI_LAYER_MAP = {"ADM0": 1, "ADM1": 2, "ADM2": 3} + + +def _fetch_esri_boundaries(iso3: str, adm: str) -> dict: + """Fetch boundaries from World Bank ESRI FeatureServer as GeoJSON.""" + layer = ESRI_LAYER_MAP[adm] + query_url = f"{ESRI_BASE_URL}/{layer}/query" + where = f"ISO_A3='{iso3}'" + + # Check record count + count_params = urllib.parse.urlencode( + {"where": where, "returnCountOnly": True, "f": "json"} + ) + with urllib.request.urlopen(f"{query_url}?{count_params}", timeout=60) as resp: + count_data = json.loads(resp.read().decode()) + + n_records = int(count_data.get("count", 0)) + if n_records == 0: + raise ValueError(f"No features found for ISO3 code '{iso3}' at {adm}") + + # Get max page size from layer metadata + meta_url = f"{ESRI_BASE_URL}/{layer}?f=pjson" + with urllib.request.urlopen(meta_url, timeout=30) as resp: + meta = json.loads(resp.read().decode()) + max_records = int(meta.get("maxRecordCount", 1000)) + + # Fetch features (with paging if needed) + all_features = [] + for offset in range(0, n_records, max_records): + params = urllib.parse.urlencode( + { + "outFields": "*", + "where": where, + "returnGeometry": True, + "f": "geojson", + "resultRecordCount": max_records, + "resultOffset": offset, + } + ) + with urllib.request.urlopen(f"{query_url}?{params}", timeout=60) as resp: + page = json.loads(resp.read().decode()) + all_features.extend(page.get("features", [])) + + return {"type": "FeatureCollection", "features": all_features} + + +def _fetch_geoboundaries(iso3: str, adm: str) -> dict: + """Fetch boundaries from GeoBoundaries API as GeoJSON.""" + url = f"https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{adm}/" + resp = requests.get(url, timeout=30) + resp.raise_for_status() + data = resp.json() + + geojson_resp = requests.get(data["gjDownloadURL"], timeout=60) + geojson_resp.raise_for_status() + return geojson_resp.json() + + +@mcp.tool() +def fetch_admin_boundaries( + iso3: str, + adm: Literal["ADM0", "ADM1", "ADM2"], + source: Literal["WB", "GB"] = "WB", +) -> dict: + """Fetch administrative boundaries for a country as GeoJSON. + + Use this to get an AOI that can be passed to the query tools. + + Parameters + ---------- + iso3 : str + ISO3 country code (e.g., "KEN" for Kenya, "BRA" for Brazil). + adm : str + Administrative level: "ADM0" (country), "ADM1" (region/province), "ADM2" (district). + source : str + Boundary source: "WB" for World Bank (default) or "GB" for GeoBoundaries. + """ + if source == "WB": + return _fetch_esri_boundaries(iso3, adm) + elif source == "GB": + return _fetch_geoboundaries(iso3, adm) + else: + raise ValueError("Source must be 'WB' or 'GB'") + + +# ── Query tools ────────────────────────────────────────────────────────────── + + +@mcp.tool() +def get_summary( + aoi: dict, + spatial_join_method: Literal["touches", "centroid", "within"], + fields: list[str], + geometry: Optional[Literal["polygon", "point"]] = None, +) -> list[dict]: + """Get H3 hex-level statistics for an area of interest. + + Parameters + ---------- + aoi : dict + A GeoJSON Feature with a geometry (Polygon or MultiPolygon). + Example: {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [...]}, "properties": {}} + spatial_join_method : str + How to match H3 cells to the AOI: + - "touches": cells that intersect the AOI boundary + - "centroid": cells whose centroid falls within the AOI + - "within": cells entirely contained within the AOI + fields : list[str] + Field names to retrieve (use list_fields to see options). + geometry : str, optional + Include H3 cell geometry in response: "polygon" or "point". + """ + payload = { + "aoi": aoi, + "spatial_join_method": spatial_join_method, + "fields": fields, + "geometry": geometry, + } + return _post("summary", payload) + + +@mcp.tool() +def get_summary_by_hexids( + hex_ids: list[str], + fields: list[str], + geometry: Optional[Literal["polygon", "point"]] = None, +) -> list[dict]: + """Get statistics for specific H3 hex IDs. + + Parameters + ---------- + hex_ids : list[str] + H3 level 6 hexagon IDs to query. + fields : list[str] + Field names to retrieve. + geometry : str, optional + Include H3 cell geometry: "polygon" or "point". + """ + payload = {"hex_ids": hex_ids, "fields": fields, "geometry": geometry} + return _post("summary_by_hexids", payload) + + +@mcp.tool() +def get_aggregate( + aoi: dict, + spatial_join_method: Literal["touches", "centroid", "within"], + fields: list[str], + aggregation_type: Literal["sum", "avg", "count", "max", "min"], +) -> dict: + """Aggregate statistics across H3 cells for an area of interest. + + Returns a single aggregated result (e.g., total population for a region). + + Parameters + ---------- + aoi : dict + A GeoJSON Feature with a geometry. + spatial_join_method : str + How to match H3 cells: "touches", "centroid", or "within". + fields : list[str] + Field names to aggregate. + aggregation_type : str + Aggregation function: "sum", "avg", "count", "max", or "min". + """ + payload = { + "aoi": aoi, + "spatial_join_method": spatial_join_method, + "fields": fields, + "aggregation_type": aggregation_type, + } + result = _post("aggregate", payload) + field_metadata = _get_field_metadata(fields) + return { + "results": result, + "metadata": { + "fields": field_metadata, + "spatial_join_method": spatial_join_method, + "aggregation_type": aggregation_type, + }, + } + + +@mcp.tool() +def get_aggregate_by_hexids( + hex_ids: list[str], + fields: list[str], + aggregation_type: Literal["sum", "avg", "count", "max", "min"], +) -> dict: + """Aggregate statistics for specific H3 hex IDs. + + Parameters + ---------- + hex_ids : list[str] + H3 hexagon IDs to aggregate. + fields : list[str] + Field names to aggregate. + aggregation_type : str + Aggregation function: "sum", "avg", "count", "max", or "min". + """ + payload = { + "hex_ids": hex_ids, + "fields": fields, + "aggregation_type": aggregation_type, + } + result = _post("aggregate_by_hexids", payload) + field_metadata = _get_field_metadata(fields) + return { + "results": result, + "metadata": { + "fields": field_metadata, + "aggregation_type": aggregation_type, + }, + } + + +@mcp.tool() +def get_timeseries( + aoi: dict, + spatial_join_method: Literal["touches", "centroid", "within"], + fields: list[str], + start_date: Optional[str] = None, + end_date: Optional[str] = None, + geometry: Optional[Literal["polygon", "point"]] = None, +) -> list[dict]: + """Get timeseries data for an area of interest. + + Parameters + ---------- + aoi : dict + A GeoJSON Feature with a geometry. + spatial_join_method : str + How to match H3 cells: "touches", "centroid", or "within". + fields : list[str] + Field names to retrieve (use list_timeseries_fields). + start_date : str, optional + Start date filter (YYYY-MM-DD). + end_date : str, optional + End date filter (YYYY-MM-DD). + geometry : str, optional + Include H3 cell geometry: "polygon" or "point". + """ + payload = { + "aoi": aoi, + "spatial_join_method": spatial_join_method, + "fields": fields, + } + if start_date: + payload["start_date"] = start_date + if end_date: + payload["end_date"] = end_date + if geometry: + payload["geometry"] = geometry + return _post("timeseries", payload) + + +@mcp.tool() +def get_timeseries_by_hexids( + hex_ids: list[str], + fields: list[str], + start_date: Optional[str] = None, + end_date: Optional[str] = None, + geometry: Optional[Literal["polygon", "point"]] = None, +) -> list[dict]: + """Get timeseries data for specific H3 hex IDs. + + Parameters + ---------- + hex_ids : list[str] + H3 hexagon IDs to query. + fields : list[str] + Field names to retrieve. + start_date : str, optional + Start date filter (YYYY-MM-DD). + end_date : str, optional + End date filter (YYYY-MM-DD). + geometry : str, optional + Include H3 cell geometry: "polygon" or "point". + """ + payload = {"hex_ids": hex_ids, "fields": fields} + if start_date: + payload["start_date"] = start_date + if end_date: + payload["end_date"] = end_date + if geometry: + payload["geometry"] = geometry + return _post("timeseries_by_hexids", payload) + + +def main(): + mcp.run() + + +if __name__ == "__main__": + main() From 9dcb3c0b2abe8f60570c0fd2182e80dbdf13d64b Mon Sep 17 00:00:00 2001 From: gabe-levin Date: Fri, 27 Mar 2026 14:02:01 +0100 Subject: [PATCH 2/2] feat: .mcp.json config file --- .mcp.json | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .mcp.json diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 00000000..ab0975a9 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "space2stats": { + "command": "space2stats-mcp", + "args": [] + } + } +}