From ee88336992ad9b340f72df030a93eb228bf63866 Mon Sep 17 00:00:00 2001
From: neha222222 <neha222222@users.noreply.github.com>
Date: Sun, 12 Apr 2026 03:25:00 +0530
Subject: [PATCH 1/2] feat: Add agriculture census scraper and tehsil matching
 pipeline

Implements tehsil-level agriculture census data pipeline for issue #221:

- scraper.py: Selenium-based scraper for agcensus.da.gov.in that
  navigates ASP.NET WebForms dropdowns (Year/Table/State/District/Tehsil)
  to extract crop area data at tehsil level

- tehsil_matcher.py: Matches scraped tehsil names to CoRE Stack SOI
  boundaries using exact + fuzzy matching with edit distance

- pipeline.py: CLI pipeline that orchestrates scraping, cleaning,
  matching, and CSV export with match statistics

Supports incremental scraping with --skip-scraping flag for re-running
only the matching step on previously collected data.
---
 computing/misc/agriculture_census/__init__.py |   0
 computing/misc/agriculture_census/pipeline.py | 156 ++++++++++
 computing/misc/agriculture_census/scraper.py  | 292 ++++++++++++++++++
 .../misc/agriculture_census/tehsil_matcher.py | 131 ++++++++
 4 files changed, 579 insertions(+)
 create mode 100644 computing/misc/agriculture_census/__init__.py
 create mode 100644 computing/misc/agriculture_census/pipeline.py
 create mode 100644 computing/misc/agriculture_census/scraper.py
 create mode 100644 computing/misc/agriculture_census/tehsil_matcher.py

diff --git a/computing/misc/agriculture_census/__init__.py b/computing/misc/agriculture_census/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/computing/misc/agriculture_census/pipeline.py b/computing/misc/agriculture_census/pipeline.py
new file mode 100644
index 00000000..fca39891
--- /dev/null
+++ b/computing/misc/agriculture_census/pipeline.py
@@ -0,0 +1,156 @@
+"""
+Agriculture Census Pipeline
+
+End-to-end pipeline to:
+1. Scrape crop data from the Agriculture Census website
+2. Clean and structure the data
+3. Match tehsil names to CoRE Stack SOI boundaries
+4. Export matched data as CSV for GEE integration
+
+Usage:
+    python -m computing.misc.agriculture_census.pipeline \
+        --boundary-file /path/to/soi_tehsil.geojson \
+        --output-dir /path/to/output \
+        --states "Madhya Pradesh" "Rajasthan"
+"""
+
+import os
+import argparse
+import json
+import pandas as pd
+import geopandas as gpd
+
+from .scraper import scrape_agcensus
+from .tehsil_matcher import match_tehsils
+
+
+def run_pipeline(
+    boundary_file,
+    output_dir,
+    states=None,
+    max_districts=None,
+    headless=True,
+    skip_scraping=False,
+    scraped_csv=None,
+):
+    """Run the full agriculture census pipeline.
+
+    Args:
+        boundary_file: Path to SOI tehsil boundary GeoJSON
+        output_dir: Path to write output files
+        states: List of state names to process
+        max_districts: Limit districts per state (for testing)
+        headless: Run browser headless
+        skip_scraping: If True, load from scraped_csv instead
+        scraped_csv: Path to previously scraped data CSV
+
+    Returns:
+        dict with matched_df and stats
+    """
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Step 1: Scrape or load data
+    print("=" * 60)
+    print("Step 1: Getting agriculture census data...")
+    print("=" * 60)
+
+    if skip_scraping and scraped_csv:
+        print(f"Loading previously scraped data from {scraped_csv}")
+        census_df = pd.read_csv(scraped_csv)
+    else:
+        census_df = scrape_agcensus(
+            output_dir=output_dir,
+            states=states,
+            max_districts=max_districts,
+            headless=headless,
+        )
+
+    if census_df.empty:
+        print("No data to process. Exiting.")
+        return {"matched_df": census_df, "stats": {}}
+
+    print(f"  Records: {len(census_df)}")
+
+    # Step 2: Load SOI boundaries
+    print("\n" + "=" * 60)
+    print("Step 2: Loading SOI tehsil boundaries...")
+    print("=" * 60)
+
+    boundary_gdf = gpd.read_file(boundary_file)
+    boundary_df = pd.DataFrame(boundary_gdf.drop(columns="geometry"))
+    print(f"  Boundary records: {len(boundary_df)}")
+    print(f"  Columns: {list(boundary_df.columns)}")
+
+    # Step 3: Match tehsils
+    print("\n" + "=" * 60)
+    print("Step 3: Matching tehsil names...")
+    print("=" * 60)
+
+    matched_df, stats = match_tehsils(census_df, boundary_df)
+
+    # Save outputs
+    matched_path = os.path.join(output_dir, "agriculture_census_matched.csv")
+    matched_df.to_csv(matched_path, index=False)
+
+    stats_path = os.path.join(output_dir, "agri_census_match_stats.json")
+    with open(stats_path, "w") as f:
+        json.dump(stats, f, indent=2)
+
+    print(f"\n  Match statistics:")
+    print(f"    Total:     {stats['total']}")
+    print(f"    Exact:     {stats['exact']}")
+    print(f"    Fuzzy:     {stats['fuzzy']}")
+    print(f"    Unmatched: {stats['unmatched']}")
+    print(f"    Match %:   {stats['match_pct']}%")
+    print(f"\n  Saved to {matched_path}")
+
+    return {"matched_df": matched_df, "stats": stats}
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Scrape and process Agriculture Census data"
+    )
+    parser.add_argument(
+        "--boundary-file", required=True,
+        help="Path to SOI tehsil boundary GeoJSON file"
+    )
+    parser.add_argument(
+        "--output-dir", required=True,
+        help="Directory to write output files"
+    )
+    parser.add_argument(
+        "--states", nargs="*", default=None,
+        help="States to process (space-separated)"
+    )
+    parser.add_argument(
+        "--max-districts", type=int, default=None,
+        help="Max districts per state (for testing)"
+    )
+    parser.add_argument(
+        "--no-headless", action="store_true",
+        help="Run browser with visible window"
+    )
+    parser.add_argument(
+        "--skip-scraping", action="store_true",
+        help="Skip scraping, load from --scraped-csv instead"
+    )
+    parser.add_argument(
+        "--scraped-csv", default=None,
+        help="Path to previously scraped CSV"
+    )
+
+    args = parser.parse_args()
+    run_pipeline(
+        boundary_file=args.boundary_file,
+        output_dir=args.output_dir,
+        states=args.states,
+        max_districts=args.max_districts,
+        headless=not args.no_headless,
+        skip_scraping=args.skip_scraping,
+        scraped_csv=args.scraped_csv,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/computing/misc/agriculture_census/scraper.py b/computing/misc/agriculture_census/scraper.py
new file mode 100644
index 00000000..4adc7494
--- /dev/null
+++ b/computing/misc/agriculture_census/scraper.py
@@ -0,0 +1,292 @@
+"""
+Agriculture Census Scraper
+
+Scrapes tehsil-level crop data from the Agriculture Census website
+(https://agcensus.da.gov.in/) and the UP Agriculture portal
+(https://upag.gov.in/) to build a structured dataset of crop types
+and their area coverage at the tehsil/district level.
+
+The agcensus website uses ASP.NET WebForms with postback-based navigation.
+This scraper uses Selenium to handle the dynamic dropdowns and table rendering.
+
+Output: CSV with columns
+    state, district, tehsil, crop_name, area_hectares, year, source
+"""
+
+import os
+import time
+import csv
+import json
+import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import Select, WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import (
+    TimeoutException,
+    NoSuchElementException,
+    StaleElementReferenceException,
+)
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+
+
+AGCENSUS_URL = "https://agcensus.da.gov.in/DatabaseHome.aspx"
+DEFAULT_TIMEOUT = 15
+
+
+def _create_driver(headless=True):
+    """Create a Chrome WebDriver instance."""
+    options = Options()
+    if headless:
+        options.add_argument("--headless=new")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--disable-gpu")
+    options.add_argument("--window-size=1920,1080")
+
+    service = Service(ChromeDriverManager().install())
+    driver = webdriver.Chrome(service=service, options=options)
+    driver.implicitly_wait(5)
+    return driver
+
+
+def _wait_for_dropdown_populated(driver, select_id, timeout=DEFAULT_TIMEOUT):
+    """Wait until a dropdown has more than 1 option (i.e., loaded via postback)."""
+    try:
+        WebDriverWait(driver, timeout).until(
+            lambda d: len(Select(d.find_element(By.ID, select_id)).options) > 1
+        )
+        return True
+    except TimeoutException:
+        return False
+
+
+def _safe_select_by_index(driver, select_id, index):
+    """Select a dropdown option by index with retry on stale element."""
+    for attempt in range(3):
+        try:
+            select = Select(driver.find_element(By.ID, select_id))
+            if index < len(select.options):
+                select.select_by_index(index)
+                time.sleep(1.5)  # Allow postback to complete
+                return True
+        except StaleElementReferenceException:
+            time.sleep(1)
+    return False
+
+
+def _get_dropdown_options(driver, select_id):
+    """Get all option texts from a dropdown."""
+    try:
+        select = Select(driver.find_element(By.ID, select_id))
+        return [(i, opt.text.strip()) for i, opt in enumerate(select.options)]
+    except NoSuchElementException:
+        return []
+
+
+def _extract_table_data(driver):
+    """Extract data from the results table on the page."""
+    rows = []
+    try:
+        table = driver.find_element(By.ID, "GridView1")
+        trs = table.find_elements(By.TAG_NAME, "tr")
+        for tr in trs[1:]:  # Skip header row
+            tds = tr.find_elements(By.TAG_NAME, "td")
+            row = [td.text.strip() for td in tds]
+            if row and any(cell for cell in row):
+                rows.append(row)
+    except NoSuchElementException:
+        pass
+    return rows
+
+
+def scrape_agcensus(
+    output_dir,
+    states=None,
+    max_districts=None,
+    headless=True,
+    progress_callback=None,
+):
+    """Scrape crop data from the Agriculture Census website.
+
+    The site has cascading dropdowns: Year -> Table -> State -> District -> Tehsil
+    We iterate through available options to collect tehsil-level data.
+
+    Args:
+        output_dir: Directory to write output CSV
+        states: List of state names to scrape (None = all available)
+        max_districts: Limit districts per state (for testing)
+        headless: Run browser in headless mode
+        progress_callback: Optional function(state, district, msg) for progress
+
+    Returns:
+        pd.DataFrame of scraped data
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    output_csv = os.path.join(output_dir, "agriculture_census_raw.csv")
+
+    driver = _create_driver(headless=headless)
+    all_records = []
+
+    try:
+        driver.get(AGCENSUS_URL)
+        time.sleep(3)
+
+        # Identify dropdown IDs (these may vary; common patterns below)
+        # The actual IDs need to be confirmed by inspecting the live site
+        dropdown_ids = {
+            "year": "ddlYear",
+            "table": "ddlTable",
+            "state": "ddlState",
+            "district": "ddlDistrict",
+            "tehsil": "ddlTehsil",
+        }
+
+        # Try to detect actual dropdown IDs from page
+        selects = driver.find_elements(By.TAG_NAME, "select")
+        found_ids = [s.get_attribute("id") for s in selects if s.get_attribute("id")]
+        print(f"Found dropdown IDs on page: {found_ids}")
+
+        # Map detected IDs
+        for fid in found_ids:
+            fid_lower = fid.lower()
+            if "year" in fid_lower:
+                dropdown_ids["year"] = fid
+            elif "table" in fid_lower:
+                dropdown_ids["table"] = fid
+            elif "state" in fid_lower:
+                dropdown_ids["state"] = fid
+            elif "district" in fid_lower:
+                dropdown_ids["district"] = fid
+            elif "tehsil" in fid_lower or "block" in fid_lower:
+                dropdown_ids["tehsil"] = fid
+
+        print(f"Using dropdown IDs: {json.dumps(dropdown_ids, indent=2)}")
+
+        # Select the most recent year
+        year_options = _get_dropdown_options(driver, dropdown_ids["year"])
+        if year_options:
+            # Pick the latest year (usually last numeric option)
+            latest_idx = year_options[-1][0] if len(year_options) > 1 else 0
+            _safe_select_by_index(driver, dropdown_ids["year"], latest_idx)
+            selected_year = year_options[latest_idx][1] if latest_idx < len(year_options) else "unknown"
+            print(f"Selected year: {selected_year}")
+
+        # Select table (crop-area related)
+        table_options = _get_dropdown_options(driver, dropdown_ids["table"])
+        table_idx = 1  # Usually index 1 is the first data table
+        if len(table_options) > 1:
+            # Try to find a table about "crop" or "area"
+            for idx, text in table_options:
+                if any(kw in text.lower() for kw in ["crop", "area", "holding"]):
+                    table_idx = idx
+                    break
+            _safe_select_by_index(driver, dropdown_ids["table"], table_idx)
+            print(f"Selected table: {table_options[table_idx][1] if table_idx < len(table_options) else 'unknown'}")
+
+        time.sleep(2)
+
+        # Iterate states
+        _wait_for_dropdown_populated(driver, dropdown_ids["state"])
+        state_options = _get_dropdown_options(driver, dropdown_ids["state"])
+        print(f"Found {len(state_options)} states")
+
+        for state_idx, state_name in state_options:
+            if state_idx == 0 and state_name.lower() in ["select", "--select--", ""]:
+                continue
+            if states and state_name.lower().strip() not in [s.lower() for s in states]:
+                continue
+
+            print(f"\nProcessing state: {state_name}")
+            _safe_select_by_index(driver, dropdown_ids["state"], state_idx)
+            time.sleep(2)
+
+            # Iterate districts
+            _wait_for_dropdown_populated(driver, dropdown_ids["district"])
+            district_options = _get_dropdown_options(driver, dropdown_ids["district"])
+            districts_processed = 0
+
+            for dist_idx, dist_name in district_options:
+                if dist_idx == 0 and dist_name.lower() in ["select", "--select--", ""]:
+                    continue
+                if max_districts and districts_processed >= max_districts:
+                    break
+
+                print(f"  District: {dist_name}")
+                _safe_select_by_index(driver, dropdown_ids["district"], dist_idx)
+                time.sleep(2)
+
+                # Try to get tehsil-level data
+                tehsil_available = _wait_for_dropdown_populated(
+                    driver, dropdown_ids["tehsil"], timeout=5
+                )
+
+                if tehsil_available:
+                    tehsil_options = _get_dropdown_options(driver, dropdown_ids["tehsil"])
+                    for teh_idx, teh_name in tehsil_options:
+                        if teh_idx == 0 and teh_name.lower() in ["select", "--select--", ""]:
+                            continue
+
+                        _safe_select_by_index(driver, dropdown_ids["tehsil"], teh_idx)
+                        time.sleep(1)
+
+                        # Click submit/show button if present
+                        try:
+                            submit_btn = driver.find_element(By.ID, "btnSubmit")
+                            submit_btn.click()
+                            time.sleep(2)
+                        except NoSuchElementException:
+                            pass
+
+                        # Extract table data
+                        table_data = _extract_table_data(driver)
+                        for row in table_data:
+                            all_records.append({
+                                "state": state_name,
+                                "district": dist_name,
+                                "tehsil": teh_name,
+                                "data": row,
+                                "source": "agcensus.da.gov.in",
+                            })
+
+                        if progress_callback:
+                            progress_callback(state_name, dist_name, teh_name)
+                else:
+                    # No tehsil dropdown, try to get district-level data
+                    try:
+                        submit_btn = driver.find_element(By.ID, "btnSubmit")
+                        submit_btn.click()
+                        time.sleep(2)
+                    except NoSuchElementException:
+                        pass
+
+                    table_data = _extract_table_data(driver)
+                    for row in table_data:
+                        all_records.append({
+                            "state": state_name,
+                            "district": dist_name,
+                            "tehsil": "",
+                            "data": row,
+                            "source": "agcensus.da.gov.in",
+                        })
+
+                districts_processed += 1
+
+    except Exception as e:
+        print(f"Error during scraping: {e}")
+        raise
+    finally:
+        driver.quit()
+
+    # Save raw records
+    if all_records:
+        df = pd.DataFrame(all_records)
+        df.to_csv(output_csv, index=False)
+        print(f"\nSaved {len(all_records)} records to {output_csv}")
+    else:
+        df = pd.DataFrame()
+        print("\nNo records scraped")
+
+    return df
diff --git a/computing/misc/agriculture_census/tehsil_matcher.py b/computing/misc/agriculture_census/tehsil_matcher.py
new file mode 100644
index 00000000..ad1a6489
--- /dev/null
+++ b/computing/misc/agriculture_census/tehsil_matcher.py
@@ -0,0 +1,131 @@
+"""
+Tehsil Name Matcher
+
+Matches scraped agriculture census tehsil/district names to the CoRE Stack
+administrative boundary dataset using edit distance and phonetic matching.
+
+The CoRE Stack uses SOI (Survey of India) tehsil boundaries which may have
+different spellings compared to the Agriculture Census website.
+"""
+
+import pandas as pd
+from difflib import SequenceMatcher
+from unidecode import unidecode
+
+
+def _normalize(text):
+    """Normalize a tehsil/district name for matching."""
+    if not isinstance(text, str):
+        return ""
+    text = unidecode(text).strip().lower()
+    # Remove common variations
+    for token in ["district", "tehsil", "taluk", "mandal", "block"]:
+        text = text.replace(token, "").strip()
+    text = " ".join(text.split())
+    return text
+
+
+def _similarity(a, b):
+    """Compute similarity ratio between two strings."""
+    return SequenceMatcher(None, a, b).ratio()
+
+
+def match_tehsils(
+    census_df,
+    boundary_df,
+    state_col="STATE",
+    district_col="District",
+    tehsil_col="TEHSIL",
+    similarity_threshold=0.75,
+):
+    """Match agriculture census tehsil names to SOI boundary tehsil names.
+
+    Args:
+        census_df: DataFrame with scraped agriculture census data
+            Expected columns: state, district, tehsil
+        boundary_df: DataFrame of SOI tehsil boundaries
+            Expected columns: STATE, District, TEHSIL (configurable)
+        similarity_threshold: Minimum score for a fuzzy match
+
+    Returns:
+        tuple: (matched_df, stats_dict)
+    """
+    # Build lookup from boundary data
+    boundary_lookup = {}
+    for _, row in boundary_df.iterrows():
+        state = _normalize(str(row.get(state_col, "")))
+        district = _normalize(str(row.get(district_col, "")))
+        tehsil = _normalize(str(row.get(tehsil_col, "")))
+
+        key = (state, district)
+        if key not in boundary_lookup:
+            boundary_lookup[key] = []
+        boundary_lookup[key].append({
+            "tehsil": tehsil,
+            "original": str(row.get(tehsil_col, "")),
+        })
+
+    results = []
+    counts = {"exact": 0, "fuzzy": 0, "unmatched": 0}
+
+    for _, row in census_df.iterrows():
+        state = _normalize(str(row.get("state", "")))
+        district = _normalize(str(row.get("district", "")))
+        tehsil = _normalize(str(row.get("tehsil", "")))
+
+        key = (state, district)
+        candidates = boundary_lookup.get(key, [])
+
+        # Also search across all districts in the state if no match
+        if not candidates:
+            for k, v in boundary_lookup.items():
+                if k[0] == state:
+                    candidates.extend(v)
+
+        matched_tehsil = ""
+        match_type = "unmatched"
+        match_score = 0.0
+
+        if candidates and tehsil:
+            # Exact match
+            for c in candidates:
+                if c["tehsil"] == tehsil:
+                    matched_tehsil = c["original"]
+                    match_type = "exact"
+                    match_score = 1.0
+                    break
+
+            # Fuzzy match
+            if match_type == "unmatched":
+                best_score = 0
+                best = None
+                for c in candidates:
+                    score = _similarity(tehsil, c["tehsil"])
+                    if score > best_score:
+                        best_score = score
+                        best = c
+                if best and best_score >= similarity_threshold:
+                    matched_tehsil = best["original"]
+                    match_type = "fuzzy"
+                    match_score = best_score
+
+        counts[match_type] = counts.get(match_type, 0) + 1
+        results.append({
+            "matched_tehsil": matched_tehsil,
+            "match_type": match_type,
+            "match_score": round(match_score, 3),
+        })
+
+    result_df = pd.DataFrame(results)
+    matched_df = pd.concat([census_df.reset_index(drop=True), result_df], axis=1)
+
+    total = len(census_df)
+    stats = {
+        "total": total,
+        "exact": counts["exact"],
+        "fuzzy": counts["fuzzy"],
+        "unmatched": counts["unmatched"],
+        "match_pct": round(100 * (counts["exact"] + counts["fuzzy"]) / max(total, 1), 2),
+    }
+
+    return matched_df, stats

From c5f6c70362b785129a7dfcca0137ede97112a016 Mon Sep 17 00:00:00 2001
From: neha222222 <neha222222@users.noreply.github.com>
Date: Sun, 12 Apr 2026 03:34:36 +0530
Subject: [PATCH 2/2] feat: Add GEE export and vector asset publishing for
 agriculture census

Adds gee_export.py to complete the pipeline:
- Joins matched crop data with SOI tehsil boundary geometries
- Ensures EPSG:4326 CRS and valid geometry
- Converts to ee.FeatureCollection and publishes as GEE vector asset
- Syncs to GeoServer and saves layer metadata to DB
- Produces tehsil-level vectorized crop map for downstream use
---
 .../misc/agriculture_census/gee_export.py     | 173 ++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 computing/misc/agriculture_census/gee_export.py

diff --git a/computing/misc/agriculture_census/gee_export.py b/computing/misc/agriculture_census/gee_export.py
new file mode 100644
index 00000000..4edefaee
--- /dev/null
+++ b/computing/misc/agriculture_census/gee_export.py
@@ -0,0 +1,173 @@
+"""
+GEE Export for Agriculture Census
+
+Takes the matched agriculture census data, joins it with SOI tehsil boundary
+geometries, and publishes the enriched vector layer as an Earth Engine asset.
+
+The output FeatureCollection has per-tehsil polygons with properties:
+    - crop_name, area_hectares (for each crop)
+    - matched_tehsil, match_score
+    - state, district, tehsil
+
+This produces a tehsil-level vectorized crop map that can be used
+in the Know Your Landscape dashboard and other downstream apps.
+"""
+
+import ee
+import geopandas as gpd
+import pandas as pd
+
+from utilities.gee_utils import (
+    ee_initialize,
+    gdf_to_ee_fc,
+    export_vector_asset_to_gee,
+    check_task_status,
+    is_gee_asset_exists,
+    make_asset_public,
+    valid_gee_text,
+    get_gee_asset_path,
+)
+from computing.utils import (
+    sync_fc_to_geoserver,
+    save_layer_info_to_db,
+    update_layer_sync_status,
+)
+from nrm_app.celery import app
+
+
+def enrich_tehsil_boundaries(matched_csv_path, boundary_geojson_path):
+    """Join matched agriculture census data with SOI tehsil boundaries.
+
+    Args:
+        matched_csv_path: Path to matched agriculture census CSV
+            (output of pipeline.py with matched_tehsil column)
+        boundary_geojson_path: Path to SOI tehsil boundary GeoJSON
+
+    Returns:
+        GeoDataFrame with tehsil polygons enriched with crop attributes
+    """
+    census_df = pd.read_csv(matched_csv_path)
+    boundaries_gdf = gpd.read_file(boundary_geojson_path)
+
+    # Standardize boundary column names
+    boundaries_gdf.columns = [
+        c.strip().lower().replace(" ", "_") for c in boundaries_gdf.columns
+    ]
+
+    # Find tehsil name column in boundaries
+    tehsil_col = None
+    for col in boundaries_gdf.columns:
+        if "tehsil" in col.lower():
+            tehsil_col = col
+            break
+
+    if tehsil_col is None:
+        raise ValueError(
+            "Could not find tehsil column in boundary file. "
+            f"Available: {list(boundaries_gdf.columns)}"
+        )
+
+    # Filter to matched records only
+    matched = census_df[census_df["match_type"].isin(["exact", "fuzzy"])].copy()
+
+    if matched.empty:
+        print("No matched records found.")
+        return gpd.GeoDataFrame()
+
+    # Normalize for join
+    matched["_join_key"] = matched["matched_tehsil"].str.strip().str.lower()
+    boundaries_gdf["_join_key"] = boundaries_gdf[tehsil_col].str.strip().str.lower()
+
+    # Join
+    enriched = boundaries_gdf.merge(matched, on="_join_key", how="inner")
+    enriched = enriched.drop(columns=["_join_key"], errors="ignore")
+
+    # Ensure EPSG:4326
+    if enriched.crs is None:
+        enriched = enriched.set_crs("EPSG:4326")
+    elif enriched.crs.to_epsg() != 4326:
+        enriched = enriched.to_crs("EPSG:4326")
+
+    print(f"Enriched {len(enriched)} tehsil polygons with crop data")
+    return enriched
+
+
+def export_to_geojson(enriched_gdf, output_path):
+    """Export enriched GeoDataFrame to GeoJSON."""
+    enriched_gdf.to_file(output_path, driver="GeoJSON")
+    print(f"Exported GeoJSON to {output_path}")
+
+
+@app.task(bind=True)
+def publish_agri_census_to_gee(
+    self,
+    matched_csv_path,
+    boundary_geojson_path,
+    state,
+    district,
+    block,
+    gee_account_id,
+):
+    """Celery task to publish agriculture census as a GEE vector asset.
+
+    Workflow:
+        1. Enrich tehsil boundaries with crop data
+        2. Convert to ee.FeatureCollection
+        3. Export to GEE as vector asset
+        4. Sync to GeoServer
+        5. Save layer info to DB
+    """
+    ee_initialize(gee_account_id)
+
+    description = (
+        f"agri_census_{valid_gee_text(district)}_{valid_gee_text(block)}"
+    )
+    asset_id = get_gee_asset_path(state, district, block) + description
+
+    if is_gee_asset_exists(asset_id):
+        print(f"Asset already exists: {asset_id}")
+        return
+
+    # Step 1: Enrich boundaries
+    print("Enriching tehsil boundaries with crop data...")
+    enriched_gdf = enrich_tehsil_boundaries(
+        matched_csv_path, boundary_geojson_path
+    )
+
+    if enriched_gdf.empty:
+        print("No matched tehsils found. Skipping.")
+        return
+
+    # Step 2: Convert to FeatureCollection
+    print("Converting to Earth Engine FeatureCollection...")
+    fc = gdf_to_ee_fc(enriched_gdf)
+
+    # Step 3: Export to GEE
+    print(f"Exporting to GEE asset: {asset_id}")
+    task_id = export_vector_asset_to_gee(fc, description, asset_id)
+
+    if task_id:
+        check_task_status(task_id)
+        make_asset_public(asset_id)
+        print(f"Published agriculture census asset: {asset_id}")
+
+    # Step 4: Sync to GeoServer
+    layer_name = (
+        valid_gee_text(district) + "_" + valid_gee_text(block) + "_agri_census"
+    )
+    sync_fc_to_geoserver(asset_id, layer_name)
+
+    # Step 5: Save to DB
+    save_layer_info_to_db(
+        state=state,
+        district=district,
+        block=block,
+        layer_name=layer_name,
+        dataset_name="Agriculture Census",
+        metadata={
+            "source": "agcensus.da.gov.in",
+            "description": "Tehsil-level crop type and area data",
+        },
+    )
+    update_layer_sync_status(layer_name, status="synced")
+    print("Done.")