From ee88336992ad9b340f72df030a93eb228bf63866 Mon Sep 17 00:00:00 2001 From: neha222222 Date: Sun, 12 Apr 2026 03:25:00 +0530 Subject: [PATCH 1/2] feat: Add agriculture census scraper and tehsil matching pipeline Implements tehsil-level agriculture census data pipeline for issue #221: - scraper.py: Selenium-based scraper for agcensus.da.gov.in that navigates ASP.NET WebForms dropdowns (Year/Table/State/District/Tehsil) to extract crop area data at tehsil level - tehsil_matcher.py: Matches scraped tehsil names to CoRE Stack SOI boundaries using exact + fuzzy matching with edit distance - pipeline.py: CLI pipeline that orchestrates scraping, cleaning, matching, and CSV export with match statistics Supports incremental scraping with --skip-scraping flag for re-running only the matching step on previously collected data. --- computing/misc/agriculture_census/__init__.py | 0 computing/misc/agriculture_census/pipeline.py | 156 ++++++++++ computing/misc/agriculture_census/scraper.py | 292 ++++++++++++++++++ .../misc/agriculture_census/tehsil_matcher.py | 131 ++++++++ 4 files changed, 579 insertions(+) create mode 100644 computing/misc/agriculture_census/__init__.py create mode 100644 computing/misc/agriculture_census/pipeline.py create mode 100644 computing/misc/agriculture_census/scraper.py create mode 100644 computing/misc/agriculture_census/tehsil_matcher.py diff --git a/computing/misc/agriculture_census/__init__.py b/computing/misc/agriculture_census/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/computing/misc/agriculture_census/pipeline.py b/computing/misc/agriculture_census/pipeline.py new file mode 100644 index 00000000..fca39891 --- /dev/null +++ b/computing/misc/agriculture_census/pipeline.py @@ -0,0 +1,156 @@ +""" +Agriculture Census Pipeline + +End-to-end pipeline to: +1. Scrape crop data from the Agriculture Census website +2. Clean and structure the data +3. Match tehsil names to CoRE Stack SOI boundaries +4. Export matched data as CSV for GEE integration + +Usage: + python -m computing.misc.agriculture_census.pipeline \ + --boundary-file /path/to/soi_tehsil.geojson \ + --output-dir /path/to/output \ + --states "Madhya Pradesh" "Rajasthan" +""" + +import os +import argparse +import json +import pandas as pd +import geopandas as gpd + +from .scraper import scrape_agcensus +from .tehsil_matcher import match_tehsils + + +def run_pipeline( + boundary_file, + output_dir, + states=None, + max_districts=None, + headless=True, + skip_scraping=False, + scraped_csv=None, +): + """Run the full agriculture census pipeline. + + Args: + boundary_file: Path to SOI tehsil boundary GeoJSON + output_dir: Path to write output files + states: List of state names to process + max_districts: Limit districts per state (for testing) + headless: Run browser headless + skip_scraping: If True, load from scraped_csv instead + scraped_csv: Path to previously scraped data CSV + + Returns: + dict with matched_df and stats + """ + os.makedirs(output_dir, exist_ok=True) + + # Step 1: Scrape or load data + print("=" * 60) + print("Step 1: Getting agriculture census data...") + print("=" * 60) + + if skip_scraping and scraped_csv: + print(f"Loading previously scraped data from {scraped_csv}") + census_df = pd.read_csv(scraped_csv) + else: + census_df = scrape_agcensus( + output_dir=output_dir, + states=states, + max_districts=max_districts, + headless=headless, + ) + + if census_df.empty: + print("No data to process. Exiting.") + return {"matched_df": census_df, "stats": {}} + + print(f" Records: {len(census_df)}") + + # Step 2: Load SOI boundaries + print("\n" + "=" * 60) + print("Step 2: Loading SOI tehsil boundaries...") + print("=" * 60) + + boundary_gdf = gpd.read_file(boundary_file) + boundary_df = pd.DataFrame(boundary_gdf.drop(columns="geometry")) + print(f" Boundary records: {len(boundary_df)}") + print(f" Columns: {list(boundary_df.columns)}") + + # Step 3: Match tehsils + print("\n" + "=" * 60) + print("Step 3: Matching tehsil names...") + print("=" * 60) + + matched_df, stats = match_tehsils(census_df, boundary_df) + + # Save outputs + matched_path = os.path.join(output_dir, "agriculture_census_matched.csv") + matched_df.to_csv(matched_path, index=False) + + stats_path = os.path.join(output_dir, "agri_census_match_stats.json") + with open(stats_path, "w") as f: + json.dump(stats, f, indent=2) + + print(f"\n Match statistics:") + print(f" Total: {stats['total']}") + print(f" Exact: {stats['exact']}") + print(f" Fuzzy: {stats['fuzzy']}") + print(f" Unmatched: {stats['unmatched']}") + print(f" Match %: {stats['match_pct']}%") + print(f"\n Saved to {matched_path}") + + return {"matched_df": matched_df, "stats": stats} + + +def main(): + parser = argparse.ArgumentParser( + description="Scrape and process Agriculture Census data" + ) + parser.add_argument( + "--boundary-file", required=True, + help="Path to SOI tehsil boundary GeoJSON file" + ) + parser.add_argument( + "--output-dir", required=True, + help="Directory to write output files" + ) + parser.add_argument( + "--states", nargs="*", default=None, + help="States to process (space-separated)" + ) + parser.add_argument( + "--max-districts", type=int, default=None, + help="Max districts per state (for testing)" + ) + parser.add_argument( + "--no-headless", action="store_true", + help="Run browser with visible window" + ) + parser.add_argument( + "--skip-scraping", action="store_true", + help="Skip scraping, load from --scraped-csv instead" + ) + parser.add_argument( + "--scraped-csv", default=None, + help="Path to previously scraped CSV" + ) + + args = parser.parse_args() + run_pipeline( + boundary_file=args.boundary_file, + output_dir=args.output_dir, + states=args.states, + max_districts=args.max_districts, + headless=not args.no_headless, + skip_scraping=args.skip_scraping, + scraped_csv=args.scraped_csv, + ) + + +if __name__ == "__main__": + main() diff --git a/computing/misc/agriculture_census/scraper.py b/computing/misc/agriculture_census/scraper.py new file mode 100644 index 00000000..4adc7494 --- /dev/null +++ b/computing/misc/agriculture_census/scraper.py @@ -0,0 +1,292 @@ +""" +Agriculture Census Scraper + +Scrapes tehsil-level crop data from the Agriculture Census website +(https://agcensus.da.gov.in/) and the UP Agriculture portal +(https://upag.gov.in/) to build a structured dataset of crop types +and their area coverage at the tehsil/district level. + +The agcensus website uses ASP.NET WebForms with postback-based navigation. +This scraper uses Selenium to handle the dynamic dropdowns and table rendering. + +Output: CSV with columns + state, district, tehsil, crop_name, area_hectares, year, source +""" + +import os +import time +import csv +import json +import pandas as pd +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import Select, WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import ( + TimeoutException, + NoSuchElementException, + StaleElementReferenceException, +) +from webdriver_manager.chrome import ChromeDriverManager +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options + + +AGCENSUS_URL = "https://agcensus.da.gov.in/DatabaseHome.aspx" +DEFAULT_TIMEOUT = 15 + + +def _create_driver(headless=True): + """Create a Chrome WebDriver instance.""" + options = Options() + if headless: + options.add_argument("--headless=new") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--disable-gpu") + options.add_argument("--window-size=1920,1080") + + service = Service(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=service, options=options) + driver.implicitly_wait(5) + return driver + + +def _wait_for_dropdown_populated(driver, select_id, timeout=DEFAULT_TIMEOUT): + """Wait until a dropdown has more than 1 option (i.e., loaded via postback).""" + try: + WebDriverWait(driver, timeout).until( + lambda d: len(Select(d.find_element(By.ID, select_id)).options) > 1 + ) + return True + except TimeoutException: + return False + + +def _safe_select_by_index(driver, select_id, index): + """Select a dropdown option by index with retry on stale element.""" + for attempt in range(3): + try: + select = Select(driver.find_element(By.ID, select_id)) + if index < len(select.options): + select.select_by_index(index) + time.sleep(1.5) # Allow postback to complete + return True + except StaleElementReferenceException: + time.sleep(1) + return False + + +def _get_dropdown_options(driver, select_id): + """Get all option texts from a dropdown.""" + try: + select = Select(driver.find_element(By.ID, select_id)) + return [(i, opt.text.strip()) for i, opt in enumerate(select.options)] + except NoSuchElementException: + return [] + + +def _extract_table_data(driver): + """Extract data from the results table on the page.""" + rows = [] + try: + table = driver.find_element(By.ID, "GridView1") + trs = table.find_elements(By.TAG_NAME, "tr") + for tr in trs[1:]: # Skip header row + tds = tr.find_elements(By.TAG_NAME, "td") + row = [td.text.strip() for td in tds] + if row and any(cell for cell in row): + rows.append(row) + except NoSuchElementException: + pass + return rows + + +def scrape_agcensus( + output_dir, + states=None, + max_districts=None, + headless=True, + progress_callback=None, +): + """Scrape crop data from the Agriculture Census website. + + The site has cascading dropdowns: Year -> Table -> State -> District -> Tehsil + We iterate through available options to collect tehsil-level data. + + Args: + output_dir: Directory to write output CSV + states: List of state names to scrape (None = all available) + max_districts: Limit districts per state (for testing) + headless: Run browser in headless mode + progress_callback: Optional function(state, district, msg) for progress + + Returns: + pd.DataFrame of scraped data + """ + os.makedirs(output_dir, exist_ok=True) + output_csv = os.path.join(output_dir, "agriculture_census_raw.csv") + + driver = _create_driver(headless=headless) + all_records = [] + + try: + driver.get(AGCENSUS_URL) + time.sleep(3) + + # Identify dropdown IDs (these may vary; common patterns below) + # The actual IDs need to be confirmed by inspecting the live site + dropdown_ids = { + "year": "ddlYear", + "table": "ddlTable", + "state": "ddlState", + "district": "ddlDistrict", + "tehsil": "ddlTehsil", + } + + # Try to detect actual dropdown IDs from page + selects = driver.find_elements(By.TAG_NAME, "select") + found_ids = [s.get_attribute("id") for s in selects if s.get_attribute("id")] + print(f"Found dropdown IDs on page: {found_ids}") + + # Map detected IDs + for fid in found_ids: + fid_lower = fid.lower() + if "year" in fid_lower: + dropdown_ids["year"] = fid + elif "table" in fid_lower: + dropdown_ids["table"] = fid + elif "state" in fid_lower: + dropdown_ids["state"] = fid + elif "district" in fid_lower: + dropdown_ids["district"] = fid + elif "tehsil" in fid_lower or "block" in fid_lower: + dropdown_ids["tehsil"] = fid + + print(f"Using dropdown IDs: {json.dumps(dropdown_ids, indent=2)}") + + # Select the most recent year + year_options = _get_dropdown_options(driver, dropdown_ids["year"]) + if year_options: + # Pick the latest year (usually last numeric option) + latest_idx = year_options[-1][0] if len(year_options) > 1 else 0 + _safe_select_by_index(driver, dropdown_ids["year"], latest_idx) + selected_year = year_options[latest_idx][1] if latest_idx < len(year_options) else "unknown" + print(f"Selected year: {selected_year}") + + # Select table (crop-area related) + table_options = _get_dropdown_options(driver, dropdown_ids["table"]) + table_idx = 1 # Usually index 1 is the first data table + if len(table_options) > 1: + # Try to find a table about "crop" or "area" + for idx, text in table_options: + if any(kw in text.lower() for kw in ["crop", "area", "holding"]): + table_idx = idx + break + _safe_select_by_index(driver, dropdown_ids["table"], table_idx) + print(f"Selected table: {table_options[table_idx][1] if table_idx < len(table_options) else 'unknown'}") + + time.sleep(2) + + # Iterate states + _wait_for_dropdown_populated(driver, dropdown_ids["state"]) + state_options = _get_dropdown_options(driver, dropdown_ids["state"]) + print(f"Found {len(state_options)} states") + + for state_idx, state_name in state_options: + if state_idx == 0 and state_name.lower() in ["select", "--select--", ""]: + continue + if states and state_name.lower().strip() not in [s.lower() for s in states]: + continue + + print(f"\nProcessing state: {state_name}") + _safe_select_by_index(driver, dropdown_ids["state"], state_idx) + time.sleep(2) + + # Iterate districts + _wait_for_dropdown_populated(driver, dropdown_ids["district"]) + district_options = _get_dropdown_options(driver, dropdown_ids["district"]) + districts_processed = 0 + + for dist_idx, dist_name in district_options: + if dist_idx == 0 and dist_name.lower() in ["select", "--select--", ""]: + continue + if max_districts and districts_processed >= max_districts: + break + + print(f" District: {dist_name}") + _safe_select_by_index(driver, dropdown_ids["district"], dist_idx) + time.sleep(2) + + # Try to get tehsil-level data + tehsil_available = _wait_for_dropdown_populated( + driver, dropdown_ids["tehsil"], timeout=5 + ) + + if tehsil_available: + tehsil_options = _get_dropdown_options(driver, dropdown_ids["tehsil"]) + for teh_idx, teh_name in tehsil_options: + if teh_idx == 0 and teh_name.lower() in ["select", "--select--", ""]: + continue + + _safe_select_by_index(driver, dropdown_ids["tehsil"], teh_idx) + time.sleep(1) + + # Click submit/show button if present + try: + submit_btn = driver.find_element(By.ID, "btnSubmit") + submit_btn.click() + time.sleep(2) + except NoSuchElementException: + pass + + # Extract table data + table_data = _extract_table_data(driver) + for row in table_data: + all_records.append({ + "state": state_name, + "district": dist_name, + "tehsil": teh_name, + "data": row, + "source": "agcensus.da.gov.in", + }) + + if progress_callback: + progress_callback(state_name, dist_name, teh_name) + else: + # No tehsil dropdown, try to get district-level data + try: + submit_btn = driver.find_element(By.ID, "btnSubmit") + submit_btn.click() + time.sleep(2) + except NoSuchElementException: + pass + + table_data = _extract_table_data(driver) + for row in table_data: + all_records.append({ + "state": state_name, + "district": dist_name, + "tehsil": "", + "data": row, + "source": "agcensus.da.gov.in", + }) + + districts_processed += 1 + + except Exception as e: + print(f"Error during scraping: {e}") + raise + finally: + driver.quit() + + # Save raw records + if all_records: + df = pd.DataFrame(all_records) + df.to_csv(output_csv, index=False) + print(f"\nSaved {len(all_records)} records to {output_csv}") + else: + df = pd.DataFrame() + print("\nNo records scraped") + + return df diff --git a/computing/misc/agriculture_census/tehsil_matcher.py b/computing/misc/agriculture_census/tehsil_matcher.py new file mode 100644 index 00000000..ad1a6489 --- /dev/null +++ b/computing/misc/agriculture_census/tehsil_matcher.py @@ -0,0 +1,131 @@ +""" +Tehsil Name Matcher + +Matches scraped agriculture census tehsil/district names to the CoRE Stack +administrative boundary dataset using edit distance and phonetic matching. + +The CoRE Stack uses SOI (Survey of India) tehsil boundaries which may have +different spellings compared to the Agriculture Census website. +""" + +import pandas as pd +from difflib import SequenceMatcher +from unidecode import unidecode + + +def _normalize(text): + """Normalize a tehsil/district name for matching.""" + if not isinstance(text, str): + return "" + text = unidecode(text).strip().lower() + # Remove common variations + for token in ["district", "tehsil", "taluk", "mandal", "block"]: + text = text.replace(token, "").strip() + text = " ".join(text.split()) + return text + + +def _similarity(a, b): + """Compute similarity ratio between two strings.""" + return SequenceMatcher(None, a, b).ratio() + + +def match_tehsils( + census_df, + boundary_df, + state_col="STATE", + district_col="District", + tehsil_col="TEHSIL", + similarity_threshold=0.75, +): + """Match agriculture census tehsil names to SOI boundary tehsil names. + + Args: + census_df: DataFrame with scraped agriculture census data + Expected columns: state, district, tehsil + boundary_df: DataFrame of SOI tehsil boundaries + Expected columns: STATE, District, TEHSIL (configurable) + similarity_threshold: Minimum score for a fuzzy match + + Returns: + tuple: (matched_df, stats_dict) + """ + # Build lookup from boundary data + boundary_lookup = {} + for _, row in boundary_df.iterrows(): + state = _normalize(str(row.get(state_col, ""))) + district = _normalize(str(row.get(district_col, ""))) + tehsil = _normalize(str(row.get(tehsil_col, ""))) + + key = (state, district) + if key not in boundary_lookup: + boundary_lookup[key] = [] + boundary_lookup[key].append({ + "tehsil": tehsil, + "original": str(row.get(tehsil_col, "")), + }) + + results = [] + counts = {"exact": 0, "fuzzy": 0, "unmatched": 0} + + for _, row in census_df.iterrows(): + state = _normalize(str(row.get("state", ""))) + district = _normalize(str(row.get("district", ""))) + tehsil = _normalize(str(row.get("tehsil", ""))) + + key = (state, district) + candidates = boundary_lookup.get(key, []) + + # Also search across all districts in the state if no match + if not candidates: + for k, v in boundary_lookup.items(): + if k[0] == state: + candidates.extend(v) + + matched_tehsil = "" + match_type = "unmatched" + match_score = 0.0 + + if candidates and tehsil: + # Exact match + for c in candidates: + if c["tehsil"] == tehsil: + matched_tehsil = c["original"] + match_type = "exact" + match_score = 1.0 + break + + # Fuzzy match + if match_type == "unmatched": + best_score = 0 + best = None + for c in candidates: + score = _similarity(tehsil, c["tehsil"]) + if score > best_score: + best_score = score + best = c + if best and best_score >= similarity_threshold: + matched_tehsil = best["original"] + match_type = "fuzzy" + match_score = best_score + + counts[match_type] = counts.get(match_type, 0) + 1 + results.append({ + "matched_tehsil": matched_tehsil, + "match_type": match_type, + "match_score": round(match_score, 3), + }) + + result_df = pd.DataFrame(results) + matched_df = pd.concat([census_df.reset_index(drop=True), result_df], axis=1) + + total = len(census_df) + stats = { + "total": total, + "exact": counts["exact"], + "fuzzy": counts["fuzzy"], + "unmatched": counts["unmatched"], + "match_pct": round(100 * (counts["exact"] + counts["fuzzy"]) / max(total, 1), 2), + } + + return matched_df, stats From c5f6c70362b785129a7dfcca0137ede97112a016 Mon Sep 17 00:00:00 2001 From: neha222222 Date: Sun, 12 Apr 2026 03:34:36 +0530 Subject: [PATCH 2/2] feat: Add GEE export and vector asset publishing for agriculture census Adds gee_export.py to complete the pipeline: - Joins matched crop data with SOI tehsil boundary geometries - Ensures EPSG:4326 CRS and valid geometry - Converts to ee.FeatureCollection and publishes as GEE vector asset - Syncs to GeoServer and saves layer metadata to DB - Produces tehsil-level vectorized crop map for downstream use --- .../misc/agriculture_census/gee_export.py | 173 ++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 computing/misc/agriculture_census/gee_export.py diff --git a/computing/misc/agriculture_census/gee_export.py b/computing/misc/agriculture_census/gee_export.py new file mode 100644 index 00000000..4edefaee --- /dev/null +++ b/computing/misc/agriculture_census/gee_export.py @@ -0,0 +1,173 @@ +""" +GEE Export for Agriculture Census + +Takes the matched agriculture census data, joins it with SOI tehsil boundary +geometries, and publishes the enriched vector layer as an Earth Engine asset. + +The output FeatureCollection has per-tehsil polygons with properties: + - crop_name, area_hectares (for each crop) + - matched_tehsil, match_score + - state, district, tehsil + +This produces a tehsil-level vectorized crop map that can be used +in the Know Your Landscape dashboard and other downstream apps. +""" + +import ee +import geopandas as gpd +import pandas as pd + +from utilities.gee_utils import ( + ee_initialize, + gdf_to_ee_fc, + export_vector_asset_to_gee, + check_task_status, + is_gee_asset_exists, + make_asset_public, + valid_gee_text, + get_gee_asset_path, +) +from computing.utils import ( + sync_fc_to_geoserver, + save_layer_info_to_db, + update_layer_sync_status, +) +from nrm_app.celery import app + + +def enrich_tehsil_boundaries(matched_csv_path, boundary_geojson_path): + """Join matched agriculture census data with SOI tehsil boundaries. + + Args: + matched_csv_path: Path to matched agriculture census CSV + (output of pipeline.py with matched_tehsil column) + boundary_geojson_path: Path to SOI tehsil boundary GeoJSON + + Returns: + GeoDataFrame with tehsil polygons enriched with crop attributes + """ + census_df = pd.read_csv(matched_csv_path) + boundaries_gdf = gpd.read_file(boundary_geojson_path) + + # Standardize boundary column names + boundaries_gdf.columns = [ + c.strip().lower().replace(" ", "_") for c in boundaries_gdf.columns + ] + + # Find tehsil name column in boundaries + tehsil_col = None + for col in boundaries_gdf.columns: + if "tehsil" in col.lower(): + tehsil_col = col + break + + if tehsil_col is None: + raise ValueError( + "Could not find tehsil column in boundary file. " + f"Available: {list(boundaries_gdf.columns)}" + ) + + # Filter to matched records only + matched = census_df[census_df["match_type"].isin(["exact", "fuzzy"])].copy() + + if matched.empty: + print("No matched records found.") + return gpd.GeoDataFrame() + + # Normalize for join + matched["_join_key"] = matched["matched_tehsil"].str.strip().str.lower() + boundaries_gdf["_join_key"] = boundaries_gdf[tehsil_col].str.strip().str.lower() + + # Join + enriched = boundaries_gdf.merge(matched, on="_join_key", how="inner") + enriched = enriched.drop(columns=["_join_key"], errors="ignore") + + # Ensure EPSG:4326 + if enriched.crs is None: + enriched = enriched.set_crs("EPSG:4326") + elif enriched.crs.to_epsg() != 4326: + enriched = enriched.to_crs("EPSG:4326") + + print(f"Enriched {len(enriched)} tehsil polygons with crop data") + return enriched + + +def export_to_geojson(enriched_gdf, output_path): + """Export enriched GeoDataFrame to GeoJSON.""" + enriched_gdf.to_file(output_path, driver="GeoJSON") + print(f"Exported GeoJSON to {output_path}") + + +@app.task(bind=True) +def publish_agri_census_to_gee( + self, + matched_csv_path, + boundary_geojson_path, + state, + district, + block, + gee_account_id, +): + """Celery task to publish agriculture census as a GEE vector asset. + + Workflow: + 1. Enrich tehsil boundaries with crop data + 2. Convert to ee.FeatureCollection + 3. Export to GEE as vector asset + 4. Sync to GeoServer + 5. Save layer info to DB + """ + ee_initialize(gee_account_id) + + description = ( + f"agri_census_{valid_gee_text(district)}_{valid_gee_text(block)}" + ) + asset_id = get_gee_asset_path(state, district, block) + description + + if is_gee_asset_exists(asset_id): + print(f"Asset already exists: {asset_id}") + return + + # Step 1: Enrich boundaries + print("Enriching tehsil boundaries with crop data...") + enriched_gdf = enrich_tehsil_boundaries( + matched_csv_path, boundary_geojson_path + ) + + if enriched_gdf.empty: + print("No matched tehsils found. Skipping.") + return + + # Step 2: Convert to FeatureCollection + print("Converting to Earth Engine FeatureCollection...") + fc = gdf_to_ee_fc(enriched_gdf) + + # Step 3: Export to GEE + print(f"Exporting to GEE asset: {asset_id}") + task_id = export_vector_asset_to_gee(fc, description, asset_id) + + if task_id: + check_task_status(task_id) + make_asset_public(asset_id) + print(f"Published agriculture census asset: {asset_id}") + + # Step 4: Sync to GeoServer + layer_name = ( + valid_gee_text(district) + "_" + valid_gee_text(block) + "_agri_census" + ) + sync_fc_to_geoserver(asset_id, layer_name) + + # Step 5: Save to DB + save_layer_info_to_db( + state=state, + district=district, + block=block, + layer_name=layer_name, + dataset_name="Agriculture Census", + metadata={ + "source": "agcensus.da.gov.in", + "description": "Tehsil-level crop type and area data", + }, + ) + update_layer_sync_status(layer_name, status="synced") + print("Done.")