diff --git a/download/README.md b/download/README.md index a462480..4017efe 100644 --- a/download/README.md +++ b/download/README.md @@ -13,13 +13,16 @@ uv sync # Activate virtual environment source .venv/bin/activate # On Windows: .venv\Scripts\activate -# Download evaluation scenes for novel view synthesis -python download.py --modality nvs --scene-list eval_scenes_v1.txt +# Recommended: download showcase scenes for quick visual inspection +python download.py --modality nvs --quality-tier showcase -# Download first 5 training scenes for 3D reconstruction -python download.py --modality 3d --scene-list train_scenes_v1.txt --count 5 +# Download evaluation-quality scenes for novel view synthesis +python download.py --modality nvs --quality-tier evaluation_ready -# Download the entire dataset with full modalities +# Download first 5 training-ready scenes for 3D reconstruction +python download.py --modality 3d --quality-tier training_ready --count 5 + +# Explicitly download every scene listed in the public manifest python download.py --modality full --all ``` For more downloading options and examples, see the [Usage Examples](#usage-examples) section below, or run `python download.py --help`. @@ -35,6 +38,8 @@ The Wanderland dataset is available on HuggingFace at [`ai4ce/wanderland`](https - **Splits**: Train/validation splits for novel view synthesis (per-scene image splits) - **Navigation**: Isaac Sim compatible scene files (USDZ) and episode configurations +The downloader reads the public manifest `wanderland_public_manifest.csv` by default. This manifest is the source of truth for currently released scenes and includes `quality_tier` values such as `showcase`, `evaluation_ready`, and `training_ready`. + ## Download Modalities The download tool supports four modalities optimized for different tasks: @@ -176,49 +181,56 @@ wanderland_data/ **Note**: This is different from scene-level splits (train_scenes_v1.txt / eval_scenes_v1.txt) which divide scenes for 3D reconstruction benchmarking. -## Scene Splits +## Scene Selection -The dataset provides two types of splits: +The recommended way to select scenes is through the public manifest: + +```bash +python download.py --modality nvs --quality-tier showcase +python download.py --modality nvs --quality-tier evaluation_ready +python download.py --modality 3d --quality-tier training_ready +``` -### Scene-Level Splits (3D Reconstruction) -Used to divide scenes into training and evaluation sets for 3D reconstruction benchmarks. +The dataset also includes two legacy scene-level split files used by the paper: -- **`train_scenes_v1.txt`**: 235 training scenes -- **`eval_scenes_v1.txt`**: 200 evaluation scenes +- **`train_scenes_v1.txt`**: 235 training scenes in the paper split +- **`eval_scenes_v1.txt`**: 200 evaluation scenes in the paper split -These splits correspond to the exact division used in the Wanderland paper. +Current public downloads are validated against the manifest, so future additions and repaired scenes can be released without changing the downloader. ### Image-Level Splits (Novel View Synthesis) Each scene contains `nvs_split/train.txt` and `nvs_split/val.txt` that divide the images within that scene for novel view synthesis tasks. ## Usage Examples -### Example 1: Download Evaluation Scenes for NVS Benchmark +### Example 1: Download Showcase Scenes for Quick Inspection +```bash +python download.py --modality nvs --quality-tier showcase +``` + +### Example 2: Download Evaluation-Quality Scenes for NVS ```bash -# Download all 200 evaluation scenes with NVS data -python download.py --modality nvs --scene-list eval_scenes_v1.txt +python download.py --modality nvs --quality-tier evaluation_ready ``` -### Example 2: Download Training Scenes for 3D Reconstruction +### Example 3: Download Training-Ready Scenes for 3D Reconstruction ```bash -# Download first 10 training scenes with 3D reconstruction data -python download.py --modality 3d --scene-list train_scenes_v1.txt --count 10 +python download.py --modality 3d --quality-tier training_ready --count 10 ``` -### Example 3: Download Navigation Data for Isaac Sim +### Example 4: Explicitly Download All Manifest Scenes ```bash -# Download navigation files for all scenes -python download.py --modality navigation --all --output ../nav_data +python download.py --modality full --all --output ../wanderland_full ``` -### Example 4: Download Specific Scenes +### Example 5: Download Specific Scenes ```bash # Download specific scenes by name python download.py --modality full \ --scenes 1-A_d1uPKpnDksrjY3UE23dUTC0odvnHu 1-j7j0xj8vB0uYpWlByv0PvyRCawz6WXH ``` -### Example 5: Download Custom Scene List +### Example 6: Download Custom Scene List ```bash # Create custom scene list cat > my_scenes.txt << EOF diff --git a/download/download.py b/download/download.py index 761321a..de2fc5b 100644 --- a/download/download.py +++ b/download/download.py @@ -1,35 +1,72 @@ #!/usr/bin/env python3 -import os import sys import argparse +import csv import tarfile import shutil +from collections import Counter from pathlib import Path -from typing import List -from huggingface_hub import HfApi, hf_hub_download, snapshot_download +from huggingface_hub import hf_hub_download, snapshot_download # Constants DATASET_REPO = "ai4ce/wanderland" +PUBLIC_MANIFEST_FILE = "wanderland_public_manifest.csv" -def list_available_scenes(): - """Query HuggingFace API for available scenes.""" - print(f"Fetching available scenes from {DATASET_REPO}...") - api = HfApi() - files = api.list_repo_files(DATASET_REPO, repo_type="dataset") +def load_public_manifest(): + """Load the public manifest that defines the currently released scenes.""" + print(f"Fetching public manifest from {DATASET_REPO}/{PUBLIC_MANIFEST_FILE}...") + try: + manifest_path = hf_hub_download( + repo_id=DATASET_REPO, + repo_type="dataset", + filename=PUBLIC_MANIFEST_FILE, + ) + except Exception as e: + print(f"Error: Failed to download public manifest: {e}") + sys.exit(1) + + required_columns = {"scene_id", "quality_tier"} + rows = [] + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + missing_columns = required_columns - set(reader.fieldnames or []) + if missing_columns: + print( + "Error: Public manifest is missing required columns: " + + ", ".join(sorted(missing_columns)) + ) + sys.exit(1) + + for row in reader: + scene_id = row.get("scene_id", "").strip() + if not scene_id: + continue + row["scene_id"] = scene_id + row["quality_tier"] = row.get("quality_tier", "").strip() + rows.append(row) + + if not rows: + print("Error: Public manifest contains no scenes") + sys.exit(1) + + return rows + - scenes = set() - for file in files: - # Look for files under data/{scene_name}/ - if file.startswith("data/"): - parts = file.split("/") - if len(parts) >= 2: - scene_name = parts[1] - scenes.add(scene_name) +def describe_public_manifest(rows): + """Print a concise summary of the loaded public manifest.""" + versions = sorted({row.get("manifest_version", "").strip() for row in rows if row.get("manifest_version", "").strip()}) + updated_at = sorted({row.get("manifest_updated_at", "").strip() for row in rows if row.get("manifest_updated_at", "").strip()}) + tiers = Counter(row.get("quality_tier", "").strip() or "unspecified" for row in rows) - return sorted(list(scenes)) + print(f"Found {len(rows)} released scenes in public manifest") + if versions: + print(f"Manifest version: {', '.join(versions)}") + if updated_at: + print(f"Manifest updated at: {', '.join(updated_at)}") + print("Quality tiers: " + ", ".join(f"{tier}={count}" for tier, count in sorted(tiers.items()))) def download_file_from_hf(repo_id, filename, local_dir): @@ -260,20 +297,20 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - # Download first 5 scenes for 3D reconstruction - python download.py --modality 3d --count 5 + # Recommended: download showcase scenes for quick inspection + python download.py --modality nvs --quality-tier showcase + + # Download evaluation-quality scenes for novel view synthesis + python download.py --modality nvs --quality-tier evaluation_ready + + # Download first 5 training-ready scenes for 3D reconstruction + python download.py --modality 3d --quality-tier training_ready --count 5 # Download specific scenes for novel view synthesis python download.py --modality nvs --scenes scene_001 scene_002 - # Download all scenes (full data) + # Explicitly download every scene listed in the public manifest python download.py --modality full --all - - # Download scenes from a list file for navigation tasks - python download.py --modality navigation --scene-list train_scenes_v1.txt - - # Download evaluation scenes for benchmarking - python download.py --modality nvs --scene-list eval_scenes_v1.txt --output ../wanderland_data """ ) @@ -296,13 +333,8 @@ def main(): help="Output directory for downloaded data (default: ../wanderland_data)" ) - # Scene selection (mutually exclusive) - scene_group = parser.add_mutually_exclusive_group(required=True) - scene_group.add_argument( - "--count", - type=int, - help="Download first N scenes" - ) + # Scene selection (mutually exclusive, with --count as an optional limiter) + scene_group = parser.add_mutually_exclusive_group() scene_group.add_argument( "--scenes", nargs="+", @@ -318,9 +350,39 @@ def main(): type=str, help="Path to text file containing scene names (one per line, # for comments)" ) + scene_group.add_argument( + "--quality-tier", + type=str, + help="Download scenes from a public manifest quality tier, e.g. showcase, " + "evaluation_ready, or training_ready" + ) + parser.add_argument( + "--count", + type=int, + help="Limit the selected scenes to the first N entries" + ) args = parser.parse_args() + if args.quality_tier is not None: + args.quality_tier = args.quality_tier.strip() + if not args.quality_tier: + parser.error("--quality-tier must be a non-empty string") + + if args.count is not None and args.count <= 0: + parser.error("--count must be a positive integer") + + if not any([ + args.scenes, + args.all, + args.scene_list, + args.quality_tier, + args.count is not None, + ]): + parser.error( + "choose a scene selector: --quality-tier, --scene-list, --scenes, --all, or --count" + ) + # Create output directory output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) @@ -331,32 +393,49 @@ def main(): print(f"Output: {output_dir.absolute()}") print("="*60) - # Get list of available scenes - available_scenes = list_available_scenes() - print(f"Found {len(available_scenes)} available scenes") + # Get list of currently released scenes from the public manifest. + manifest_rows = load_public_manifest() + describe_public_manifest(manifest_rows) + available_scenes = [row["scene_id"] for row in manifest_rows] + available_scene_set = set(available_scenes) # Determine which scenes to download scenes_to_download = [] if args.all: scenes_to_download = available_scenes - print(f"Downloading all {len(scenes_to_download)} scenes") + print(f"Selected all {len(scenes_to_download)} manifest scenes") - elif args.count: - scenes_to_download = available_scenes[:args.count] - print(f"Downloading first {len(scenes_to_download)} scenes") + elif args.quality_tier: + scenes_to_download = [ + row["scene_id"] + for row in manifest_rows + if row.get("quality_tier", "") == args.quality_tier + ] + + if not scenes_to_download: + available_tiers = sorted({ + row.get("quality_tier", "").strip() + for row in manifest_rows + if row.get("quality_tier", "").strip() + }) + print(f"Error: No scenes found for quality tier '{args.quality_tier}'") + print(f"Available quality tiers: {', '.join(available_tiers)}") + sys.exit(1) + + print(f"Selected {len(scenes_to_download)} scenes from quality tier '{args.quality_tier}'") elif args.scenes: # Validate scene names invalid_scenes = [] for scene in args.scenes: - if scene in available_scenes: + if scene in available_scene_set: scenes_to_download.append(scene) else: invalid_scenes.append(scene) if invalid_scenes: - print(f"Warning: Invalid scene names: {invalid_scenes}") + print(f"Warning: Scene names not found in public manifest: {invalid_scenes}") if not scenes_to_download: print("Error: No valid scenes specified") @@ -375,13 +454,13 @@ def main(): # Validate scene names invalid_scenes = [] for scene in requested_scenes: - if scene in available_scenes: + if scene in available_scene_set: scenes_to_download.append(scene) else: invalid_scenes.append(scene) if invalid_scenes: - print(f"Warning: Invalid scene names in file: {invalid_scenes}") + print(f"Warning: Scene names not found in public manifest: {invalid_scenes}") if not scenes_to_download: print("Error: No valid scenes in list file") @@ -389,6 +468,21 @@ def main(): print(f"Downloading {len(scenes_to_download)} scenes from list file") + elif args.count: + scenes_to_download = available_scenes[:args.count] + print(f"Selected first {len(scenes_to_download)} manifest scenes") + + count_only_selection = args.count is not None and not any([ + args.scenes, + args.all, + args.scene_list, + args.quality_tier, + ]) + if args.count is not None and not count_only_selection: + original_count = len(scenes_to_download) + scenes_to_download = scenes_to_download[:args.count] + print(f"Applying --count {args.count}: {len(scenes_to_download)} of {original_count} selected scenes") + # Download each scene success_count = 0 fail_count = 0