diff --git a/README.md b/README.md index cd903cb..6bbae11 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,24 @@ multirtc geocode PLATFORM SLC-GRANULE --resolution RESOLUTION --work-dir WORK-DI Output geocoded pixel values represent sigma0 power. +## Sentinel-1 Download Source +By default, Sentinel-1 data is downloaded from [ASF](https://search.asf.alaska.edu/). As an alternative, you can download from the [Copernicus Data Space Ecosystem (CDSE)](https://dataspace.copernicus.eu/) using the `--download-source CDSE` flag: + +```bash +multirtc rtc S1 SLC-GRANULE --resolution RESOLUTION --work-dir WORK-DIR --download-source CDSE +``` +The `--download-source` flag is also supported for the `geocode` subcommand. + +Both download sources require credentials: + +| Source | Environment Variables | `~/.netrc` machine | Registration | +|--------|----------------------|-------------------|--------------| +| ASF (default) | `EARTHDATA_USERNAME` / `EARTHDATA_PASSWORD` | `urs.earthdata.nasa.gov` | https://urs.earthdata.nasa.gov/users/new | +| CDSE | `CDSE_USERNAME` / `CDSE_PASSWORD` | `dataspace.copernicus.eu` | https://dataspace.copernicus.eu/ | + +Credentials can be provided via environment variables or a `~/.netrc` entry as shown above. + + ### Running via Docker In addition to the main python interface, I've also provided an experimental docker container that contains full support for polar grid format SICD data. Encapsulating this functionality in a docker container is ncessary for now because it requires re-compiling a development version of ISCE3. The docker container can be run using a similar interface, with exception of needing to pass your EarthData credentials and the need to pass a mounted volume with an `input` and `output` directory inside: @@ -71,6 +89,17 @@ PROJECT/ |--input.slc (if needed) |--output/ ``` +To use CDSE as the download source via Docker, pass your CDSE credentials and the `--download-source CDSE` flag: + +```bash +docker run -it --rm \ + -e CDSE_USERNAME=YOUR_CDSE_USERNAME \ + -e CDSE_PASSWORD=YOUR_CDSE_PASSWORD \ + -v ~/LOCAL_PATH/PROJECT:/home/conda/PROJECT \ + ghcr.io/forrestfwilliams/multirtc:VERSION \ + rtc S1 SLC-GRANULE --resolution RESOLUTION --work-dir PROJECT --download-source CDSE +``` + If you're encountering `permission denied` errors when running the container, make sure that the input and output folders are owned by the same group and user IDs that the container uses (`chown -R 1000:1000 ~/LOCAL_PATH/PROJECT`). ### Output Layers diff --git a/src/multirtc/__main__.py b/src/multirtc/__main__.py index 707c9b1..480eb27 100644 --- a/src/multirtc/__main__.py +++ b/src/multirtc/__main__.py @@ -2,6 +2,7 @@ import os from multirtc import dem, geocode, multirtc +from multirtc.cdse import ensure_cdse_credentials from multirtc.fetch import write_credentials_to_netrc_file from multirtc.multimetric import ale, point_target, rle @@ -12,6 +13,11 @@ def main(): if username and password: write_credentials_to_netrc_file(username, password, append=False) + cdse_username = os.getenv('CDSE_USERNAME') + cdse_password = os.getenv('CDSE_PASSWORD') + if cdse_username and cdse_password: + ensure_cdse_credentials(cdse_username, cdse_password) + global_parser = argparse.ArgumentParser( prog='multirtc', description='ISCE3-based multi-sensor RTC and cal/val tool', diff --git a/src/multirtc/cdse.py b/src/multirtc/cdse.py new file mode 100644 index 0000000..fb692d8 --- /dev/null +++ b/src/multirtc/cdse.py @@ -0,0 +1,279 @@ +"""Download Sentinel-1 SLC products from the Copernicus Data Space Ecosystem (CDSE). + +This module provides an alternative to ASF-based downloads via burst2safe. +It searches the CDSE OData catalog by the parent SLC scene name (derived from +the burst granule via ASF search) and downloads the full SLC zip. + +CDSE credentials (username/password) can be provided via: + - Environment variables: CDSE_USERNAME and CDSE_PASSWORD + - The ~/.netrc file with machine: dataspace.copernicus.eu + +References: + - https://documentation.dataspace.copernicus.eu/APIs/OData.html + - https://documentation.dataspace.copernicus.eu/APIs/Token.html +""" + +import logging +import netrc +import os +import time +import zipfile +from pathlib import Path + +import asf_search +import requests + + +logger = logging.getLogger(__name__) + +CDSE_HOST = 'dataspace.copernicus.eu' +CDSE_TOKEN_URL = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' +CDSE_ODATA_URL = 'https://catalogue.dataspace.copernicus.eu/odata/v1/Products' +CDSE_DOWNLOAD_URL = 'https://download.dataspace.copernicus.eu/odata/v1/Products' + +# Retry settings +MAX_RETRIES = 3 +RETRY_START_WAIT = 10 # seconds +RETRY_INCREMENT = 10 # seconds + + +def get_cdse_credentials( + username: str | None = None, + password: str | None = None, +) -> tuple[str, str]: + """Resolve CDSE credentials from arguments, environment, or ~/.netrc. + + Args: + username: CDSE username. Falls back to CDSE_USERNAME env var, then ~/.netrc. + password: CDSE password. Falls back to CDSE_PASSWORD env var, then ~/.netrc. + + Returns: + Tuple of (username, password). + """ + if username and password: + return username, password + + env_user = os.getenv('CDSE_USERNAME') + env_pass = os.getenv('CDSE_PASSWORD') + if env_user and env_pass: + return env_user, env_pass + + try: + nrc = netrc.netrc() + auth = nrc.authenticators(CDSE_HOST) + if auth: + return auth[0], auth[2] + except (FileNotFoundError, netrc.NetrcParseError): + pass + + raise ValueError( + 'CDSE credentials not found. Provide them via:\n' + ' 1. CDSE_USERNAME and CDSE_PASSWORD environment variables\n' + ' 2. ~/.netrc entry for machine dataspace.copernicus.eu\n' + 'Register for a free account at https://dataspace.copernicus.eu/' + ) + + +def ensure_cdse_credentials( + username: str | None = None, + password: str | None = None, +) -> None: + """Ensure CDSE credentials are available in ~/.netrc. + + If credentials are provided via env vars but ~/.netrc does not + contain an entry for CDSE, the entry will be appended to ~/.netrc. + """ + if username is None: + username = os.getenv('CDSE_USERNAME') + if password is None: + password = os.getenv('CDSE_PASSWORD') + + netrc_file = Path.home() / '.netrc' + + cdse_in_netrc = False + if netrc_file.exists(): + try: + nrc = netrc.netrc(netrc_file) + if nrc.authenticators(CDSE_HOST): + cdse_in_netrc = True + except netrc.NetrcParseError: + pass + + if username and password and not cdse_in_netrc: + with open(netrc_file, 'a') as f: + f.write(f'\nmachine {CDSE_HOST} login {username} password {password}\n') + netrc_file.chmod(0o600) + elif username and password and cdse_in_netrc: + logging.info(f'CDSE credentials already present in {netrc_file}, skipping update.') + + get_cdse_credentials(username, password) + + +def get_cdse_access_token(username: str, password: str) -> str: + """Obtain an access token from the CDSE identity provider. + + Args: + username: CDSE username. + password: CDSE password. + + Returns: + Access token string. + """ + data = { + 'grant_type': 'password', + 'username': username, + 'password': password, + 'client_id': 'cdse-public', + } + response = requests.post(CDSE_TOKEN_URL, data=data, timeout=60) + response.raise_for_status() + return response.json()['access_token'] + + +def search_cdse_by_scene_name(scene_name: str) -> dict: + """Search the CDSE OData catalog for a Sentinel-1 SLC by scene name. + + Args: + scene_name: Sentinel-1 scene name (without .SAFE or .zip extension). + + Returns: + Product entry from the CDSE OData response containing 'Id' and 'Name'. + + Raises: + LookupError: If the product is not found on CDSE. + """ + scene_name = scene_name.replace('.zip', '').replace('.SAFE', '') + safe_name = f'{scene_name}.SAFE' + query = f"{CDSE_ODATA_URL}?$filter=Name eq '{safe_name}'" + + response = requests.get(query, timeout=120) + response.raise_for_status() + results = response.json().get('value', []) + + if not results: + raise LookupError(f"Product '{safe_name}' not found in CDSE catalog.") + return results[0] + + +def burst_to_parent_slc(burst_granule: str) -> str: + """Use ASF search to find the parent SLC scene name for a burst granule. + + Args: + burst_granule: Burst granule name (e.g. S1_136231_IW2_20200604T022312_VV_7C85-BURST). + + Returns: + Parent SLC scene name (e.g. S1A_IW_SLC__1SDV_20200604T022251_20200604T022318_032861_03CE65_7C85). + """ + results = asf_search.granule_search([burst_granule]) + if not results: + raise LookupError(f'Burst granule {burst_granule} not found in ASF archive.') + + url = results[0].properties['url'] + # URL format: https://sentinel1-burst.asf.alaska.edu/{PARENT_SLC}/IW{N}/{POL}/{idx}.tiff + parent_slc = url.split('/')[3] + logger.info(f'Mapped burst {burst_granule} to parent SLC {parent_slc}') + return parent_slc + + +def download_slc_from_cdse( + scene_name: str, + output_dir: Path | str, + max_retries: int = MAX_RETRIES, +) -> Path: + """Download a Sentinel-1 SLC product from CDSE and extract the SAFE directory. + + Args: + scene_name: Sentinel-1 scene name. + output_dir: Directory to save and extract the downloaded product. + max_retries: Number of download attempts before raising an error. + + Returns: + Path to the extracted .SAFE directory. + """ + output_dir = Path(output_dir).resolve() + scene_name = scene_name.replace('.zip', '').replace('.SAFE', '') + safe_dir = output_dir / f'{scene_name}.SAFE' + + # Skip download if SAFE already exists + if safe_dir.exists(): + logger.info(f'SAFE directory already exists: {safe_dir}') + return safe_dir + + # Get credentials and token + cdse_user, cdse_pass = get_cdse_credentials() + access_token = get_cdse_access_token(cdse_user, cdse_pass) + + # Search CDSE catalog + product = search_cdse_by_scene_name(scene_name) + product_id = product['Id'] + + download_url_zip = f'{CDSE_DOWNLOAD_URL}({product_id})/$zip' + download_url_value = f'{CDSE_DOWNLOAD_URL}({product_id})/$value' + headers = {'Authorization': f'Bearer {access_token}'} + + out_zip = output_dir / f'{scene_name}.zip' + + def _do_download(url: str) -> None: + """Perform the actual download from a given URL.""" + response = requests.get(url, headers=headers, stream=True, timeout=600) + response.raise_for_status() + + with open(out_zip, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192 * 16): + if chunk: + f.write(chunk) + + if out_zip.stat().st_size == 0: + out_zip.unlink(missing_ok=True) + raise requests.RequestException('Downloaded file is empty') + + logger.info(f'Downloaded {out_zip.name} from CDSE ({out_zip.stat().st_size / 1e6:.1f} MB)') + + last_exc: Exception | None = None + for attempt in range(1, max_retries + 1): + logger.info(f'CDSE download attempt #{attempt} for {scene_name}') + try: + try: + _do_download(download_url_zip) + break + except requests.HTTPError as e: + out_zip.unlink(missing_ok=True) + if e.response is not None and e.response.status_code == 404: + logger.info('Compressed format not available, falling back to uncompressed...') + try: + _do_download(download_url_value) + break + except requests.RequestException: + out_zip.unlink(missing_ok=True) + raise + raise + except requests.RequestException: + out_zip.unlink(missing_ok=True) + raise + except Exception as exc: + last_exc = exc + wait_time = RETRY_START_WAIT + RETRY_INCREMENT * (attempt - 1) + if attempt < max_retries: + logger.warning(f'Attempt #{attempt} failed: {exc}. Waiting {wait_time}s before retry...') + time.sleep(wait_time) + else: + raise RuntimeError( + f'Failed to download {scene_name} from CDSE after {max_retries} attempts' + ) from last_exc + + # Extract the zip to get the SAFE directory + logger.info(f'Extracting {out_zip.name}...') + with zipfile.ZipFile(out_zip, 'r') as zf: + zf.extractall(output_dir) + out_zip.unlink() + + if not safe_dir.exists(): + # Some CDSE zips may have a different top-level name; find the .SAFE directory + safe_dirs = list(output_dir.glob('*.SAFE')) + if safe_dirs: + safe_dir = safe_dirs[0] + else: + raise FileNotFoundError(f'Could not find extracted SAFE directory in {output_dir}') + + logger.info(f'Extracted SAFE directory: {safe_dir}') + return safe_dir diff --git a/src/multirtc/geocode.py b/src/multirtc/geocode.py index 81cad33..efc035e 100644 --- a/src/multirtc/geocode.py +++ b/src/multirtc/geocode.py @@ -2,7 +2,7 @@ from pathlib import Path -from multirtc.multirtc import SUPPORTED, run_multirtc +from multirtc.multirtc import DOWNLOAD_SOURCES, SUPPORTED, run_multirtc def create_parser(parser): @@ -11,6 +11,18 @@ def create_parser(parser): parser.add_argument('--resolution', type=float, help='Resolution of the output dataset (m)') parser.add_argument('--dem', type=Path, default=None, help='Path to the DEM to use for processing') parser.add_argument('--work-dir', type=Path, default=None, help='Working directory for processing') + parser.add_argument( + '--download-source', + type=str, + choices=DOWNLOAD_SOURCES, + default='ASF', + help=( + "Source for downloading Sentinel-1 SLC data. " + "'ASF' uses Alaska Satellite Facility (default). " + "'CDSE' uses Copernicus Data Space Ecosystem " + "(requires CDSE_USERNAME/CDSE_PASSWORD env vars or ~/.netrc)." + ), + ) return parser @@ -19,4 +31,12 @@ def run(args): assert args.dem.exists(), f'DEM file {args.dem} does not exist.' if args.work_dir is None: args.work_dir = Path.cwd() - run_multirtc(args.platform, args.granule, args.resolution, args.work_dir, args.dem, apply_rtc=False) + run_multirtc( + args.platform, + args.granule, + args.resolution, + args.work_dir, + args.dem, + apply_rtc=False, + download_source=args.download_source, + ) diff --git a/src/multirtc/multirtc.py b/src/multirtc/multirtc.py index 7c6a433..8d302ea 100644 --- a/src/multirtc/multirtc.py +++ b/src/multirtc/multirtc.py @@ -1,5 +1,6 @@ """Create an RTC dataset for a multiple satellite platforms""" +import logging from pathlib import Path from burst2safe.burst2safe import burst2safe @@ -7,12 +8,18 @@ from multirtc import dem from multirtc.base import Slc +from multirtc.cdse import burst_to_parent_slc, download_slc_from_cdse, ensure_cdse_credentials from multirtc.create_rtc import rtc from multirtc.rtc_options import RtcOptions from multirtc.sentinel1 import S1BurstSlc from multirtc.sicd import SicdPfaSlc, SicdRzdSlc +logger = logging.getLogger(__name__) + +DOWNLOAD_SOURCES = ['ASF', 'CDSE'] + + SUPPORTED = ['S1', 'UMBRA', 'CAPELLA', 'ICEYE'] @@ -33,7 +40,7 @@ def prep_dirs(work_dir: Path | None = None) -> tuple[Path, Path]: return input_dir, output_dir -def get_slc(platform: str, granule: str, input_dir: Path) -> Slc: +def get_slc(platform: str, granule: str, input_dir: Path, download_source: str = 'ASF') -> Slc: """ Get the SLC object for the specified platform and granule. @@ -41,12 +48,18 @@ def get_slc(platform: str, granule: str, input_dir: Path) -> Slc: platform: Platform type (e.g., 'UMBRA'). granule: Granule name if data is available in ASF archive, or filename if granule is already downloaded. input_dir: Directory containing the input data. + download_source: Source for downloading Sentinel-1 SLC data ('ASF' or 'CDSE'). Returns: Slc subclass object for the specified platform and granule. """ if platform == 'S1': - safe_path = burst2safe(granules=[granule], all_anns=True, work_dir=input_dir) + if download_source == 'CDSE': + ensure_cdse_credentials() + parent_slc = burst_to_parent_slc(granule) + safe_path = download_slc_from_cdse(parent_slc, input_dir) + else: + safe_path = burst2safe(granules=[granule], all_anns=True, work_dir=input_dir) orbit_path = Path(retrieve_orbit_file(safe_path.name, str(input_dir), concatenate=True)) slc = S1BurstSlc(safe_path, orbit_path, granule) elif platform in ['CAPELLA', 'ICEYE', 'UMBRA']: @@ -61,7 +74,13 @@ def get_slc(platform: str, granule: str, input_dir: Path) -> Slc: def run_multirtc( - platform: str, granule: str, resolution: int, work_dir: Path, dem_path: Path | None = None, apply_rtc=True + platform: str, + granule: str, + resolution: int, + work_dir: Path, + dem_path: Path | None = None, + apply_rtc=True, + download_source: str = 'ASF', ) -> None: """Create an RTC or Geocoded dataset using the OPERA algorithm. @@ -72,9 +91,10 @@ def run_multirtc( work_dir: Working directory for processing. dem_path: Path to the DEM to use for processing. If None, the NISAR DEM will be downloaded. apply_rtc: If True perform radiometric correction; if False, only geocode. + download_source: Source for downloading Sentinel-1 SLC data ('ASF' or 'CDSE'). """ input_dir, output_dir = prep_dirs(work_dir) - slc = get_slc(platform, granule, input_dir) + slc = get_slc(platform, granule, input_dir, download_source=download_source) if dem_path is None: dem_path = input_dir / 'dem.tif' dem.download_opera_dem_for_footprint(dem_path, slc.footprint) @@ -103,6 +123,18 @@ def create_parser(parser): parser.add_argument('--resolution', type=float, help='Resolution of the output RTC (m)') parser.add_argument('--dem', type=Path, default=None, help='Path to the DEM to use for processing') parser.add_argument('--work-dir', type=Path, default=None, help='Working directory for processing') + parser.add_argument( + '--download-source', + type=str, + choices=DOWNLOAD_SOURCES, + default='ASF', + help=( + "Source for downloading Sentinel-1 SLC data. " + "'ASF' uses Alaska Satellite Facility (default). " + "'CDSE' uses Copernicus Data Space Ecosystem " + "(requires CDSE_USERNAME/CDSE_PASSWORD env vars or ~/.netrc)." + ), + ) return parser @@ -111,4 +143,12 @@ def run(args): assert args.dem.exists(), f'DEM file {args.dem} does not exist.' if args.work_dir is None: args.work_dir = Path.cwd() - run_multirtc(args.platform, args.granule, args.resolution, args.work_dir, args.dem, apply_rtc=True) + run_multirtc( + args.platform, + args.granule, + args.resolution, + args.work_dir, + args.dem, + apply_rtc=True, + download_source=args.download_source, + )