Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@ multirtc geocode PLATFORM SLC-GRANULE --resolution RESOLUTION --work-dir WORK-DI

Output geocoded pixel values represent sigma0 power.

## Sentinel-1 Download Source
By default, Sentinel-1 data is downloaded from [ASF](https://search.asf.alaska.edu/). As an alternative, you can download from the [Copernicus Data Space Ecosystem (CDSE)](https://dataspace.copernicus.eu/) using the `--download-source CDSE` flag:

```bash
multirtc rtc S1 SLC-GRANULE --resolution RESOLUTION --work-dir WORK-DIR --download-source CDSE
```
The `--download-source` flag is also supported for the `geocode` subcommand.

Both download sources require credentials:

| Source | Environment Variables | `~/.netrc` machine | Registration |
|--------|----------------------|-------------------|--------------|
| ASF (default) | `EARTHDATA_USERNAME` / `EARTHDATA_PASSWORD` | `urs.earthdata.nasa.gov` | https://urs.earthdata.nasa.gov/users/new |
| CDSE | `CDSE_USERNAME` / `CDSE_PASSWORD` | `dataspace.copernicus.eu` | https://dataspace.copernicus.eu/ |

Credentials can be provided via environment variables or a `~/.netrc` entry as shown above.


### Running via Docker
In addition to the main python interface, I've also provided an experimental docker container that contains full support for polar grid format SICD data. Encapsulating this functionality in a docker container is ncessary for now because it requires re-compiling a development version of ISCE3. The docker container can be run using a similar interface, with exception of needing to pass your EarthData credentials and the need to pass a mounted volume with an `input` and `output` directory inside:

Expand All @@ -71,6 +89,17 @@ PROJECT/
|--input.slc (if needed)
|--output/
```
To use CDSE as the download source via Docker, pass your CDSE credentials and the `--download-source CDSE` flag:

```bash
docker run -it --rm \
-e CDSE_USERNAME=YOUR_CDSE_USERNAME \
-e CDSE_PASSWORD=YOUR_CDSE_PASSWORD \
-v ~/LOCAL_PATH/PROJECT:/home/conda/PROJECT \
ghcr.io/forrestfwilliams/multirtc:VERSION \
rtc S1 SLC-GRANULE --resolution RESOLUTION --work-dir PROJECT --download-source CDSE
```

If you're encountering `permission denied` errors when running the container, make sure that the input and output folders are owned by the same group and user IDs that the container uses (`chown -R 1000:1000 ~/LOCAL_PATH/PROJECT`).

### Output Layers
Expand Down
6 changes: 6 additions & 0 deletions src/multirtc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os

from multirtc import dem, geocode, multirtc
from multirtc.cdse import ensure_cdse_credentials
from multirtc.fetch import write_credentials_to_netrc_file
from multirtc.multimetric import ale, point_target, rle

Expand All @@ -12,6 +13,11 @@ def main():
if username and password:
write_credentials_to_netrc_file(username, password, append=False)

cdse_username = os.getenv('CDSE_USERNAME')
cdse_password = os.getenv('CDSE_PASSWORD')
if cdse_username and cdse_password:
ensure_cdse_credentials(cdse_username, cdse_password)

global_parser = argparse.ArgumentParser(
prog='multirtc',
description='ISCE3-based multi-sensor RTC and cal/val tool',
Expand Down
279 changes: 279 additions & 0 deletions src/multirtc/cdse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
"""Download Sentinel-1 SLC products from the Copernicus Data Space Ecosystem (CDSE).

This module provides an alternative to ASF-based downloads via burst2safe.
It searches the CDSE OData catalog by the parent SLC scene name (derived from
the burst granule via ASF search) and downloads the full SLC zip.

CDSE credentials (username/password) can be provided via:
- Environment variables: CDSE_USERNAME and CDSE_PASSWORD
- The ~/.netrc file with machine: dataspace.copernicus.eu

References:
- https://documentation.dataspace.copernicus.eu/APIs/OData.html
- https://documentation.dataspace.copernicus.eu/APIs/Token.html
"""

import logging
import netrc
import os
import time
import zipfile
from pathlib import Path

import asf_search
import requests


logger = logging.getLogger(__name__)

CDSE_HOST = 'dataspace.copernicus.eu'
CDSE_TOKEN_URL = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'
CDSE_ODATA_URL = 'https://catalogue.dataspace.copernicus.eu/odata/v1/Products'
CDSE_DOWNLOAD_URL = 'https://download.dataspace.copernicus.eu/odata/v1/Products'

# Retry settings
MAX_RETRIES = 3
RETRY_START_WAIT = 10 # seconds
RETRY_INCREMENT = 10 # seconds


def get_cdse_credentials(
username: str | None = None,
password: str | None = None,
) -> tuple[str, str]:
"""Resolve CDSE credentials from arguments, environment, or ~/.netrc.

Args:
username: CDSE username. Falls back to CDSE_USERNAME env var, then ~/.netrc.
password: CDSE password. Falls back to CDSE_PASSWORD env var, then ~/.netrc.

Returns:
Tuple of (username, password).
"""
if username and password:
return username, password

env_user = os.getenv('CDSE_USERNAME')
env_pass = os.getenv('CDSE_PASSWORD')
if env_user and env_pass:
return env_user, env_pass

try:
nrc = netrc.netrc()
auth = nrc.authenticators(CDSE_HOST)
if auth:
return auth[0], auth[2]
except (FileNotFoundError, netrc.NetrcParseError):
pass

raise ValueError(
'CDSE credentials not found. Provide them via:\n'
' 1. CDSE_USERNAME and CDSE_PASSWORD environment variables\n'
' 2. ~/.netrc entry for machine dataspace.copernicus.eu\n'
'Register for a free account at https://dataspace.copernicus.eu/'
)


def ensure_cdse_credentials(
username: str | None = None,
password: str | None = None,
) -> None:
"""Ensure CDSE credentials are available in ~/.netrc.

If credentials are provided via env vars but ~/.netrc does not
contain an entry for CDSE, the entry will be appended to ~/.netrc.
"""
if username is None:
username = os.getenv('CDSE_USERNAME')
if password is None:
password = os.getenv('CDSE_PASSWORD')

netrc_file = Path.home() / '.netrc'

cdse_in_netrc = False
if netrc_file.exists():
try:
nrc = netrc.netrc(netrc_file)
if nrc.authenticators(CDSE_HOST):
cdse_in_netrc = True
except netrc.NetrcParseError:
pass

if username and password and not cdse_in_netrc:
with open(netrc_file, 'a') as f:
f.write(f'\nmachine {CDSE_HOST} login {username} password {password}\n')
netrc_file.chmod(0o600)
elif username and password and cdse_in_netrc:
logging.info(f'CDSE credentials already present in {netrc_file}, skipping update.')

get_cdse_credentials(username, password)


def get_cdse_access_token(username: str, password: str) -> str:
"""Obtain an access token from the CDSE identity provider.

Args:
username: CDSE username.
password: CDSE password.

Returns:
Access token string.
"""
data = {
'grant_type': 'password',
'username': username,
'password': password,
'client_id': 'cdse-public',
}
response = requests.post(CDSE_TOKEN_URL, data=data, timeout=60)
response.raise_for_status()
return response.json()['access_token']


def search_cdse_by_scene_name(scene_name: str) -> dict:
"""Search the CDSE OData catalog for a Sentinel-1 SLC by scene name.

Args:
scene_name: Sentinel-1 scene name (without .SAFE or .zip extension).

Returns:
Product entry from the CDSE OData response containing 'Id' and 'Name'.

Raises:
LookupError: If the product is not found on CDSE.
"""
scene_name = scene_name.replace('.zip', '').replace('.SAFE', '')
safe_name = f'{scene_name}.SAFE'
query = f"{CDSE_ODATA_URL}?$filter=Name eq '{safe_name}'"

response = requests.get(query, timeout=120)
response.raise_for_status()
results = response.json().get('value', [])

if not results:
raise LookupError(f"Product '{safe_name}' not found in CDSE catalog.")
return results[0]


def burst_to_parent_slc(burst_granule: str) -> str:
"""Use ASF search to find the parent SLC scene name for a burst granule.

Args:
burst_granule: Burst granule name (e.g. S1_136231_IW2_20200604T022312_VV_7C85-BURST).

Returns:
Parent SLC scene name (e.g. S1A_IW_SLC__1SDV_20200604T022251_20200604T022318_032861_03CE65_7C85).
"""
results = asf_search.granule_search([burst_granule])
if not results:
raise LookupError(f'Burst granule {burst_granule} not found in ASF archive.')

url = results[0].properties['url']
# URL format: https://sentinel1-burst.asf.alaska.edu/{PARENT_SLC}/IW{N}/{POL}/{idx}.tiff
parent_slc = url.split('/')[3]
logger.info(f'Mapped burst {burst_granule} to parent SLC {parent_slc}')
return parent_slc


def download_slc_from_cdse(
scene_name: str,
output_dir: Path | str,
max_retries: int = MAX_RETRIES,
) -> Path:
"""Download a Sentinel-1 SLC product from CDSE and extract the SAFE directory.

Args:
scene_name: Sentinel-1 scene name.
output_dir: Directory to save and extract the downloaded product.
max_retries: Number of download attempts before raising an error.

Returns:
Path to the extracted .SAFE directory.
"""
output_dir = Path(output_dir).resolve()
scene_name = scene_name.replace('.zip', '').replace('.SAFE', '')
safe_dir = output_dir / f'{scene_name}.SAFE'

# Skip download if SAFE already exists
if safe_dir.exists():
logger.info(f'SAFE directory already exists: {safe_dir}')
return safe_dir

# Get credentials and token
cdse_user, cdse_pass = get_cdse_credentials()
access_token = get_cdse_access_token(cdse_user, cdse_pass)

# Search CDSE catalog
product = search_cdse_by_scene_name(scene_name)
product_id = product['Id']

download_url_zip = f'{CDSE_DOWNLOAD_URL}({product_id})/$zip'
download_url_value = f'{CDSE_DOWNLOAD_URL}({product_id})/$value'
headers = {'Authorization': f'Bearer {access_token}'}

out_zip = output_dir / f'{scene_name}.zip'

def _do_download(url: str) -> None:
"""Perform the actual download from a given URL."""
response = requests.get(url, headers=headers, stream=True, timeout=600)
response.raise_for_status()

with open(out_zip, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192 * 16):
if chunk:
f.write(chunk)

if out_zip.stat().st_size == 0:
out_zip.unlink(missing_ok=True)
raise requests.RequestException('Downloaded file is empty')

logger.info(f'Downloaded {out_zip.name} from CDSE ({out_zip.stat().st_size / 1e6:.1f} MB)')

last_exc: Exception | None = None
for attempt in range(1, max_retries + 1):
logger.info(f'CDSE download attempt #{attempt} for {scene_name}')
try:
try:
_do_download(download_url_zip)
break
except requests.HTTPError as e:
out_zip.unlink(missing_ok=True)
if e.response is not None and e.response.status_code == 404:
logger.info('Compressed format not available, falling back to uncompressed...')
try:
_do_download(download_url_value)
break
except requests.RequestException:
out_zip.unlink(missing_ok=True)
raise
raise
except requests.RequestException:
out_zip.unlink(missing_ok=True)
raise
except Exception as exc:
last_exc = exc
wait_time = RETRY_START_WAIT + RETRY_INCREMENT * (attempt - 1)
if attempt < max_retries:
logger.warning(f'Attempt #{attempt} failed: {exc}. Waiting {wait_time}s before retry...')
time.sleep(wait_time)
else:
raise RuntimeError(
f'Failed to download {scene_name} from CDSE after {max_retries} attempts'
) from last_exc

# Extract the zip to get the SAFE directory
logger.info(f'Extracting {out_zip.name}...')
with zipfile.ZipFile(out_zip, 'r') as zf:
zf.extractall(output_dir)
out_zip.unlink()

if not safe_dir.exists():
# Some CDSE zips may have a different top-level name; find the .SAFE directory
safe_dirs = list(output_dir.glob('*.SAFE'))
if safe_dirs:
safe_dir = safe_dirs[0]
else:
raise FileNotFoundError(f'Could not find extracted SAFE directory in {output_dir}')

logger.info(f'Extracted SAFE directory: {safe_dir}')
return safe_dir
24 changes: 22 additions & 2 deletions src/multirtc/geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pathlib import Path

from multirtc.multirtc import SUPPORTED, run_multirtc
from multirtc.multirtc import DOWNLOAD_SOURCES, SUPPORTED, run_multirtc


def create_parser(parser):
Expand All @@ -11,6 +11,18 @@ def create_parser(parser):
parser.add_argument('--resolution', type=float, help='Resolution of the output dataset (m)')
parser.add_argument('--dem', type=Path, default=None, help='Path to the DEM to use for processing')
parser.add_argument('--work-dir', type=Path, default=None, help='Working directory for processing')
parser.add_argument(
'--download-source',
type=str,
choices=DOWNLOAD_SOURCES,
default='ASF',
help=(
"Source for downloading Sentinel-1 SLC data. "
"'ASF' uses Alaska Satellite Facility (default). "
"'CDSE' uses Copernicus Data Space Ecosystem "
"(requires CDSE_USERNAME/CDSE_PASSWORD env vars or ~/.netrc)."
),
)
return parser


Expand All @@ -19,4 +31,12 @@ def run(args):
assert args.dem.exists(), f'DEM file {args.dem} does not exist.'
if args.work_dir is None:
args.work_dir = Path.cwd()
run_multirtc(args.platform, args.granule, args.resolution, args.work_dir, args.dem, apply_rtc=False)
run_multirtc(
args.platform,
args.granule,
args.resolution,
args.work_dir,
args.dem,
apply_rtc=False,
download_source=args.download_source,
)
Loading