englacial · espg · Mar 16, 2026 · Mar 6, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/README.md b/README.md
@@ -55,27 +55,29 @@ bash deployment/aws/deploy.sh
 
 See [Lambda Deployment](docs/deployment/lambda.md) and [ARM64 Build Guide](docs/deployment/arm64.md).
 
-### Step 3: Run Production Processing
+### Step 3: Run Processing
 
-The orchestrator creates a Zarr template, authenticates with NASA Earthdata, and dispatches one Lambda per spatial cell.
+Processing reads a pipeline config YAML (data source, aggregation, output store) and a granule catalog. Run locally or dispatch to Lambda.
 
 ```bash
-# Full run:
-uv run python deployment/aws/invoke_lambda.py \
-    --catalog catalog_ATL06_cycle22_order6.json
+# Local processing (write to local Zarr):
+uv run python -m magg --config atl06.yaml --catalog catalog.json --store ./output.zarr
+
+# Local processing (write to S3):
+uv run python -m magg --config atl06.yaml --catalog catalog.json --store s3://bucket/output.zarr
 
-# Dry run (show what would be processed):
+# Lambda dispatch (requires deployed Lambda function):
 uv run python deployment/aws/invoke_lambda.py \
-    --catalog catalog_ATL06_cycle22_order6.json \
-    --dry-run
+    --config atl06.yaml --catalog catalog.json
 
 # Test with a few cells:
-uv run python deployment/aws/invoke_lambda.py \
-    --catalog catalog_ATL06_cycle22_order6.json \
-    --max-cells 10
+uv run python -m magg --config atl06.yaml --catalog catalog.json --max-cells 5
+
+# Dry run:
+uv run python -m magg --config atl06.yaml --catalog catalog.json --dry-run
 ```
 
-Output: Zarr store at `s3://xagg/atl06/morton_aggregation.zarr/`
+The store path and output grid parameters are defined in the YAML config (`output.store`, `output.grid.child_order`) and can be overridden via `--store` on the command line.
 
 ### Step 4: Visualize Results
 
@@ -92,10 +94,14 @@ Adjust `GRID_SPACING` in the notebook to control output resolution (default 2 km
 ```
 magg/
 ├── src/magg/              # Main package (cloud-agnostic)
+│   ├── __main__.py        # Local processing runner (python -m magg)
+│   ├── config.py          # YAML pipeline configuration
 │   ├── processing.py      # Core aggregation pipeline
 │   ├── catalog.py         # CMR query + catalog building
 │   ├── schema.py          # Output schema + Zarr template
+│   ├── store.py           # Store factory (local or S3)
 │   ├── auth.py            # NASA Earthdata authentication
+│   └── configs/           # Built-in pipeline configs (atl06.yaml)
 ├── deployment/            # Cloud-specific deployment
 │   └── aws/               # Lambda handler, orchestrator, build scripts
 ├── notebooks/             # Visualization

diff --git a/deployment/LAMBDA_DEPLOYMENT.md b/deployment/LAMBDA_DEPLOYMENT.md
@@ -9,7 +9,7 @@ for testing. The target architecture is **arm64 / py3.12** (20% cheaper per GB-s
 - **Runtime**: python3.11
 - **Architecture**: x86_64
 - **Layer**: `xagg-dependencies:1` (x86_64, py3.11, h5coro==0.0.8)
-- **Function code**: `lambda_handler.py` + `magg/` package + obstore/zarr/pydantic/pandera
+- **Function code**: `lambda_handler.py` + `magg/` package + obstore/zarr/pydantic/pyyaml
 - **Role**: `magg-lambda-execution` (scoped to `xagg` bucket)
 
 ### What's in the layer vs function code
@@ -19,7 +19,7 @@ numpy, pandas, h5coro, mortie, healpy, earthaccess, boto3, astropy, shapely, cra
 fastparquet, requests, s3fs, and transitive deps.
 
 **Function code** (20MB unzipped):
-`lambda_handler.py`, `magg/` package, obstore, zarr, pydantic-zarr, pandera, pydantic,
+`lambda_handler.py`, `magg/` package, obstore, zarr, pydantic-zarr, pyyaml, pydantic,
 pydantic-core, typeguard, typing_inspect, annotated-types.
 
 ---
@@ -126,7 +126,7 @@ cp -r src/magg /tmp/lambda_build/magg
 
 # Add deps not in layer (skip native ones if already unpacked)
 pip install --target /tmp/lambda_build --no-deps \
-  zarr pydantic-zarr pandera pydantic typeguard typing_inspect annotated-types
+  zarr pydantic-zarr pyyaml pydantic typeguard typing_inspect annotated-types
 
 # For obstore (native): download correct wheel and unzip
 pip download --python-version <VER> --platform <PLAT> --only-binary :all: \

diff --git a/deployment/aws/build_function.sh b/deployment/aws/build_function.sh
@@ -58,7 +58,7 @@ pip3 install --target "$BUILD_DIR" --no-cache-dir \
     "obstore>=0.8.2" \
     "zarr>=3.1.5" \
     "pydantic-zarr>=0.9.1" \
-    "pandera"
+    "pyyaml"
 
 # --- Remove packages already in the Lambda layer ---
 # The layer provides these (plus their transitive deps). Removing them from

diff --git a/deployment/aws/deploy.sh b/deployment/aws/deploy.sh
@@ -11,7 +11,8 @@
 
 set -e
 
-FUNCTION_NAME="process-morton-cell"
+FUNCTION_NAME="${MAGG_LAMBDA_FUNCTION_NAME:-process-morton-cell}"
+S3_BUCKET="${MAGG_S3_BUCKET:-xagg}"
 REGION="us-west-2"
 ARCH="arm64"
 FUNCTION_ONLY=false
@@ -67,17 +68,17 @@ if [ "$FUNCTION_ONLY" = false ]; then
     elif [ "$LAYER_SIZE" -gt 50000000 ]; then
         # Layer > 50MB: upload to S3 first (direct upload limit is ~50MB)
         S3_KEY="lambda-deploy/${LAYER_NAME}-$(date +%s).zip"
-        echo "  Layer is $(numfmt --to=iec $LAYER_SIZE), uploading to s3://xagg/$S3_KEY first..."
-        aws s3 cp "$LAYER_ZIP" "s3://xagg/$S3_KEY" --region "$REGION"
+        echo "  Layer is $(numfmt --to=iec $LAYER_SIZE), uploading to s3://$S3_BUCKET/$S3_KEY first..."
+        aws s3 cp "$LAYER_ZIP" "s3://$S3_BUCKET/$S3_KEY" --region "$REGION"
         LAYER_ARN=$(aws lambda publish-layer-version \
             --layer-name "$LAYER_NAME" \
             --compatible-runtimes "$RUNTIME" \
             --compatible-architectures "$ARCH" \
-            --content "S3Bucket=xagg,S3Key=$S3_KEY" \
+            --content "S3Bucket=$S3_BUCKET,S3Key=$S3_KEY" \
             --region "$REGION" \
             --query 'LayerVersionArn' --output text)
         echo "  Published: $LAYER_ARN"
-        aws s3 rm "s3://xagg/$S3_KEY" --region "$REGION"
+        aws s3 rm "s3://$S3_BUCKET/$S3_KEY" --region "$REGION"
     else
         LAYER_ARN=$(aws lambda publish-layer-version \
             --layer-name "$LAYER_NAME" \
@@ -99,15 +100,15 @@ else
     FUNC_SIZE=$(stat -c%s "$FUNC_ZIP" 2>/dev/null || stat -f%z "$FUNC_ZIP")
     if [ "$FUNC_SIZE" -gt 50000000 ]; then
         S3_KEY="lambda-deploy/function-$(date +%s).zip"
-        echo "  Function zip is $(numfmt --to=iec $FUNC_SIZE), uploading to s3://xagg/$S3_KEY first..."
-        aws s3 cp "$FUNC_ZIP" "s3://xagg/$S3_KEY" --region "$REGION"
+        echo "  Function zip is $(numfmt --to=iec $FUNC_SIZE), uploading to s3://$S3_BUCKET/$S3_KEY first..."
+        aws s3 cp "$FUNC_ZIP" "s3://$S3_BUCKET/$S3_KEY" --region "$REGION"
         aws lambda update-function-code \
             --function-name "$FUNCTION_NAME" \
-            --s3-bucket xagg --s3-key "$S3_KEY" \
+            --s3-bucket "$S3_BUCKET" --s3-key "$S3_KEY" \
             --architectures "$ARCH" \
             --region "$REGION" \
             --query '{CodeSize:CodeSize,LastModified:LastModified}' --output table
-        aws s3 rm "s3://xagg/$S3_KEY" --region "$REGION"
+        aws s3 rm "s3://$S3_BUCKET/$S3_KEY" --region "$REGION"
     else
         aws lambda update-function-code \
             --function-name "$FUNCTION_NAME" \