From 4600e2fd0505ee7ee179bdf8d4398ba30bd5be05 Mon Sep 17 00:00:00 2001 From: Danny Price Date: Sun, 1 Mar 2026 12:32:17 +0800 Subject: [PATCH 1/5] Updated Dockerfile for robustness --- app/Dockerfile | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/app/Dockerfile b/app/Dockerfile index 321d708..dc2954f 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -1,27 +1,35 @@ # syntax=docker/dockerfile:1 FROM python:3.14-slim-trixie -RUN apt-get update -RUN apt-get install build-essential git gfortran f2c pkg-config libhdf5-dev wget -y -RUN mkdir /app -RUN mkdir /app/data + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential git gfortran f2c pkg-config libhdf5-dev wget && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /app/data /app/assets COPY ./app.py /app/app.py COPY ./requirements.txt /app/requirements.txt -COPY ./assets/* /app/assets/ +COPY ./assets/ /app/assets/ WORKDIR /app -RUN pip3 install -r requirements.txt +RUN pip3 install --no-cache-dir -r requirements.txt + +# Try to copy a local gedm_dist_maps.hkl if the builder has one or download from Zenodo +ENV DATA_URL="https://zenodo.org/records/18779007/files/gedm_dist_maps.hkl?download=1" + +# Copy data dir contents if present +COPY data/gedm_dist_maps.hk[l] /app/data/ -# DASH APP -# Copy local gedm_dist_maps.hkl if it exists in app/data/ -RUN --mount=type=bind,source=.,target=/tmp/build \ - if [ -f /tmp/build/data/gedm_dist_maps.hkl ]; then cp /tmp/build/data/gedm_dist_maps.hkl /app/data/gedm_dist_maps.hkl; else wget -O /app/data/gedm_dist_maps.hkl "https://zenodo.org/records/18779007/files/gedm_dist_maps.hkl?download=1"; fi || true +RUN if [ ! -f /app/data/gedm_dist_maps.hkl ]; then \ + echo "gedm_dist_maps.hkl not found locally — downloading..." && \ + wget --progress=dot:giga -O /app/data/gedm_dist_maps.hkl "$DATA_URL"; \ + else \ + echo "gedm_dist_maps.hkl found locally — skipping download."; \ + fi && \ + # Sanity check: file must exist and not be empty + test -s /app/data/gedm_dist_maps.hkl || \ + { echo "FATAL: gedm_dist_maps.hkl is missing or empty"; exit 1; } EXPOSE 8050/tcp -# --preload: load the app once BEFORE forking workers so skymap data -# lives in shared copy-on-write memory instead of being duplicated. -# -w 2: two workers is plenty for a Dash app; 8 workers each loading -# ~60 MB of data was consuming ~500 MB+ of RAM unnecessarily. -# --timeout 120: data loading + first callback can take a while on cold start. -# --access-logfile -: emit HTTP request logs to stdout for observability. -CMD ["gunicorn", "--preload", "-w", "2", "--timeout", "120", "--access-logfile", "-", "-b", "0.0.0.0:8050", "app:server"] +CMD ["gunicorn", "--preload", "-w", "4", "--timeout", "120", "--access-logfile", "-", "-b", "0.0.0.0:8050", "app:server"] From 085f58f9e0d0b4b09deb2bf111637d9c8cb7cacf Mon Sep 17 00:00:00 2001 From: Danny Price Date: Mon, 2 Mar 2026 13:32:32 +0800 Subject: [PATCH 2/5] Updates to docker app for skymap download --- .gitignore | 4 +++ app/Dockerfile | 18 +++--------- app/app.py | 2 +- app/download_data.py | 70 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 app/download_data.py diff --git a/.gitignore b/.gitignore index 2b659b1..92bccc3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,7 @@ ymw16.so build/ dist/ pyymw16.egg-info/ +app/gen_maps/*.hkl +pygedm.egg-info/ +scripts/*.npy +scripts/*.hkl diff --git a/app/Dockerfile b/app/Dockerfile index dc2954f..ed40406 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -10,26 +10,16 @@ RUN apt-get update && \ RUN mkdir -p /app/data /app/assets COPY ./app.py /app/app.py COPY ./requirements.txt /app/requirements.txt +COPY ./download_data.py /app/download_data.py COPY ./assets/ /app/assets/ WORKDIR /app RUN pip3 install --no-cache-dir -r requirements.txt -# Try to copy a local gedm_dist_maps.hkl if the builder has one or download from Zenodo -ENV DATA_URL="https://zenodo.org/records/18779007/files/gedm_dist_maps.hkl?download=1" -# Copy data dir contents if present -COPY data/gedm_dist_maps.hk[l] /app/data/ - -RUN if [ ! -f /app/data/gedm_dist_maps.hkl ]; then \ - echo "gedm_dist_maps.hkl not found locally — downloading..." && \ - wget --progress=dot:giga -O /app/data/gedm_dist_maps.hkl "$DATA_URL"; \ - else \ - echo "gedm_dist_maps.hkl found locally — skipping download."; \ - fi && \ - # Sanity check: file must exist and not be empty - test -s /app/data/gedm_dist_maps.hkl || \ - { echo "FATAL: gedm_dist_maps.hkl is missing or empty"; exit 1; } +# Download gedm_dist_maps.hkl from Zenodo (or use a local copy if in ./assets) +RUN --mount=type=bind,target=/tmp/ctx,source=. \ + python3 /app/download_data.py EXPOSE 8050/tcp CMD ["gunicorn", "--preload", "-w", "4", "--timeout", "120", "--access-logfile", "-", "-b", "0.0.0.0:8050", "app:server"] diff --git a/app/app.py b/app/app.py index a00c70d..e80ac1e 100644 --- a/app/app.py +++ b/app/app.py @@ -27,7 +27,7 @@ logger.info("All imports completed") # ── Data loading ───────────────────────────────────────────────────────────── -DATA_PATH = "data/gedm_dist_maps.hkl" +DATA_PATH = "assets/gedm_dist_maps.hkl" def load_skymap_data(path): diff --git a/app/download_data.py b/app/download_data.py new file mode 100644 index 0000000..5b56b90 --- /dev/null +++ b/app/download_data.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Download gedm_dist_maps.hkl from Zenodo if not already present.""" + +import sys +import os +from pathlib import Path +import urllib.request +import urllib.error + +# Configuration +DATA_DIR = Path("/app/assets") +DATA_FILE = DATA_DIR / "gedm_dist_maps.hkl" +ZENODO_URL = "https://zenodo.org/records/18779007/files/gedm_dist_maps.hkl?download=1" +MIN_SIZE = 1024 * 1024 # 1 MB — file should be ~60 MB, so anything smaller is corrupt + + +def main(): + # Check for a local copy in the build context (mounted at /tmp/ctx) + local_path = Path("/tmp/ctx/data/gedm_dist_maps.hkl") + if local_path.exists() and local_path.stat().st_size > MIN_SIZE: + print(f"✓ Found local copy in build context ({local_path.stat().st_size / 1024**2:.1f} MB)") + print(f" Copying to {DATA_FILE}...") + import shutil + shutil.copy2(local_path, DATA_FILE) + return 0 + + # Check if file already exists in container + if DATA_FILE.exists() and DATA_FILE.stat().st_size > MIN_SIZE: + print(f"✓ {DATA_FILE} already exists ({DATA_FILE.stat().st_size / 1024**2:.1f} MB)") + return 0 + + # File is missing or too small — download from Zenodo + print(f"↓ Downloading gedm_dist_maps.hkl from Zenodo (~60 MB)...") + print(f" URL: {ZENODO_URL}") + + try: + urllib.request.urlretrieve(ZENODO_URL, DATA_FILE) + except urllib.error.URLError as e: + print(f"✗ FATAL: Failed to download from Zenodo: {e}", file=sys.stderr) + return 1 + except Exception as e: + print(f"✗ FATAL: Unexpected error during download: {e}", file=sys.stderr) + return 1 + + # Verify the downloaded file + if not DATA_FILE.exists(): + print(f"✗ FATAL: Download appeared to succeed but file does not exist", file=sys.stderr) + return 1 + + size = DATA_FILE.stat().st_size + if size < MIN_SIZE: + print(f"✗ FATAL: Downloaded file is only {size / 1024**2:.1f} MB (expected ~60 MB)", file=sys.stderr) + return 1 + + # Verify the file is readable HDF5 + try: + import h5py + with h5py.File(DATA_FILE, 'r') as h: + print(f"✓ Downloaded successfully ({size / 1024**2:.1f} MB) and verified as valid HDF5") + except ImportError: + print(f"✓ Downloaded successfully ({size / 1024**2:.1f} MB)") + except Exception as e: + print(f"✗ FATAL: File is not valid HDF5: {e}", file=sys.stderr) + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 9de0162540d0290cf89365de7b3253dfd2cf25f6 Mon Sep 17 00:00:00 2001 From: Danny Price Date: Mon, 2 Mar 2026 21:57:18 +0800 Subject: [PATCH 3/5] Updated app.py - use less memory by only loading one map --- app/README.md | 2 +- app/app.py | 140 ++++++++++++++++++++++++++++---------------------- 2 files changed, 80 insertions(+), 62 deletions(-) diff --git a/app/README.md b/app/README.md index c94aac6..e178373 100644 --- a/app/README.md +++ b/app/README.md @@ -43,7 +43,7 @@ docker compose restart ```bash docker build --tag pygedm_app . -docker run -p 8050:8050 -v "$(pwd)/data:/app/data:ro" pygedm_app +docker run -p 8050:8050 pygedm_app ``` ## Logs diff --git a/app/app.py b/app/app.py index e80ac1e..7e7cca4 100644 --- a/app/app.py +++ b/app/app.py @@ -27,78 +27,96 @@ logger.info("All imports completed") # ── Data loading ───────────────────────────────────────────────────────────── -DATA_PATH = "assets/gedm_dist_maps.hkl" +import os +DATA_PATH = os.path.join(os.path.dirname(__file__), "assets", "gedm_dist_maps.hkl") -def load_skymap_data(path): - """Load skymap data from HKL file using h5py directly. - """ - logger.info("Loading skymap data from %s", path) - t0 = time.time() - try: - with h5py.File(path, "r") as h: - if "data" not in h: - logger.error("HDF5 file missing 'data' group. Top-level keys: %s", list(h.keys())) - raise KeyError("Expected 'data' group in HDF5 file") +def _build_xarray(data, dist, gl, gb): + """Wrap a numpy array as an xarray DataArray.""" + return xr.DataArray( + data, + dims=("distance_kpc", "gb", "gl"), + coords={"distance_kpc": dist, "gl": gl, "gb": gb}, + attrs={"units": "DM pc/cm3"}, + ) - grp = h["data"] - logger.info("HDF5 data group keys: %s", list(grp.keys())) - # hickle wraps dict string keys in quotes: '"keyname"' +# ── Skymap state ───────────────────────────────────────────────────────────── +_current_skymap_model = None # Name of the currently loaded skymap model +_current_skymap_data = None # The currently loaded xarray DataArray +_skymap_coords = None # Shared coordinates (loaded once) + + +def _load_skymap_coords(): + """Load and cache the shared coordinates (dist, gl, gb) - only called once.""" + global _skymap_coords + if _skymap_coords is not None: + return _skymap_coords + + logger.info("Loading skymap coordinates from %s", DATA_PATH) + t0 = time.time() + try: + with h5py.File(DATA_PATH, "r") as h: + grp = h["data"] dist = grp['"dist"'][()] gl = grp['"gl"'][()][::2] gb = grp['"gb"'][()][::2] + _skymap_coords = (dist, gl, gb) + elapsed = time.time() - t0 + logger.info("Skymap coordinates loaded in %.2fs (%.1f MB)", elapsed, + sum(a.nbytes for a in _skymap_coords) / 1024**2) + return _skymap_coords + except Exception: + logger.critical("FATAL – could not load skymap coordinates", exc_info=True) + sys.exit(1) - # Read and downsample in one step – never holds full-res in memory - ne2001 = grp['"ne2001"'][:, ::2, ::2] - ne2025 = grp['"ne2025"'][:, ::2, ::2] - ymw16 = grp['"ymw16"'][:, ::2, ::2] - logger.info( - "Loaded arrays – ne2001: %s %s, ne2025: %s %s, ymw16: %s %s", - ne2001.shape, ne2001.dtype, - ne2025.shape, ne2025.dtype, - ymw16.shape, ymw16.dtype, - ) +def get_skymap(model): + """Load skymap data for the requested model, reloading only if the model changes. - except FileNotFoundError: - logger.critical("Data file not found: %s", path) - raise - except KeyError as exc: - logger.critical("Missing expected key in HDF5 file: %s", exc) - raise - except Exception: - logger.critical("Failed to load skymap data", exc_info=True) - raise + Args: + model (str): One of 'NE2001', 'NE2025', or 'YMW16' - nbytes = sum(a.nbytes for a in (ne2001, ne2025, ymw16, dist, gl, gb)) - elapsed = time.time() - t0 - logger.info("Skymap data loaded in %.2fs (%.1f MB in arrays)", elapsed, nbytes / 1024**2) + Returns: + xarray.DataArray: The skymap for the requested model + """ + global _current_skymap_model, _current_skymap_data - return dist, gl, gb, ne2001, ne2025, ymw16 + if model == _current_skymap_model: + logger.debug("Skymap model unchanged (%s), reusing loaded data", model) + return _current_skymap_data + model_key = f'"{model.lower()}"' + logger.info("Skymap model changed to %s, loading from %s", model, DATA_PATH) + t0 = time.time() -def _build_xarray(data, dist, gl, gb): - """Wrap a numpy array as an xarray DataArray.""" - return xr.DataArray( - data, - dims=("distance_kpc", "gb", "gl"), - coords={"distance_kpc": dist, "gl": gl, "gb": gb}, - attrs={"units": "DM pc/cm3"}, - ) + try: + dist, gl, gb = _load_skymap_coords() + with h5py.File(DATA_PATH, "r") as h: + grp = h["data"] + # Read and downsample in one step + data = grp[model_key][:, ::2, ::2] + nbytes = data.nbytes + _current_skymap_data = _build_xarray(data, dist, gl, gb) + _current_skymap_model = model + elapsed = time.time() - t0 + logger.info("Skymap %s loaded in %.2fs (%.1f MB)", model, elapsed, nbytes / 1024**2) + return _current_skymap_data + + except Exception: + logger.critical("FATAL – could not load skymap for %s", model, exc_info=True) + sys.exit(1) + + +# Load coordinates at startup (minimal memory footprint) try: - _dist, _gl, _gb, _ne2001, _ne2025, _ymw16 = load_skymap_data(DATA_PATH) - skymap_dist = _dist - skymap_data_ne = _build_xarray(_ne2001, _dist, _gl, _gb) - skymap_data_ne25 = _build_xarray(_ne2025, _dist, _gl, _gb) - skymap_data_ymw = _build_xarray(_ymw16, _dist, _gl, _gb) - del _ne2001, _ne2025, _ymw16, _dist, _gl, _gb # free raw arrays - logger.info("xarray DataArrays built successfully") + _load_skymap_coords() + logger.info("Skymap coordinate system initialized (lazy loading enabled)") except Exception: - logger.critical("FATAL – could not initialise skymap data, exiting", exc_info=True) + logger.critical("FATAL – could not initialise skymap coordinates, exiting", exc_info=True) sys.exit(1) # APP SETUP @@ -261,11 +279,6 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm sc = SkyCoord(0 * u.deg, 0 * u.deg, frame="galactic") coord_error = True - logger.info("Calculating: gl=%s, gb=%s, dmord=%s, func=%s", sc.galactic.l, sc.galactic.b, dmord, f.__name__) - dout = f(sc.galactic.l, sc.galactic.b, dmord, method=model, nu=nu) - dout_ne = f(sc.galactic.l, sc.galactic.b, dmord, method="ne2001", nu=nu) - dout_ne25 = f(sc.galactic.l, sc.galactic.b, dmord, method="ne2025", nu=nu) - dout_ymw = f(sc.galactic.l, sc.galactic.b, dmord, method="ymw16", nu=nu) # Make plots D = np.linspace(0.1, dmord.value) @@ -286,6 +299,11 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm y_ne25[ii] = d_ne25[0].value y_ymw[ii] = d_ymw[0].value + # Reuse last loop values (at dmord) for the table output + dout_ne = d_ne21 + dout_ne25 = d_ne25 + dout_ymw = d_ymw + # print(d, y) fig = pgo.Figure() fig.add_trace(pgo.Scatter(x=D, y=y_ne21, mode="lines", name="NE2001")) @@ -299,15 +317,15 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm # SKYMAP if model == "NE2001": - skymap_data = skymap_data_ne skymap_model_label = "NE2001" elif model == "NE2025": - skymap_data = skymap_data_ne25 skymap_model_label = "NE2025" else: - skymap_data = skymap_data_ymw skymap_model_label = model + # Load only the requested skymap (lazy loading) + skymap_data = get_skymap(skymap_model_label) + logger.debug("Selected skymap model=%s, shape=%s", skymap_model_label, skymap_data.shape) # Determine DM min/max for colorscale From f559609fb1187a07a3e5797442303f4bd44125ce Mon Sep 17 00:00:00 2001 From: Danny Price Date: Mon, 2 Mar 2026 22:08:27 +0800 Subject: [PATCH 4/5] Update app.py - fix to skymap/calculate refresh logic --- app/app.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/app/app.py b/app/app.py index 7e7cca4..f82f0f9 100644 --- a/app/app.py +++ b/app/app.py @@ -173,13 +173,11 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm ) ) - # Check if this is just a zoom/pan event - if so, don't regenerate the skymap - is_zoom_or_pan = False + # Check if this is just a zoom/pan event - if so, return immediately before any computation if triggered_id == 'skymap-output' and relayout_data: - # Check if it's a zoom/pan (contains axis ranges) if any(k in relayout_data for k in ['xaxis.range', 'yaxis.range', 'xaxis.range[0]', 'yaxis.range[0]', 'xaxis.autorange', 'yaxis.autorange']): - is_zoom_or_pan = True + return (no_update,) * 8 # Check if distance slider changed (relayoutData contains frame updates) reset_dm_range = False @@ -187,7 +185,7 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm slider_store_out = no_update default_slider_idx = None slider_value_candidate = None - if relayout_data and not is_zoom_or_pan: + if relayout_data: # Check if slider.value key exists (frame changed) if 'slider.value' in relayout_data: reset_dm_range = True @@ -542,10 +540,6 @@ def _to_float(val): ] # Handle different update scenarios - if is_zoom_or_pan: - # Don't regenerate anything on zoom/pan, just let the figure handle it - return no_update, no_update, no_update, no_update, no_update, slider_store_out, no_update, no_update - plot_out = fig if update_plot else no_update table_out = gedm_out if update_plot else no_update From 4e046caccda84827583dffa4d0220cd6ebd561f5 Mon Sep 17 00:00:00 2001 From: Danny Price Date: Tue, 3 Mar 2026 09:19:06 +0800 Subject: [PATCH 5/5] Bumping version to 4.0.1 --- Dockerfile | 8 ++------ pyproject.toml | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index d863048..ae78a66 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,9 @@ # syntax=docker/dockerfile:1 -FROM python:3.8-slim-buster -RUN apt-get update +FROM python:3.14-slim-trixie +RUN apt-get update RUN apt-get install build-essential git f2c pkg-config -y COPY . /app WORKDIR /app RUN pip3 install -r requirements.txt RUN pip3 install -r requirements_test.txt RUN pip3 install . - - - - diff --git a/pyproject.toml b/pyproject.toml index 2f0e3d5..2287621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "pygedm" -version = "4.0.0" -description = "Python/C++ version of NE2001, YMW16, and YT2020 electron density models" +version = "4.0.1" +description = "Python interface to NE2001, YMW16, NE2025 and YT2020 electron density models" readme = "README.md" requires-python = ">=3.8" license = "MIT"