Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ ymw16.so
build/
dist/
pyymw16.egg-info/
app/gen_maps/*.hkl
pygedm.egg-info/
scripts/*.npy
scripts/*.hkl
8 changes: 2 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
# syntax=docker/dockerfile:1
FROM python:3.8-slim-buster
RUN apt-get update
FROM python:3.14-slim-trixie
RUN apt-get update
RUN apt-get install build-essential git f2c pkg-config -y
COPY . /app
WORKDIR /app
RUN pip3 install -r requirements.txt
RUN pip3 install -r requirements_test.txt
RUN pip3 install .




32 changes: 15 additions & 17 deletions app/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
# syntax=docker/dockerfile:1

FROM python:3.14-slim-trixie
RUN apt-get update
RUN apt-get install build-essential git gfortran f2c pkg-config libhdf5-dev wget -y
RUN mkdir /app
RUN mkdir /app/data

RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential git gfortran f2c pkg-config libhdf5-dev wget && \
rm -rf /var/lib/apt/lists/*

RUN mkdir -p /app/data /app/assets
COPY ./app.py /app/app.py
COPY ./requirements.txt /app/requirements.txt
COPY ./assets/* /app/assets/
COPY ./download_data.py /app/download_data.py
COPY ./assets/ /app/assets/
WORKDIR /app

RUN pip3 install -r requirements.txt
RUN pip3 install --no-cache-dir -r requirements.txt


# DASH APP
# Copy local gedm_dist_maps.hkl if it exists in app/data/
RUN --mount=type=bind,source=.,target=/tmp/build \
if [ -f /tmp/build/data/gedm_dist_maps.hkl ]; then cp /tmp/build/data/gedm_dist_maps.hkl /app/data/gedm_dist_maps.hkl; else wget -O /app/data/gedm_dist_maps.hkl "https://zenodo.org/records/18779007/files/gedm_dist_maps.hkl?download=1"; fi || true
# Download gedm_dist_maps.hkl from Zenodo (or use a local copy if in ./assets)
RUN --mount=type=bind,target=/tmp/ctx,source=. \
python3 /app/download_data.py

EXPOSE 8050/tcp
# --preload: load the app once BEFORE forking workers so skymap data
# lives in shared copy-on-write memory instead of being duplicated.
# -w 2: two workers is plenty for a Dash app; 8 workers each loading
# ~60 MB of data was consuming ~500 MB+ of RAM unnecessarily.
# --timeout 120: data loading + first callback can take a while on cold start.
# --access-logfile -: emit HTTP request logs to stdout for observability.
CMD ["gunicorn", "--preload", "-w", "2", "--timeout", "120", "--access-logfile", "-", "-b", "0.0.0.0:8050", "app:server"]
CMD ["gunicorn", "--preload", "-w", "4", "--timeout", "120", "--access-logfile", "-", "-b", "0.0.0.0:8050", "app:server"]
2 changes: 1 addition & 1 deletion app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ docker compose restart

```bash
docker build --tag pygedm_app .
docker run -p 8050:8050 -v "$(pwd)/data:/app/data:ro" pygedm_app
docker run -p 8050:8050 pygedm_app
```

## Logs
Expand Down
152 changes: 82 additions & 70 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,78 +27,96 @@
logger.info("All imports completed")

# ── Data loading ─────────────────────────────────────────────────────────────
DATA_PATH = "data/gedm_dist_maps.hkl"
import os
DATA_PATH = os.path.join(os.path.dirname(__file__), "assets", "gedm_dist_maps.hkl")


def load_skymap_data(path):
"""Load skymap data from HKL file using h5py directly.
"""
logger.info("Loading skymap data from %s", path)
t0 = time.time()

try:
with h5py.File(path, "r") as h:
if "data" not in h:
logger.error("HDF5 file missing 'data' group. Top-level keys: %s", list(h.keys()))
raise KeyError("Expected 'data' group in HDF5 file")
def _build_xarray(data, dist, gl, gb):
"""Wrap a numpy array as an xarray DataArray."""
return xr.DataArray(
data,
dims=("distance_kpc", "gb", "gl"),
coords={"distance_kpc": dist, "gl": gl, "gb": gb},
attrs={"units": "DM pc/cm3"},
)

grp = h["data"]
logger.info("HDF5 data group keys: %s", list(grp.keys()))

# hickle wraps dict string keys in quotes: '"keyname"'
# ── Skymap state ─────────────────────────────────────────────────────────────
_current_skymap_model = None # Name of the currently loaded skymap model
_current_skymap_data = None # The currently loaded xarray DataArray
_skymap_coords = None # Shared coordinates (loaded once)


def _load_skymap_coords():
"""Load and cache the shared coordinates (dist, gl, gb) - only called once."""
global _skymap_coords
if _skymap_coords is not None:
return _skymap_coords

logger.info("Loading skymap coordinates from %s", DATA_PATH)
t0 = time.time()
try:
with h5py.File(DATA_PATH, "r") as h:
grp = h["data"]
dist = grp['"dist"'][()]
gl = grp['"gl"'][()][::2]
gb = grp['"gb"'][()][::2]
_skymap_coords = (dist, gl, gb)
elapsed = time.time() - t0
logger.info("Skymap coordinates loaded in %.2fs (%.1f MB)", elapsed,
sum(a.nbytes for a in _skymap_coords) / 1024**2)
return _skymap_coords
except Exception:
logger.critical("FATAL – could not load skymap coordinates", exc_info=True)
sys.exit(1)

# Read and downsample in one step – never holds full-res in memory
ne2001 = grp['"ne2001"'][:, ::2, ::2]
ne2025 = grp['"ne2025"'][:, ::2, ::2]
ymw16 = grp['"ymw16"'][:, ::2, ::2]

logger.info(
"Loaded arrays – ne2001: %s %s, ne2025: %s %s, ymw16: %s %s",
ne2001.shape, ne2001.dtype,
ne2025.shape, ne2025.dtype,
ymw16.shape, ymw16.dtype,
)
def get_skymap(model):
"""Load skymap data for the requested model, reloading only if the model changes.

except FileNotFoundError:
logger.critical("Data file not found: %s", path)
raise
except KeyError as exc:
logger.critical("Missing expected key in HDF5 file: %s", exc)
raise
except Exception:
logger.critical("Failed to load skymap data", exc_info=True)
raise
Args:
model (str): One of 'NE2001', 'NE2025', or 'YMW16'

nbytes = sum(a.nbytes for a in (ne2001, ne2025, ymw16, dist, gl, gb))
elapsed = time.time() - t0
logger.info("Skymap data loaded in %.2fs (%.1f MB in arrays)", elapsed, nbytes / 1024**2)
Returns:
xarray.DataArray: The skymap for the requested model
"""
global _current_skymap_model, _current_skymap_data

return dist, gl, gb, ne2001, ne2025, ymw16
if model == _current_skymap_model:
logger.debug("Skymap model unchanged (%s), reusing loaded data", model)
return _current_skymap_data

model_key = f'"{model.lower()}"'
logger.info("Skymap model changed to %s, loading from %s", model, DATA_PATH)
t0 = time.time()

def _build_xarray(data, dist, gl, gb):
"""Wrap a numpy array as an xarray DataArray."""
return xr.DataArray(
data,
dims=("distance_kpc", "gb", "gl"),
coords={"distance_kpc": dist, "gl": gl, "gb": gb},
attrs={"units": "DM pc/cm3"},
)
try:
dist, gl, gb = _load_skymap_coords()

with h5py.File(DATA_PATH, "r") as h:
grp = h["data"]
# Read and downsample in one step
data = grp[model_key][:, ::2, ::2]
nbytes = data.nbytes

_current_skymap_data = _build_xarray(data, dist, gl, gb)
_current_skymap_model = model
elapsed = time.time() - t0
logger.info("Skymap %s loaded in %.2fs (%.1f MB)", model, elapsed, nbytes / 1024**2)
return _current_skymap_data

except Exception:
logger.critical("FATAL – could not load skymap for %s", model, exc_info=True)
sys.exit(1)


# Load coordinates at startup (minimal memory footprint)
try:
_dist, _gl, _gb, _ne2001, _ne2025, _ymw16 = load_skymap_data(DATA_PATH)
skymap_dist = _dist
skymap_data_ne = _build_xarray(_ne2001, _dist, _gl, _gb)
skymap_data_ne25 = _build_xarray(_ne2025, _dist, _gl, _gb)
skymap_data_ymw = _build_xarray(_ymw16, _dist, _gl, _gb)
del _ne2001, _ne2025, _ymw16, _dist, _gl, _gb # free raw arrays
logger.info("xarray DataArrays built successfully")
_load_skymap_coords()
logger.info("Skymap coordinate system initialized (lazy loading enabled)")
except Exception:
logger.critical("FATAL – could not initialise skymap data, exiting", exc_info=True)
logger.critical("FATAL – could not initialise skymap coordinates, exiting", exc_info=True)
sys.exit(1)

# APP SETUP
Expand Down Expand Up @@ -155,21 +173,19 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm
)
)

# Check if this is just a zoom/pan event - if so, don't regenerate the skymap
is_zoom_or_pan = False
# Check if this is just a zoom/pan event - if so, return immediately before any computation
if triggered_id == 'skymap-output' and relayout_data:
# Check if it's a zoom/pan (contains axis ranges)
if any(k in relayout_data for k in ['xaxis.range', 'yaxis.range', 'xaxis.range[0]', 'yaxis.range[0]',
'xaxis.autorange', 'yaxis.autorange']):
is_zoom_or_pan = True
return (no_update,) * 8

# Check if distance slider changed (relayoutData contains frame updates)
reset_dm_range = False
current_frame_idx = 0
slider_store_out = no_update
default_slider_idx = None
slider_value_candidate = None
if relayout_data and not is_zoom_or_pan:
if relayout_data:
# Check if slider.value key exists (frame changed)
if 'slider.value' in relayout_data:
reset_dm_range = True
Expand Down Expand Up @@ -261,11 +277,6 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm
sc = SkyCoord(0 * u.deg, 0 * u.deg, frame="galactic")
coord_error = True

logger.info("Calculating: gl=%s, gb=%s, dmord=%s, func=%s", sc.galactic.l, sc.galactic.b, dmord, f.__name__)
dout = f(sc.galactic.l, sc.galactic.b, dmord, method=model, nu=nu)
dout_ne = f(sc.galactic.l, sc.galactic.b, dmord, method="ne2001", nu=nu)
dout_ne25 = f(sc.galactic.l, sc.galactic.b, dmord, method="ne2025", nu=nu)
dout_ymw = f(sc.galactic.l, sc.galactic.b, dmord, method="ymw16", nu=nu)

# Make plots
D = np.linspace(0.1, dmord.value)
Expand All @@ -286,6 +297,11 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm
y_ne25[ii] = d_ne25[0].value
y_ymw[ii] = d_ymw[0].value

# Reuse last loop values (at dmord) for the table output
dout_ne = d_ne21
dout_ne25 = d_ne25
dout_ymw = d_ymw

# print(d, y)
fig = pgo.Figure()
fig.add_trace(pgo.Scatter(x=D, y=y_ne21, mode="lines", name="NE2001"))
Expand All @@ -299,15 +315,15 @@ def callback(n_clicks, skymap_apply_clicks, relayout_data, model, colorscale, dm

# SKYMAP
if model == "NE2001":
skymap_data = skymap_data_ne
skymap_model_label = "NE2001"
elif model == "NE2025":
skymap_data = skymap_data_ne25
skymap_model_label = "NE2025"
else:
skymap_data = skymap_data_ymw
skymap_model_label = model

# Load only the requested skymap (lazy loading)
skymap_data = get_skymap(skymap_model_label)

logger.debug("Selected skymap model=%s, shape=%s", skymap_model_label, skymap_data.shape)

# Determine DM min/max for colorscale
Expand Down Expand Up @@ -524,10 +540,6 @@ def _to_float(val):
]

# Handle different update scenarios
if is_zoom_or_pan:
# Don't regenerate anything on zoom/pan, just let the figure handle it
return no_update, no_update, no_update, no_update, no_update, slider_store_out, no_update, no_update

plot_out = fig if update_plot else no_update
table_out = gedm_out if update_plot else no_update

Expand Down
70 changes: 70 additions & 0 deletions app/download_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python3
"""Download gedm_dist_maps.hkl from Zenodo if not already present."""

import sys
import os
from pathlib import Path
import urllib.request
import urllib.error

# Configuration
DATA_DIR = Path("/app/assets")
DATA_FILE = DATA_DIR / "gedm_dist_maps.hkl"
ZENODO_URL = "https://zenodo.org/records/18779007/files/gedm_dist_maps.hkl?download=1"
MIN_SIZE = 1024 * 1024 # 1 MB — file should be ~60 MB, so anything smaller is corrupt


def main():
# Check for a local copy in the build context (mounted at /tmp/ctx)
local_path = Path("/tmp/ctx/data/gedm_dist_maps.hkl")
if local_path.exists() and local_path.stat().st_size > MIN_SIZE:
print(f"✓ Found local copy in build context ({local_path.stat().st_size / 1024**2:.1f} MB)")
print(f" Copying to {DATA_FILE}...")
import shutil
shutil.copy2(local_path, DATA_FILE)
return 0

# Check if file already exists in container
if DATA_FILE.exists() and DATA_FILE.stat().st_size > MIN_SIZE:
print(f"✓ {DATA_FILE} already exists ({DATA_FILE.stat().st_size / 1024**2:.1f} MB)")
return 0

# File is missing or too small — download from Zenodo
print(f"↓ Downloading gedm_dist_maps.hkl from Zenodo (~60 MB)...")
print(f" URL: {ZENODO_URL}")

try:
urllib.request.urlretrieve(ZENODO_URL, DATA_FILE)
except urllib.error.URLError as e:
print(f"✗ FATAL: Failed to download from Zenodo: {e}", file=sys.stderr)
return 1
except Exception as e:
print(f"✗ FATAL: Unexpected error during download: {e}", file=sys.stderr)
return 1

# Verify the downloaded file
if not DATA_FILE.exists():
print(f"✗ FATAL: Download appeared to succeed but file does not exist", file=sys.stderr)
return 1

size = DATA_FILE.stat().st_size
if size < MIN_SIZE:
print(f"✗ FATAL: Downloaded file is only {size / 1024**2:.1f} MB (expected ~60 MB)", file=sys.stderr)
return 1

# Verify the file is readable HDF5
try:
import h5py
with h5py.File(DATA_FILE, 'r') as h:
print(f"✓ Downloaded successfully ({size / 1024**2:.1f} MB) and verified as valid HDF5")
except ImportError:
print(f"✓ Downloaded successfully ({size / 1024**2:.1f} MB)")
except Exception as e:
print(f"✗ FATAL: File is not valid HDF5: {e}", file=sys.stderr)
return 1

return 0


if __name__ == "__main__":
sys.exit(main())
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"

[project]
name = "pygedm"
version = "4.0.0"
description = "Python/C++ version of NE2001, YMW16, and YT2020 electron density models"
version = "4.0.1"
description = "Python interface to NE2001, YMW16, NE2025 and YT2020 electron density models"
readme = "README.md"
requires-python = ">=3.8"
license = "MIT"
Expand Down