diff --git a/brainrender/atlas_specific/allen_brain_atlas/streamlines.py b/brainrender/atlas_specific/allen_brain_atlas/streamlines.py index 29fc42f..30c4d69 100644 --- a/brainrender/atlas_specific/allen_brain_atlas/streamlines.py +++ b/brainrender/atlas_specific/allen_brain_atlas/streamlines.py @@ -1,4 +1,5 @@ import pandas as pd +import requests from loguru import logger from myterial import orange from rich import print @@ -14,28 +15,58 @@ except ModuleNotFoundError: # pragma: no cover allen_sdk_installed = False # pragma: no cover +try: + import cloudvolume + + cloudvolume_installed = True +except ModuleNotFoundError: # pragma: no cover + cloudvolume_installed = False # pragma: no cover + +from brainglobe_atlasapi import BrainGlobeAtlas from brainrender import base_dir -from brainrender._io import request from brainrender._utils import listify streamlines_folder = base_dir / "streamlines" streamlines_folder.mkdir(exist_ok=True) +ALLEN_MESOSCALE_URL = ( + "precomputed://gs://allen_neuroglancer_ccf/allen_mesoscale" +) +ALLEN_API_URL = "https://api.brain-map.org/api/v2/data/query.json" +VOXEL_SIZE_NM = 1000 # skeleton vertices are in nanometers + +_ml_extent_um_cache = None + + +def _get_ml_extent_um(): + """ + Derives the full medial-lateral extent of the Allen CCF atlas in microns + dynamically from the brainglobe atlas API. Used to flip the Z (ML) axis + when converting from Allen CCF space to brainrender's coordinate system, + where left and right hemispheres are mirrored relative to the Allen CCF. + + Result is cached after the first call to avoid reinstantiating the atlas + on every experiment download. + """ + global _ml_extent_um_cache + if _ml_extent_um_cache is None: + atlas = BrainGlobeAtlas("allen_mouse_25um", check_latest=False) + _ml_extent_um_cache = float(atlas.shape[2] * atlas.resolution[2]) + return _ml_extent_um_cache + def experiments_source_search(SOI): """ Returns data about experiments whose injection was in the SOI, structure of interest :param SOI: str, structure of interest. Acronym of structure to use as seed for the search - :param source: (Default value = True) """ - transgenic_id = 0 # id = 0 means use only wild type primary_structure_only = True if not allen_sdk_installed: print( - f"[{orange}]Streamlines cannot be download because the AllenSDK package is not installed. " + f"[{orange}]Streamlines cannot be downloaded because the AllenSDK package is not installed. " "Please install `allensdk` with `pip install allensdk`" ) return None @@ -50,57 +81,147 @@ def experiments_source_search(SOI): ) -def get_streamlines_data(eids, force_download=False): +def _get_injection_site_um(eid, ml_extent_um): """ - Given a list of expeirmental IDs, it downloads the streamline data - from the https://neuroinformatics.nl cache and saves them as - json files. + Fetches the injection site coordinates for an experiment from the Allen + Brain Atlas API. Coordinates are in Allen CCF um space with the Z (ML) + axis flipped to match brainrender's hemisphere convention. - :param eids: list of integers with experiments IDs + :param eid: int, experiment ID + :param ml_extent_um: float, full ML extent of the atlas in um for LR flip + :return: dict with x, y, z keys or None if not found """ - data = [] - for eid in track(eids, total=len(eids), description="downloading"): - url = "https://neuroinformatics.nl/HBP/allen-connectivity-viewer/json/streamlines_{}.json.gz".format( - eid + try: + url = ( + f"{ALLEN_API_URL}?q=model::ProjectionStructureUnionize," + f"rma::criteria,section_data_set[id$eq{eid}]," + f"rma::criteria,[is_injection$eqtrue]," + f"rma::options[num_rows$eq1][order$eq'projection_volume desc']" + ) + response = requests.get(url, timeout=10) + data = response.json() + if data["success"] and data["num_rows"] > 0: + voxel = data["msg"][0] + return { + "x": float(voxel["max_voxel_x"]), + "y": float(voxel["max_voxel_y"]), + "z": float(ml_extent_um - voxel["max_voxel_z"]), + } + except Exception as e: + logger.warning( + f"Could not fetch injection site for experiment {eid}: {e}" ) + return None - jsonpath = streamlines_folder / f"{eid}.json" - if not jsonpath.exists() or force_download: - response = request(url) +def _skeleton_to_dataframe(skeleton, eid, ml_extent_um): + """ + Converts a cloudvolume Skeleton object to the pd.DataFrame format + expected by brainrender's Streamlines actor. - # Write the response content as a temporary compressed file - temp_path = streamlines_folder / "temp.gz" - with open(str(temp_path), "wb") as temp: - temp.write(response.content) + Vertices are in nanometers in Allen CCF space. We: + 1. Convert nm -> um (divide by VOXEL_SIZE_NM) + 2. Flip Z (ML) axis to match brainrender's hemisphere convention - # Open in pandas and delete temp - url_data = pd.read_json( - str(temp_path), lines=True, compression="gzip" - ) - temp_path.unlink() + X (AP) and Y (DV) are passed through as-is because brainrender's + brain mesh uses the same orientation as the Allen CCF for those axes. - # save json - url_data.to_json(str(jsonpath)) + :param skeleton: cloudvolume Skeleton object + :param eid: int, experiment ID used to fetch real injection coordinates + :param ml_extent_um: float, full ML extent of the atlas in um for LR flip + :return: pd.DataFrame with 'lines' and 'injection_sites' columns + """ + components = skeleton.components() + + lines = [] + for component in components: + verts_um = component.vertices / VOXEL_SIZE_NM + points = [ + { + "x": float(v[0]), + "y": float(v[1]), + "z": float(ml_extent_um - v[2]), + } + for v in verts_um + ] + lines.append(points) + + injection_site = _get_injection_site_um(eid, ml_extent_um) + if injection_site is None: + logger.warning( + f"Falling back to centroid for injection site of experiment {eid}" + ) + all_verts_um = skeleton.vertices / VOXEL_SIZE_NM + centroid = all_verts_um.mean(axis=0) + injection_site = { + "x": float(centroid[0]), + "y": float(centroid[1]), + "z": float(ml_extent_um - centroid[2]), + } - # append to lists and return - data.append(url_data) + return pd.DataFrame( + {"lines": [lines], "injection_sites": [[injection_site]]} + ) + + +def get_streamlines_data(eids, force_download=False): + """ + Given a list of experiment IDs, downloads streamline data from the + Allen mesoscale connectivity dataset hosted on Google Cloud Storage + via cloud-volume, and saves them as JSON files. + + :param eids: list of integers with experiment IDs + :param force_download: bool, if True re-download even if cached + """ + if not cloudvolume_installed: + print( + f"[{orange}]Streamlines cannot be downloaded because the cloud-volume package is not installed. " + "Please install it with `pip install cloud-volume`" + ) + return [] + + ml_extent_um = _get_ml_extent_um() + + cv = cloudvolume.CloudVolume( + ALLEN_MESOSCALE_URL, + use_https=True, + progress=False, + ) + + data = [] + for eid in track(eids, total=len(eids), description="downloading"): + jsonpath = streamlines_folder / f"{eid}.json" + + if not jsonpath.exists() or force_download: + try: + skeleton = cv.skeleton.get(int(eid)) + except Exception as e: + logger.warning( + f"Could not fetch streamlines for experiment {eid}: {e}" + ) + continue + + df = _skeleton_to_dataframe(skeleton, int(eid), ml_extent_um) + df.to_json(str(jsonpath)) + data.append(df) else: data.append(pd.read_json(str(jsonpath))) + return data def get_streamlines_for_region(region, force_download=False): """ - Using the Allen Mouse Connectivity data and corresponding API, this function finds experiments whose injections - were targeted to the region of interest and downloads the corresponding streamlines data. By default, experiments - are selected for only WT mice and only when the region was the primary injection target. - - :param region: str with region to use for research - + Using the Allen Mouse Connectivity data and corresponding API, this function finds experiments + whose injections were targeted to the region of interest and downloads the corresponding + streamlines data from the Allen mesoscale connectivity dataset on Google Cloud Storage. + By default, experiments are selected for only WT mice and only when the region was + the primary injection target. + + :param region: str with region to use for search + :param force_download: bool, if True re-download even if cached """ logger.debug(f"Getting streamlines data for region: {region}") - # Get experiments whose injections were targeted to the region region_experiments = experiments_source_search(region) if region_experiments is None or region_experiments.empty: logger.debug("No experiments found from allen data") diff --git a/pyproject.toml b/pyproject.toml index 201a47f..4bef42a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dynamic = ["version"] dependencies = [ "brainglobe-atlasapi>=2.0.1", + "cloud-volume>=3.11.0", "brainglobe-space>=1.0.0", "brainglobe-utils>=0.5.0", "h5py", diff --git a/tests/test_streamlines.py b/tests/test_streamlines.py index 47efadb..74bcef5 100644 --- a/tests/test_streamlines.py +++ b/tests/test_streamlines.py @@ -1,36 +1,284 @@ +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import numpy as np import pandas as pd import pytest -from brainrender import Scene -from brainrender.actors.streamlines import ( - Streamlines, - make_streamlines, -) from brainrender.atlas_specific import get_streamlines_for_region +from brainrender.atlas_specific.allen_brain_atlas.streamlines import ( + _get_injection_site_um, + _get_ml_extent_um, + _skeleton_to_dataframe, + get_streamlines_data, +) + +ML_EXTENT = 11400.0 + + +@pytest.fixture(autouse=True) +def _reset_ml_cache(): + import brainrender.atlas_specific.allen_brain_atlas.streamlines as _mod + + _mod._ml_extent_um_cache = None + yield + _mod._ml_extent_um_cache = None + + +def _make_fake_skeleton(): + verts = np.array([[1000.0, 2000.0, 3000.0]] * 4, dtype=float) + comp1 = MagicMock() + comp1.vertices = verts[:2] + comp2 = MagicMock() + comp2.vertices = verts[2:] + skeleton = MagicMock() + skeleton.vertices = verts + skeleton.components.return_value = [comp1, comp2] + return skeleton + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.BrainGlobeAtlas" +) +def test_get_ml_extent_um(mock_atlas_cls): + mock_atlas = MagicMock() + mock_atlas.shape = (528, 320, 456) + mock_atlas.resolution = (25, 25, 25) + mock_atlas_cls.return_value = mock_atlas + result = _get_ml_extent_um() + assert result == pytest.approx(456 * 25) -@pytest.mark.xfail(reason="Likely due to fail due to neuromorpho") -def test_download(): - streams = get_streamlines_for_region("TH", force_download=False) - assert len(streams) == 54 - assert isinstance(streams[0], pd.DataFrame) +@patch("brainrender.atlas_specific.allen_brain_atlas.streamlines.requests.get") +def test_get_injection_site_success(mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = { + "success": True, + "num_rows": 1, + "msg": [ + { + "max_voxel_x": 100.0, + "max_voxel_y": 200.0, + "max_voxel_z": 300.0, + } + ], + } + mock_get.return_value = mock_resp + result = _get_injection_site_um(12345, ML_EXTENT) + assert result is not None + assert result["x"] == pytest.approx(100.0) + assert result["y"] == pytest.approx(200.0) + assert result["z"] == pytest.approx(ML_EXTENT - 300.0) -@pytest.mark.xfail(reason="Likely due to fail due to neuromorpho") -def test_download_slow(): - streams = get_streamlines_for_region("TH", force_download=True) - assert len(streams) == 54 - assert isinstance(streams[0], pd.DataFrame) +@patch("brainrender.atlas_specific.allen_brain_atlas.streamlines.requests.get") +def test_get_injection_site_empty_response(mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = {"success": True, "num_rows": 0, "msg": []} + mock_get.return_value = mock_resp + assert _get_injection_site_um(12345, ML_EXTENT) is None + + +@patch("brainrender.atlas_specific.allen_brain_atlas.streamlines.requests.get") +def test_get_injection_site_network_error(mock_get): + mock_get.side_effect = Exception("timeout") + assert _get_injection_site_um(12345, ML_EXTENT) is None + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines._get_injection_site_um" +) +def test_skeleton_to_dataframe_with_injection(mock_inj): + mock_inj.return_value = {"x": 10.0, "y": 20.0, "z": 30.0} + skeleton = _make_fake_skeleton() + df = _skeleton_to_dataframe(skeleton, 99, ML_EXTENT) + assert isinstance(df, pd.DataFrame) + assert "lines" in df.columns + assert "injection_sites" in df.columns + lines = df["lines"].iloc[0] + assert len(lines) == 2 + pt = lines[0][0] + assert set(pt.keys()) == {"x", "y", "z"} + assert pt["x"] == pytest.approx(1.0) + assert pt["y"] == pytest.approx(2.0) + assert pt["z"] == pytest.approx(ML_EXTENT - 3.0) + assert df["injection_sites"].iloc[0] == [{"x": 10.0, "y": 20.0, "z": 30.0}] + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines._get_injection_site_um" +) +def test_skeleton_to_dataframe_fallback_centroid(mock_inj): + mock_inj.return_value = None + skeleton = _make_fake_skeleton() + df = _skeleton_to_dataframe(skeleton, 99, ML_EXTENT) + injection = df["injection_sites"].iloc[0][0] + assert set(injection.keys()) == {"x", "y", "z"} + assert injection["x"] == pytest.approx(1.0) + assert injection["y"] == pytest.approx(2.0) + assert injection["z"] == pytest.approx(ML_EXTENT - 3.0) + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines._get_ml_extent_um" +) +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume_installed", + True, +) +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines._skeleton_to_dataframe" +) +def test_get_streamlines_data_downloads(mock_s2df, mock_ml): + mock_ml.return_value = ML_EXTENT + fake_df = pd.DataFrame({"lines": [[]], "injection_sites": [[]]}) + mock_s2df.return_value = fake_df + mock_cv_module = MagicMock() + mock_cv_instance = MagicMock() + mock_cv_instance.skeleton.get.return_value = _make_fake_skeleton() + mock_cv_module.CloudVolume.return_value = mock_cv_instance + with tempfile.TemporaryDirectory() as tmpdir: + with patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.streamlines_folder", + Path(tmpdir), + ): + with patch.dict("sys.modules", {"cloudvolume": mock_cv_module}): + with patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume", + mock_cv_module, + create=True, + ): + result = get_streamlines_data( + [111, 222], force_download=True + ) + assert len(result) == 2 + assert all(isinstance(r, pd.DataFrame) for r in result) + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines._get_ml_extent_um" +) +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume_installed", + True, +) +def test_get_streamlines_data_uses_cache(mock_ml): + mock_ml.return_value = ML_EXTENT + fake_df = pd.DataFrame( + {"lines": [[[]]], "injection_sites": [[{"x": 1, "y": 2, "z": 3}]]} + ) + mock_cv_module = MagicMock() + with tempfile.TemporaryDirectory() as tmpdir: + fake_df.to_json(str(Path(tmpdir) / "111.json")) + with patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.streamlines_folder", + Path(tmpdir), + ): + with patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume", + mock_cv_module, + create=True, + ): + result = get_streamlines_data([111], force_download=False) + mock_cv_module.CloudVolume.return_value.skeleton.get.assert_not_called() + assert len(result) == 1 + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume_installed", + False, +) +def test_get_streamlines_data_no_cloudvolume(): + assert get_streamlines_data([111]) == [] + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines._get_ml_extent_um" +) +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume_installed", + True, +) +def test_get_streamlines_data_skips_failed_experiment(mock_ml): + mock_ml.return_value = ML_EXTENT + mock_cv_module = MagicMock() + mock_cv_instance = MagicMock() + mock_cv_instance.skeleton.get.side_effect = Exception("not found") + mock_cv_module.CloudVolume.return_value = mock_cv_instance + with tempfile.TemporaryDirectory() as tmpdir: + with patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.streamlines_folder", + Path(tmpdir), + ): + with patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.cloudvolume", + mock_cv_module, + create=True, + ): + result = get_streamlines_data([999], force_download=True) + assert result == [] + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.experiments_source_search" +) +def test_get_streamlines_for_region_no_experiments(mock_search): + mock_search.return_value = pd.DataFrame() + assert get_streamlines_for_region("XYZ") is None + + +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.get_streamlines_data" +) +@patch( + "brainrender.atlas_specific.allen_brain_atlas.streamlines.experiments_source_search" +) +def test_get_streamlines_for_region_calls_download(mock_search, mock_dl): + mock_search.return_value = pd.DataFrame({"id": [111, 222]}) + mock_dl.return_value = ["df1", "df2"] + result = get_streamlines_for_region("TH") + assert result == ["df1", "df2"] + mock_dl.assert_called_once() + + +@pytest.mark.parametrize( + "eid", + [479983421], +) +def test_download_streamlines_from_gcs(eid): + """Smoke test: download one small experiment to verify the GCS source is live.""" + data = get_streamlines_data([eid], force_download=True) + assert len(data) == 1 + df = data[0] + assert "lines" in df.columns + assert "injection_sites" in df.columns + lines = df["lines"].iloc[0] + assert len(lines) > 0 + assert set(lines[0][0].keys()) == {"x", "y", "z"} + +def test_streamlines_hemisphere_orientation(): + """Verify that a right-hemisphere experiment renders in the correct hemisphere. -@pytest.mark.xfail(reason="Likely due to fail due to neuromorpho") -def test_streamlines(): - s = Scene(title="BR") - streams = get_streamlines_for_region("TH", force_download=False) - s.add(Streamlines(streams[0])) - s.add(*make_streamlines(*streams[1:3])) + Experiment 298004028 has injection at ML=2.15mm (right hemisphere) with + near-zero left hemisphere projection volume per the Allen connectivity viewer. + After the Z (ML) axis flip, all streamline Z coordinates should be below + the atlas midline (~5700um), matching brainrender's right hemisphere convention. + """ + data = get_streamlines_data([298004028], force_download=True) + assert len(data) == 1 + lines = data[0]["lines"].iloc[0] + assert len(lines) > 0 - with pytest.raises(TypeError): - Streamlines([1, 2, 3]) + # Collect all Z coordinates from all streamline components + all_z = [pt["z"] for component in lines for pt in component] - del s + # Allen CCF ML extent is 11400um, midline is ~5700um + # Right hemisphere in brainrender = Z < midline after flip + midline = 5700.0 + right_side = [z for z in all_z if z < midline] + assert len(right_side) / len(all_z) > 0.95, ( + f"Expected >95% of streamline points in right hemisphere (Z < {midline}), " + f"got {len(right_side)}/{len(all_z)} ({100*len(right_side)/len(all_z):.1f}%)" + )