diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst index 7b4a0cf..d0a3137 100644 --- a/docs/_templates/autosummary/class.rst +++ b/docs/_templates/autosummary/class.rst @@ -8,38 +8,38 @@ {% block attributes %} {% if attributes %} -Attributes table -~~~~~~~~~~~~~~~~ + Attributes table + ~~~~~~~~~~~~~~~~ -.. autosummary:: -{% for item in attributes %} - ~{{ name }}.{{ item }} -{%- endfor %} -{% endif %} -{% endblock %} + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} {% block methods %} {% if methods %} -Methods table -~~~~~~~~~~~~~ - -.. autosummary:: -{% for item in methods %} - {%- if item != '__init__' %} - ~{{ name }}.{{ item }} - {%- endif -%} -{%- endfor %} -{% endif %} -{% endblock %} + Methods table + ~~~~~~~~~~~~~ + + .. autosummary:: + {% for item in methods %} + {%- if item != '__init__' %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} {% block attributes_documentation %} {% if attributes %} -Attributes -~~~~~~~~~~ + Attributes + ~~~~~~~~~~ {% for item in attributes %} -.. autoattribute:: {{ [objname, item] | join(".") }} + .. auto{{ [fullname, item] | join(".") | member_type }}:: {{ [objname, item] | join(".") }} {%- endfor %} {% endif %} @@ -47,13 +47,13 @@ Attributes {% block methods_documentation %} {% if methods %} -Methods -~~~~~~~ + Methods + ~~~~~~~ {% for item in methods %} {%- if item != '__init__' %} -.. automethod:: {{ [objname, item] | join(".") }} + .. automethod:: {{ [objname, item] | join(".") }} {%- endif -%} {%- endfor %} diff --git a/docs/api.md b/docs/api.md index 51a5363..46765ed 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,22 +1,12 @@ # API Reference -```{contents} -:depth: 3 -:local: -``` - -```{toctree} -:maxdepth: 10 -``` - ```{eval-rst} -.. currentmodule:: mudata +.. module:: mudata ``` ## Multimodal omics ```{eval-rst} -.. module::mudata .. autosummary:: :toctree: generated @@ -26,7 +16,7 @@ ## Input/Output ```{eval-rst} -.. module::mudata +.. currentmodule:: mudata .. autosummary:: :toctree: generated @@ -47,7 +37,6 @@ ## Extensions ```{eval-rst} -.. module::mudata .. autosummary:: :toctree: generated @@ -56,9 +45,16 @@ Types used by the former: ```{eval-rst} -.. module::mudata .. autosummary:: :toctree: generated ExtensionNamespace ``` + +## Settings +```{eval-rst} +.. autosummary:: + :toctree: generated + + set_options +``` diff --git a/docs/conf.py b/docs/conf.py index 349b26a..96d0e75 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -88,12 +88,20 @@ source_suffix = {".rst": "restructuredtext", ".ipynb": "myst-nb", ".myst": "myst-nb"} +# FIXME: remove this workaround when anndata 0.13 is released so docs are built with Pandas 3 +import pandas as pd # noqa: E402 + +pd.DataFrame.__module__ = "pandas" +pd.Index.__module__ = "pandas" + intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "anndata": ("https://anndata.readthedocs.io/en/stable/", None), "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "pandas": ("https://pandas.pydata.org/docs/", None), "fsspec": ("https://filesystem-spec.readthedocs.io/en/stable/", None), + "h5py": ("https://docs.h5py.org/en/stable/", None), "zarr": ("https://zarr.readthedocs.io/en/stable/", None), } diff --git a/docs/extensions/member_type.py b/docs/extensions/member_type.py new file mode 100644 index 0000000..9327220 --- /dev/null +++ b/docs/extensions/member_type.py @@ -0,0 +1,34 @@ +"""Extension adding a jinja2 filter that determines a class member’s type.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +from jinja2.defaults import DEFAULT_FILTERS +from jinja2.utils import import_string + +if TYPE_CHECKING: + from sphinx.application import Sphinx + + +def member_type(obj_path: str) -> Literal["method", "property", "attribute"]: + """Determine object member type. + + E.g.: `.. auto{{ fullname | member_type }}::` + """ + # https://jinja.palletsprojects.com/en/stable/api/#custom-filters + cls_path, member_name = obj_path.rsplit(".", 1) + cls = import_string(cls_path) + member = getattr(cls, member_name, None) + match member: + case property(): + return "property" + case _ if callable(member): + return "method" + case _: + return "attribute" + + +def setup(app: Sphinx): + """App setup hook.""" + DEFAULT_FILTERS["member_type"] = member_type diff --git a/pyproject.toml b/pyproject.toml index 55a987f..673c277 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ doc = [ "docutils>=0.8,!=0.18.*,!=0.19.*", "ipykernel", "ipython", + "mudata[io]", "myst-nb>=1.1", "pandas", "sphinx>=8.1", @@ -64,7 +65,7 @@ doc = [ "sphinx-design", "sphinxcontrib-bibtex>=1", "sphinxcontrib-katex", - "sphinxext-opengraph", + "sphinxext-opengraph" ] [tool.hatch] diff --git a/src/mudata/_core/config.py b/src/mudata/_core/config.py index 6e5671b..e42fa4d 100644 --- a/src/mudata/_core/config.py +++ b/src/mudata/_core/config.py @@ -24,7 +24,7 @@ class set_options: >>> with mudata.set_options(display_style="html"): ... print("Options are applied here") - ... or globally: + or globally: >>> mudata.set_options(display_style="html") """ diff --git a/src/mudata/_core/io.py b/src/mudata/_core/io.py index ec0351c..40f7146 100644 --- a/src/mudata/_core/io.py +++ b/src/mudata/_core/io.py @@ -39,7 +39,7 @@ # -def _is_openfile(obj): +def _is_openfile(obj) -> bool: return obj.__class__.__name__ == "OpenFile" and obj.__class__.__module__.startswith("fsspec.") @@ -116,11 +116,27 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs): mdata.update() -def write_zarr(store: MutableMapping | str | PathLike, data: MuData | AnnData, chunks=None, write_data=True, **kwargs): +def write_zarr( + store: MutableMapping | str | PathLike | zarr.abc.store.Store, + data: MuData | AnnData, + chunks: tuple[int, ...] | None = None, + write_data: bool = True, + **kwargs, +): """ - Write MuData or AnnData object to the Zarr store + Write a MuData or AnnData object to the Zarr store. - Matrices - sparse or dense - are currently stored as they are. + Parameters + ---------- + store + Thie filename or a Zarr store. + chunks + The chunk shape. + write_data + Whether to write the data (the :attr:`~anndata.AnnData.X` matrices) for the modalities. If `False`, only the metadata + (everything except :attr:~anndata.AnnData.X`) will be written. + **kwargs + Additional arguments to :func:`zarr.create_array`. """ import zarr @@ -222,9 +238,16 @@ def write_zarr(store: MutableMapping | str | PathLike, data: MuData | AnnData, c def write_h5mu(filename: str | PathLike, mdata: MuData, **kwargs): """ - Write MuData object to the HDF5 file + Write a MuData object to an HDF5 file. - Matrices - sparse or dense - are currently stored as they are. + Parameters + ---------- + filename + The filename. + mdata + The :class:`~mudata.MuData` object. + **kwargs + Additional arguments to :meth:`h5py.Group.create_dataset`. """ from .. import __mudataversion__, __version__ @@ -239,12 +262,16 @@ def write_h5mu(filename: str | PathLike, mdata: MuData, **kwargs): def write_h5ad(filename: str | PathLike, mod: str, data: MuData | AnnData): """ - Write AnnData object to the HDF5 file with a MuData container - - Currently is based on anndata._io.h5ad.write_h5ad internally. - Matrices - sparse or dense - are currently stored as they are. + Write an AnnData object to an existing HDF5 file containing a MuData (an h5mu file). - Ideally this is merged later to anndata._io.h5ad.write_h5ad. + Parameters + ---------- + filename + The file name. + mod + The modality to write. + data + The data. If a :class:`~mudata.MuData` object, `data[mod]` will be written. """ from .. import __anndataversion__, __version__ @@ -301,17 +328,30 @@ def write_h5ad(filename: str | PathLike, mod: str, data: MuData | AnnData): def write(filename: str | PathLike, data: MuData | AnnData): """ - Write MuData or AnnData to an HDF5 file + Write a :class:`~mudata.MuData` or :class:`~anndata.AnnData` object to an HDF5 file. This function is designed to enhance I/O ease of use. - It recognises the following formats of filename: + It recognises the following formats of `filename`: - for MuData - - `FILE.h5mu` + + - `FILE.h5mu` + - for AnnData - - `FILE.h5mu/MODALITY` - - `FILE.h5mu/mod/MODALITY` - - `FILE.h5ad` + + - `FILE.h5mu/MODALITY` + - `FILE.h5mu/mod/MODALITY` + - `FILE.h5ad` + + The first two variants will write the :class:`~anndata.AnnData` object to the modality `MODALITY` + of the existing `FILE.h5mu` file, same as :func:`write_h5ad`. + + Parameters + ---------- + filename + The file name. + data + The data object to write. """ filename = str(filename) if filename.endswith(".h5ad") and isinstance(data, AnnData): @@ -351,9 +391,28 @@ def write(filename: str | PathLike, data: MuData | AnnData): # -def read_h5mu(filename: str | PathLike | io.IOBase | fsspec.OpenFile, backed: str | bool | None = None): - """Read MuData object from HDF5 file.""" - assert backed in [None, True, False, "r", "r+"], "Argument `backed` should be boolean, or r/r+, or None" +def read_h5mu( + filename: str | PathLike | io.IOBase | fsspec.OpenFile, backed: Literal["r", "r+"] | bool | None = None +) -> MuData: + """Read an `.h5mu`-formatted HDF5 file. + + Parameters + ---------- + filename + The file name or an :external+fsspec:doc:`fsspec` object. + backed + Whether to open the file in backed mode. In this mode, the data matrices :attr:`~anndata.AnnData.X` are not read into memory, + but are references to the on-disk datasets. + + Examples + -------- + >>> mdata = read_h5mu("file.h5mu") + + >>> with fsspec.open("https://example.com/file.h5mu") as f: + ... mdata = read_h5mu(f) + """ + if backed not in [None, True, False, "r", "r+"]: + raise ValueError("Argument `backed` should be boolean, or r/r+, or None") if backed is True or not backed: mode = "r" @@ -410,13 +469,13 @@ def read_h5mu(filename: str | PathLike | io.IOBase | fsspec.OpenFile, backed: st return mu -def read_zarr(store: str | PathLike | MutableMapping | zarr.Group): +def read_zarr(store: str | PathLike | MutableMapping | zarr.Group | zarr.abc.store.Store) -> MuData | AnnData: """Read from a hierarchical Zarr array store. Parameters ---------- store - The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class. + The file name or a Zarr store. """ import zarr @@ -503,12 +562,27 @@ def _read_h5mu_mod(g: h5py.Group, manager: MuDataFileManager = None, backed: boo def read_h5ad( filename: str | PathLike | io.IOBase | fsspec.OpenFile, mod: str | None, backed: Literal["r", "r+"] | bool = False ) -> AnnData: - """Read AnnData object from inside a .h5mu file or from a standalone .h5ad file (mod=None). + """Read a modality from inside a .h5mu file or from a standalone .h5ad file (mod=None). + + Parameters + ---------- + filename + The file name or an :external+fsspec:doc:`fsspec` object. + backed + Whether to open the file in backed mode. In this mode, the data matrix :attr:`~anndata.AnnData.X` is not read into memory, + but is a reference to the on-disk datasets. + + Examples + -------- + >>> adata = read_h5ad("file.h5mu", "rna") - Currently replicates and modifies anndata._io.h5ad.read_h5ad. - Matrices are loaded as they are in the file (sparse or dense). + >>> adata = read_h5ad("rna.h5ad") - Ideally this is merged later to anndata._io.h5ad.read_h5ad. + >>> with fsspec.open("https://example.com/file.h5mu") as f: + ... adata = read_h5ad(f, "rna") + + >>> with fsspec.open("https://example.com/rna.h5ad") as f: + ... adata = read_h5ad(f) """ if mod is None: with ExitStack() as stack: @@ -533,21 +607,33 @@ def read_h5ad( def read(filename: str | PathLike | io.IOBase | fsspec.OpenFile, **kwargs) -> MuData | AnnData: - """Read MuData object from HDF5 file or AnnData object (a single modality) inside it. + """Read an `.h5mu` formatted HDF5 file or a single modality inside it. This function is designed to enhance I/O ease of use. - It recognises the following formats: + It recognises the following formats of `filename`: - `FILE.h5mu` + - `FILE.h5ad` - `FILE.h5mu/MODALITY` - `FILE.h5mu/mod/MODALITY` - - `FILE.h5ad` - OpenFile from fsspec is supported for remote storage, e.g.: + The last two variantes will read the modality `MODALITY` and return an :class:`~anndata.AnnData` object. + + Parameters + ---------- + filename + The file name or an :external+fsspec:doc:`fsspec` object. + **kwargs + additional arguments to :func:`read_h5ad` or :func:`read_h5mu`. + + Examples + -------- + >>> mdata = read("file.h5mu") - - .. code-block:: + >>> adata = read("file.h5mu/rna") - mdata = read(fsspec.open("s3://bucket/file.h5mu"))) + >>> with fsspec.open("s3://bucket/file.h5mu") as f: + ... mdata = read(f) """ if isinstance(filename, io.IOBase): raise TypeError( diff --git a/src/mudata/_core/merge.py b/src/mudata/_core/merge.py index 53c1ba2..77c8cd1 100644 --- a/src/mudata/_core/merge.py +++ b/src/mudata/_core/merge.py @@ -45,21 +45,22 @@ def concat( ) -> MuData: """Concatenates MuData objects. - All mdatas should have the same axis 0 or 1, which defines concatenation axis: + All mdatas should have the same :attr:`~MuData.axis` `0` or `1`, which defines concatenation axis: + - concatenate along obs when obs are shared in each mdata (multimodal), - concatenate along vars when vars are shared in each mdata (multi-dataset). The intersection of modalities is taken. - Nested MuData objects cannot be concatenated. + Nested :class:`MuData` objects cannot be concatenated. This implementation follows anndata.concat() original implementation. The arguments are propagated to anndata.concat() for concatenating modalities. See anndata.concat() documentation for more details. - Params - ------ + Parameters + ---------- mdatas - The objects to be concatenated. If a Mapping is passed, keys are used for the `keys` + The objects to be concatenated. If a :class:`~collections.abc.Mapping` is passed, keys are used for the `keys` argument and values are concatenated. join How to align values when concatenating. If "outer", the union of the other axis @@ -68,17 +69,17 @@ def concat( How elements not aligned to the axis being concatenated along are selected. Currently implemented strategies include: - * `None`: No elements are kept. - * `"same"`: Elements that are the same in each of the objects. - * `"unique"`: Elements for which there is only one possible value. - * `"first"`: The first element seen at each from each position. - * `"only"`: Elements that show up in only one of the objects. + - `None`: No elements are kept. + - `"same"`: Elements that are the same in each of the objects. + - `"unique"`: Elements for which there is only one possible value. + - `"first"`: The first element seen at each from each position. + - `"only"`: Elements that show up in only one of the objects. uns_merge - How the elements of `.uns` are selected. Uses the same set of strategies as + How the elements of :attr:`~MuData.uns` are selected. Uses the same set of strategies as the `merge` argument, except applied recursively. label - Column in axis annotation (i.e. `.obs` or `.var`) to place batch information in. - If it's None, no column is added. + Column in axis annotation (i.e. :attr:`~MuData.obs` or :attr:`~MuData.var`) to place batch information in. + If `None`, no column is added. keys Names for each object being added. These values are used for column values for `label` or appended to the index if `index_unique` is not `None`. Defaults to diff --git a/src/mudata/_core/mudata.py b/src/mudata/_core/mudata.py index f206bbd..a1edf1d 100644 --- a/src/mudata/_core/mudata.py +++ b/src/mudata/_core/mudata.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import warnings from collections import Counter, abc from collections.abc import Iterable, Mapping, MutableMapping, Sequence @@ -7,17 +9,15 @@ from hashlib import sha1 from itertools import chain, combinations from numbers import Integral -from os import PathLike -from pathlib import Path from random import choices from string import ascii_letters, digits from types import MappingProxyType -from typing import Any, Literal, Union +from typing import TYPE_CHECKING, Any, Literal import numpy as np import pandas as pd from anndata import AnnData -from anndata._core.aligned_mapping import AxisArraysBase, PairwiseArraysView +from anndata._core.aligned_mapping import AxisArraysBase from anndata._core.views import DataFrameView from anndata.utils import convert_to_dict @@ -34,9 +34,15 @@ ) from .views import DictView +if TYPE_CHECKING: + from os import PathLike + from pathlib import Path + + import zarr + class MuAxisArraysView(AlignedView, AxisArraysBase): - def __init__(self, parent_mapping: AxisArraysBase, parent_view: "MuData", subset_idx: Any): + def __init__(self, parent_mapping: AxisArraysBase, parent_view: MuData, subset_idx: Any): self.parent_mapping = parent_mapping self._parent = parent_view self.subset_idx = subset_idx @@ -52,9 +58,6 @@ class MuAxisArrays(AxisArrays): class ModDict(dict): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - def _repr_hierarchy( self, nest_level: int = 0, is_last: bool = False, active_levels: list[int] | None = None ) -> str: @@ -141,12 +144,12 @@ class MuData: def __init__( self, - data: Union[AnnData, Mapping[str, AnnData], "MuData"] | None = None, - feature_types_names: dict | None = MappingProxyType( + data: AnnData | Mapping[str, AnnData] | MuData | None = None, + feature_types_names: Mapping[str, str] | None = MappingProxyType( {"Gene Expression": "rna", "Peaks": "atac", "Antibody Capture": "prot"} ), as_view: bool = False, - index: tuple[slice | Integral, slice | Integral] | slice | Integral | None = None, + index: tuple[slice | int, slice | int] | slice | int | None = None, **kwargs, ): self._init_common() @@ -256,7 +259,7 @@ def _init_common(self): self.file = MuDataFileManager() self._is_view = False - def _init_as_view(self, mudata_ref: "MuData", index): + def _init_as_view(self, mudata_ref: MuData, index): from anndata._core.index import _normalize_indices from anndata._core.views import _resolve_idxs @@ -333,7 +336,7 @@ def _init_as_view(self, mudata_ref: "MuData", index): else: self._mudata_ref = mudata_ref - def _init_as_actual(self, data: "MuData"): + def _init_as_actual(self, data: MuData): self._init_common() self._mod = data.mod self._obs = data.obs @@ -450,7 +453,7 @@ def _check_changed_attr_names(self, attr: str, columns: bool = False): break return (attr_names_changed, attr_columns_changed) - def copy(self, filename: PathLike | None = None) -> "MuData": + def copy(self, filename: str | PathLike | None = None) -> MuData: """ Make a copy. @@ -486,10 +489,14 @@ def copy(self, filename: PathLike | None = None) -> "MuData": write_h5mu(filename, self) return read_h5mu(filename, self.file._filemode) - def strings_to_categoricals(self, df: pd.DataFrame | None = None): - """Transform string columns in .var and .obs slots of MuData to categorical as well as of .var and .obs slots in each AnnData object. + def strings_to_categoricals(self, df: pd.DataFrame | None = None) -> pd.DataFrame | None: + """Transform string annotations to categoricals. - This keeps it compatible with AnnData.strings_to_categoricals() method. + Parameters + ---------- + df + If `None`, modifies :attr:`var` and :attr:`obs` attributes of the :class:`MuData` object as well as + each modality. Otherwise, modifies the dataframe in-place and returns it. """ AnnData.strings_to_categoricals(self, df) @@ -503,25 +510,25 @@ def strings_to_categoricals(self, df: pd.DataFrame | None = None): # To increase compatibility with scanpy methods _sanitize = strings_to_categoricals - def __getitem__(self, index) -> Union["MuData", AnnData]: + def __getitem__(self, index) -> AnnData | MuData: if isinstance(index, str): return self._mod[index] else: return MuData(self, as_view=True, index=index) @property - def mod(self) -> Mapping[str, "AnnData | MuData"]: + def mod(self) -> Mapping[str, AnnData | MuData]: """Dictionary of modalities.""" return self._mod @property def is_view(self) -> bool: - """Whether the object is a view of another `MuData` object.""" + """Whether the object is a view of another :class:`MuData` object.""" return self._is_view @property def shape(self) -> tuple[int, int]: - """Shape of data, all variables and observations combined (:attr:`n_obs`, :attr:`n_var`).""" + """Shape of data, all variables and observations combined (:attr:`n_obs`, :attr:`n_vars`).""" return self.n_obs, self.n_vars def __len__(self) -> int: @@ -577,7 +584,7 @@ def _update_attr( attrm = getattr(self, attr + "m") attrp = getattr(self, attr + "p") - attrmap = getattr(self, attr + "map") + attrmap = getattr(self, f"_{attr}map") dfs = [ getattr(a, attr).loc[:, []].assign(**{f"{m}:{rowcol}": np.arange(getattr(a, attr).shape[0])}) @@ -840,7 +847,7 @@ def _update_attr_legacy( attrm = getattr(self, attr + "m") attrp = getattr(self, attr + "p") - attrmap = getattr(self, attr + "map") + attrmap = getattr(self, f"_{attr}map") if join_common: # If all modalities have a column with the same name, it is not global @@ -1176,40 +1183,26 @@ def _shrink_attr(self, attr: str, inplace=True) -> pd.DataFrame: @property def n_mod(self) -> int: - """ - Number of modalities in the MuData object. - - Returns - ------- - int: The number of modalities. - """ + """Number of modalities.""" return len(self._mod) @property def isbacked(self) -> bool: - """ - Whether the MuData object is backed. - - Returns - ------- - bool: True if the object is backed, False otherwise. - """ + """Whether the object is backed on disk.""" return self.file.filename is not None @property def filename(self) -> Path | None: - """ - Filename of the MuData object. + """Change the backing mode by setting the filename to a `.h5mu` file. - Returns - ------- - Path | None: The path to the file if backed, None otherwise. + - Setting the filename writes the stored data to disk. + - Setting the filename when the filename was previously another name moves the backing file from + the previous file to the new file. If you want to copy the previous file, use `copy(filename="new_filename")`. """ return self.file.filename @filename.setter - def filename(self, filename: PathLike | None): - filename = None if filename is None else Path(filename) + def filename(self, filename: str | PathLike | None): if self.isbacked: if filename is None: self.file._to_memory_mode() @@ -1268,7 +1261,11 @@ def obs_vector(self, key: str, layer: str | None = None) -> np.ndarray: return self._attr_vector(key, "obs") def update_obs(self): - """Update global .obs_names according to the .obs_names of all the modalities.""" + """Update :attr:`obs` indices of the object with the data from all the modalities. + + .. note:: + From v0.4, it will not pull columns from modalities by default. + """ join_common = self.axis == 1 self._update_attr("obs", axis=1, join_common=join_common) @@ -1311,10 +1308,9 @@ def _names_make_unique(self, attr: Literal["obs", "var"]): def obs_names_make_unique(self): """ - Call .obs_names_make_unique() method on each AnnData object. + Call :meth:`AnnData.obs_names_make_unique ` on each modality. - If there are obs_names, which are the same for multiple modalities, - append modality name to all obs_names. + If there are :attr:`obs_names` which are the same for multiple modalities, append the modality name to all obs_names. """ self._names_make_unique("obs") @@ -1408,16 +1404,19 @@ def var_vector(self, key: str, layer: str | None = None) -> np.ndarray: return self._attr_vector(key, "var") def update_var(self): - """Update global .var_names according to the .var_names of all the modalities.""" + """Update :attr:`var` indices of the object with the data from all the modalities. + + .. note:: + From v0.4, it will not pull columns from modalities by default. + """ join_common = self.axis == 0 self._update_attr("var", axis=0, join_common=join_common) def var_names_make_unique(self): """ - Call .var_names_make_unique() method on each AnnData object. + Call :meth:`AnnData.var_names_make_unique ` on each modality. - If there are var_names, which are the same for multiple modalities, - append modality name to all var_names. + If there are :attr:`obs_names` which are the same for multiple modalities, append the modality name to all obs_names. """ self._names_make_unique("var") @@ -1428,18 +1427,21 @@ def var_names(self) -> pd.Index: @var_names.setter def var_names(self, names: Sequence[str]): - """Set the variable names for all the nested AnnData/MuData objects.""" self._set_names("var", 1, names) # Multi-dimensional annotations (.obsm and .varm) @property - def obsm(self) -> MuAxisArrays | MuAxisArraysView: - """Multi-dimensional annotation of observation.""" + def obsm(self) -> MutableMapping[str]: + """Multi-dimensional annotation of observations. + + Stores for each key a two- or higher-dimensional :class:`~numpy.ndarray` or :class:`~pandas.DataFrame` of length :attr:`n_obs`. + Is sliced with `obs` but otherwise behaves like a :term:`mapping`. + """ return self._obsm @obsm.setter - def obsm(self, value): + def obsm(self, value: Mapping[str]): obsm = MuAxisArrays(self, axis=0, store=convert_to_dict(value)) if self.is_view: self._init_as_actual(self.copy()) @@ -1450,12 +1452,16 @@ def obsm(self): self.obsm = {} @property - def obsp(self) -> PairwiseArrays | PairwiseArraysView: - """Pairwise annotatation of observations.""" + def obsp(self) -> MutableMapping[str]: + """Pairwise annotatation of observations. + + Stores for each key a two- or higher-dimensional :class:`~numpy.ndarray` whose first two dimensions are of liength `n_obs`. + Is sliced with `obs` but otherwise behaves like a :term:`mapping`. + """ return self._obsp @obsp.setter - def obsp(self, value): + def obsp(self, value: Mapping[str]): obsp = PairwiseArrays(self, axis=0, store=convert_to_dict(value)) if self.is_view: self._init_as_actual(self.copy()) @@ -1466,21 +1472,26 @@ def obsp(self): self.obsp = {} @property - def obsmap(self) -> PairwiseArrays | PairwiseArraysView: - """ - Mapping of observation index in the MuData to indices in individual modalities. + def obsmap(self) -> Mapping[str]: + """Mapping of observation indices in the object to indices in individual modalities. - 1-based, 0 indicates that the corresponding observation is missing in the respective modality. + Contains an entry for each modality. Each entry is an :class:`~numpy.ndarray` with shape `(n_obs, 1)`. Each element + in the array contains the numerical index of the observation in the respective modality corresponding to the :class:`MuData` + observation in that position. The index is 1-based, 0 indicates that the observation is missing in the modality. """ - return self._obsmap + return MappingProxyType(self._obsmap) @property - def varm(self) -> MuAxisArrays | MuAxisArraysView: - """Multi-dimensional annotation of variables.""" + def varm(self) -> MutableMapping[str]: + """Multi-dimensional annotation of variables. + + Stores for each key a two- or higher-dimensional :class:`~numpy.ndarray` or :class:`~pandas.DataFrame` of length :attr:`n_vars`. + Is sliced with `var` but otherwise behaves like a :term:`mapping`. + """ return self._varm @varm.setter - def varm(self, value): + def varm(self, value: Mapping[str]): varm = MuAxisArrays(self, axis=1, store=convert_to_dict(value)) if self.is_view: self._init_as_actual(self.copy()) @@ -1491,12 +1502,16 @@ def varm(self): self.varm = {} @property - def varp(self) -> PairwiseArrays | PairwiseArraysView: - """Pairwise annotatation of variables.""" + def varp(self) -> MutableMapping[str]: + """Pairwise annotatation of variables. + + Stores for each key a two- or higher-dimensional :class:`~numpy.ndarray` whose first two dimensions are of liength `n_obs`. + Is sliced with `obs` but otherwise behaves like a :term:`mapping`. + """ return self._varp @varp.setter - def varp(self, value): + def varp(self, value: Mapping[str]): varp = PairwiseArrays(self, axis=0, store=convert_to_dict(value)) if self.is_view: self._init_as_actual(self.copy()) @@ -1507,16 +1522,16 @@ def varp(self): self.varp = {} @property - def varmap(self) -> PairwiseArrays | PairwiseArraysView: - """ - Mapping of feature index in the MuData to indices in individual modalities. + def varmap(self) -> Mapping[str]: + """Mapping of feature indices in the object to indices in individual modalities. - 1-based, 0 indicates that the corresponding observation is missing in the respective modality. + Contains an entry for each modality. Each entry is an :class:`~numpy.ndarray` with shape `(n_obs, 1)`. Each element + in the array contains the numerical index of the feature in the respective modality corresponding to the :class:`MuData` + feature in that position. The index is 1-based, 0 indicates that the feature is missing in the modality. """ - return self._varmap + return MappingProxyType(self._varmap) # Unstructured annotations - # NOTE: annotations are stored as dict() and not as OrderedDict() as in AnnData @property def uns(self) -> MutableMapping: @@ -1564,27 +1579,28 @@ def uns_keys(self) -> list[str]: return list(self._uns.keys()) def update(self): - """ - Update both .obs and .var indices of MuData with the data from all the modalities + """Update both :attr:`obs` and :attr:`var` indices of the object with the data from all the modalities. - NOTE: From v0.4, it will not pull columns from modalities by default. + .. note:: + From v0.4, it will not pull columns from modalities by default. """ if len(self._mod) > 0: self.update_var() self.update_obs() @property - def axis(self) -> int: - """MuData axis.""" + def axis(self) -> Literal[-1, 0, 1]: + """MuData axis. + + - `0` if the modalities have shared observations + - `1` if the modalities have shared features + - `-1` if both observations and features are shared + """ return self._axis @property def mod_names(self) -> list[str]: - """ - Names of modalities (alias for `list(mdata.mod.keys())`) - - This property is read-only. - """ + """Names of modalities (alias for `list(mdata.mod.keys())`)""" return list(self._mod.keys()) def _pull_attr( @@ -1809,7 +1825,9 @@ def pull_obs( only_drop: bool = False, ): """ - Copy the data from the modalities to the global .obs, existing columns to be overwritten or updated. + Copy data from the :attr:`~anndata.AnnData.obs` of the modalities to the global :attr:`obs` + + Existing columns to be overwritten or updated. Parameters ---------- @@ -1879,7 +1897,9 @@ def pull_var( only_drop: bool = False, ): """ - Copy the data from the modalities to the global .var, existing columns to be overwritten or updated. + Copy data from the :attr:`~anndata.AnnData.var` of the modalities to the global :attr:`var` + + Existing columns to be overwritten or updated. Parameters ---------- @@ -2071,7 +2091,9 @@ def push_obs( only_drop: bool = False, ): """ - Copy the data from the mdata.obs to the modalities, existing columns to be overwritten. + Copy the data from :attr:`obs` to the :attr:`~anndata.AnnData.obs` of the modalities. + + Existing columns to be overwritten. Parameters ---------- @@ -2110,7 +2132,9 @@ def push_var( only_drop: bool = False, ): """ - Copy the data from the mdata.var to the modalities, existing columns to be overwritten. + Copy the data from :attr:`var` to the :attr:`~anndata.AnnData.var` of the modalities. + + Existing columns to be overwritten. Parameters ---------- @@ -2139,8 +2163,16 @@ def push_var( "var", columns=columns, mods=mods, common=common, prefixed=prefixed, drop=drop, only_drop=only_drop ) - def write_h5mu(self, filename: str | None = None, **kwargs): - """Write MuData object to an HDF5 file.""" + def write_h5mu(self, filename: str | PathLike | None = None, **kwargs): + """Write the object to an HDF5 file. + + Parameters + ---------- + filename + Path of the `.h5mu` file to write to. Defaults to the backing file. + **kwargs + Additional arguments to :func:`~mudata.write_h5mu`. + """ from .io import _write_h5mu, write_h5mu if self.isbacked and (filename is None or filename == self.filename): @@ -2158,27 +2190,36 @@ def write_h5mu(self, filename: str | None = None, **kwargs): write = write_h5mu - def write_zarr(self, store: MutableMapping | str | Path, **kwargs): - """Write MuData object to a Zarr store.""" + def write_zarr(self, store: MutableMapping | str | PathLike | zarr.abc.store.Store, **kwargs): + """Write the object to a Zarr store. + + Parameters + ---------- + store + The filename or a Zarr store. + **kwargs + Additional arguments to :func:`~mudata.write_zarr`. + """ from .io import write_zarr write_zarr(store, self, **kwargs) def to_anndata(self, **kwargs) -> AnnData: """ - Convert MuData to AnnData. + Convert the object to :class:`~anndata.AnnData`. - If mdata.axis == 0 (shared observations), + If :attr:`axis` is `0` (shared observations), concatenate modalities along axis 1 (`anndata.concat(axis=1)`). - If mdata.axis == 1 (shared variables), + + If :attr:`axis` is `1` (shared features), concatenate datasets along axis 0 (`anndata.concat(axis=0)`). - See `anndata.concat()` documentation for more details. + See :func:`anndata.concat` documentation for more details. Parameters ---------- - kwargs - Keyword arguments passed to `anndata.concat()` + **kwargs + Keyword arguments passed to :func:`anndata.concat` """ from .to_ import to_anndata @@ -2234,7 +2275,7 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0) def __repr__(self) -> str: return self._gen_repr(self.n_obs, self.n_vars, extensive=True) - def _repr_html_(self, expand=None): + def _repr_html_(self, expand=None) -> str: """ HTML formatter for MuData objects for rich display in notebooks. @@ -2307,7 +2348,7 @@ def _repr_html_(self, expand=None): full = "".join((MUDATA_CSS, "
", header, mods, "
")) return full - def _find_unique_colnames(self, attr: str, ncols: int): + def _find_unique_colnames(self, attr: str, ncols: int) -> list[str]: nchars = 16 allunique = False while not allunique: diff --git a/src/mudata/_core/to_.py b/src/mudata/_core/to_.py index 33c0afa..dca9975 100644 --- a/src/mudata/_core/to_.py +++ b/src/mudata/_core/to_.py @@ -8,19 +8,19 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData: """ - Convert MuData to AnnData by concatenating modalities. + Convert :class:`MuData` to :class:`~anndata.AnnData` by concatenating modalities. - If mdata.axis == 0 (shared observations), + If `mdata.axis == 0` (shared observations), concatenate modalities along axis 1 (`anndata.concat(axis=1)`). - If mdata.axis == 1 (shared variables), + If `mdata.axis == 1` (shared variables), concatenate datasets along axis 0 (`anndata.concat(axis=0)`). Parameters ---------- data - MuData object to convert to AnnData - kwargs - Keyword arguments passed to anndata.concat + Object to convert to :class:`~anndata.AnnData`. + **kwargs + Additional arguments for :func:`anndata.concat`. """ if mdata.axis == -1: raise ValueError("Only MuData with axis=0 and axis=1 are supported in `to_anndata()`") @@ -37,18 +37,18 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData: def to_mudata(adata: AnnData, axis: Literal[0, 1], by: str) -> MuData: """ - Convert AnnData to MuData by splitting it along obs or var. + Convert :class:`~anndata.AnnData` to :class:`MuData` by splitting it along obs or var. Axis signifies the shared axis. - Use `axis=0` for getting MuData with shared observations (axis=0), - and `axis=1` for getting MuData with shared variables (axis=1). + Use `axis=0` for getting :class:`MuData` with shared observations (axis=0), + and `axis=1` for getting :class:`MuData` with shared variables (axis=1). - Paramteters - ----------- + Parameters + ---------- adata - AnnData object to convert to MuData + Object to convert to MuData. axis - Axis of shared observations (0) or variables (1) + Shared axis: `0` for observations, `1` for features. by Key in `adata.var` (if axis=0) or `adata.obs` (if axis=1) to split by """