Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/vak/common/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,10 @@ def _map_using_notated_path(
Where each key is path to annotated file, and
its value is the corresponding ``crowsetta.Annotation``.
"""
# "cast" `annotated_files` to list;
# need this because accessing .values on a pandas.Series in new versions gives us an ArrowStringArray
# that does not have a `remove` method like a list
annotated_files = list(annotated_files)
# First check that we don't have duplicate keys that would cause this to fail silently
keys = []
for annot in annot_list:
Expand Down
2 changes: 2 additions & 0 deletions src/vak/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,5 @@
VALID_SPLITS = ("predict", "test", "train", "val")

DEFAULT_BACKGROUND_LABEL = "background"

DEFAULT_BOUNDARY_TIMES_PADVAL = -100.0
290 changes: 290 additions & 0 deletions src/vak/common/validators.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Functions for input validation"""
from __future__ import annotations

import pathlib
import warnings

import numpy as np
import numpy.typing as npt
import torch


def column_or_1d(y: npt.NDArray, warn: bool = False) -> npt.NDArray:
Expand Down Expand Up @@ -76,3 +78,291 @@ def is_a_directory(path):
def is_a_file(path):
"""check if given path is a file"""
return pathlib.Path(path).is_file()


def is_1d_tensor(t: torch.Tensor, name: str | None = None) -> bool:
"""Validate that input is a one-dimensional tensor

Parameters
----------
t : torch.Tensor

Returns
-------
is_1d_tensor : bool
Returns True if ``t`` is a one-dimensional tensor.
If ``t`` is not a tensor, raise a TypeError,
and if ``t`` is not one-dimensional,
raises a ValueError.
"""
if not isinstance(t, torch.Tensor):
if name is not None:
name_insert = f" of `{name}`"
else:
name_insert = ""
raise TypeError(
f"Expected type{name_insert} to be `torch.Tensor` but type was: {type(t)}"
)

if not t.ndim == 1:
if name is not None:
name_insert = f"`{name}` must be "
else:
name_insert = "Must be "
raise ValueError(
f"{name_insert}a 1-dimensional tensor but ndim={t.ndim}"
)

return True


def is_2d_tensor(t: torch.Tensor, name: str | None = None) -> bool:
"""Validate that input is a two-dimensional tensor

Parameters
----------
t : torch.Tensor

Returns
-------
is_2d_tensor : bool
Returns True if ``t`` is a two-dimensional tensor.
If ``t`` is not a tensor, raise a TypeError,
and if ``t`` is not two-dimensional,
raises a ValueError.
"""
if not isinstance(t, torch.Tensor):
if name is not None:
name_insert = f" of `{name}`"
else:
name_insert = ""
raise TypeError(
f"Expected type{name_insert} to be `torch.Tensor` but type was: {type(t)}"
)

if not t.ndim == 2:
if name is not None:
name_insert = f"`{name}` must be "
else:
name_insert = "Must be "
raise ValueError(
f"{name_insert}a 2-dimensional tensor but ndim={t.ndim}"
)

return True


def is_1d_or_2d_tensor(y: torch.Tensor, name: str | None = None) -> bool:
"""Validates that ``y`` is a
one-dimension or two-dimensional
:class:`torch.Tensor`.

If ``y`` is not a :class:`torch.Tensor`,
raises a TypeError.
If ``y`` does not have one or two
dimensions, raises a ValueError.

Parameters
----------
y: torch.Tensor
Array to be validated.
name: str, optional
Name of array in calling function.
Used in any error message if supplied.

Returns
-------
is_1d_or_2d_tensor: bool
``True`` if ``y.ndim==1 or y.ndim == 2``

Examples
--------
>>> y = torch.tensor([[0, 1, 2], [0, 1, 2]])
>>> vak.metrics.boundary_detection.validators.is_1d_or_2d_tensor(y)
True

>>> y = torch.tensor([0, 1, 2])
>>> vak.metrics.boundary_detection.validators.is_1d_or_2d_tensor(y)
True
"""
if name:
name += " "
else:
name = ""

if not isinstance(y, torch.Tensor):
raise TypeError(
f"Input {name}should be a `torch.Tensor`, but type was: {type(y)}"
)

if y.ndim !=1 and y.ndim != 2:
raise ValueError(
f"Input {name}should be a one-dimensional or two-dimensional `torch.Tensor`, "
f"but number of dimensions was: {y.ndim}"
)
return True


def is_non_negative(
boundary_times: torch.FloatTensor, name: str | None = None
) -> bool:
"""Validates that ``y`` is a
:class:`torch.Tensor` with
all non-negative (>=0.0) values.

Parameters
----------
y: torch.Tensor
Array to be validated.
name: str, optional
Name of array in calling function.
Used in any error message if supplied.

Returns
-------
is_non_negative: bool
True if all values in ``y``
are non-negative

Examples
--------
>>> y = torch.tensor([0.0, 0.1, 0.,2])
>>> vak.metrics.boundary_detection.validators.is_non_negative(y)
True
"""
if name:
name += " "
else:
name = ""

if not torch.all(boundary_times >= 0.0):
raise ValueError(
f"Values of boundaries tensor {name}must all be non-negative:\n{boundary_times}"
)

return True


def is_strictly_increasing(
boundary_times: torch.FloatTensor, name: str | None = None
) -> bool:
"""Validates that ``y`` is a
:class:`torch.Tensor` with
strictly increasing values.

Parameters
----------
y: torch.Tensor
Array to be validated.
name: str, optional
Name of array in calling function.
Used in any error message if supplied.

Returns
-------
is_strictly_increasing: bool
``True`` if
``torch.all(y[1:] > y[:-1])``
is ``True``.

Examples
--------
>>> y = torch.tensor([0.0, 0.1, 0.,2])
>>> vak.metrics.boundary_detection.validators.is_non_negative(y)
True
"""
if name:
name += " "
else:
name = ""

if boundary_times.numel() <= 1:
# It's a valid boundary times tensor but there's no boundaries or just one boundary,
# so we don't check that values are strictly increasing
return True

if not torch.all(boundary_times[1:] > boundary_times[:-1]):
raise ValueError(
f"Values of boundaries times {name}must be strictly increasing:\n{boundary_times}"
)

return True


def have_same_dtype(
t1: torch.Tensor,
t2: torch.Tensor,
name1: str | None = None,
name2: str | None = None,
) -> bool:
"""Validates that two tensors, ``t1`` and ``t2``, have the same :class:`~torch.dtype`.

Parameters
----------
t1 : torch.Tensor
First tensor to be validated.
t2 : torch.Tensor
Second tensor to be validated.
name1 : str, optional
Name of first tensor in calling function.
Used in any error message if both ``name1`` and ``name2`` are supplied.
name2 : str, optional
Name of second tensor in calling function.
Used in any error message if both ``name1`` and ``name2`` are supplied.

Returns
-------
have_same_dtype : bool
True if ``arr1`` and ``arr2`` have the same :class:`~numpy.dtype`.
"""
if not t1.dtype == t2.dtype:
if name1 and name2:
names = f"{name1} and {name2} "
else:
names = ""

raise ValueError(
f"Two tensors {names}must have the same dtype, but dtypes were: {t1.dtype} and {t2.dtype}"
)

return True


def have_same_ndim(
t1: torch.Tensor,
t2: torch.Tensor,
name1: str | None = None,
name2: str | None = None,
) -> bool:
"""Validates that two tensors, ``t1`` and ``t2``, have the same :prop:`torch.Tensor.ndim`.

Parameters
----------
t1 : torch.Tensor
First tensor to be validated.
t2 : torch.Tensor
Second tensor to be validated.
name1 : str, optional
Name of first tensor in calling function.
Used in any error message if both ``name1`` and ``name2`` are supplied.
name2 : str, optional
Name of second tensor in calling function.
Used in any error message if both ``name1`` and ``name2`` are supplied.

Returns
-------
have_same_ndim : bool
True if ``arr1`` and ``arr2`` have the same :prop:`torch.Tensor.ndim`.
"""

if not t1.ndim == t2.ndim:
if name1 and name2:
names = f"{name1} and {name2} "
else:
names = ""

raise ValueError(
f"Two tensors {names}must have the same number of dimensions, but t1.ndim={t1.ndim} and t2.ndim={t2.ndim}"
)

return True
14 changes: 13 additions & 1 deletion src/vak/datapipes/frame_classification/infer_datapipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,17 @@ def _load_frames(self, frames_path):

def __getitem__(self, idx):
frames_path = self.dataset_path / self.frames_paths[idx]
frames = self._load_frames(frames_path)

from vak import common
if self.input_type == "audio":
frames, _ = common.constants.AUDIO_FORMAT_FUNC_MAP[
constants.FRAME_CLASSIFICATION_DATASET_AUDIO_FORMAT
](frames_path)
elif self.input_type == "spect":
spect_dict = common.files.spect.load(frames_path)
frames = spect_dict[common.constants.SPECT_KEY]
frame_times = spect_dict[common.constants.TIMEBINS_KEY]

item = {"frames": frames, "frames_path": frames_path}
if self.frame_labels_paths is not None:
frame_labels = np.load(
Expand All @@ -224,6 +234,8 @@ def __getitem__(self, idx):
if self.item_transform:
item = self.item_transform(**item)

item["frame_times"] = frame_times

return item

def __len__(self):
Expand Down
1 change: 1 addition & 0 deletions src/vak/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .classification import * # noqa: F401, F403
from .distance import * # noqa: F401, F403
from . import boundary_detection
12 changes: 12 additions & 0 deletions src/vak/metrics/boundary_detection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from . import (
functional,
)

from ._boundary_detection import (
PrecisionRecallFScoreRVal,
)

__all__ = [
"functional",
"PrecisionRecallFScoreRVal",
]
Loading
Loading