Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies = [
"numpy>=1.21,<3",
"einops>=0.7.0,<1",
"scikit-learn>=1.6.0,<2",
"typing_extensions~=4.0",
]
classifiers = [
"Programming Language :: Python :: 3",
Expand Down
2 changes: 1 addition & 1 deletion src/chronos/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.2.2"
__version__ = "2.3.0"
67 changes: 42 additions & 25 deletions src/chronos/chronos2/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,20 @@
import datasets
import fev

try:
from typing import NotRequired # Python 3.11+
except ImportError:
from typing_extensions import NotRequired


TensorOrArray: TypeAlias = torch.Tensor | np.ndarray


class PreparedInput(TypedDict):
"""A preprocessed time series input ready for model training/inference."""

context: torch.Tensor # (n_variates, history_length), float32
future_covariates: torch.Tensor # (n_variates, prediction_length), float32
context: torch.Tensor # (n_variates, history_length)
future_covariates: NotRequired[torch.Tensor] # (n_variates, prediction_length); only required in TEST mode
n_targets: int
n_covariates: int
n_future_covariates: int
Expand Down Expand Up @@ -200,7 +205,8 @@ def validate_and_prepare_single_dict_input(

context_tensor = torch.cat([target, past_covariates_tensor], dim=0).to(dtype=torch.float32)
future_covariates_tensor = torch.cat(
[future_covariates_target_padding, future_covariates_tensor], dim=0
[future_covariates_target_padding, future_covariates_tensor],
dim=0,
).to(dtype=torch.float32)
n_targets = target.shape[0]
n_covariates = past_covariates_tensor.shape[0]
Expand Down Expand Up @@ -260,15 +266,28 @@ def prepare_inputs(
return inputs


def validate_prepared_schema(prepared_input: Any) -> None:
"""Validate that an input matches the PreparedInput schema."""
def validate_prepared_schema(prepared_input: Any, mode: "DatasetMode | str") -> None:
"""Validate that an input matches the PreparedInput schema.

Parameters
----------
prepared_input
The input to validate
mode
Dataset mode. `future_covariates` is only required in TEST mode since it's reconstructed
from context in TRAIN/VALIDATION modes.
"""
if not isinstance(prepared_input, Mapping):
raise TypeError(
f"Expected input to be a dict-like, got {type(prepared_input).__name__}. "
"Set convert_inputs=True when calling fit() to preprocess raw inputs."
)

required_keys = {"context", "future_covariates", "n_targets", "n_covariates", "n_future_covariates"}
# future_covariates is only required in TEST mode (reconstructed from context otherwise)
required_keys = {"context", "n_targets", "n_covariates", "n_future_covariates"}
if mode == DatasetMode.TEST:
required_keys.add("future_covariates")

missing = required_keys - set(prepared_input.keys())
if missing:
raise TypeError(
Expand All @@ -283,20 +302,21 @@ def validate_prepared_schema(prepared_input: Any) -> None:
"Set convert_inputs=True when calling fit() to preprocess raw inputs."
)

future_covariates = prepared_input["future_covariates"]
if not isinstance(future_covariates, torch.Tensor) or future_covariates.ndim != 2:
raise TypeError(
f"Expected 'future_covariates' to be 2-d torch.Tensor, got {type(future_covariates).__name__} "
f"with shape {getattr(future_covariates, 'shape', 'N/A')}. "
"Set convert_inputs=True when calling fit() to preprocess raw inputs."
)
future_covariates = prepared_input.get("future_covariates")
if future_covariates is not None:
if not isinstance(future_covariates, torch.Tensor) or future_covariates.ndim != 2:
raise TypeError(
f"Expected 'future_covariates' to be 2-d torch.Tensor, got {type(future_covariates).__name__} "
f"with shape {getattr(future_covariates, 'shape', 'N/A')}. "
"Set convert_inputs=True when calling fit() to preprocess raw inputs."
)

if context.shape[0] != future_covariates.shape[0]:
raise ValueError(
f"Expected 'context' and 'future_covariates' to have the same first dimension, "
f"got {context.shape[0]} and {future_covariates.shape[0]}. "
"Set convert_inputs=True when calling fit() to preprocess raw inputs."
)
if context.shape[0] != future_covariates.shape[0]:
raise ValueError(
f"Expected 'context' and 'future_covariates' to have the same first dimension, "
f"got {context.shape[0]} and {future_covariates.shape[0]}. "
"Set convert_inputs=True when calling fit() to preprocess raw inputs."
)


def convert_list_of_tensors_input_to_list_of_dicts_input(
Expand Down Expand Up @@ -532,7 +552,7 @@ def __init__(
inputs = convert_list_of_tensors_input_to_list_of_dicts_input(cast(Sequence[TensorOrArray], inputs))
self.inputs = prepare_inputs(cast(Iterable[Mapping[str, Any]], inputs), prediction_length, min_past, mode)
else:
validate_prepared_schema(inputs[0])
validate_prepared_schema(inputs[0], mode=mode)
self.inputs = cast(Sequence[PreparedInput], inputs)

self.context_length = context_length
Expand All @@ -545,7 +565,6 @@ def __init__(
def _construct_slice(self, input_idx: int) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor, int]:
prepared = self.inputs[input_idx]
past_tensor = prepared["context"].clone() # shape: (n_targets + n_covariates, history_length)
future_tensor = prepared["future_covariates"].clone()
n_targets = int(prepared["n_targets"])
n_covariates = int(prepared["n_covariates"])
n_future_covariates = int(prepared["n_future_covariates"])
Expand Down Expand Up @@ -580,9 +599,7 @@ def _construct_slice(self, input_idx: int) -> tuple[torch.Tensor, torch.Tensor |

if n_future_covariates > 0:
# the last n_future_covariates elements in context_tensor are the known covariates
future_covariates = past_tensor[
-n_future_covariates:, slice_idx : slice_idx + self.prediction_length
]
future_covariates = past_tensor[-n_future_covariates:, slice_idx : slice_idx + self.prediction_length]
else:
# zero-length tensor for easy concatenation later
future_covariates = torch.zeros((0, self.prediction_length))
Expand All @@ -596,7 +613,7 @@ def _construct_slice(self, input_idx: int) -> tuple[torch.Tensor, torch.Tensor |
future_covariates = torch.cat([future_covariates_padding, future_covariates], dim=0)
else:
future_target = None
future_covariates = future_tensor
future_covariates = prepared["future_covariates"].clone()

# context: (n_targets + n_covariates, min(context_length, history_length))
# future_target: (n_targets + n_covariates, prediction_length), the future values of known future covariates
Expand Down