diff --git a/CHANGELOG.md b/CHANGELOG.md index 680f11da28..3d2db9af2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for GPyTorch objects (kernels, means, likelihood) as Gaussian process components, enabling full low-level customization - Factories for all Gaussian process components -- `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate` +- `BOTORCH`, `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate` - `TypeSelector` and `NameSelector` classes for parameter selection in kernel factories - `parameter_names` attribute to basic kernels for controlling the considered parameters +- `ParameterKind` flag enum for classifying parameters by their role and automatic + parameter kind validation in kernel factories - `IndexKernel` and `PositiveIndexKernel` classes - Interpoint constraints for continuous search spaces - `IndexKernel` and `PositiveIndexKernel` classes @@ -26,6 +28,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 composition via `+` (sum) and `*` (product), as well as `constant * kernel` for creating a `ScaleKernel` with a fixed output scale +### Changed +- Gaussian processes no longer invoke leave-one-out training for multitask scenarios but + can now rely on improved model priors for generalization + ### Breaking Changes - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples instead of a single tuple (needed for interpoint constraints) diff --git a/baybe/parameters/__init__.py b/baybe/parameters/__init__.py index 93e62b6ee9..6fe2c0d8f4 100644 --- a/baybe/parameters/__init__.py +++ b/baybe/parameters/__init__.py @@ -5,6 +5,7 @@ from baybe.parameters.enum import ( CategoricalEncoding, CustomEncoding, + ParameterKind, SubstanceEncoding, ) from baybe.parameters.numerical import ( @@ -22,6 +23,7 @@ "MeasurableMetadata", "NumericalContinuousParameter", "NumericalDiscreteParameter", + "ParameterKind", "SubstanceEncoding", "SubstanceParameter", "TaskParameter", diff --git a/baybe/parameters/base.py b/baybe/parameters/base.py index 2d4df2bc77..7986a21124 100644 --- a/baybe/parameters/base.py +++ b/baybe/parameters/base.py @@ -21,6 +21,7 @@ from baybe.utils.metadata import MeasurableMetadata, to_metadata if TYPE_CHECKING: + from baybe.parameters.enum import ParameterKind from baybe.searchspace.continuous import SubspaceContinuous from baybe.searchspace.core import SearchSpace from baybe.searchspace.discrete import SubspaceDiscrete @@ -77,6 +78,13 @@ def is_discrete(self) -> bool: """Boolean indicating if this is a discrete parameter.""" return isinstance(self, DiscreteParameter) + @property + def kind(self) -> ParameterKind: + """The kind of the parameter.""" + from baybe.parameters.enum import ParameterKind + + return ParameterKind.from_parameter(self) + @property @abstractmethod def comp_rep_columns(self) -> tuple[str, ...]: diff --git a/baybe/parameters/enum.py b/baybe/parameters/enum.py index 3161f67dc7..07e9f9fb34 100644 --- a/baybe/parameters/enum.py +++ b/baybe/parameters/enum.py @@ -1,6 +1,38 @@ """Parameter-related enumerations.""" -from enum import Enum +from __future__ import annotations + +from enum import Enum, Flag, auto +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from baybe.parameters.base import Parameter + + +class ParameterKind(Flag): + """Flag enum encoding the kind of a parameter. + + Can be used to express compatibility (e.g. Gaussian process kernel factories) + with different parameter types via bitwise combination of flags. + """ + + REGULAR = auto() + """Regular parameter undergoing no special treatment.""" + + TASK = auto() + """Task parameter for transfer learning.""" + + FIDELITY = auto() + """Fidelity parameter for multi-fidelity modelling.""" + + @staticmethod + def from_parameter(parameter: Parameter) -> ParameterKind: + """Determine the kind of a parameter from its type.""" + from baybe.parameters.categorical import TaskParameter + + if isinstance(parameter, TaskParameter): + return ParameterKind.TASK + return ParameterKind.REGULAR class ParameterEncoding(Enum): diff --git a/baybe/parameters/selectors.py b/baybe/parameters/selectors.py index 6b1a4ae16d..e72b6fe4d9 100644 --- a/baybe/parameters/selectors.py +++ b/baybe/parameters/selectors.py @@ -3,15 +3,13 @@ import re from abc import ABC, abstractmethod from collections.abc import Collection -from typing import ClassVar, Protocol +from typing import Protocol from attrs import Converter, define, field -from attrs.converters import optional from attrs.validators import deep_iterable, instance_of, min_len from typing_extensions import override from baybe.parameters.base import Parameter -from baybe.searchspace.core import SearchSpace from baybe.utils.basic import to_tuple from baybe.utils.conversion import nonstring_to_tuple @@ -131,37 +129,3 @@ def to_parameter_selector( return TypeSelector(items) raise TypeError(f"Cannot convert {x!r} to a parameter selector.") - - -@define -class _ParameterSelectorMixin: - """A mixin class to enable parameter selection.""" - - # For internal use only: sanity check mechanism to remind developers of new - # subclasses to actually use the parameter selector when it is provided - # TODO: Perhaps we can find a more elegant way to enforce this by design - _uses_parameter_names: ClassVar[bool] = False - - parameter_selector: ParameterSelectorProtocol | None = field( - default=None, converter=optional(to_parameter_selector), kw_only=True - ) - """An optional selector to specify which parameters are to be considered.""" - - def get_parameter_names(self, searchspace: SearchSpace) -> tuple[str, ...] | None: - """Get the names of the parameters to be considered.""" - if self.parameter_selector is None: - return None - - return tuple( - p.name for p in searchspace.parameters if self.parameter_selector(p) - ) - - def __attrs_post_init__(self): - if self.parameter_selector is not None and not self._uses_parameter_names: - raise AssertionError( - f"A `parameter_selector` was provided to " - f"`{type(self).__name__}`, but the class does not set " - f"`_uses_parameter_names = True`. Subclasses that accept a " - f"parameter selector must explicitly set this flag to confirm " - f"they actually use the selected parameter names." - ) diff --git a/baybe/surrogates/gaussian_process/components/_gpytorch.py b/baybe/surrogates/gaussian_process/components/_gpytorch.py new file mode 100644 index 0000000000..b7efc7bcbe --- /dev/null +++ b/baybe/surrogates/gaussian_process/components/_gpytorch.py @@ -0,0 +1,71 @@ +"""Custom GPyTorch components.""" + +import torch +from botorch.models.multitask import _compute_multitask_mean +from botorch.models.utils.gpytorch_modules import MIN_INFERRED_NOISE_LEVEL +from gpytorch.constraints import GreaterThan +from gpytorch.likelihoods.hadamard_gaussian_likelihood import HadamardGaussianLikelihood +from gpytorch.means import MultitaskMean +from gpytorch.means.multitask_mean import Mean +from gpytorch.priors import LogNormalPrior +from torch import Tensor +from torch.nn import Module + + +class HadamardConstantMean(Mean): + """A GPyTorch mean function implementing BoTorch's multitask mean logic. + + While GPyTorch already provides a :class:`~gpytorch.means.MultitaskMean` class, it + computes mean values for all (input, task)-pairs (where input means all parameters + except the task parameter), i.e. it intrinsically applies a Cartesian expansion. + However, for the regular transfer learning setting, we only need the means for the + pairs that are actually observed/requested. BoTorch subselects the relevant means + from the GPyTorch output in `MultiTaskGP.forward`, i.e. it uses a class-based + approach to define its special logic for the multitask case. In contrast, BayBE uses + a composition approach, which is more flexible but requires that the logic is + injected via a self-contained `Mean` object, which is what this class provides. + + Note: + Analogous to GPyTorch's + https://github.com/cornellius-gp/gpytorch/blob/main/gpytorch/likelihoods/hadamard_gaussian_likelihood.py + but where the logic is applied to the mean function, i.e. we learn a different + (constant) mean for each task. + """ + + def __init__(self, mean_module: Module, num_tasks: int, task_feature: int): + super().__init__() + self.multitask_mean = MultitaskMean(mean_module, num_tasks=num_tasks) + self.task_feature = task_feature + + def forward(self, x: Tensor) -> Tensor: + # Adapted from https://github.com/meta-pytorch/botorch/blob/e0f4f5b941b5949a4a1171bf8d4ee9f74f146f3a/botorch/models/multitask.py#L397 + + # Convert task feature to positive index + task_feature = self.task_feature % x.shape[-1] + + # Split input into task and non-task components + x_before = x[..., :task_feature] + task_idcs = x[..., task_feature : task_feature + 1] + x_after = x[..., task_feature + 1 :] + + return _compute_multitask_mean( + self.multitask_mean, x_before, task_idcs, x_after + ) + + +def make_botorch_multitask_likelihood( + num_tasks: int, task_feature: int +) -> HadamardGaussianLikelihood: + """Adapted from :class:`botorch.models.multitask.MultiTaskGP`.""" + noise_prior = LogNormalPrior(loc=-4.0, scale=1.0) + return HadamardGaussianLikelihood( + num_tasks=num_tasks, + batch_shape=torch.Size(), + noise_prior=noise_prior, + noise_constraint=GreaterThan( + MIN_INFERRED_NOISE_LEVEL, + transform=None, + initial_value=noise_prior.mode, + ), + task_feature_index=task_feature, + ) diff --git a/baybe/surrogates/gaussian_process/components/kernel.py b/baybe/surrogates/gaussian_process/components/kernel.py index ca4da066e7..28f5c1bcf4 100644 --- a/baybe/surrogates/gaussian_process/components/kernel.py +++ b/baybe/surrogates/gaussian_process/components/kernel.py @@ -2,18 +2,24 @@ from __future__ import annotations +from abc import ABC, abstractmethod +from collections.abc import Iterable from functools import partial -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar from attrs import define, field +from attrs.converters import optional from attrs.validators import is_callable from typing_extensions import override +from baybe.exceptions import IncompatibleSearchSpaceError from baybe.kernels.base import Kernel -from baybe.kernels.composite import ProductKernel from baybe.parameters.categorical import TaskParameter +from baybe.parameters.enum import ParameterKind from baybe.parameters.selectors import ( + ParameterSelectorProtocol, TypeSelector, + to_parameter_selector, ) from baybe.searchspace.core import SearchSpace from baybe.surrogates.gaussian_process.components.generic import ( @@ -27,6 +33,8 @@ from gpytorch.kernels import Kernel as GPyTorchKernel from torch import Tensor + from baybe.parameters.base import Parameter + KernelFactoryProtocol = GPComponentFactoryProtocol[Kernel | GPyTorchKernel] PlainKernelFactory = PlainGPComponentFactory[Kernel | GPyTorchKernel] else: @@ -35,6 +43,80 @@ PlainKernelFactory = PlainGPComponentFactory[Kernel] +@define +class _KernelFactory(KernelFactoryProtocol, ABC): + """Base class for kernel factories.""" + + # For internal use only: sanity check mechanism to remind developers of new + # factories to actually use the parameter selector when it is provided + # TODO: Perhaps we can find a more elegant way to enforce this by design + _uses_parameter_names: ClassVar[bool] = False + + supported_parameter_kinds: ClassVar[ParameterKind] = ParameterKind.REGULAR + """The parameter kinds supported by the kernel factory.""" + + parameter_selector: ParameterSelectorProtocol | None = field( + default=None, converter=optional(to_parameter_selector) + ) + """An optional selector to specify which parameters are considered by the kernel.""" + + def get_parameter_names(self, searchspace: SearchSpace) -> tuple[str, ...] | None: + """Get the names of the parameters to be considered by the kernel.""" + if self.parameter_selector is None: + return None + + return tuple( + p.name for p in searchspace.parameters if self.parameter_selector(p) + ) + + def _validate_parameter_kinds(self, parameters: Iterable[Parameter]) -> None: + """Validate that the given parameters are supported by the factory. + + Args: + parameters: The parameters to validate. + + Raises: + IncompatibleSearchSpaceError: If unsupported parameter kinds are found. + """ + if unsupported := [ + p.name for p in parameters if not (p.kind & self.supported_parameter_kinds) + ]: + raise IncompatibleSearchSpaceError( + f"'{type(self).__name__}' does not support parameter kind(s) for " + f"parameter(s) {unsupported}. Supported kinds: " + f"{self.supported_parameter_kinds}." + ) + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel | GPyTorchKernel: + """Construct the kernel, validating parameter kinds before construction.""" + if self.parameter_selector is not None: + params = [p for p in searchspace.parameters if self.parameter_selector(p)] + else: + params = list(searchspace.parameters) + self._validate_parameter_kinds(params) + + return self._make(searchspace, train_x, train_y) + + @abstractmethod + def _make( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel | GPyTorchKernel: + """Construct the kernel.""" + + def __attrs_post_init__(self): + if self.parameter_selector is not None and not self._uses_parameter_names: + raise AssertionError( + f"A `parameter_selector` was provided to " + f"`{type(self).__name__}`, but the class does not set " + f"`_uses_parameter_names = True`. Subclasses that accept a " + f"parameter selector must explicitly set this flag to confirm " + f"they actually use the selected parameter names." + ) + + @define class ICMKernelFactory(KernelFactoryProtocol): """A kernel factory that constructs an ICM kernel for transfer learning. @@ -76,6 +158,43 @@ def _default_task_kernel_factory(self) -> KernelFactoryProtocol: def __call__( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: + if searchspace.task_idx is None: + raise IncompatibleSearchSpaceError( + f"'{type(self).__name__}' can only be used with a searchspace that " + f"contains a '{TaskParameter.__name__}'." + ) + base_kernel = self.base_kernel_factory(searchspace, train_x, train_y) task_kernel = self.task_kernel_factory(searchspace, train_x, train_y) - return ProductKernel([base_kernel, task_kernel]) + if isinstance(base_kernel, Kernel): + base_kernel = base_kernel.to_gpytorch(searchspace) + if isinstance(task_kernel, Kernel): + task_kernel = task_kernel.to_gpytorch(searchspace) + + # Ensure correct partitioning between base and task kernels active dimensions + all_idcs = set(range(len(searchspace.comp_rep_columns))) + allowed_task_idcs = {searchspace.task_idx} + allowed_base_idcs = all_idcs - allowed_task_idcs + base_idcs = ( + set(dims) + if (dims := base_kernel.active_dims.tolist()) is not None + else None + ) + task_idcs = ( + set(dims) + if (dims := task_kernel.active_dims.tolist()) is not None + else None + ) + + if base_idcs is not None and (base_idcs > allowed_base_idcs): + raise ValueError( + f"The base kernel's 'active_dims' {base_idcs} must be a subset of " + f"the non-task indices {allowed_base_idcs}." + ) + if task_idcs != allowed_task_idcs: + raise ValueError( + f"The task kernel's 'active_dims' {task_idcs} does not match " + f"the task index {allowed_task_idcs}." + ) + + return base_kernel * task_kernel diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 2b1a3f361e..3df3353985 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -291,18 +291,7 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: covar_module=kernel, likelihood=likelihood, ) - - # TODO: This is still a temporary workaround to avoid overfitting seen in - # low-dimensional TL cases. More robust settings are being researched. - if context.n_task_dimensions > 0: - mll = gpytorch.mlls.LeaveOneOutPseudoLikelihood( - self._model.likelihood, self._model - ) - else: - mll = gpytorch.ExactMarginalLogLikelihood( - self._model.likelihood, self._model - ) - + mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model) botorch.fit.fit_gpytorch_mll(mll) @override diff --git a/baybe/surrogates/gaussian_process/presets/__init__.py b/baybe/surrogates/gaussian_process/presets/__init__.py index deb7de9e64..77b38af1cb 100644 --- a/baybe/surrogates/gaussian_process/presets/__init__.py +++ b/baybe/surrogates/gaussian_process/presets/__init__.py @@ -7,6 +7,13 @@ BayBEMeanFactory, ) +# BoTorch preset +from baybe.surrogates.gaussian_process.presets.botorch import ( + BotorchKernelFactory, + BotorchLikelihoodFactory, + BotorchMeanFactory, +) + # Core from baybe.surrogates.gaussian_process.presets.core import GaussianProcessPreset @@ -31,6 +38,10 @@ "BayBEKernelFactory", "BayBELikelihoodFactory", "BayBEMeanFactory", + # BoTorch preset + "BotorchKernelFactory", + "BotorchLikelihoodFactory", + "BotorchMeanFactory", # EDBO preset "EDBOKernelFactory", "EDBOLikelihoodFactory", diff --git a/baybe/surrogates/gaussian_process/presets/baybe.py b/baybe/surrogates/gaussian_process/presets/baybe.py index bc820ae52b..e96a08e6cc 100644 --- a/baybe/surrogates/gaussian_process/presets/baybe.py +++ b/baybe/surrogates/gaussian_process/presets/baybe.py @@ -10,14 +10,14 @@ from baybe.kernels.base import Kernel from baybe.kernels.basic import IndexKernel from baybe.parameters.categorical import TaskParameter +from baybe.parameters.enum import ParameterKind from baybe.parameters.selectors import ( ParameterSelectorProtocol, TypeSelector, - _ParameterSelectorMixin, to_parameter_selector, ) from baybe.searchspace.core import SearchSpace -from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol +from baybe.surrogates.gaussian_process.components.kernel import _KernelFactory from baybe.surrogates.gaussian_process.components.mean import LazyConstantMeanFactory from baybe.surrogates.gaussian_process.presets.edbo_smoothed import ( SmoothedEDBOKernelFactory, @@ -29,11 +29,16 @@ @define -class BayBEKernelFactory(KernelFactoryProtocol): +class BayBEKernelFactory(_KernelFactory): """The default kernel factory for Gaussian process surrogates.""" + supported_parameter_kinds: ClassVar[ParameterKind] = ( + ParameterKind.REGULAR | ParameterKind.TASK + ) + # See base class. + @override - def __call__( + def _make( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: from baybe.surrogates.gaussian_process.components.kernel import ICMKernelFactory @@ -48,12 +53,15 @@ def __call__( @define -class BayBETaskKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin): +class BayBETaskKernelFactory(_KernelFactory): """The factory providing the default task kernel for Gaussian process surrogates.""" _uses_parameter_names: ClassVar[bool] = True # See base class. + supported_parameter_kinds: ClassVar[ParameterKind] = ParameterKind.TASK + # See base class. + parameter_selector: ParameterSelectorProtocol | None = field( factory=lambda: TypeSelector([TaskParameter]), converter=to_parameter_selector, @@ -61,7 +69,7 @@ class BayBETaskKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin): # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429) @override - def __call__( + def _make( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: return IndexKernel( diff --git a/baybe/surrogates/gaussian_process/presets/botorch.py b/baybe/surrogates/gaussian_process/presets/botorch.py new file mode 100644 index 0000000000..30135bc468 --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets/botorch.py @@ -0,0 +1,145 @@ +"""BoTorch preset for Gaussian process surrogates.""" + +from __future__ import annotations + +import gc +from itertools import chain +from typing import TYPE_CHECKING, ClassVar + +from attrs import define +from typing_extensions import override + +from baybe.kernels.base import Kernel +from baybe.parameters.enum import ParameterKind +from baybe.searchspace.core import SearchSpace +from baybe.surrogates.gaussian_process.components import LikelihoodFactoryProtocol +from baybe.surrogates.gaussian_process.components._gpytorch import ( + make_botorch_multitask_likelihood, +) +from baybe.surrogates.gaussian_process.components.kernel import ( + ICMKernelFactory, + _KernelFactory, +) +from baybe.surrogates.gaussian_process.components.mean import MeanFactoryProtocol + +if TYPE_CHECKING: + from gpytorch.kernels import Kernel as GPyTorchKernel + from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood + from gpytorch.means import Mean as GPyTorchMean + from torch import Tensor + + +@define +class BotorchKernelFactory(_KernelFactory): + """A factory providing BoTorch kernels.""" + + _uses_parameter_names: ClassVar[bool] = True + # See base class. + + supported_parameter_kinds: ClassVar[ParameterKind] = ( + ParameterKind.REGULAR | ParameterKind.TASK + ) + # See base class. + + @override + def _make( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel | GPyTorchKernel: + from botorch.models.kernels.positive_index import PositiveIndexKernel + from botorch.models.utils.gpytorch_modules import ( + get_covar_module_with_dim_scaled_prior, + ) + + parameter_names = self.get_parameter_names(searchspace) + + # Resolve parameter names to active dimension indices + active_dims: list[int] | None + if parameter_names is not None: + active_dims = list( + chain.from_iterable( + searchspace.get_comp_rep_parameter_indices(name) + for name in parameter_names + ) + ) + ard_num_dims = len(active_dims) + else: + active_dims = None + ard_num_dims = len(searchspace.comp_rep_columns) + + # Determine if the selected parameters include a task parameter + task_idx = searchspace.task_idx + is_multitask = task_idx is not None and ( + active_dims is None or task_idx in active_dims + ) + + if not is_multitask: + return get_covar_module_with_dim_scaled_prior( + ard_num_dims=ard_num_dims, active_dims=active_dims + ) + + assert task_idx is not None + base_idcs = [ + idx + for idx in (active_dims or range(len(searchspace.comp_rep_columns))) + if idx != task_idx + ] + base = get_covar_module_with_dim_scaled_prior( + ard_num_dims=len(base_idcs), active_dims=base_idcs + ) + index_kernel = PositiveIndexKernel( + num_tasks=searchspace.n_tasks, + rank=searchspace.n_tasks, + active_dims=[task_idx], + ) + return ICMKernelFactory(base, index_kernel)(searchspace, train_x, train_y) + + +class BotorchMeanFactory(MeanFactoryProtocol): + """A factory providing BoTorch mean functions.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> GPyTorchMean: + from gpytorch.means import ConstantMean + + from baybe.surrogates.gaussian_process.components._gpytorch import ( + HadamardConstantMean, + ) + + if searchspace.n_tasks == 1: + return ConstantMean() + + assert searchspace.task_idx is not None + return HadamardConstantMean( + ConstantMean(), searchspace.n_tasks, searchspace.task_idx + ) + + +class BotorchLikelihoodFactory(LikelihoodFactoryProtocol): + """A factory providing BoTorch likelihoods.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> GPyTorchLikelihood: + from botorch.models.utils.gpytorch_modules import ( + get_gaussian_likelihood_with_lognormal_prior, + ) + + if searchspace.n_tasks == 1: + return get_gaussian_likelihood_with_lognormal_prior() + + assert searchspace.task_idx is not None + return make_botorch_multitask_likelihood( + num_tasks=searchspace.n_tasks, task_feature=searchspace.task_idx + ) + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() + +# Aliases for generic preset imports +PresetKernelFactory = BotorchKernelFactory +PresetMeanFactory = BotorchMeanFactory +PresetLikelihoodFactory = BotorchLikelihoodFactory diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index ad77df0b4d..f65762c1fa 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -11,6 +11,9 @@ class GaussianProcessPreset(Enum): BAYBE = "BAYBE" """The default BayBE settings of the Gaussian process surrogate class.""" + BOTORCH = "BOTORCH" + """The BoTorch settings.""" + EDBO = "EDBO" """The EDBO settings.""" diff --git a/baybe/surrogates/gaussian_process/presets/edbo.py b/baybe/surrogates/gaussian_process/presets/edbo.py index 36611c258e..ecf24de106 100644 --- a/baybe/surrogates/gaussian_process/presets/edbo.py +++ b/baybe/surrogates/gaussian_process/presets/edbo.py @@ -16,13 +16,14 @@ from baybe.parameters.selectors import ( ParameterSelectorProtocol, TypeSelector, - _ParameterSelectorMixin, to_parameter_selector, ) from baybe.parameters.substance import SubstanceParameter from baybe.priors.basic import GammaPrior from baybe.searchspace.discrete import SubspaceDiscrete -from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol +from baybe.surrogates.gaussian_process.components.kernel import ( + _KernelFactory, +) from baybe.surrogates.gaussian_process.components.likelihood import ( LikelihoodFactoryProtocol, ) @@ -56,7 +57,7 @@ def _contains_encoding( @define -class EDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin): +class EDBOKernelFactory(_KernelFactory): """A factory providing EDBO kernels. References: @@ -74,12 +75,10 @@ class EDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin): # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429) @override - def __call__( + def _make( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: - effective_dims = train_x.shape[-1] - len( - [p for p in searchspace.parameters if isinstance(p, TaskParameter)] - ) + effective_dims = train_x.shape[-1] switching_condition = _contains_encoding( searchspace.discrete, _EDBO_ENCODINGS diff --git a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py index eb8d57491a..89cf9a9ef2 100644 --- a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py +++ b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py @@ -15,11 +15,12 @@ from baybe.parameters.selectors import ( ParameterSelectorProtocol, TypeSelector, - _ParameterSelectorMixin, to_parameter_selector, ) from baybe.priors.basic import GammaPrior -from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol +from baybe.surrogates.gaussian_process.components.kernel import ( + _KernelFactory, +) from baybe.surrogates.gaussian_process.components.likelihood import ( LikelihoodFactoryProtocol, ) @@ -38,7 +39,7 @@ @define -class SmoothedEDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin): +class SmoothedEDBOKernelFactory(_KernelFactory): """A factory providing smoothed versions of EDBO kernels. Takes the low and high dimensional limits of @@ -56,12 +57,10 @@ class SmoothedEDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin): # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429) @override - def __call__( + def _make( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: - effective_dims = train_x.shape[-1] - len( - [p for p in searchspace.parameters if isinstance(p, TaskParameter)] - ) + effective_dims = train_x.shape[-1] # Interpolate prior moments linearly between low D and high D regime. # The high D regime itself is the average of the EDBO OHE and Mordred regime. diff --git a/docs/conf.py b/docs/conf.py index 5251bb8654..6096369714 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -150,7 +150,7 @@ ("py:class", "baybe.acquisition.acqfs._ExpectedHypervolumeImprovement"), ("py:class", "baybe.settings._SlottedContextDecorator"), ("py:class", "baybe.surrogates.gaussian_process.components.PlainKernelFactory"), - ("py:class", "baybe.parameters.selectors._ParameterSelectorMixin"), + ("py:class", "baybe.surrogates.gaussian_process.components.kernel._KernelFactory"), # Deprecation ("py:.*", "baybe.targets._deprecated.*"), ] diff --git a/streamlit/surrogate_models.py b/streamlit/surrogate_models.py index 83415b91b9..f71b34a6e8 100644 --- a/streamlit/surrogate_models.py +++ b/streamlit/surrogate_models.py @@ -19,11 +19,12 @@ from baybe.acquisition import qLogExpectedImprovement from baybe.acquisition.base import AcquisitionFunction from baybe.exceptions import IncompatibleSurrogateError -from baybe.parameters import NumericalDiscreteParameter +from baybe.parameters import NumericalDiscreteParameter, TaskParameter from baybe.recommenders import BotorchRecommender from baybe.searchspace import SearchSpace from baybe.surrogates import CustomONNXSurrogate, GaussianProcessSurrogate from baybe.surrogates.base import Surrogate +from baybe.surrogates.gaussian_process.presets import GaussianProcessPreset from baybe.targets import NumericalTarget from baybe.utils.basic import get_subclasses @@ -90,13 +91,36 @@ def main(): } acquisition_function_names = list(acquisition_function_classes.keys()) - # Streamlit simulation parameters + # >>>>> Sidebar options >>>>> + # Domain st.sidebar.markdown("# Domain") + st_enable_multitask = st.sidebar.toggle("Multi-task") + st_n_tasks = 2 if st_enable_multitask else 1 st_random_seed = int(st.sidebar.number_input("Random seed", value=1337)) st_function_name = st.sidebar.selectbox( "Test function", list(test_functions.keys()) ) st_minimize = st.sidebar.checkbox("Minimize") + + # Training data + st.sidebar.markdown("---") + st.sidebar.markdown("# Training Data") + st_n_training_points = st.sidebar.slider( + "Number of training points", + 0 if st_enable_multitask else 1, + 20, + 0 if st_enable_multitask else 5, + ) + if st_enable_multitask: + st_n_training_points_other = st.sidebar.slider( + "Number of off-task training points", 0, 20, 5 + ) + st_offtask_scale = st.sidebar.slider("Off-task scale factor", -20.0, 20.0, 1.0) + st_offtask_offset_factor = st.sidebar.slider( + "Off-task offset", -100.0, 100.0, 0.0 + ) + + # Model st.sidebar.markdown("---") st.sidebar.markdown("# Model") st_surrogate_name = st.sidebar.selectbox( @@ -104,13 +128,26 @@ def main(): surrogate_model_names, surrogate_model_names.index(GaussianProcessSurrogate.__name__), ) + + st_gp_preset = None + st_transfer_learning = False + if st_surrogate_name == GaussianProcessSurrogate.__name__: + preset_names = [preset.value for preset in GaussianProcessPreset] + st_gp_preset = st.sidebar.selectbox( + "GP Preset", + preset_names, + index=preset_names.index(GaussianProcessPreset.BAYBE.value), + ) + if st_enable_multitask: + st_transfer_learning = st.sidebar.checkbox("Transfer learning", value=True) st_acqf_name = st.sidebar.selectbox( "Acquisition function", acquisition_function_names, acquisition_function_names.index(qLogExpectedImprovement.__name__), ) - st_n_training_points = st.sidebar.slider("Number of training points", 1, 20, 5) st_n_recommendations = st.sidebar.slider("Number of recommendations", 1, 20, 5) + + # Surrogate consistency validation st.sidebar.markdown("---") st.sidebar.markdown("# Validation") st.sidebar.markdown( @@ -127,6 +164,10 @@ def main(): ) st_function_amplitude = st.sidebar.slider("Function amplitude", 1.0, 100.0, 1.0) st_function_bias = st.sidebar.slider("Function bias", -100.0, 100.0, 0.0) + # <<<<< Sidebar options <<<<< + + # Derived settings + st_use_separate_gps = st_n_tasks > 1 and not st_transfer_learning # Set the chosen random seed active_settings.random_seed = st_random_seed @@ -140,68 +181,184 @@ def main(): bias=st_function_bias, ) - # Create the training data - train_x = np.random.uniform( - st_lower_parameter_limit, st_upper_parameter_limit, st_n_training_points - ) - train_y = fun(train_x) - measurements = pd.DataFrame({"x": train_x, "y": train_y}) + # Generate task-specific transforms (scale and offset for each task) + task_names = ["on-task", "off-task"] if st_n_tasks > 1 else ["on-task"] + task_transforms = {} + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + if task_idx == 0: + # On-task: use original function values + task_transforms[task_name] = {"scale": 1.0, "offset": 0.0} + else: + # Off-task: use user-specified scale and offset + scale = st_offtask_scale + offset = st_offtask_offset_factor * st_function_amplitude + task_transforms[task_name] = {"scale": scale, "offset": offset} + + # Create training data + measurements_list = [] + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + transform = task_transforms[task_name] + n_points = st_n_training_points if task_idx == 0 else st_n_training_points_other + train_x = np.random.uniform( + st_lower_parameter_limit, st_upper_parameter_limit, n_points + ) + task_measurements = pd.DataFrame( + { + "x": train_x, + "task": task_name, + "y": fun(train_x) * transform["scale"] + transform["offset"], + } + ) + measurements_list.append(task_measurements) + measurements = pd.concat(measurements_list, ignore_index=True) # Create the plotting grid and corresponding target values test_x = np.linspace( st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES ) - test_y = fun(test_x) - candidates = pd.DataFrame({"x": test_x, "y": test_y}) + candidates_list = [] + test_ys = {} + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + transform = task_transforms[task_name] + test_ys[task_name] = fun(test_x) * transform["scale"] + transform["offset"] + task_candidates = pd.DataFrame( + {"x": test_x, "task": task_name, "y": test_ys[task_name]} + ) + candidates_list.append(task_candidates) + candidates = pd.concat(candidates_list, ignore_index=True) # Create the searchspace and objective - parameter = NumericalDiscreteParameter( - name="x", - values=np.linspace( - st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES - ), - ) - searchspace = SearchSpace.from_product(parameters=[parameter]) + parameters = [ + NumericalDiscreteParameter( + name="x", + values=np.linspace( + st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES + ), + ) + ] + if st_transfer_learning: + parameters.append( + TaskParameter( + name="task", + values=task_names, + active_values=["on-task"], + ) + ) + searchspace = SearchSpace.from_product(parameters=parameters) objective = NumericalTarget(name="y", minimize=st_minimize).to_objective() - # Create the acquisition function and the recommender + # Create the acquisition function acqf_cls = acquisition_function_classes[st_acqf_name] try: acqf = acqf_cls(maximize=not st_minimize) except TypeError: acqf = acqf_cls() - recommender = BotorchRecommender( - surrogate_model=surrogate_model_classes[st_surrogate_name](), - acquisition_function=acqf, - ) - # Get the recommendations and extract the posterior mean / standard deviation - try: - recommendations = recommender.recommend( - st_n_recommendations, searchspace, objective, measurements - ) - except IncompatibleSurrogateError: - st.error( - f"You requested {st_n_recommendations} recommendations but the selected " - f"surrogate class does not support recommending more than one candidate " - f"at a time." + def make_surrogate(): + if st_surrogate_name == GaussianProcessSurrogate.__name__: + assert st_gp_preset is not None + return GaussianProcessSurrogate.from_preset( + preset=GaussianProcessPreset[st_gp_preset] + ) + return surrogate_model_classes[st_surrogate_name]() + + if st_use_separate_gps: + # One independent GP per task, each trained without the task column + stats_by_task = {} + for task_name in task_names: + task_meas = measurements[measurements["task"] == task_name][ + ["x", "y"] + ].reset_index(drop=True) + task_recommender = BotorchRecommender( + surrogate_model=make_surrogate(), + acquisition_function=acqf, + ) + if task_name == "on-task": + try: + recommendations = task_recommender.recommend( + st_n_recommendations, searchspace, objective, task_meas + ) + except IncompatibleSurrogateError: + st.error( + f"You requested {st_n_recommendations} recommendations but " + f"the selected surrogate class does not support recommending " + f"more than one candidate at a time." + ) + st.stop() + task_surrogate = task_recommender.get_surrogate( + searchspace, objective, task_meas + ) + stats_by_task[task_name] = task_surrogate.posterior_stats( + pd.DataFrame({"x": test_x}) + ) + else: + # Single recommender (single task, or multi-task with transfer learning) + recommender = BotorchRecommender( + surrogate_model=make_surrogate(), + acquisition_function=acqf, ) - st.stop() - surrogate = recommender.get_surrogate(searchspace, objective, measurements) - stats = surrogate.posterior_stats(candidates) - mean, std = stats["y_mean"], stats["y_std"] + try: + recommendations = recommender.recommend( + st_n_recommendations, searchspace, objective, measurements + ) + except IncompatibleSurrogateError: + st.error( + f"You requested {st_n_recommendations} recommendations but the " + f"selected surrogate class does not support recommending more than " + f"one candidate at a time." + ) + st.stop() + surrogate = recommender.get_surrogate(searchspace, objective, measurements) + stats = surrogate.posterior_stats(candidates) # Visualize the test function, training points, model predictions, recommendations - fig = plt.figure() - plt.plot(test_x, test_y, color="tab:blue", label="Test function") - plt.plot(train_x, train_y, "o", color="tab:blue") - plt.plot(test_x, mean, color="tab:red", label="Surrogate model") - plt.fill_between(test_x, mean - std, mean + std, alpha=0.2, color="tab:red") - plt.vlines( - recommendations, *plt.gca().get_ylim(), color="k", label="Recommendations" - ) - plt.legend() - st.pyplot(fig) + if st_n_tasks > 1: + cols = st.columns(st_n_tasks) + + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + task_mask = candidates["task"] == task_name if st_n_tasks > 1 else slice(None) + + if st_use_separate_gps: + task_stats = stats_by_task[task_name] + mean = task_stats["y_mean"].values + std = task_stats["y_std"].values + elif st_n_tasks > 1: + mean = stats["y_mean"][task_mask].values + std = stats["y_std"][task_mask].values + else: + mean = stats["y_mean"].values + std = stats["y_std"].values + + test_y = test_ys[task_name] + train_mask = ( + measurements["task"] == task_name if st_n_tasks > 1 else slice(None) + ) + train_y = measurements[train_mask]["y"].values + task_train_x = measurements[train_mask]["x"].values + + fig = plt.figure() + plt.plot(test_x, test_y, color="tab:blue", label="Test function") + plt.plot(task_train_x, train_y, "o", color="tab:blue") + plt.plot(test_x, mean, color="tab:red", label="Surrogate model") + plt.fill_between(test_x, mean - std, mean + std, alpha=0.2, color="tab:red") + if task_name == "on-task": + plt.vlines( + recommendations["x"] if st_n_tasks > 1 else recommendations, + *plt.gca().get_ylim(), + color="k", + label="Recommendations", + ) + plt.legend() + if st_n_tasks > 1: + plt.title(task_name.capitalize()) + with cols[task_idx]: + st.pyplot(fig) + else: + st.pyplot(fig) if __name__ == "__main__": diff --git a/tests/test_gp.py b/tests/test_gp.py index 30ca69bc0e..eec3297559 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -1,6 +1,11 @@ """Tests for the Gaussian Process surrogate.""" +import pandas as pd import pytest +import torch +from botorch.fit import fit_gpytorch_mll +from botorch.models import MultiTaskGP, SingleTaskGP +from botorch.models.transforms import Normalize, Standardize from gpytorch.kernels import MaternKernel as GPyTorchMaternKernel from gpytorch.kernels import RBFKernel as GPyTorchRBFKernel from gpytorch.kernels import ScaleKernel as GPyTorchScaleKernel @@ -8,23 +13,37 @@ from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood from gpytorch.means import ConstantMean from gpytorch.means import Mean as GPyTorchMean +from gpytorch.mlls import ExactMarginalLogLikelihood from pandas.testing import assert_frame_equal from pytest import param +from baybe import active_settings from baybe.kernels.basic import MaternKernel, RBFKernel -from baybe.kernels.composite import ScaleKernel +from baybe.kernels.composite import AdditiveKernel, ScaleKernel +from baybe.parameters.categorical import TaskParameter from baybe.parameters.numerical import NumericalContinuousParameter +from baybe.searchspace.core import SearchSpace from baybe.surrogates.gaussian_process.components.generic import PlainGPComponentFactory from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from baybe.surrogates.gaussian_process.presets import GaussianProcessPreset from baybe.targets.numerical import NumericalTarget -from baybe.utils.dataframe import create_fake_input +from baybe.utils.dataframe import create_fake_input, to_tensor searchspace = NumericalContinuousParameter("p", (0, 1)).to_searchspace() +searchspace_mt = SearchSpace.from_product( + [ + NumericalContinuousParameter("p", (0, 1)), + TaskParameter("task", ["a", "b", "c"]), + ] +) objective = NumericalTarget("t").to_objective() measurements = create_fake_input(searchspace.parameters, objective.targets, n_rows=100) baybe_kernel = ScaleKernel(MaternKernel() + RBFKernel()) +measurements_mt = create_fake_input( + searchspace_mt.parameters, objective.targets, n_rows=100 +) +baybe_kernel = ScaleKernel(AdditiveKernel([MaternKernel(), RBFKernel()])) gpytorch_kernel = GPyTorchScaleKernel(GPyTorchMaternKernel() + GPyTorchRBFKernel()) @@ -36,6 +55,52 @@ def _dummy_likelihood_factory(*args, **kwargs) -> GPyTorchLikelihood: return GaussianLikelihood() +def _posterior_stats_botorch( + searchspace: SearchSpace, measurements: pd.DataFrame +) -> pd.DataFrame: + """The essential BoTorch steps to produce posterior estimates.""" + train_X = to_tensor(searchspace.transform(measurements, allow_extra=True)) + train_Y = to_tensor(objective.transform(measurements, allow_extra=True)) + + # >>>>> Code adapted from BoTorch landing page: https://botorch.org/ >>>>> + # NOTE: We normalize according to the searchspace bounds to ensure consisteny with + # the BayBE GP implementation. + if searchspace.n_tasks == 1: + gp = SingleTaskGP( + train_X=train_X, + train_Y=train_Y, + input_transform=Normalize( + d=len(searchspace.comp_rep_columns), + bounds=to_tensor(searchspace.scaling_bounds), + ), + outcome_transform=Standardize(m=1), + ) + else: + assert searchspace.task_idx is not None + non_task_idcs = [ + i for i in range(train_X.shape[-1]) if i != searchspace.task_idx + ] + gp = MultiTaskGP( + train_X=train_X, + train_Y=train_Y, + task_feature=searchspace.task_idx, + input_transform=Normalize( + d=len(searchspace.comp_rep_columns), + indices=non_task_idcs, + bounds=to_tensor(searchspace.scaling_bounds), + ), + ) + mll = ExactMarginalLogLikelihood(gp.likelihood, gp) + fit_gpytorch_mll(mll) + # <<<<<<<<<< + + with torch.no_grad(): + posterior = gp.posterior(train_X) + mean = posterior.mean + std = posterior.variance.sqrt() + return pd.DataFrame({"t_mean": mean.numpy().ravel(), "t_std": std.numpy().ravel()}) + + @pytest.mark.parametrize( ("component_1", "component_2"), [ @@ -117,3 +182,25 @@ def test_invalid_components(): GaussianProcessSurrogate(mean_or_factory=GaussianLikelihood()) with pytest.raises(TypeError, match="Component must be one of"): GaussianProcessSurrogate(likelihood_or_factory=MaternKernel()) + + +@pytest.mark.parametrize("multitask", [False, True], ids=["single-task", "multi-task"]) +def test_botorch_preset(multitask: bool, monkeypatch): + """The BoTorch preset exactly mimics BoTorch's behavior.""" + if multitask: + monkeypatch.setenv("BAYBE_DISABLE_CUSTOM_KERNEL_WARNING", "true") + sp = searchspace_mt + data = measurements_mt + else: + sp = searchspace + data = measurements + + active_settings.random_seed = 1337 + gp = GaussianProcessSurrogate.from_preset("BOTORCH") + gp.fit(sp, objective, data) + posterior1 = gp.posterior_stats(data) + + active_settings.random_seed = 1337 + posterior2 = _posterior_stats_botorch(sp, data) + + assert_frame_equal(posterior1, posterior2) diff --git a/tests/test_kernel_factories.py b/tests/test_kernel_factories.py new file mode 100644 index 0000000000..6a8acda6a6 --- /dev/null +++ b/tests/test_kernel_factories.py @@ -0,0 +1,75 @@ +"""Tests for kernel factories.""" + +from contextlib import nullcontext + +import pytest +import torch +from pytest import param + +from baybe.exceptions import IncompatibleSearchSpaceError +from baybe.parameters.categorical import CategoricalParameter, TaskParameter +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + NumericalDiscreteParameter, +) +from baybe.searchspace.core import SearchSpace +from baybe.surrogates.gaussian_process.presets.baybe import ( + BayBEKernelFactory, + BayBENumericalKernelFactory, + BayBETaskKernelFactory, +) + +# A selector that accepts all parameters +_SELECT_ALL = lambda parameter: True # noqa: E731 + + +@pytest.mark.parametrize( + ("factory", "parameters", "error"), + [ + param( + BayBENumericalKernelFactory(parameter_selector=_SELECT_ALL), + [TaskParameter("task", ["t1", "t2"])], + IncompatibleSearchSpaceError, + id="regular_rejects_task", + ), + param( + BayBETaskKernelFactory(parameter_selector=_SELECT_ALL), + [CategoricalParameter("cat", ["a", "b"])], + IncompatibleSearchSpaceError, + id="task_rejects_categorical", + ), + param( + BayBETaskKernelFactory(parameter_selector=_SELECT_ALL), + [NumericalDiscreteParameter("num", [1, 2, 3])], + IncompatibleSearchSpaceError, + id="task_rejects_numerical_discrete", + ), + param( + BayBETaskKernelFactory(parameter_selector=_SELECT_ALL), + [NumericalContinuousParameter("cont", (0, 1))], + IncompatibleSearchSpaceError, + id="task_rejects_numerical_continuous", + ), + param( + BayBEKernelFactory(), + [ + NumericalContinuousParameter("cont", (0, 1)), + TaskParameter("task", ["t1", "t2"]), + ], + None, + id="combined_accepts_both", + ), + ], +) +def test_factory_parameter_kind_validation(factory, parameters, error): + """Factories reject unsupported parameter kinds and accept supported ones.""" + ss = SearchSpace.from_product(parameters) + train_x = torch.zeros(2, len(ss.comp_rep_columns)) + train_y = torch.zeros(2, 1) + + with ( + nullcontext() + if error is None + else pytest.raises(error, match="does not support") + ): + factory(ss, train_x, train_y)