diff --git a/CHANGELOG.md b/CHANGELOG.md
index 680f11da28..3d2db9af2d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,9 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Support for GPyTorch objects (kernels, means, likelihood) as Gaussian process
   components, enabling full low-level customization
 - Factories for all Gaussian process components
-- `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate`
+- `BOTORCH`, `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate`
 - `TypeSelector` and `NameSelector` classes for parameter selection in kernel factories
 - `parameter_names` attribute to basic kernels for controlling the considered parameters
+- `ParameterKind` flag enum for classifying parameters by their role and automatic
+  parameter kind validation in kernel factories
 - `IndexKernel` and `PositiveIndexKernel` classes
 - Interpoint constraints for continuous search spaces
 - `IndexKernel` and `PositiveIndexKernel` classes
@@ -26,6 +28,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   composition via `+` (sum) and `*` (product), as well as `constant * kernel`
   for creating a `ScaleKernel` with a fixed output scale
 
+### Changed
+- Gaussian processes no longer invoke leave-one-out training for multitask scenarios but 
+  can now rely on improved model priors for generalization
+
 ### Breaking Changes
 - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples
   instead of a single tuple (needed for interpoint constraints)
diff --git a/baybe/parameters/__init__.py b/baybe/parameters/__init__.py
index 93e62b6ee9..6fe2c0d8f4 100644
--- a/baybe/parameters/__init__.py
+++ b/baybe/parameters/__init__.py
@@ -5,6 +5,7 @@
 from baybe.parameters.enum import (
     CategoricalEncoding,
     CustomEncoding,
+    ParameterKind,
     SubstanceEncoding,
 )
 from baybe.parameters.numerical import (
@@ -22,6 +23,7 @@
     "MeasurableMetadata",
     "NumericalContinuousParameter",
     "NumericalDiscreteParameter",
+    "ParameterKind",
     "SubstanceEncoding",
     "SubstanceParameter",
     "TaskParameter",
diff --git a/baybe/parameters/base.py b/baybe/parameters/base.py
index 2d4df2bc77..7986a21124 100644
--- a/baybe/parameters/base.py
+++ b/baybe/parameters/base.py
@@ -21,6 +21,7 @@
 from baybe.utils.metadata import MeasurableMetadata, to_metadata
 
 if TYPE_CHECKING:
+    from baybe.parameters.enum import ParameterKind
     from baybe.searchspace.continuous import SubspaceContinuous
     from baybe.searchspace.core import SearchSpace
     from baybe.searchspace.discrete import SubspaceDiscrete
@@ -77,6 +78,13 @@ def is_discrete(self) -> bool:
         """Boolean indicating if this is a discrete parameter."""
         return isinstance(self, DiscreteParameter)
 
+    @property
+    def kind(self) -> ParameterKind:
+        """The kind of the parameter."""
+        from baybe.parameters.enum import ParameterKind
+
+        return ParameterKind.from_parameter(self)
+
     @property
     @abstractmethod
     def comp_rep_columns(self) -> tuple[str, ...]:
diff --git a/baybe/parameters/enum.py b/baybe/parameters/enum.py
index 3161f67dc7..07e9f9fb34 100644
--- a/baybe/parameters/enum.py
+++ b/baybe/parameters/enum.py
@@ -1,6 +1,38 @@
 """Parameter-related enumerations."""
 
-from enum import Enum
+from __future__ import annotations
+
+from enum import Enum, Flag, auto
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from baybe.parameters.base import Parameter
+
+
+class ParameterKind(Flag):
+    """Flag enum encoding the kind of a parameter.
+
+    Can be used to express compatibility (e.g. Gaussian process kernel factories)
+    with different parameter types via bitwise combination of flags.
+    """
+
+    REGULAR = auto()
+    """Regular parameter undergoing no special treatment."""
+
+    TASK = auto()
+    """Task parameter for transfer learning."""
+
+    FIDELITY = auto()
+    """Fidelity parameter for multi-fidelity modelling."""
+
+    @staticmethod
+    def from_parameter(parameter: Parameter) -> ParameterKind:
+        """Determine the kind of a parameter from its type."""
+        from baybe.parameters.categorical import TaskParameter
+
+        if isinstance(parameter, TaskParameter):
+            return ParameterKind.TASK
+        return ParameterKind.REGULAR
 
 
 class ParameterEncoding(Enum):
diff --git a/baybe/parameters/selectors.py b/baybe/parameters/selectors.py
index 6b1a4ae16d..e72b6fe4d9 100644
--- a/baybe/parameters/selectors.py
+++ b/baybe/parameters/selectors.py
@@ -3,15 +3,13 @@
 import re
 from abc import ABC, abstractmethod
 from collections.abc import Collection
-from typing import ClassVar, Protocol
+from typing import Protocol
 
 from attrs import Converter, define, field
-from attrs.converters import optional
 from attrs.validators import deep_iterable, instance_of, min_len
 from typing_extensions import override
 
 from baybe.parameters.base import Parameter
-from baybe.searchspace.core import SearchSpace
 from baybe.utils.basic import to_tuple
 from baybe.utils.conversion import nonstring_to_tuple
 
@@ -131,37 +129,3 @@ def to_parameter_selector(
         return TypeSelector(items)
 
     raise TypeError(f"Cannot convert {x!r} to a parameter selector.")
-
-
-@define
-class _ParameterSelectorMixin:
-    """A mixin class to enable parameter selection."""
-
-    # For internal use only: sanity check mechanism to remind developers of new
-    # subclasses to actually use the parameter selector when it is provided
-    # TODO: Perhaps we can find a more elegant way to enforce this by design
-    _uses_parameter_names: ClassVar[bool] = False
-
-    parameter_selector: ParameterSelectorProtocol | None = field(
-        default=None, converter=optional(to_parameter_selector), kw_only=True
-    )
-    """An optional selector to specify which parameters are to be considered."""
-
-    def get_parameter_names(self, searchspace: SearchSpace) -> tuple[str, ...] | None:
-        """Get the names of the parameters to be considered."""
-        if self.parameter_selector is None:
-            return None
-
-        return tuple(
-            p.name for p in searchspace.parameters if self.parameter_selector(p)
-        )
-
-    def __attrs_post_init__(self):
-        if self.parameter_selector is not None and not self._uses_parameter_names:
-            raise AssertionError(
-                f"A `parameter_selector` was provided to "
-                f"`{type(self).__name__}`, but the class does not set "
-                f"`_uses_parameter_names = True`. Subclasses that accept a "
-                f"parameter selector must explicitly set this flag to confirm "
-                f"they actually use the selected parameter names."
-            )
diff --git a/baybe/surrogates/gaussian_process/components/_gpytorch.py b/baybe/surrogates/gaussian_process/components/_gpytorch.py
new file mode 100644
index 0000000000..b7efc7bcbe
--- /dev/null
+++ b/baybe/surrogates/gaussian_process/components/_gpytorch.py
@@ -0,0 +1,71 @@
+"""Custom GPyTorch components."""
+
+import torch
+from botorch.models.multitask import _compute_multitask_mean
+from botorch.models.utils.gpytorch_modules import MIN_INFERRED_NOISE_LEVEL
+from gpytorch.constraints import GreaterThan
+from gpytorch.likelihoods.hadamard_gaussian_likelihood import HadamardGaussianLikelihood
+from gpytorch.means import MultitaskMean
+from gpytorch.means.multitask_mean import Mean
+from gpytorch.priors import LogNormalPrior
+from torch import Tensor
+from torch.nn import Module
+
+
+class HadamardConstantMean(Mean):
+    """A GPyTorch mean function implementing BoTorch's multitask mean logic.
+
+    While GPyTorch already provides a :class:`~gpytorch.means.MultitaskMean` class, it
+    computes mean values for all (input, task)-pairs (where input means all parameters
+    except the task parameter), i.e. it intrinsically applies a Cartesian expansion.
+    However, for the regular transfer learning setting, we only need the means for the
+    pairs that are actually observed/requested. BoTorch subselects the relevant means
+    from the GPyTorch output in `MultiTaskGP.forward`, i.e. it uses a class-based
+    approach to define its special logic for the multitask case. In contrast, BayBE uses
+    a composition approach, which is more flexible but requires that the logic is
+    injected via a self-contained `Mean` object, which is what this class provides.
+
+    Note:
+        Analogous to GPyTorch's
+        https://github.com/cornellius-gp/gpytorch/blob/main/gpytorch/likelihoods/hadamard_gaussian_likelihood.py
+        but where the logic is applied to the mean function, i.e. we learn a different
+        (constant) mean for each task.
+    """
+
+    def __init__(self, mean_module: Module, num_tasks: int, task_feature: int):
+        super().__init__()
+        self.multitask_mean = MultitaskMean(mean_module, num_tasks=num_tasks)
+        self.task_feature = task_feature
+
+    def forward(self, x: Tensor) -> Tensor:
+        # Adapted from https://github.com/meta-pytorch/botorch/blob/e0f4f5b941b5949a4a1171bf8d4ee9f74f146f3a/botorch/models/multitask.py#L397
+
+        # Convert task feature to positive index
+        task_feature = self.task_feature % x.shape[-1]
+
+        # Split input into task and non-task components
+        x_before = x[..., :task_feature]
+        task_idcs = x[..., task_feature : task_feature + 1]
+        x_after = x[..., task_feature + 1 :]
+
+        return _compute_multitask_mean(
+            self.multitask_mean, x_before, task_idcs, x_after
+        )
+
+
+def make_botorch_multitask_likelihood(
+    num_tasks: int, task_feature: int
+) -> HadamardGaussianLikelihood:
+    """Adapted from :class:`botorch.models.multitask.MultiTaskGP`."""
+    noise_prior = LogNormalPrior(loc=-4.0, scale=1.0)
+    return HadamardGaussianLikelihood(
+        num_tasks=num_tasks,
+        batch_shape=torch.Size(),
+        noise_prior=noise_prior,
+        noise_constraint=GreaterThan(
+            MIN_INFERRED_NOISE_LEVEL,
+            transform=None,
+            initial_value=noise_prior.mode,
+        ),
+        task_feature_index=task_feature,
+    )
diff --git a/baybe/surrogates/gaussian_process/components/kernel.py b/baybe/surrogates/gaussian_process/components/kernel.py
index ca4da066e7..28f5c1bcf4 100644
--- a/baybe/surrogates/gaussian_process/components/kernel.py
+++ b/baybe/surrogates/gaussian_process/components/kernel.py
@@ -2,18 +2,24 @@
 
 from __future__ import annotations
 
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
 from functools import partial
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, ClassVar
 
 from attrs import define, field
+from attrs.converters import optional
 from attrs.validators import is_callable
 from typing_extensions import override
 
+from baybe.exceptions import IncompatibleSearchSpaceError
 from baybe.kernels.base import Kernel
-from baybe.kernels.composite import ProductKernel
 from baybe.parameters.categorical import TaskParameter
+from baybe.parameters.enum import ParameterKind
 from baybe.parameters.selectors import (
+    ParameterSelectorProtocol,
     TypeSelector,
+    to_parameter_selector,
 )
 from baybe.searchspace.core import SearchSpace
 from baybe.surrogates.gaussian_process.components.generic import (
@@ -27,6 +33,8 @@
     from gpytorch.kernels import Kernel as GPyTorchKernel
     from torch import Tensor
 
+    from baybe.parameters.base import Parameter
+
     KernelFactoryProtocol = GPComponentFactoryProtocol[Kernel | GPyTorchKernel]
     PlainKernelFactory = PlainGPComponentFactory[Kernel | GPyTorchKernel]
 else:
@@ -35,6 +43,80 @@
     PlainKernelFactory = PlainGPComponentFactory[Kernel]
 
 
+@define
+class _KernelFactory(KernelFactoryProtocol, ABC):
+    """Base class for kernel factories."""
+
+    # For internal use only: sanity check mechanism to remind developers of new
+    # factories to actually use the parameter selector when it is provided
+    # TODO: Perhaps we can find a more elegant way to enforce this by design
+    _uses_parameter_names: ClassVar[bool] = False
+
+    supported_parameter_kinds: ClassVar[ParameterKind] = ParameterKind.REGULAR
+    """The parameter kinds supported by the kernel factory."""
+
+    parameter_selector: ParameterSelectorProtocol | None = field(
+        default=None, converter=optional(to_parameter_selector)
+    )
+    """An optional selector to specify which parameters are considered by the kernel."""
+
+    def get_parameter_names(self, searchspace: SearchSpace) -> tuple[str, ...] | None:
+        """Get the names of the parameters to be considered by the kernel."""
+        if self.parameter_selector is None:
+            return None
+
+        return tuple(
+            p.name for p in searchspace.parameters if self.parameter_selector(p)
+        )
+
+    def _validate_parameter_kinds(self, parameters: Iterable[Parameter]) -> None:
+        """Validate that the given parameters are supported by the factory.
+
+        Args:
+            parameters: The parameters to validate.
+
+        Raises:
+            IncompatibleSearchSpaceError: If unsupported parameter kinds are found.
+        """
+        if unsupported := [
+            p.name for p in parameters if not (p.kind & self.supported_parameter_kinds)
+        ]:
+            raise IncompatibleSearchSpaceError(
+                f"'{type(self).__name__}' does not support parameter kind(s) for "
+                f"parameter(s) {unsupported}. Supported kinds: "
+                f"{self.supported_parameter_kinds}."
+            )
+
+    @override
+    def __call__(
+        self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
+    ) -> Kernel | GPyTorchKernel:
+        """Construct the kernel, validating parameter kinds before construction."""
+        if self.parameter_selector is not None:
+            params = [p for p in searchspace.parameters if self.parameter_selector(p)]
+        else:
+            params = list(searchspace.parameters)
+        self._validate_parameter_kinds(params)
+
+        return self._make(searchspace, train_x, train_y)
+
+    @abstractmethod
+    def _make(
+        self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
+    ) -> Kernel | GPyTorchKernel:
+        """Construct the kernel."""
+
+    def __attrs_post_init__(self):
+        if self.parameter_selector is not None and not self._uses_parameter_names:
+            raise AssertionError(
+                f"A `parameter_selector` was provided to "
+                f"`{type(self).__name__}`, but the class does not set "
+                f"`_uses_parameter_names = True`. Subclasses that accept a "
+                f"parameter selector must explicitly set this flag to confirm "
+                f"they actually use the selected parameter names."
+            )
+
+
 @define
 class ICMKernelFactory(KernelFactoryProtocol):
     """A kernel factory that constructs an ICM kernel for transfer learning.
@@ -76,6 +158,43 @@ def _default_task_kernel_factory(self) -> KernelFactoryProtocol:
     def __call__(
         self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
     ) -> Kernel:
+        if searchspace.task_idx is None:
+            raise IncompatibleSearchSpaceError(
+                f"'{type(self).__name__}' can only be used with a searchspace that "
+                f"contains a '{TaskParameter.__name__}'."
+            )
+
         base_kernel = self.base_kernel_factory(searchspace, train_x, train_y)
         task_kernel = self.task_kernel_factory(searchspace, train_x, train_y)
-        return ProductKernel([base_kernel, task_kernel])
+        if isinstance(base_kernel, Kernel):
+            base_kernel = base_kernel.to_gpytorch(searchspace)
+        if isinstance(task_kernel, Kernel):
+            task_kernel = task_kernel.to_gpytorch(searchspace)
+
+        # Ensure correct partitioning between base and task kernels active dimensions
+        all_idcs = set(range(len(searchspace.comp_rep_columns)))
+        allowed_task_idcs = {searchspace.task_idx}
+        allowed_base_idcs = all_idcs - allowed_task_idcs
+        base_idcs = (
+            set(dims)
+            if (dims := base_kernel.active_dims.tolist()) is not None
+            else None
+        )
+        task_idcs = (
+            set(dims)
+            if (dims := task_kernel.active_dims.tolist()) is not None
+            else None
+        )
+
+        if base_idcs is not None and (base_idcs > allowed_base_idcs):
+            raise ValueError(
+                f"The base kernel's 'active_dims' {base_idcs} must be a subset of "
+                f"the non-task indices {allowed_base_idcs}."
+            )
+        if task_idcs != allowed_task_idcs:
+            raise ValueError(
+                f"The task kernel's 'active_dims' {task_idcs} does not match "
+                f"the task index {allowed_task_idcs}."
+            )
+
+        return base_kernel * task_kernel
diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py
index 2b1a3f361e..3df3353985 100644
--- a/baybe/surrogates/gaussian_process/core.py
+++ b/baybe/surrogates/gaussian_process/core.py
@@ -291,18 +291,7 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
             covar_module=kernel,
             likelihood=likelihood,
         )
-
-        # TODO: This is still a temporary workaround to avoid overfitting seen in
-        #  low-dimensional TL cases. More robust settings are being researched.
-        if context.n_task_dimensions > 0:
-            mll = gpytorch.mlls.LeaveOneOutPseudoLikelihood(
-                self._model.likelihood, self._model
-            )
-        else:
-            mll = gpytorch.ExactMarginalLogLikelihood(
-                self._model.likelihood, self._model
-            )
-
+        mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model)
         botorch.fit.fit_gpytorch_mll(mll)
 
     @override
diff --git a/baybe/surrogates/gaussian_process/presets/__init__.py b/baybe/surrogates/gaussian_process/presets/__init__.py
index deb7de9e64..77b38af1cb 100644
--- a/baybe/surrogates/gaussian_process/presets/__init__.py
+++ b/baybe/surrogates/gaussian_process/presets/__init__.py
@@ -7,6 +7,13 @@
     BayBEMeanFactory,
 )
 
+# BoTorch preset
+from baybe.surrogates.gaussian_process.presets.botorch import (
+    BotorchKernelFactory,
+    BotorchLikelihoodFactory,
+    BotorchMeanFactory,
+)
+
 # Core
 from baybe.surrogates.gaussian_process.presets.core import GaussianProcessPreset
 
@@ -31,6 +38,10 @@
     "BayBEKernelFactory",
     "BayBELikelihoodFactory",
     "BayBEMeanFactory",
+    # BoTorch preset
+    "BotorchKernelFactory",
+    "BotorchLikelihoodFactory",
+    "BotorchMeanFactory",
     # EDBO preset
     "EDBOKernelFactory",
     "EDBOLikelihoodFactory",
diff --git a/baybe/surrogates/gaussian_process/presets/baybe.py b/baybe/surrogates/gaussian_process/presets/baybe.py
index bc820ae52b..e96a08e6cc 100644
--- a/baybe/surrogates/gaussian_process/presets/baybe.py
+++ b/baybe/surrogates/gaussian_process/presets/baybe.py
@@ -10,14 +10,14 @@
 from baybe.kernels.base import Kernel
 from baybe.kernels.basic import IndexKernel
 from baybe.parameters.categorical import TaskParameter
+from baybe.parameters.enum import ParameterKind
 from baybe.parameters.selectors import (
     ParameterSelectorProtocol,
     TypeSelector,
-    _ParameterSelectorMixin,
     to_parameter_selector,
 )
 from baybe.searchspace.core import SearchSpace
-from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol
+from baybe.surrogates.gaussian_process.components.kernel import _KernelFactory
 from baybe.surrogates.gaussian_process.components.mean import LazyConstantMeanFactory
 from baybe.surrogates.gaussian_process.presets.edbo_smoothed import (
     SmoothedEDBOKernelFactory,
@@ -29,11 +29,16 @@
 
 
 @define
-class BayBEKernelFactory(KernelFactoryProtocol):
+class BayBEKernelFactory(_KernelFactory):
     """The default kernel factory for Gaussian process surrogates."""
 
+    supported_parameter_kinds: ClassVar[ParameterKind] = (
+        ParameterKind.REGULAR | ParameterKind.TASK
+    )
+    # See base class.
+
     @override
-    def __call__(
+    def _make(
         self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
     ) -> Kernel:
         from baybe.surrogates.gaussian_process.components.kernel import ICMKernelFactory
@@ -48,12 +53,15 @@ def __call__(
 
 
 @define
-class BayBETaskKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin):
+class BayBETaskKernelFactory(_KernelFactory):
     """The factory providing the default task kernel for Gaussian process surrogates."""
 
     _uses_parameter_names: ClassVar[bool] = True
     # See base class.
 
+    supported_parameter_kinds: ClassVar[ParameterKind] = ParameterKind.TASK
+    # See base class.
+
     parameter_selector: ParameterSelectorProtocol | None = field(
         factory=lambda: TypeSelector([TaskParameter]),
         converter=to_parameter_selector,
@@ -61,7 +69,7 @@ class BayBETaskKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin):
     # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429)
 
     @override
-    def __call__(
+    def _make(
         self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
     ) -> Kernel:
         return IndexKernel(
diff --git a/baybe/surrogates/gaussian_process/presets/botorch.py b/baybe/surrogates/gaussian_process/presets/botorch.py
new file mode 100644
index 0000000000..30135bc468
--- /dev/null
+++ b/baybe/surrogates/gaussian_process/presets/botorch.py
@@ -0,0 +1,145 @@
+"""BoTorch preset for Gaussian process surrogates."""
+
+from __future__ import annotations
+
+import gc
+from itertools import chain
+from typing import TYPE_CHECKING, ClassVar
+
+from attrs import define
+from typing_extensions import override
+
+from baybe.kernels.base import Kernel
+from baybe.parameters.enum import ParameterKind
+from baybe.searchspace.core import SearchSpace
+from baybe.surrogates.gaussian_process.components import LikelihoodFactoryProtocol
+from baybe.surrogates.gaussian_process.components._gpytorch import (
+    make_botorch_multitask_likelihood,
+)
+from baybe.surrogates.gaussian_process.components.kernel import (
+    ICMKernelFactory,
+    _KernelFactory,
+)
+from baybe.surrogates.gaussian_process.components.mean import MeanFactoryProtocol
+
+if TYPE_CHECKING:
+    from gpytorch.kernels import Kernel as GPyTorchKernel
+    from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood
+    from gpytorch.means import Mean as GPyTorchMean
+    from torch import Tensor
+
+
+@define
+class BotorchKernelFactory(_KernelFactory):
+    """A factory providing BoTorch kernels."""
+
+    _uses_parameter_names: ClassVar[bool] = True
+    # See base class.
+
+    supported_parameter_kinds: ClassVar[ParameterKind] = (
+        ParameterKind.REGULAR | ParameterKind.TASK
+    )
+    # See base class.
+
+    @override
+    def _make(
+        self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
+    ) -> Kernel | GPyTorchKernel:
+        from botorch.models.kernels.positive_index import PositiveIndexKernel
+        from botorch.models.utils.gpytorch_modules import (
+            get_covar_module_with_dim_scaled_prior,
+        )
+
+        parameter_names = self.get_parameter_names(searchspace)
+
+        # Resolve parameter names to active dimension indices
+        active_dims: list[int] | None
+        if parameter_names is not None:
+            active_dims = list(
+                chain.from_iterable(
+                    searchspace.get_comp_rep_parameter_indices(name)
+                    for name in parameter_names
+                )
+            )
+            ard_num_dims = len(active_dims)
+        else:
+            active_dims = None
+            ard_num_dims = len(searchspace.comp_rep_columns)
+
+        # Determine if the selected parameters include a task parameter
+        task_idx = searchspace.task_idx
+        is_multitask = task_idx is not None and (
+            active_dims is None or task_idx in active_dims
+        )
+
+        if not is_multitask:
+            return get_covar_module_with_dim_scaled_prior(
+                ard_num_dims=ard_num_dims, active_dims=active_dims
+            )
+
+        assert task_idx is not None
+        base_idcs = [
+            idx
+            for idx in (active_dims or range(len(searchspace.comp_rep_columns)))
+            if idx != task_idx
+        ]
+        base = get_covar_module_with_dim_scaled_prior(
+            ard_num_dims=len(base_idcs), active_dims=base_idcs
+        )
+        index_kernel = PositiveIndexKernel(
+            num_tasks=searchspace.n_tasks,
+            rank=searchspace.n_tasks,
+            active_dims=[task_idx],
+        )
+        return ICMKernelFactory(base, index_kernel)(searchspace, train_x, train_y)
+
+
+class BotorchMeanFactory(MeanFactoryProtocol):
+    """A factory providing BoTorch mean functions."""
+
+    @override
+    def __call__(
+        self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
+    ) -> GPyTorchMean:
+        from gpytorch.means import ConstantMean
+
+        from baybe.surrogates.gaussian_process.components._gpytorch import (
+            HadamardConstantMean,
+        )
+
+        if searchspace.n_tasks == 1:
+            return ConstantMean()
+
+        assert searchspace.task_idx is not None
+        return HadamardConstantMean(
+            ConstantMean(), searchspace.n_tasks, searchspace.task_idx
+        )
+
+
+class BotorchLikelihoodFactory(LikelihoodFactoryProtocol):
+    """A factory providing BoTorch likelihoods."""
+
+    @override
+    def __call__(
+        self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
+    ) -> GPyTorchLikelihood:
+        from botorch.models.utils.gpytorch_modules import (
+            get_gaussian_likelihood_with_lognormal_prior,
+        )
+
+        if searchspace.n_tasks == 1:
+            return get_gaussian_likelihood_with_lognormal_prior()
+
+        assert searchspace.task_idx is not None
+        return make_botorch_multitask_likelihood(
+            num_tasks=searchspace.n_tasks, task_feature=searchspace.task_idx
+        )
+
+
+# Collect leftover original slotted classes processed by `attrs.define`
+gc.collect()
+
+# Aliases for generic preset imports
+PresetKernelFactory = BotorchKernelFactory
+PresetMeanFactory = BotorchMeanFactory
+PresetLikelihoodFactory = BotorchLikelihoodFactory
diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py
index ad77df0b4d..f65762c1fa 100644
--- a/baybe/surrogates/gaussian_process/presets/core.py
+++ b/baybe/surrogates/gaussian_process/presets/core.py
@@ -11,6 +11,9 @@ class GaussianProcessPreset(Enum):
     BAYBE = "BAYBE"
     """The default BayBE settings of the Gaussian process surrogate class."""
 
+    BOTORCH = "BOTORCH"
+    """The BoTorch settings."""
+
     EDBO = "EDBO"
     """The EDBO settings."""
 
diff --git a/baybe/surrogates/gaussian_process/presets/edbo.py b/baybe/surrogates/gaussian_process/presets/edbo.py
index 36611c258e..ecf24de106 100644
--- a/baybe/surrogates/gaussian_process/presets/edbo.py
+++ b/baybe/surrogates/gaussian_process/presets/edbo.py
@@ -16,13 +16,14 @@
 from baybe.parameters.selectors import (
     ParameterSelectorProtocol,
     TypeSelector,
-    _ParameterSelectorMixin,
     to_parameter_selector,
 )
 from baybe.parameters.substance import SubstanceParameter
 from baybe.priors.basic import GammaPrior
 from baybe.searchspace.discrete import SubspaceDiscrete
-from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol
+from baybe.surrogates.gaussian_process.components.kernel import (
+    _KernelFactory,
+)
 from baybe.surrogates.gaussian_process.components.likelihood import (
     LikelihoodFactoryProtocol,
 )
@@ -56,7 +57,7 @@ def _contains_encoding(
 
 
 @define
-class EDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin):
+class EDBOKernelFactory(_KernelFactory):
     """A factory providing EDBO kernels.
 
     References:
@@ -74,12 +75,10 @@ class EDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin):
     # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429)
 
     @override
-    def __call__(
+    def _make(
         self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
     ) -> Kernel:
-        effective_dims = train_x.shape[-1] - len(
-            [p for p in searchspace.parameters if isinstance(p, TaskParameter)]
-        )
+        effective_dims = train_x.shape[-1]
 
         switching_condition = _contains_encoding(
             searchspace.discrete, _EDBO_ENCODINGS
diff --git a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py
index eb8d57491a..89cf9a9ef2 100644
--- a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py
+++ b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py
@@ -15,11 +15,12 @@
 from baybe.parameters.selectors import (
     ParameterSelectorProtocol,
     TypeSelector,
-    _ParameterSelectorMixin,
     to_parameter_selector,
 )
 from baybe.priors.basic import GammaPrior
-from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol
+from baybe.surrogates.gaussian_process.components.kernel import (
+    _KernelFactory,
+)
 from baybe.surrogates.gaussian_process.components.likelihood import (
     LikelihoodFactoryProtocol,
 )
@@ -38,7 +39,7 @@
 
 
 @define
-class SmoothedEDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin):
+class SmoothedEDBOKernelFactory(_KernelFactory):
     """A factory providing smoothed versions of EDBO kernels.
 
     Takes the low and high dimensional limits of
@@ -56,12 +57,10 @@ class SmoothedEDBOKernelFactory(KernelFactoryProtocol, _ParameterSelectorMixin):
     # TODO: Reuse base attribute (https://github.com/python-attrs/attrs/pull/1429)
 
     @override
-    def __call__(
+    def _make(
         self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor
     ) -> Kernel:
-        effective_dims = train_x.shape[-1] - len(
-            [p for p in searchspace.parameters if isinstance(p, TaskParameter)]
-        )
+        effective_dims = train_x.shape[-1]
 
         # Interpolate prior moments linearly between low D and high D regime.
         # The high D regime itself is the average of the EDBO OHE and Mordred regime.
diff --git a/docs/conf.py b/docs/conf.py
index 5251bb8654..6096369714 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -150,7 +150,7 @@
     ("py:class", "baybe.acquisition.acqfs._ExpectedHypervolumeImprovement"),
     ("py:class", "baybe.settings._SlottedContextDecorator"),
     ("py:class", "baybe.surrogates.gaussian_process.components.PlainKernelFactory"),
-    ("py:class", "baybe.parameters.selectors._ParameterSelectorMixin"),
+    ("py:class", "baybe.surrogates.gaussian_process.components.kernel._KernelFactory"),
     # Deprecation
     ("py:.*", "baybe.targets._deprecated.*"),
 ]
diff --git a/streamlit/surrogate_models.py b/streamlit/surrogate_models.py
index 83415b91b9..f71b34a6e8 100644
--- a/streamlit/surrogate_models.py
+++ b/streamlit/surrogate_models.py
@@ -19,11 +19,12 @@
 from baybe.acquisition import qLogExpectedImprovement
 from baybe.acquisition.base import AcquisitionFunction
 from baybe.exceptions import IncompatibleSurrogateError
-from baybe.parameters import NumericalDiscreteParameter
+from baybe.parameters import NumericalDiscreteParameter, TaskParameter
 from baybe.recommenders import BotorchRecommender
 from baybe.searchspace import SearchSpace
 from baybe.surrogates import CustomONNXSurrogate, GaussianProcessSurrogate
 from baybe.surrogates.base import Surrogate
+from baybe.surrogates.gaussian_process.presets import GaussianProcessPreset
 from baybe.targets import NumericalTarget
 from baybe.utils.basic import get_subclasses
 
@@ -90,13 +91,36 @@ def main():
     }
     acquisition_function_names = list(acquisition_function_classes.keys())
 
-    # Streamlit simulation parameters
+    # >>>>> Sidebar options >>>>>
+    # Domain
     st.sidebar.markdown("# Domain")
+    st_enable_multitask = st.sidebar.toggle("Multi-task")
+    st_n_tasks = 2 if st_enable_multitask else 1
     st_random_seed = int(st.sidebar.number_input("Random seed", value=1337))
     st_function_name = st.sidebar.selectbox(
         "Test function", list(test_functions.keys())
     )
     st_minimize = st.sidebar.checkbox("Minimize")
+
+    # Training data
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("# Training Data")
+    st_n_training_points = st.sidebar.slider(
+        "Number of training points",
+        0 if st_enable_multitask else 1,
+        20,
+        0 if st_enable_multitask else 5,
+    )
+    if st_enable_multitask:
+        st_n_training_points_other = st.sidebar.slider(
+            "Number of off-task training points", 0, 20, 5
+        )
+        st_offtask_scale = st.sidebar.slider("Off-task scale factor", -20.0, 20.0, 1.0)
+        st_offtask_offset_factor = st.sidebar.slider(
+            "Off-task offset", -100.0, 100.0, 0.0
+        )
+
+    # Model
     st.sidebar.markdown("---")
     st.sidebar.markdown("# Model")
     st_surrogate_name = st.sidebar.selectbox(
@@ -104,13 +128,26 @@ def main():
         surrogate_model_names,
         surrogate_model_names.index(GaussianProcessSurrogate.__name__),
     )
+
+    st_gp_preset = None
+    st_transfer_learning = False
+    if st_surrogate_name == GaussianProcessSurrogate.__name__:
+        preset_names = [preset.value for preset in GaussianProcessPreset]
+        st_gp_preset = st.sidebar.selectbox(
+            "GP Preset",
+            preset_names,
+            index=preset_names.index(GaussianProcessPreset.BAYBE.value),
+        )
+        if st_enable_multitask:
+            st_transfer_learning = st.sidebar.checkbox("Transfer learning", value=True)
     st_acqf_name = st.sidebar.selectbox(
         "Acquisition function",
         acquisition_function_names,
         acquisition_function_names.index(qLogExpectedImprovement.__name__),
     )
-    st_n_training_points = st.sidebar.slider("Number of training points", 1, 20, 5)
     st_n_recommendations = st.sidebar.slider("Number of recommendations", 1, 20, 5)
+
+    # Surrogate consistency validation
     st.sidebar.markdown("---")
     st.sidebar.markdown("# Validation")
     st.sidebar.markdown(
@@ -127,6 +164,10 @@ def main():
     )
     st_function_amplitude = st.sidebar.slider("Function amplitude", 1.0, 100.0, 1.0)
     st_function_bias = st.sidebar.slider("Function bias", -100.0, 100.0, 0.0)
+    # <<<<< Sidebar options <<<<<
+
+    # Derived settings
+    st_use_separate_gps = st_n_tasks > 1 and not st_transfer_learning
 
     # Set the chosen random seed
     active_settings.random_seed = st_random_seed
@@ -140,68 +181,184 @@ def main():
         bias=st_function_bias,
     )
 
-    # Create the training data
-    train_x = np.random.uniform(
-        st_lower_parameter_limit, st_upper_parameter_limit, st_n_training_points
-    )
-    train_y = fun(train_x)
-    measurements = pd.DataFrame({"x": train_x, "y": train_y})
+    # Generate task-specific transforms (scale and offset for each task)
+    task_names = ["on-task", "off-task"] if st_n_tasks > 1 else ["on-task"]
+    task_transforms = {}
+    for task_idx in range(st_n_tasks):
+        task_name = task_names[task_idx]
+        if task_idx == 0:
+            # On-task: use original function values
+            task_transforms[task_name] = {"scale": 1.0, "offset": 0.0}
+        else:
+            # Off-task: use user-specified scale and offset
+            scale = st_offtask_scale
+            offset = st_offtask_offset_factor * st_function_amplitude
+            task_transforms[task_name] = {"scale": scale, "offset": offset}
+
+    # Create training data
+    measurements_list = []
+    for task_idx in range(st_n_tasks):
+        task_name = task_names[task_idx]
+        transform = task_transforms[task_name]
+        n_points = st_n_training_points if task_idx == 0 else st_n_training_points_other
+        train_x = np.random.uniform(
+            st_lower_parameter_limit, st_upper_parameter_limit, n_points
+        )
+        task_measurements = pd.DataFrame(
+            {
+                "x": train_x,
+                "task": task_name,
+                "y": fun(train_x) * transform["scale"] + transform["offset"],
+            }
+        )
+        measurements_list.append(task_measurements)
+    measurements = pd.concat(measurements_list, ignore_index=True)
 
     # Create the plotting grid and corresponding target values
     test_x = np.linspace(
         st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES
     )
-    test_y = fun(test_x)
-    candidates = pd.DataFrame({"x": test_x, "y": test_y})
+    candidates_list = []
+    test_ys = {}
+    for task_idx in range(st_n_tasks):
+        task_name = task_names[task_idx]
+        transform = task_transforms[task_name]
+        test_ys[task_name] = fun(test_x) * transform["scale"] + transform["offset"]
+        task_candidates = pd.DataFrame(
+            {"x": test_x, "task": task_name, "y": test_ys[task_name]}
+        )
+        candidates_list.append(task_candidates)
+    candidates = pd.concat(candidates_list, ignore_index=True)
 
     # Create the searchspace and objective
-    parameter = NumericalDiscreteParameter(
-        name="x",
-        values=np.linspace(
-            st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES
-        ),
-    )
-    searchspace = SearchSpace.from_product(parameters=[parameter])
+    parameters = [
+        NumericalDiscreteParameter(
+            name="x",
+            values=np.linspace(
+                st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES
+            ),
+        )
+    ]
+    if st_transfer_learning:
+        parameters.append(
+            TaskParameter(
+                name="task",
+                values=task_names,
+                active_values=["on-task"],
+            )
+        )
+    searchspace = SearchSpace.from_product(parameters=parameters)
     objective = NumericalTarget(name="y", minimize=st_minimize).to_objective()
 
-    # Create the acquisition function and the recommender
+    # Create the acquisition function
     acqf_cls = acquisition_function_classes[st_acqf_name]
     try:
         acqf = acqf_cls(maximize=not st_minimize)
     except TypeError:
         acqf = acqf_cls()
-    recommender = BotorchRecommender(
-        surrogate_model=surrogate_model_classes[st_surrogate_name](),
-        acquisition_function=acqf,
-    )
 
-    # Get the recommendations and extract the posterior mean / standard deviation
-    try:
-        recommendations = recommender.recommend(
-            st_n_recommendations, searchspace, objective, measurements
-        )
-    except IncompatibleSurrogateError:
-        st.error(
-            f"You requested {st_n_recommendations} recommendations but the selected "
-            f"surrogate class does not support recommending more than one candidate "
-            f"at a time."
+    def make_surrogate():
+        if st_surrogate_name == GaussianProcessSurrogate.__name__:
+            assert st_gp_preset is not None
+            return GaussianProcessSurrogate.from_preset(
+                preset=GaussianProcessPreset[st_gp_preset]
+            )
+        return surrogate_model_classes[st_surrogate_name]()
+
+    if st_use_separate_gps:
+        # One independent GP per task, each trained without the task column
+        stats_by_task = {}
+        for task_name in task_names:
+            task_meas = measurements[measurements["task"] == task_name][
+                ["x", "y"]
+            ].reset_index(drop=True)
+            task_recommender = BotorchRecommender(
+                surrogate_model=make_surrogate(),
+                acquisition_function=acqf,
+            )
+            if task_name == "on-task":
+                try:
+                    recommendations = task_recommender.recommend(
+                        st_n_recommendations, searchspace, objective, task_meas
+                    )
+                except IncompatibleSurrogateError:
+                    st.error(
+                        f"You requested {st_n_recommendations} recommendations but "
+                        f"the selected surrogate class does not support recommending "
+                        f"more than one candidate at a time."
+                    )
+                    st.stop()
+            task_surrogate = task_recommender.get_surrogate(
+                searchspace, objective, task_meas
+            )
+            stats_by_task[task_name] = task_surrogate.posterior_stats(
+                pd.DataFrame({"x": test_x})
+            )
+    else:
+        # Single recommender (single task, or multi-task with transfer learning)
+        recommender = BotorchRecommender(
+            surrogate_model=make_surrogate(),
+            acquisition_function=acqf,
         )
-        st.stop()
-    surrogate = recommender.get_surrogate(searchspace, objective, measurements)
-    stats = surrogate.posterior_stats(candidates)
-    mean, std = stats["y_mean"], stats["y_std"]
+        try:
+            recommendations = recommender.recommend(
+                st_n_recommendations, searchspace, objective, measurements
+            )
+        except IncompatibleSurrogateError:
+            st.error(
+                f"You requested {st_n_recommendations} recommendations but the "
+                f"selected surrogate class does not support recommending more than "
+                f"one candidate at a time."
+            )
+            st.stop()
+        surrogate = recommender.get_surrogate(searchspace, objective, measurements)
+        stats = surrogate.posterior_stats(candidates)
 
     # Visualize the test function, training points, model predictions, recommendations
-    fig = plt.figure()
-    plt.plot(test_x, test_y, color="tab:blue", label="Test function")
-    plt.plot(train_x, train_y, "o", color="tab:blue")
-    plt.plot(test_x, mean, color="tab:red", label="Surrogate model")
-    plt.fill_between(test_x, mean - std, mean + std, alpha=0.2, color="tab:red")
-    plt.vlines(
-        recommendations, *plt.gca().get_ylim(), color="k", label="Recommendations"
-    )
-    plt.legend()
-    st.pyplot(fig)
+    if st_n_tasks > 1:
+        cols = st.columns(st_n_tasks)
+
+    for task_idx in range(st_n_tasks):
+        task_name = task_names[task_idx]
+        task_mask = candidates["task"] == task_name if st_n_tasks > 1 else slice(None)
+
+        if st_use_separate_gps:
+            task_stats = stats_by_task[task_name]
+            mean = task_stats["y_mean"].values
+            std = task_stats["y_std"].values
+        elif st_n_tasks > 1:
+            mean = stats["y_mean"][task_mask].values
+            std = stats["y_std"][task_mask].values
+        else:
+            mean = stats["y_mean"].values
+            std = stats["y_std"].values
+
+        test_y = test_ys[task_name]
+        train_mask = (
+            measurements["task"] == task_name if st_n_tasks > 1 else slice(None)
+        )
+        train_y = measurements[train_mask]["y"].values
+        task_train_x = measurements[train_mask]["x"].values
+
+        fig = plt.figure()
+        plt.plot(test_x, test_y, color="tab:blue", label="Test function")
+        plt.plot(task_train_x, train_y, "o", color="tab:blue")
+        plt.plot(test_x, mean, color="tab:red", label="Surrogate model")
+        plt.fill_between(test_x, mean - std, mean + std, alpha=0.2, color="tab:red")
+        if task_name == "on-task":
+            plt.vlines(
+                recommendations["x"] if st_n_tasks > 1 else recommendations,
+                *plt.gca().get_ylim(),
+                color="k",
+                label="Recommendations",
+            )
+        plt.legend()
+        if st_n_tasks > 1:
+            plt.title(task_name.capitalize())
+            with cols[task_idx]:
+                st.pyplot(fig)
+        else:
+            st.pyplot(fig)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gp.py b/tests/test_gp.py
index 30ca69bc0e..eec3297559 100644
--- a/tests/test_gp.py
+++ b/tests/test_gp.py
@@ -1,6 +1,11 @@
 """Tests for the Gaussian Process surrogate."""
 
+import pandas as pd
 import pytest
+import torch
+from botorch.fit import fit_gpytorch_mll
+from botorch.models import MultiTaskGP, SingleTaskGP
+from botorch.models.transforms import Normalize, Standardize
 from gpytorch.kernels import MaternKernel as GPyTorchMaternKernel
 from gpytorch.kernels import RBFKernel as GPyTorchRBFKernel
 from gpytorch.kernels import ScaleKernel as GPyTorchScaleKernel
@@ -8,23 +13,37 @@
 from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood
 from gpytorch.means import ConstantMean
 from gpytorch.means import Mean as GPyTorchMean
+from gpytorch.mlls import ExactMarginalLogLikelihood
 from pandas.testing import assert_frame_equal
 from pytest import param
 
+from baybe import active_settings
 from baybe.kernels.basic import MaternKernel, RBFKernel
-from baybe.kernels.composite import ScaleKernel
+from baybe.kernels.composite import AdditiveKernel, ScaleKernel
+from baybe.parameters.categorical import TaskParameter
 from baybe.parameters.numerical import NumericalContinuousParameter
+from baybe.searchspace.core import SearchSpace
 from baybe.surrogates.gaussian_process.components.generic import PlainGPComponentFactory
 from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate
 from baybe.surrogates.gaussian_process.presets import GaussianProcessPreset
 from baybe.targets.numerical import NumericalTarget
-from baybe.utils.dataframe import create_fake_input
+from baybe.utils.dataframe import create_fake_input, to_tensor
 
 searchspace = NumericalContinuousParameter("p", (0, 1)).to_searchspace()
+searchspace_mt = SearchSpace.from_product(
+    [
+        NumericalContinuousParameter("p", (0, 1)),
+        TaskParameter("task", ["a", "b", "c"]),
+    ]
+)
 objective = NumericalTarget("t").to_objective()
 measurements = create_fake_input(searchspace.parameters, objective.targets, n_rows=100)
 
 baybe_kernel = ScaleKernel(MaternKernel() + RBFKernel())
+measurements_mt = create_fake_input(
+    searchspace_mt.parameters, objective.targets, n_rows=100
+)
+baybe_kernel = ScaleKernel(AdditiveKernel([MaternKernel(), RBFKernel()]))
 gpytorch_kernel = GPyTorchScaleKernel(GPyTorchMaternKernel() + GPyTorchRBFKernel())
 
 
@@ -36,6 +55,52 @@ def _dummy_likelihood_factory(*args, **kwargs) -> GPyTorchLikelihood:
     return GaussianLikelihood()
 
 
+def _posterior_stats_botorch(
+    searchspace: SearchSpace, measurements: pd.DataFrame
+) -> pd.DataFrame:
+    """The essential BoTorch steps to produce posterior estimates."""
+    train_X = to_tensor(searchspace.transform(measurements, allow_extra=True))
+    train_Y = to_tensor(objective.transform(measurements, allow_extra=True))
+
+    # >>>>> Code adapted from BoTorch landing page: https://botorch.org/ >>>>>
+    # NOTE: We normalize according to the searchspace bounds to ensure consisteny with
+    #       the BayBE GP implementation.
+    if searchspace.n_tasks == 1:
+        gp = SingleTaskGP(
+            train_X=train_X,
+            train_Y=train_Y,
+            input_transform=Normalize(
+                d=len(searchspace.comp_rep_columns),
+                bounds=to_tensor(searchspace.scaling_bounds),
+            ),
+            outcome_transform=Standardize(m=1),
+        )
+    else:
+        assert searchspace.task_idx is not None
+        non_task_idcs = [
+            i for i in range(train_X.shape[-1]) if i != searchspace.task_idx
+        ]
+        gp = MultiTaskGP(
+            train_X=train_X,
+            train_Y=train_Y,
+            task_feature=searchspace.task_idx,
+            input_transform=Normalize(
+                d=len(searchspace.comp_rep_columns),
+                indices=non_task_idcs,
+                bounds=to_tensor(searchspace.scaling_bounds),
+            ),
+        )
+    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
+    fit_gpytorch_mll(mll)
+    # <<<<<<<<<<
+
+    with torch.no_grad():
+        posterior = gp.posterior(train_X)
+    mean = posterior.mean
+    std = posterior.variance.sqrt()
+    return pd.DataFrame({"t_mean": mean.numpy().ravel(), "t_std": std.numpy().ravel()})
+
+
 @pytest.mark.parametrize(
     ("component_1", "component_2"),
     [
@@ -117,3 +182,25 @@ def test_invalid_components():
         GaussianProcessSurrogate(mean_or_factory=GaussianLikelihood())
     with pytest.raises(TypeError, match="Component must be one of"):
         GaussianProcessSurrogate(likelihood_or_factory=MaternKernel())
+
+
+@pytest.mark.parametrize("multitask", [False, True], ids=["single-task", "multi-task"])
+def test_botorch_preset(multitask: bool, monkeypatch):
+    """The BoTorch preset exactly mimics BoTorch's behavior."""
+    if multitask:
+        monkeypatch.setenv("BAYBE_DISABLE_CUSTOM_KERNEL_WARNING", "true")
+        sp = searchspace_mt
+        data = measurements_mt
+    else:
+        sp = searchspace
+        data = measurements
+
+    active_settings.random_seed = 1337
+    gp = GaussianProcessSurrogate.from_preset("BOTORCH")
+    gp.fit(sp, objective, data)
+    posterior1 = gp.posterior_stats(data)
+
+    active_settings.random_seed = 1337
+    posterior2 = _posterior_stats_botorch(sp, data)
+
+    assert_frame_equal(posterior1, posterior2)
diff --git a/tests/test_kernel_factories.py b/tests/test_kernel_factories.py
new file mode 100644
index 0000000000..6a8acda6a6
--- /dev/null
+++ b/tests/test_kernel_factories.py
@@ -0,0 +1,75 @@
+"""Tests for kernel factories."""
+
+from contextlib import nullcontext
+
+import pytest
+import torch
+from pytest import param
+
+from baybe.exceptions import IncompatibleSearchSpaceError
+from baybe.parameters.categorical import CategoricalParameter, TaskParameter
+from baybe.parameters.numerical import (
+    NumericalContinuousParameter,
+    NumericalDiscreteParameter,
+)
+from baybe.searchspace.core import SearchSpace
+from baybe.surrogates.gaussian_process.presets.baybe import (
+    BayBEKernelFactory,
+    BayBENumericalKernelFactory,
+    BayBETaskKernelFactory,
+)
+
+# A selector that accepts all parameters
+_SELECT_ALL = lambda parameter: True  # noqa: E731
+
+
+@pytest.mark.parametrize(
+    ("factory", "parameters", "error"),
+    [
+        param(
+            BayBENumericalKernelFactory(parameter_selector=_SELECT_ALL),
+            [TaskParameter("task", ["t1", "t2"])],
+            IncompatibleSearchSpaceError,
+            id="regular_rejects_task",
+        ),
+        param(
+            BayBETaskKernelFactory(parameter_selector=_SELECT_ALL),
+            [CategoricalParameter("cat", ["a", "b"])],
+            IncompatibleSearchSpaceError,
+            id="task_rejects_categorical",
+        ),
+        param(
+            BayBETaskKernelFactory(parameter_selector=_SELECT_ALL),
+            [NumericalDiscreteParameter("num", [1, 2, 3])],
+            IncompatibleSearchSpaceError,
+            id="task_rejects_numerical_discrete",
+        ),
+        param(
+            BayBETaskKernelFactory(parameter_selector=_SELECT_ALL),
+            [NumericalContinuousParameter("cont", (0, 1))],
+            IncompatibleSearchSpaceError,
+            id="task_rejects_numerical_continuous",
+        ),
+        param(
+            BayBEKernelFactory(),
+            [
+                NumericalContinuousParameter("cont", (0, 1)),
+                TaskParameter("task", ["t1", "t2"]),
+            ],
+            None,
+            id="combined_accepts_both",
+        ),
+    ],
+)
+def test_factory_parameter_kind_validation(factory, parameters, error):
+    """Factories reject unsupported parameter kinds and accept supported ones."""
+    ss = SearchSpace.from_product(parameters)
+    train_x = torch.zeros(2, len(ss.comp_rep_columns))
+    train_y = torch.zeros(2, 1)
+
+    with (
+        nullcontext()
+        if error is None
+        else pytest.raises(error, match="does not support")
+    ):
+        factory(ss, train_x, train_y)