emdgroup · kalama-ai · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
@@ -1,7 +1,9 @@
 """Categorical parameters."""
 
 import gc
+from enum import Enum
 from functools import cached_property
+from typing import Any
 
 import numpy as np
 import pandas as pd
@@ -16,6 +18,13 @@
 from baybe.utils.numerical import DTypeFloatNumpy
 
 
+class TaskCorrelation(Enum):
+    """Task correlation modes for TaskParameter."""
+
+    UNKNOWN = "unknown"
+    POSITIVE = "positive"
+
+
 def _convert_values(value, self, field) -> tuple[str, ...]:
     """Sort and convert values for categorical parameters."""
     value = nonstring_to_tuple(value, self, field)
@@ -87,6 +96,30 @@ class TaskParameter(CategoricalParameter):
     encoding: CategoricalEncoding = field(default=CategoricalEncoding.INT, init=False)
     # See base class.
 
+    task_correlation: TaskCorrelation = field(default=TaskCorrelation.POSITIVE)
+    """Task correlation. Defaults to positive correlation via PositiveIndexKernel."""
-    """Task correlation. Defaults to positive correlation via PositiveIndexKernel."""
+    """Task correlation influencing which kernel will be used du default for task parameters."""
-    """Task correlation. Defaults to positive correlation via PositiveIndexKernel."""
+    """Task correlation influencing which kernel will be used du default for task parameters."""
+
+    @task_correlation.validator
+    def _validate_task_correlation_active_values(  # noqa: DOC101, DOC103
+        self, _: Any, value: TaskCorrelation
+    ) -> None:
+        """Validate active values compatibility with task correlation mode.
+
+        Raises:
+            ValueError: If task_correlation is POSITIVE but active_values contains more
+                than one value.
+        """
+        # Check POSITIVE constraint: must have exactly one active value
+        # Note: _active_values is the internal field, could be None
+        if value == TaskCorrelation.POSITIVE and self._active_values is not None:
+            if len(self._active_values) > 1:
+                raise ValueError(
+                    f"Task correlation '{TaskCorrelation.POSITIVE.value}' requires "
+                    f"one active value, but {len(self._active_values)} were provided: "
-                    f"one active value, but {len(self._active_values)} were provided: "
+                    f"exactly one active value, but {len(self._active_values)} were provided: "
-                    f"one active value, but {len(self._active_values)} were provided: "
+                    f"exactly one active value, but {len(self._active_values)} were provided: "
+                    f"{self._active_values}. The POSITIVE mode uses the "
+                    f"PositiveIndexKernel which assumes a single target task."
+                )
+
 
 # Collect leftover original slotted classes processed by `attrs.define`
 gc.collect()
@@ -15,6 +15,7 @@
 from baybe.constraints.base import Constraint
 from baybe.parameters import TaskParameter
 from baybe.parameters.base import Parameter
+from baybe.parameters.categorical import TaskCorrelation
 from baybe.searchspace.continuous import SubspaceContinuous
 from baybe.searchspace.discrete import (
     MemorySize,
@@ -279,6 +280,45 @@ def n_tasks(self) -> int:
         except StopIteration:
             return 1
 
+    @property
+    def target_task_idxs(self) -> list[int] | None:
+        """The indices of the target tasks in the computational representation.
+
+        Returns a list of integer indices corresponding to each active value in the
+        TaskParameter. Returns None when there are no task parameters.
+        """
+        # TODO [16932]: This approach only works for a single task parameter.
+        try:
+            task_param = next(
+                p for p in self.parameters if isinstance(p, TaskParameter)
+            )
+            comp_df = task_param.comp_df
+
+            # Extract computational representation indices for all active values
+            target_task_idxs = [
+                int(comp_df.loc[active_value].iloc[0])
+                for active_value in task_param.active_values
+            ]
+            return target_task_idxs
+
+        # When there are no task parameters, return None
+        except StopIteration:
+            return None
+
+    @property
+    def task_correlation(self) -> TaskCorrelation | None:
+        """The task correlation mode for this searchspace."""
+        # TODO [16932]: This approach only works for a single task parameter.
+        try:
+            task_param = next(
+                p for p in self.parameters if isinstance(p, TaskParameter)
+            )
+            return task_param.task_correlation
+
+        # When there are no task parameters, we return None
+        except StopIteration:
+            return None
+
     def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]:
         """Find a parameter's column indices in the computational representation.
 

@@ -10,6 +10,7 @@
 from typing_extensions import override
 
 from baybe.parameters.base import Parameter
+from baybe.parameters.categorical import TaskCorrelation
 from baybe.searchspace.core import SearchSpace
 from baybe.surrogates.base import Surrogate
 from baybe.surrogates.gaussian_process.kernel_factory import (
@@ -69,6 +70,16 @@ def parameter_bounds(self) -> Tensor:
 
         return torch.from_numpy(self.searchspace.scaling_bounds.values)
 
+    @property
+    def task_correlation(self) -> TaskCorrelation | None:
+        """Get the task correlation mode of the task parameter, if available."""
+        return self.searchspace.task_correlation
+
+    @property
+    def target_task_idxs(self) -> list[int] | None:
+        """Determine target task index for PositiveIndexKernel normalization."""
+        return self.searchspace.target_task_idxs
+
     def get_numerical_indices(self, n_inputs: int) -> tuple[int, ...]:
         """Get the indices of the regular numerical model inputs."""
         return tuple(i for i in range(n_inputs) if i != self.task_idx)
@@ -181,7 +192,17 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None:
         # create GP covariance
         if not context.is_multitask:
             covar_module = base_covar_module
-        else:
+        elif context.task_correlation == TaskCorrelation.POSITIVE:
-        elif context.task_correlation == TaskCorrelation.POSITIVE:
+        elif context.task_correlation is TaskCorrelation.POSITIVE:
-        elif context.task_correlation == TaskCorrelation.POSITIVE:
+        elif context.task_correlation is TaskCorrelation.POSITIVE:
+            task_covar_module = (
+                botorch.models.kernels.positive_index.PositiveIndexKernel(
+                    num_tasks=context.n_tasks,
+                    active_dims=context.task_idx,
+                    rank=context.n_tasks,  # TODO: make controllable
+                    target_task_index=context.target_task_idxs[0],
+                )
+            )
+            covar_module = base_covar_module * task_covar_module
+        elif context.task_correlation == TaskCorrelation.UNKNOWN:
-        elif context.task_correlation == TaskCorrelation.UNKNOWN:
+        elif context.task_correlation is TaskCorrelation.UNKNOWN:
-        elif context.task_correlation == TaskCorrelation.UNKNOWN:
+        elif context.task_correlation is TaskCorrelation.UNKNOWN:
             task_covar_module = gpytorch.kernels.IndexKernel(
                 num_tasks=context.n_tasks,
                 active_dims=context.task_idx,

@@ -20,6 +20,7 @@
 
 from baybe.objectives import SingleTargetObjective
 from baybe.parameters import TaskParameter
+from baybe.parameters.categorical import TaskCorrelation
 from baybe.searchspace import SearchSpace
 from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate
 from benchmarks.definition import TransferLearningRegressionBenchmarkSettings
@@ -39,7 +40,12 @@ def __call__(self) -> pd.DataFrame:
 class SearchSpaceFactory(Protocol):
     """Protocol for SearchSpace creation used in TL regression benchmarks."""
 
-    def __call__(self, data: pd.DataFrame, use_task_parameter: bool) -> SearchSpace:
+    def __call__(
+        self,
+        data: pd.DataFrame,
+        use_task_parameter: bool,
+        task_correlation: TaskCorrelation = TaskCorrelation.UNKNOWN,
+    ) -> SearchSpace:
         """Create a SearchSpace for regression benchmark evaluation.
 
         Args:
@@ -48,6 +54,8 @@ def __call__(self, data: pd.DataFrame, use_task_parameter: bool) -> SearchSpace:
                 scenarios. If True, creates search space with TaskParameter for
                 TL models. If False, creates vanilla search space without
                 task parameter.
+            task_correlation: The task correlation mode (UNKNOWN or POSITIVE).
+                Only used when use_task_parameter is True.
 
         Returns:
             The TL and non-TL searchspaces for the benchmark.
@@ -100,12 +108,6 @@ def spearman_rho_score(x: np.ndarray, y: np.ndarray, /) -> float:
     return rho
 
 
-# Dictionary mapping transfer learning model names to their surrogate classes
-TL_MODELS = {
-    "index_kernel": GaussianProcessSurrogate,
-}
-
-
 # Regression metrics to evaluate model performance
 REGRESSION_METRICS = {
     root_mean_squared_error,
@@ -161,12 +163,17 @@ def run_tl_regression_benchmark(
     # Create search space without task parameter
     vanilla_searchspace = searchspace_factory(data=data, use_task_parameter=False)
 
-    # Create transfer learning search space (with task parameter)
-    tl_searchspace = searchspace_factory(data=data, use_task_parameter=True)
+    # Create transfer learning search spaces (with task parameter)
+    tl_index_searchspace = searchspace_factory(
+        data=data, use_task_parameter=True, task_correlation=TaskCorrelation.UNKNOWN
+    )
+    tl_pos_index_searchspace = searchspace_factory(
+        data=data, use_task_parameter=True, task_correlation=TaskCorrelation.POSITIVE
+    )
 
-    # Extract task parameter details
+    # Extract task parameter details (use index searchspace as reference)
     task_param = next(
-        p for p in tl_searchspace.parameters if isinstance(p, TaskParameter)
+        p for p in tl_index_searchspace.parameters if isinstance(p, TaskParameter)
     )
     name_task = task_param.name
 
@@ -234,16 +241,36 @@ def run_tl_regression_benchmark(
             result.update(metrics)
             results.append(result)
 
-            # Naive GP on full search space
+            # IndexKernel on full search space, no source data
+            metrics = _evaluate_model(
+                GaussianProcessSurrogate(),
+                target_train,
+                target_test,
+                tl_index_searchspace,
+                objective,
+            )
+            result = {
+                "scenario": "0_index",
+                "mc_iter": mc_iter,
+                "n_train_pts": n_train_pts,
+                "fraction_source": 0.0,
+                "n_source_pts": 0,
+                "n_test_pts": len(target_test),
+                "source_data_seed": settings.random_seed + mc_iter,
+            }
+            result.update(metrics)
+            results.append(result)
+
+            # PositiveIndexKernel on full search space, no source data
             metrics = _evaluate_model(
                 GaussianProcessSurrogate(),
                 target_train,
                 target_test,
-                tl_searchspace,
+                tl_pos_index_searchspace,
                 objective,
             )
             result = {
-                "scenario": "0_full_searchspace",
+                "scenario": "0_pos_index",
                 "mc_iter": mc_iter,
                 "n_train_pts": n_train_pts,
                 "fraction_source": 0.0,
@@ -277,29 +304,47 @@ def run_tl_regression_benchmark(
 
                 combined_data = pd.concat([source_subset, target_train])
 
-                for model_suffix, model_class in TL_MODELS.items():
-                    scenario_name = f"{int(100 * fraction_source)}_{model_suffix}"
-                    model = model_class()
-
-                    metrics = _evaluate_model(
-                        model,
-                        combined_data,
-                        target_test,
-                        tl_searchspace,
-                        objective,
-                    )
-
-                    result = {
-                        "scenario": scenario_name,
-                        "mc_iter": mc_iter,
-                        "n_train_pts": n_train_pts,
-                        "fraction_source": fraction_source,
-                        "n_source_pts": len(source_subset),
-                        "n_test_pts": len(target_test),
-                        "source_data_seed": settings.random_seed + mc_iter,
-                    }
-                    result.update(metrics)
-                    results.append(result)
+                # Evaluate IndexKernel
+                scenario_name = f"{int(100 * fraction_source)}_index"
+                metrics = _evaluate_model(
+                    GaussianProcessSurrogate(),
+                    combined_data,
+                    target_test,
+                    tl_index_searchspace,
+                    objective,
+                )
+                result = {
+                    "scenario": scenario_name,
+                    "mc_iter": mc_iter,
+                    "n_train_pts": n_train_pts,
+                    "fraction_source": fraction_source,
+                    "n_source_pts": len(source_subset),
+                    "n_test_pts": len(target_test),
+                    "source_data_seed": settings.random_seed + mc_iter,
+                }
+                result.update(metrics)
+                results.append(result)
+
+                # Evaluate PositiveIndexKernel
+                scenario_name = f"{int(100 * fraction_source)}_pos_index"
+                metrics = _evaluate_model(
+                    GaussianProcessSurrogate(),
+                    combined_data,
+                    target_test,
+                    tl_pos_index_searchspace,
+                    objective,
+                )
+                result = {
+                    "scenario": scenario_name,
+                    "mc_iter": mc_iter,
+                    "n_train_pts": n_train_pts,
+                    "fraction_source": fraction_source,
+                    "n_source_pts": len(source_subset),
+                    "n_test_pts": len(target_test),
+                    "source_data_seed": settings.random_seed + mc_iter,
+                }
+                result.update(metrics)
+                results.append(result)
 
                 pbar.update(1)