diff --git a/baybe/parameters/categorical.py b/baybe/parameters/categorical.py index 6940e21ba3..66696e636f 100644 --- a/baybe/parameters/categorical.py +++ b/baybe/parameters/categorical.py @@ -1,7 +1,9 @@ """Categorical parameters.""" import gc +from enum import Enum from functools import cached_property +from typing import Any import numpy as np import pandas as pd @@ -16,6 +18,13 @@ from baybe.utils.numerical import DTypeFloatNumpy +class TaskCorrelation(Enum): + """Task correlation modes for TaskParameter.""" + + UNKNOWN = "unknown" + POSITIVE = "positive" + + def _convert_values(value, self, field) -> tuple[str, ...]: """Sort and convert values for categorical parameters.""" value = nonstring_to_tuple(value, self, field) @@ -87,6 +96,30 @@ class TaskParameter(CategoricalParameter): encoding: CategoricalEncoding = field(default=CategoricalEncoding.INT, init=False) # See base class. + task_correlation: TaskCorrelation = field(default=TaskCorrelation.POSITIVE) + """Task correlation. Defaults to positive correlation via PositiveIndexKernel.""" + + @task_correlation.validator + def _validate_task_correlation_active_values( # noqa: DOC101, DOC103 + self, _: Any, value: TaskCorrelation + ) -> None: + """Validate active values compatibility with task correlation mode. + + Raises: + ValueError: If task_correlation is POSITIVE but active_values contains more + than one value. + """ + # Check POSITIVE constraint: must have exactly one active value + # Note: _active_values is the internal field, could be None + if value == TaskCorrelation.POSITIVE and self._active_values is not None: + if len(self._active_values) > 1: + raise ValueError( + f"Task correlation '{TaskCorrelation.POSITIVE.value}' requires " + f"one active value, but {len(self._active_values)} were provided: " + f"{self._active_values}. The POSITIVE mode uses the " + f"PositiveIndexKernel which assumes a single target task." + ) + # Collect leftover original slotted classes processed by `attrs.define` gc.collect() diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 492844f995..ea248e128f 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -15,6 +15,7 @@ from baybe.constraints.base import Constraint from baybe.parameters import TaskParameter from baybe.parameters.base import Parameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace.continuous import SubspaceContinuous from baybe.searchspace.discrete import ( MemorySize, @@ -279,6 +280,45 @@ def n_tasks(self) -> int: except StopIteration: return 1 + @property + def target_task_idxs(self) -> list[int] | None: + """The indices of the target tasks in the computational representation. + + Returns a list of integer indices corresponding to each active value in the + TaskParameter. Returns None when there are no task parameters. + """ + # TODO [16932]: This approach only works for a single task parameter. + try: + task_param = next( + p for p in self.parameters if isinstance(p, TaskParameter) + ) + comp_df = task_param.comp_df + + # Extract computational representation indices for all active values + target_task_idxs = [ + int(comp_df.loc[active_value].iloc[0]) + for active_value in task_param.active_values + ] + return target_task_idxs + + # When there are no task parameters, return None + except StopIteration: + return None + + @property + def task_correlation(self) -> TaskCorrelation | None: + """The task correlation mode for this searchspace.""" + # TODO [16932]: This approach only works for a single task parameter. + try: + task_param = next( + p for p in self.parameters if isinstance(p, TaskParameter) + ) + return task_param.task_correlation + + # When there are no task parameters, we return None + except StopIteration: + return None + def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: """Find a parameter's column indices in the computational representation. diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index c0148aca55..d6be4014a6 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -10,6 +10,7 @@ from typing_extensions import override from baybe.parameters.base import Parameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace.core import SearchSpace from baybe.surrogates.base import Surrogate from baybe.surrogates.gaussian_process.kernel_factory import ( @@ -69,6 +70,16 @@ def parameter_bounds(self) -> Tensor: return torch.from_numpy(self.searchspace.scaling_bounds.values) + @property + def task_correlation(self) -> TaskCorrelation | None: + """Get the task correlation mode of the task parameter, if available.""" + return self.searchspace.task_correlation + + @property + def target_task_idxs(self) -> list[int] | None: + """Determine target task index for PositiveIndexKernel normalization.""" + return self.searchspace.target_task_idxs + def get_numerical_indices(self, n_inputs: int) -> tuple[int, ...]: """Get the indices of the regular numerical model inputs.""" return tuple(i for i in range(n_inputs) if i != self.task_idx) @@ -181,7 +192,17 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: # create GP covariance if not context.is_multitask: covar_module = base_covar_module - else: + elif context.task_correlation == TaskCorrelation.POSITIVE: + task_covar_module = ( + botorch.models.kernels.positive_index.PositiveIndexKernel( + num_tasks=context.n_tasks, + active_dims=context.task_idx, + rank=context.n_tasks, # TODO: make controllable + target_task_index=context.target_task_idxs[0], + ) + ) + covar_module = base_covar_module * task_covar_module + elif context.task_correlation == TaskCorrelation.UNKNOWN: task_covar_module = gpytorch.kernels.IndexKernel( num_tasks=context.n_tasks, active_dims=context.task_idx, diff --git a/benchmarks/definition/regression/core.py b/benchmarks/definition/regression/core.py index 44c86c8d64..3e1cd1ddad 100644 --- a/benchmarks/definition/regression/core.py +++ b/benchmarks/definition/regression/core.py @@ -20,6 +20,7 @@ from baybe.objectives import SingleTargetObjective from baybe.parameters import TaskParameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace import SearchSpace from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from benchmarks.definition import TransferLearningRegressionBenchmarkSettings @@ -39,7 +40,12 @@ def __call__(self) -> pd.DataFrame: class SearchSpaceFactory(Protocol): """Protocol for SearchSpace creation used in TL regression benchmarks.""" - def __call__(self, data: pd.DataFrame, use_task_parameter: bool) -> SearchSpace: + def __call__( + self, + data: pd.DataFrame, + use_task_parameter: bool, + task_correlation: TaskCorrelation = TaskCorrelation.UNKNOWN, + ) -> SearchSpace: """Create a SearchSpace for regression benchmark evaluation. Args: @@ -48,6 +54,8 @@ def __call__(self, data: pd.DataFrame, use_task_parameter: bool) -> SearchSpace: scenarios. If True, creates search space with TaskParameter for TL models. If False, creates vanilla search space without task parameter. + task_correlation: The task correlation mode (UNKNOWN or POSITIVE). + Only used when use_task_parameter is True. Returns: The TL and non-TL searchspaces for the benchmark. @@ -100,12 +108,6 @@ def spearman_rho_score(x: np.ndarray, y: np.ndarray, /) -> float: return rho -# Dictionary mapping transfer learning model names to their surrogate classes -TL_MODELS = { - "index_kernel": GaussianProcessSurrogate, -} - - # Regression metrics to evaluate model performance REGRESSION_METRICS = { root_mean_squared_error, @@ -161,12 +163,17 @@ def run_tl_regression_benchmark( # Create search space without task parameter vanilla_searchspace = searchspace_factory(data=data, use_task_parameter=False) - # Create transfer learning search space (with task parameter) - tl_searchspace = searchspace_factory(data=data, use_task_parameter=True) + # Create transfer learning search spaces (with task parameter) + tl_index_searchspace = searchspace_factory( + data=data, use_task_parameter=True, task_correlation=TaskCorrelation.UNKNOWN + ) + tl_pos_index_searchspace = searchspace_factory( + data=data, use_task_parameter=True, task_correlation=TaskCorrelation.POSITIVE + ) - # Extract task parameter details + # Extract task parameter details (use index searchspace as reference) task_param = next( - p for p in tl_searchspace.parameters if isinstance(p, TaskParameter) + p for p in tl_index_searchspace.parameters if isinstance(p, TaskParameter) ) name_task = task_param.name @@ -234,16 +241,36 @@ def run_tl_regression_benchmark( result.update(metrics) results.append(result) - # Naive GP on full search space + # IndexKernel on full search space, no source data + metrics = _evaluate_model( + GaussianProcessSurrogate(), + target_train, + target_test, + tl_index_searchspace, + objective, + ) + result = { + "scenario": "0_index", + "mc_iter": mc_iter, + "n_train_pts": n_train_pts, + "fraction_source": 0.0, + "n_source_pts": 0, + "n_test_pts": len(target_test), + "source_data_seed": settings.random_seed + mc_iter, + } + result.update(metrics) + results.append(result) + + # PositiveIndexKernel on full search space, no source data metrics = _evaluate_model( GaussianProcessSurrogate(), target_train, target_test, - tl_searchspace, + tl_pos_index_searchspace, objective, ) result = { - "scenario": "0_full_searchspace", + "scenario": "0_pos_index", "mc_iter": mc_iter, "n_train_pts": n_train_pts, "fraction_source": 0.0, @@ -277,29 +304,47 @@ def run_tl_regression_benchmark( combined_data = pd.concat([source_subset, target_train]) - for model_suffix, model_class in TL_MODELS.items(): - scenario_name = f"{int(100 * fraction_source)}_{model_suffix}" - model = model_class() - - metrics = _evaluate_model( - model, - combined_data, - target_test, - tl_searchspace, - objective, - ) - - result = { - "scenario": scenario_name, - "mc_iter": mc_iter, - "n_train_pts": n_train_pts, - "fraction_source": fraction_source, - "n_source_pts": len(source_subset), - "n_test_pts": len(target_test), - "source_data_seed": settings.random_seed + mc_iter, - } - result.update(metrics) - results.append(result) + # Evaluate IndexKernel + scenario_name = f"{int(100 * fraction_source)}_index" + metrics = _evaluate_model( + GaussianProcessSurrogate(), + combined_data, + target_test, + tl_index_searchspace, + objective, + ) + result = { + "scenario": scenario_name, + "mc_iter": mc_iter, + "n_train_pts": n_train_pts, + "fraction_source": fraction_source, + "n_source_pts": len(source_subset), + "n_test_pts": len(target_test), + "source_data_seed": settings.random_seed + mc_iter, + } + result.update(metrics) + results.append(result) + + # Evaluate PositiveIndexKernel + scenario_name = f"{int(100 * fraction_source)}_pos_index" + metrics = _evaluate_model( + GaussianProcessSurrogate(), + combined_data, + target_test, + tl_pos_index_searchspace, + objective, + ) + result = { + "scenario": scenario_name, + "mc_iter": mc_iter, + "n_train_pts": n_train_pts, + "fraction_source": fraction_source, + "n_source_pts": len(source_subset), + "n_test_pts": len(target_test), + "source_data_seed": settings.random_seed + mc_iter, + } + result.update(metrics) + results.append(result) pbar.update(1) diff --git a/benchmarks/domains/aryl_halides/core.py b/benchmarks/domains/aryl_halides/core.py index 8cd51f1c0b..2cd5f9b60a 100644 --- a/benchmarks/domains/aryl_halides/core.py +++ b/benchmarks/domains/aryl_halides/core.py @@ -18,6 +18,7 @@ from baybe.objectives import SingleTargetObjective from baybe.parameters import SubstanceParameter, TaskParameter from baybe.parameters.base import DiscreteParameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace import SearchSpace from baybe.simulation import simulate_scenarios from baybe.targets import NumericalTarget @@ -48,8 +49,19 @@ def make_searchspace( data: pd.DataFrame, target_tasks: Sequence[str] | None = None, source_tasks: Sequence[str] | None = None, + task_correlation: TaskCorrelation = TaskCorrelation.UNKNOWN, ) -> SearchSpace: - """Create the search space for the benchmark.""" + """Create the search space for the benchmark. + + Args: + data: The benchmark data. + target_tasks: The target tasks for transfer learning. + source_tasks: The source tasks for transfer learning. + task_correlation: The task correlation mode (UNKNOWN or POSITIVE). + + Returns: + The configured search space. + """ params: list[DiscreteParameter] = [ SubstanceParameter( name=substance, @@ -59,13 +71,13 @@ def make_searchspace( for substance in ["base", "ligand", "additive"] ] if target_tasks is not None and source_tasks is not None: - all_tasks = [*source_tasks, *target_tasks] all_tasks = [*source_tasks, *target_tasks] params.append( TaskParameter( name="aryl_halide", values=all_tasks, active_values=target_tasks, + task_correlation=task_correlation, ) ) return SearchSpace.from_product(parameters=params) @@ -109,10 +121,17 @@ def aryl_halide_tl_substance_benchmark( """ data = load_data() - searchspace = make_searchspace( + searchspace_tl_index = make_searchspace( + data=data, + source_tasks=source_tasks, + target_tasks=target_tasks, + task_correlation=TaskCorrelation.UNKNOWN, + ) + searchspace_tl_pos_index = make_searchspace( data=data, source_tasks=source_tasks, target_tasks=target_tasks, + task_correlation=TaskCorrelation.POSITIVE, ) searchspace_nontl = make_searchspace(data=data) @@ -120,8 +139,12 @@ def aryl_halide_tl_substance_benchmark( initial_data = make_initial_data(data, source_tasks) objective = make_objective() - tl_campaign = Campaign( - searchspace=searchspace, + tl_index_campaign = Campaign( + searchspace=searchspace_tl_index, + objective=objective, + ) + tl_pos_index_campaign = Campaign( + searchspace=searchspace_tl_pos_index, objective=objective, ) nontl_campaign = Campaign(searchspace=searchspace_nontl, objective=objective) @@ -138,7 +161,8 @@ def aryl_halide_tl_substance_benchmark( results.append( simulate_scenarios( { - f"{int(100 * p)}": tl_campaign, + f"{int(100 * p)}_index": tl_index_campaign, + f"{int(100 * p)}_pos_index": tl_pos_index_campaign, f"{int(100 * p)}_naive": nontl_campaign, }, lookup, @@ -151,7 +175,11 @@ def aryl_halide_tl_substance_benchmark( ) results.append( simulate_scenarios( - {"0": tl_campaign, "0_naive": nontl_campaign}, + { + "0_index": tl_index_campaign, + "0_pos_index": tl_pos_index_campaign, + "0_naive": nontl_campaign, + }, lookup, batch_size=settings.batch_size, n_doe_iterations=settings.n_doe_iterations, diff --git a/benchmarks/domains/aryl_halides/regression_tl.py b/benchmarks/domains/aryl_halides/regression_tl.py index 144472ed2b..f389088b45 100644 --- a/benchmarks/domains/aryl_halides/regression_tl.py +++ b/benchmarks/domains/aryl_halides/regression_tl.py @@ -6,6 +6,7 @@ import pandas as pd +from baybe.parameters.categorical import TaskCorrelation from benchmarks.definition import ( TransferLearningRegressionBenchmark, TransferLearningRegressionBenchmarkSettings, @@ -58,12 +59,17 @@ def _aryl_halide_tl_regr( DataFrame with benchmark results. """ - def make_searchspace_wrapper(data: pd.DataFrame, use_task_parameter: bool): + def make_searchspace_wrapper( + data: pd.DataFrame, + use_task_parameter: bool, + task_correlation: TaskCorrelation = TaskCorrelation.UNKNOWN, + ): if use_task_parameter: return make_searchspace( data=data, source_tasks=source_tasks, target_tasks=target_tasks, + task_correlation=task_correlation, ) else: return make_searchspace(data=data) diff --git a/benchmarks/domains/direct_arylation/convergence_tl.py b/benchmarks/domains/direct_arylation/convergence_tl.py index c53fe89f47..a2398894c9 100644 --- a/benchmarks/domains/direct_arylation/convergence_tl.py +++ b/benchmarks/domains/direct_arylation/convergence_tl.py @@ -15,6 +15,7 @@ TaskParameter, ) from baybe.parameters.base import DiscreteParameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace import SearchSpace from baybe.simulation import simulate_scenarios from baybe.targets import NumericalTarget @@ -41,8 +42,18 @@ def load_data() -> pd.DataFrame: def make_searchspace( data: pd.DataFrame, use_task_parameter: bool, + task_correlation: TaskCorrelation = TaskCorrelation.UNKNOWN, ) -> SearchSpace: - """Create the search space for the benchmark.""" + """Create the search space for the benchmark. + + Args: + data: The benchmark data. + use_task_parameter: Whether to include a task parameter. + task_correlation: The task correlation mode (UNKNOWN or POSITIVE). + + Returns: + The configured search space. + """ params: list[DiscreteParameter] = [ SubstanceParameter( name=substance, @@ -62,6 +73,7 @@ def make_searchspace( name="Temp_C", values=["90", "105", "120"], active_values=["105"], + task_correlation=task_correlation, ) ) return SearchSpace.from_product(parameters=params) @@ -118,9 +130,15 @@ def direct_arylation_tl_temperature( """ data = load_data() - searchspace = make_searchspace( + tl_index_searchspace = make_searchspace( data=data, use_task_parameter=True, + task_correlation=TaskCorrelation.UNKNOWN, + ) + tl_pos_index_searchspace = make_searchspace( + data=data, + use_task_parameter=True, + task_correlation=TaskCorrelation.POSITIVE, ) searchspace_nontl = make_searchspace( data=data, @@ -131,8 +149,11 @@ def direct_arylation_tl_temperature( initial_data = make_initial_data(data) objective = make_objective() - tl_campaign = Campaign(searchspace=searchspace, objective=objective) - non_tl_campaign = Campaign(searchspace=searchspace_nontl, objective=objective) + tl_index_campaign = Campaign(searchspace=tl_index_searchspace, objective=objective) + tl_pos_index_campaign = Campaign( + searchspace=tl_pos_index_searchspace, objective=objective + ) + nontl_campaign = Campaign(searchspace=searchspace_nontl, objective=objective) percentages = [0.01, 0.1, 0.2] @@ -148,8 +169,9 @@ def direct_arylation_tl_temperature( results.append( simulate_scenarios( { - f"{int(100 * p)}": tl_campaign, - f"{int(100 * p)}_naive": non_tl_campaign, + f"{int(100 * p)}_index": tl_index_campaign, + f"{int(100 * p)}_pos_index": tl_pos_index_campaign, + f"{int(100 * p)}_naive": nontl_campaign, }, lookup, initial_data=initial_data_samples[p], @@ -161,7 +183,11 @@ def direct_arylation_tl_temperature( ) results.append( simulate_scenarios( - {"0": tl_campaign, "0_naive": non_tl_campaign}, + { + "0_index": tl_index_campaign, + "0_pos_index": tl_pos_index_campaign, + "0_naive": nontl_campaign, + }, lookup, batch_size=settings.batch_size, n_doe_iterations=settings.n_doe_iterations, diff --git a/benchmarks/domains/easom/convergence_tl.py b/benchmarks/domains/easom/convergence_tl.py index bcae93cdc1..2ed5c5280d 100644 --- a/benchmarks/domains/easom/convergence_tl.py +++ b/benchmarks/domains/easom/convergence_tl.py @@ -11,6 +11,7 @@ from baybe.objectives import SingleTargetObjective from baybe.parameters import NumericalDiscreteParameter, TaskParameter from baybe.parameters.base import DiscreteParameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace import SearchSpace from baybe.simulation import simulate_scenarios from baybe.targets import NumericalTarget @@ -85,21 +86,34 @@ def easom_tl_47_negate_noise5(settings: ConvergenceBenchmarkSettings) -> pd.Data NumericalDiscreteParameter(name=name, values=values) for name, values in grid_locations.items() ] - task_param = TaskParameter( + task_param_index = TaskParameter( name="Function", values=["Target_Function", "Source_Function"], active_values=["Target_Function"], + task_correlation=TaskCorrelation.UNKNOWN, ) - params_tl = params + [task_param] + task_param_pos_index = TaskParameter( + name="Function", + values=["Target_Function", "Source_Function"], + active_values=["Target_Function"], + task_correlation=TaskCorrelation.POSITIVE, + ) + params_tl_index = params + [task_param_index] + params_tl_pos_index = params + [task_param_pos_index] searchspace_nontl = SearchSpace.from_product(parameters=params) - searchspace_tl = SearchSpace.from_product(parameters=params_tl) + tl_index_searchspace = SearchSpace.from_product(parameters=params_tl_index) + tl_pos_index_searchspace = SearchSpace.from_product(parameters=params_tl_pos_index) objective = SingleTargetObjective( target=NumericalTarget(name="Target", minimize=not negate) ) - tl_campaign = Campaign( - searchspace=searchspace_tl, + tl_index_campaign = Campaign( + searchspace=tl_index_searchspace, + objective=objective, + ) + tl_pos_index_campaign = Campaign( + searchspace=tl_pos_index_searchspace, objective=objective, ) nontl_campaign = Campaign( @@ -137,7 +151,8 @@ def easom_tl_47_negate_noise5(settings: ConvergenceBenchmarkSettings) -> pd.Data results.append( simulate_scenarios( { - f"{int(100 * p)}": tl_campaign, + f"{int(100 * p)}_index": tl_index_campaign, + f"{int(100 * p)}_pos_index": tl_pos_index_campaign, f"{int(100 * p)}_naive": nontl_campaign, }, lookup, @@ -150,7 +165,11 @@ def easom_tl_47_negate_noise5(settings: ConvergenceBenchmarkSettings) -> pd.Data ) results.append( simulate_scenarios( - {"0": tl_campaign, "0_naive": nontl_campaign}, + { + "0_index": tl_index_campaign, + "0_pos_index": tl_pos_index_campaign, + "0_naive": nontl_campaign, + }, lookup, batch_size=settings.batch_size, n_doe_iterations=settings.n_doe_iterations, diff --git a/benchmarks/domains/hartmann/convergence_tl.py b/benchmarks/domains/hartmann/convergence_tl.py index 9d0959fab3..09ef9a0494 100644 --- a/benchmarks/domains/hartmann/convergence_tl.py +++ b/benchmarks/domains/hartmann/convergence_tl.py @@ -11,6 +11,7 @@ from baybe.objectives import SingleTargetObjective from baybe.parameters import NumericalDiscreteParameter, TaskParameter from baybe.parameters.base import DiscreteParameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace import SearchSpace from baybe.simulation import simulate_scenarios from baybe.targets import NumericalTarget @@ -62,21 +63,34 @@ def hartmann_tl_3_20_15(settings: ConvergenceBenchmarkSettings) -> pd.DataFrame: ) for name, points in grid_locations.items() ] - task_param = TaskParameter( + task_param_index = TaskParameter( name="Function", values=["Target_Function", "Source_Function"], active_values=["Target_Function"], + task_correlation=TaskCorrelation.UNKNOWN, ) - params_tl = params + [task_param] + task_param_pos_index = TaskParameter( + name="Function", + values=["Target_Function", "Source_Function"], + active_values=["Target_Function"], + task_correlation=TaskCorrelation.POSITIVE, + ) + params_tl_index = params + [task_param_index] + params_tl_pos_index = params + [task_param_pos_index] searchspace_nontl = SearchSpace.from_product(parameters=params) - searchspace_tl = SearchSpace.from_product(parameters=params_tl) + tl_index_searchspace = SearchSpace.from_product(parameters=params_tl_index) + tl_pos_index_searchspace = SearchSpace.from_product(parameters=params_tl_pos_index) objective = SingleTargetObjective( target=NumericalTarget(name="Target", minimize=True) ) - tl_campaign = Campaign( - searchspace=searchspace_tl, + tl_index_campaign = Campaign( + searchspace=tl_index_searchspace, + objective=objective, + ) + tl_pos_index_campaign = Campaign( + searchspace=tl_pos_index_searchspace, objective=objective, ) nontl_campaign = Campaign( @@ -121,7 +135,8 @@ def hartmann_tl_3_20_15(settings: ConvergenceBenchmarkSettings) -> pd.DataFrame: results.append( simulate_scenarios( { - f"{int(100 * p)}": tl_campaign, + f"{int(100 * p)}_index": tl_index_campaign, + f"{int(100 * p)}_pos_index": tl_pos_index_campaign, f"{int(100 * p)}_naive": nontl_campaign, }, lookup, @@ -134,7 +149,11 @@ def hartmann_tl_3_20_15(settings: ConvergenceBenchmarkSettings) -> pd.DataFrame: ) results.append( simulate_scenarios( - {"0": tl_campaign, "0_naive": nontl_campaign}, + { + "0_index": tl_index_campaign, + "0_pos_index": tl_pos_index_campaign, + "0_naive": nontl_campaign, + }, lookup, batch_size=settings.batch_size, n_doe_iterations=settings.n_doe_iterations, diff --git a/benchmarks/domains/michalewicz/convergence_tl.py b/benchmarks/domains/michalewicz/convergence_tl.py index 511f79523b..5f645cec6a 100644 --- a/benchmarks/domains/michalewicz/convergence_tl.py +++ b/benchmarks/domains/michalewicz/convergence_tl.py @@ -18,6 +18,7 @@ from baybe.objectives import SingleTargetObjective from baybe.parameters import NumericalContinuousParameter, TaskParameter from baybe.parameters.base import Parameter +from baybe.parameters.categorical import TaskCorrelation from baybe.searchspace import SearchSpace from baybe.simulation import simulate_scenarios from baybe.targets import NumericalTarget @@ -26,8 +27,19 @@ from benchmarks.definition.base import RunMode -def make_searchspace(use_task_parameter: bool) -> SearchSpace: - """Create search space for the benchmark.""" +def make_searchspace( + use_task_parameter: bool, + task_correlation: TaskCorrelation = TaskCorrelation.UNKNOWN, +) -> SearchSpace: + """Create search space for the benchmark. + + Args: + use_task_parameter: Whether to include a task parameter. + task_correlation: The task correlation mode (UNKNOWN or POSITIVE). + + Returns: + The configured search space. + """ params: list[Parameter] = [ NumericalContinuousParameter( name=f"x{k}", @@ -41,6 +53,7 @@ def make_searchspace(use_task_parameter: bool) -> SearchSpace: name="Function", values=["Target_Function", "Source_Function"], active_values=["Target_Function"], + task_correlation=task_correlation, ) ) @@ -132,14 +145,23 @@ def michalewicz_tl_continuous(settings: ConvergenceBenchmarkSettings) -> pd.Data "Target_Function": Michalewicz(dim=5, negate=True), } searchspace_nontl = make_searchspace(use_task_parameter=False) - searchspace_tl = make_searchspace(use_task_parameter=True) + tl_index_searchspace = make_searchspace( + use_task_parameter=True, task_correlation=TaskCorrelation.UNKNOWN + ) + tl_pos_index_searchspace = make_searchspace( + use_task_parameter=True, task_correlation=TaskCorrelation.POSITIVE + ) objective = make_objective() - campaign_tl = Campaign( - searchspace=searchspace_tl, + tl_index_campaign = Campaign( + searchspace=tl_index_searchspace, + objective=objective, + ) + tl_pos_index_campaign = Campaign( + searchspace=tl_pos_index_searchspace, objective=objective, ) - campaign_nontl = Campaign( + nontl_campaign = Campaign( searchspace=searchspace_nontl, objective=objective, ) @@ -158,7 +180,11 @@ def michalewicz_tl_continuous(settings: ConvergenceBenchmarkSettings) -> pd.Data for p in n_points: results.append( simulate_scenarios( - {f"{p}": campaign_tl, f"{p}_naive": campaign_nontl}, + { + f"{p}_index": tl_index_campaign, + f"{p}_pos_index": tl_pos_index_campaign, + f"{p}_naive": nontl_campaign, + }, lambda x: wrap_function( functions["Target_Function"], "Target_Function", x ), @@ -171,7 +197,11 @@ def michalewicz_tl_continuous(settings: ConvergenceBenchmarkSettings) -> pd.Data ) results.append( simulate_scenarios( - {"0": campaign_tl, "0_naive": campaign_nontl}, + { + "0_index": tl_index_campaign, + "0_pos_index": tl_pos_index_campaign, + "0_naive": nontl_campaign, + }, lambda x: wrap_function(functions["Target_Function"], "Target_Function", x), batch_size=settings.batch_size, n_doe_iterations=settings.n_doe_iterations,