diff --git a/docs/guide/bayesian.md b/docs/guide/bayesian.md index 6725f19..57fc472 100644 --- a/docs/guide/bayesian.md +++ b/docs/guide/bayesian.md @@ -188,13 +188,43 @@ separate evaluation from analysis: --8<-- "examples/bayesian_study.py:persistence" ``` +## Multi-fidelity workflow + +Real-world studies often have an expensive simulator — full MCMC, a +fine-mesh CFD solver, or an agent-based epidemiological model. +Running every candidate design at full fidelity is wasteful. A +multi-fidelity strategy screens many designs cheaply, then validates +only the promising ones at high fidelity. + +`Phase` supports this via optional `world` and `scorer` overrides. +When set, a phase uses its own simulator instead of the `Study`-level +default. Here the cheap surrogate draws only 50 posterior samples +(fast but noisy CRPS estimates), while the validation phase draws +2 000: + +```python +--8<-- "examples/bayesian_study.py:multifidelity" +``` + +The `Study` orchestrates both phases: the first screens 60 designs +with the cheap surrogate and keeps the top 10 by Pareto rank, then +the second re-evaluates those 10 designs with the expensive model. + +This pattern applies whenever fidelity is a computational strategy +rather than a design factor: + +- **Epidemiology** — screen surveillance designs with a deterministic + ODE model, validate the best with a stochastic agent-based model. +- **Engineering** — coarse mesh for broad exploration, fine mesh for + the Pareto front. +- **Forecast model grading** — fast approximate inference for + screening, full HMC for final assessment. + ## What to try next - Swap `method="morris"` for `method="sobol"` in `screen()` for variance-based sensitivity indices. - Use `Constraint` + `feasibility_filter` to enforce `coverage_95 >= 0.90` before stacking. -- Replace `run_grid` with a two-phase `Study` that screens first - and refines around the Pareto front. - Try `stack_bayesian()` on models that expose log-likelihood (requires `arviz`). diff --git a/examples/bayesian_study.py b/examples/bayesian_study.py index 63005fd..2dda582 100644 --- a/examples/bayesian_study.py +++ b/examples/bayesian_study.py @@ -21,6 +21,8 @@ Factor, FactorType, Observable, + Phase, + Study, build_grid, coverage_curve, ensemble_predict, @@ -36,6 +38,7 @@ score, screen, stack_scores, + top_k_pareto_filter, ) ASSET_DIR = "docs/assets" @@ -130,8 +133,21 @@ class BayesianRegressionSimulator: Each config specifies prior hyperparameters and sample size. The "truth" is the held-out test set; "observations" are the posterior predictive samples at those test points. + + Args: + n_samples: Number of posterior predictive draws. Fewer draws + give faster but noisier score estimates — useful as a cheap + surrogate in a multi-fidelity workflow. """ + def __init__(self, n_samples: int = 500) -> None: + """Initialise the simulator. + + Args: + n_samples: Number of posterior predictive draws. + """ + self.n_samples = n_samples + def generate(self, config: dict[str, Any]) -> tuple[Any, Any]: """Draw training data, fit posterior, and return test predictions. @@ -155,6 +171,7 @@ def generate(self, config: dict[str, Any]) -> tuple[Any, Any]: y_train, prior_var=config["prior_var"], noise_scale=config["noise_scale"], + n_samples=self.n_samples, ) observations = { @@ -371,6 +388,50 @@ def _save_plots(results: Any, directions: Any, nominal: Any, empirical: Any) -> # --8<-- [end:plots] +def _run_multi_fidelity() -> None: + """Demonstrate multi-fidelity via Phase-level world overrides.""" + # --8<-- [start:multifidelity] + # Cheap surrogate: only 50 posterior draws (fast, noisier scores) + cheap_world = BayesianRegressionSimulator(n_samples=50) + # Expensive model: 2000 posterior draws (slow, precise scores) + expensive_world = BayesianRegressionSimulator(n_samples=2000) + + grid = build_grid(factors, method="lhs", n_samples=60, seed=42) + for cfg in grid: + cfg["n_obs"] = round(cfg["n_obs"]) + + study = Study( + world=expensive_world, + scorer=BayesianRegressionScorer(), + observables=observables, + phases=[ + # Phase 1: screen 60 designs with the cheap surrogate + Phase( + name="screen", + grid=grid, + world=cheap_world, + filter_fn=top_k_pareto_filter(k=10), + ), + # Phase 2: validate top 10 with the expensive model + Phase(name="validate", grid="carry"), + ], + annotations=[compute_cost], + ) + study.run() + + screen_r = study.results("screen") + validate_r = study.results("validate") + print("\nMulti-fidelity study:") + print(f" Screen phase: {screen_r.scores.shape[0]} designs (50 draws)") + print(f" Validate phase: {validate_r.scores.shape[0]} designs (2000 draws)") + + directions = [o.direction for o in observables] + weights = [o.weight for o in observables] + front_idx = extract_front(validate_r.scores, directions, weights) + print(f" Final Pareto front: {len(front_idx)} designs") + # --8<-- [end:multifidelity] + + def main() -> None: """Run the Bayesian model criticism study.""" world = BayesianRegressionSimulator() @@ -403,6 +464,7 @@ def main() -> None: nominal, empirical = _run_calibration(results, front_idx, world) _run_persistence(results) _save_plots(results, directions, nominal, empirical) + _run_multi_fidelity() if __name__ == "__main__": diff --git a/src/trade_study/study.py b/src/trade_study/study.py index 4ca2a57..ab64404 100644 --- a/src/trade_study/study.py +++ b/src/trade_study/study.py @@ -49,6 +49,12 @@ class Phase: indices of configs to pass to the next phase. If None, phase is terminal. n_trials: For adaptive mode, number of optuna trials. + world: Optional phase-level simulator override. When set, this + phase uses *world* instead of the ``Study``-level simulator. + Useful for multi-fidelity workflows (cheap surrogate first, + expensive model later). + scorer: Optional phase-level scorer override. When set, this + phase uses *scorer* instead of the ``Study``-level scorer. """ name: str @@ -57,6 +63,8 @@ class Phase: None ) n_trials: int = 100 + world: Simulator | None = None + scorer: Scorer | None = None def top_k_pareto_filter( @@ -210,10 +218,14 @@ def run( prev_result: ResultsTable | None = None for phase in self.phases: + # Resolve phase-level overrides (multi-fidelity support) + world = phase.world if phase.world is not None else self.world + scorer = phase.scorer if phase.scorer is not None else self.scorer + if isinstance(phase.grid, str) and phase.grid == "adaptive": result = run_adaptive( - self.world, - self.scorer, + world, + scorer, self.factors, self.observables, n_trials=phase.n_trials, @@ -226,8 +238,8 @@ def run( raise ValueError(msg) grid = phase.grid(prev_result, self.observables) result = run_grid( - self.world, - self.scorer, + world, + scorer, grid, self.observables, annotations=self.annotations or None, @@ -239,8 +251,8 @@ def run( phase.grid if isinstance(phase.grid, list) else (carry_grid or []) ) result = run_grid( - self.world, - self.scorer, + world, + scorer, grid, self.observables, annotations=self.annotations or None, diff --git a/tests/test_study.py b/tests/test_study.py index d2cd3aa..e4cf18a 100644 --- a/tests/test_study.py +++ b/tests/test_study.py @@ -991,3 +991,167 @@ def test_feasibility_filter_in_study( final = study.results("refine") # alpha=0.0 (cost=0), alpha=0.25 (cost=2.5), alpha=0.5 (cost=5.0) satisfy cost <= 5 assert final.scores.shape[0] == 3 + + +# --------------------------------------------------------------------------- +# Phase-level world / scorer override (multi-fidelity, #78) +# --------------------------------------------------------------------------- + + +class _CheapSimulator: + """Cheap surrogate that adds a constant offset to alpha.""" + + def generate( + self, + config: dict[str, Any], + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Return config as both truth and observations. + + Returns: + Tuple of (config, config). + """ + return config, config + + +class _CheapScorer: + """Scorer that returns a constant error and halved cost.""" + + def score( + self, + truth: Any, + observations: Any, + config: dict[str, Any], + ) -> dict[str, float]: + """Score: error = 0.1 (constant), cost = alpha * 5. + + Returns: + Dict with ``error`` and ``cost`` scores. + """ + a = float(config.get("alpha", 0.5)) + return {"error": 0.1, "cost": a * 5.0} + + +def test_phase_world_override( + scorer: _ToyScorer, + observables: list[Observable], +) -> None: + """Phase.world overrides Study.world for that phase.""" + cheap = _CheapSimulator() + expensive = _ToySimulator() + grid = [{"alpha": 0.5}] + study = Study( + world=expensive, + scorer=scorer, + observables=observables, + phases=[ + Phase(name="cheap_phase", grid=grid, world=cheap), + Phase(name="expensive_phase", grid=grid), + ], + ) + study.run() + # Both phases use ToyScorer (error=|0.5-0.5|=0, cost=5). + # The key is that they ran without error, proving the + # phase-level world was used for cheap_phase. + assert study.results("cheap_phase").scores.shape == (1, 2) + assert study.results("expensive_phase").scores.shape == (1, 2) + + +def test_phase_scorer_override( + world: _ToySimulator, + observables: list[Observable], +) -> None: + """Phase.scorer overrides Study.scorer for that phase.""" + cheap_scorer = _CheapScorer() + expensive_scorer = _ToyScorer() + grid = [{"alpha": 0.5}] + study = Study( + world=world, + scorer=expensive_scorer, + observables=observables, + phases=[ + Phase(name="cheap_phase", grid=grid, scorer=cheap_scorer), + Phase(name="expensive_phase", grid=grid), + ], + ) + study.run() + cheap_r = study.results("cheap_phase") + expensive_r = study.results("expensive_phase") + # CheapScorer: error=0.1, cost=0.5*5=2.5 + assert cheap_r.scores[0, 0] == pytest.approx(0.1) + assert cheap_r.scores[0, 1] == pytest.approx(2.5) + # ToyScorer: error=|0.5-0.5|=0, cost=0.5*10=5 + assert expensive_r.scores[0, 0] == pytest.approx(0.0) + assert expensive_r.scores[0, 1] == pytest.approx(5.0) + + +def test_phase_both_overrides( + observables: list[Observable], +) -> None: + """Phase can override both world and scorer simultaneously.""" + study = Study( + world=_ToySimulator(), + scorer=_ToyScorer(), + observables=observables, + phases=[ + Phase( + name="custom", + grid=[{"alpha": 0.5}], + world=_CheapSimulator(), + scorer=_CheapScorer(), + ), + ], + ) + study.run() + r = study.results("custom") + assert r.scores[0, 0] == pytest.approx(0.1) + assert r.scores[0, 1] == pytest.approx(2.5) + + +def test_multi_fidelity_screen_then_validate( + observables: list[Observable], +) -> None: + """Two-phase multi-fidelity: cheap screen, expensive validation.""" + grid = [{"alpha": v} for v in [0.0, 0.25, 0.5, 0.75, 1.0]] + study = Study( + world=_ToySimulator(), + scorer=_ToyScorer(), + observables=observables, + phases=[ + Phase( + name="screen", + grid=grid, + world=_CheapSimulator(), + scorer=_CheapScorer(), + filter_fn=top_k_pareto_filter(k=2), + ), + Phase(name="validate", grid="carry"), + ], + ) + study.run() + screen_r = study.results("screen") + validate_r = study.results("validate") + # Screening used cheap scorer (all errors = 0.1) + assert np.all(screen_r.scores[:, 0] == pytest.approx(0.1)) + # Validation used Study-level ToyScorer (varied errors) + assert validate_r.scores.shape[0] <= 2 + # At least one validation error differs from 0.1 + assert not np.all(validate_r.scores[:, 0] == pytest.approx(0.1)) + + +def test_phase_world_override_none_uses_study_default( + world: _ToySimulator, + scorer: _ToyScorer, + observables: list[Observable], +) -> None: + """Phase.world=None (default) uses Study.world.""" + grid = [{"alpha": 0.5}] + study = Study( + world=world, + scorer=scorer, + observables=observables, + phases=[Phase(name="default", grid=grid)], + ) + study.run() + r = study.results("default") + assert r.scores[0, 0] == pytest.approx(0.0) + assert r.scores[0, 1] == pytest.approx(5.0)