diff --git a/examples/surrogate_modeling/README.md b/examples/surrogate_modeling/README.md new file mode 100644 index 00000000..56461404 --- /dev/null +++ b/examples/surrogate_modeling/README.md @@ -0,0 +1,54 @@ +# Surrogate Modeling with Scikit-learn and Mesa + +This example demonstrates how to integrate Mesa with machine learning libraries like **Scikit-learn** to create a "surrogate model". It showcases a high-performance workflow for parameter exploration without the computational overhead of running thousands of full simulations. + +## Summary + +Agent-Based Models (ABMs) can be computationally expensive when exploring high-dimensional parameter spaces. This example illustrates a three-step surrogate modeling pipeline: + +1. **Efficient Sampling**: Using **Latin Hypercube Sampling (LHS)** via `scipy.stats.qmc` to select a sparse but representative set of points across the parameter space. +2. **Simulation**: Running the Mesa model (WealthModel) at these sampled points to gather training data. +3. **Emulation**: Training a **Random Forest Regressor** on the simulation outcomes to create a surrogate that can predict model results (e.g., Gini coefficient) for any new parameter set nearly instantly. + +This approach is particularly beneficial for calibration, sensitivity analysis, and optimization in complex models where running every possible configuration is infeasible. + +## Installation + +This example requires the `latest` version of Mesa and additional machine learning dependencies: + +```bash +pip install mesa scikit-learn scipy pandas + +``` + +## How to Run + +To run the surrogate modeling workflow (sampling, training, and prediction), execute the analysis script from the root of the `mesa-examples` directory: + +```bash +python -m examples.surrogate_modeling.analysis + +``` + +## Files + +* `model.py`: Contains the `WealthModel` and `WealthAgent` implementations, refactored for Mesa 4.0 "Lean Core" compatibility. +* `analysis.py`: Contains the LHS sampling logic, the manual training loop, and the Scikit-learn regressor integration. + +## Model Details + +### Logic + +The example uses a refactored **Boltzmann Wealth Model**. Agents start with a fixed amount of wealth and exchange it randomly during interactions. The model calculates the **Gini coefficient** at the end of each run as the target metric for the surrogate model. + +### Mesa 4.0 Integration + +* **RNG Initialization**: Uses the `rng` parameter in `Model.__init__` to ensure future-proof compatibility and reproducibility. +* **Spatial Management**: Utilizes the `OrthogonalMooreGrid` and `CellAgent` pattern where agents are placed by sampling from `all_cells.cells`. +* **Agent Activation**: Uses `self.agents.shuffle_do("step")` for efficient agent execution. + +## Further Reading + +* **Latin Hypercube Sampling**: A statistical method for generating a near-random sample of parameter values from a multidimensional distribution. +* **Surrogate Modeling**: Also known as metamodeling or emulation, this is a technique used when an outcome of interest cannot be easily directly measured, so a model of the outcome is used instead. +* **Scikit-learn Random Forest**: [Random Forest Regressor Documentation](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html) \ No newline at end of file diff --git a/examples/surrogate_modeling/analysis.py b/examples/surrogate_modeling/analysis.py new file mode 100644 index 00000000..12a555dd --- /dev/null +++ b/examples/surrogate_modeling/analysis.py @@ -0,0 +1,60 @@ +import numpy as np +import pandas as pd +from scipy.stats import qmc +from sklearn.ensemble import RandomForestRegressor + +from .model import WealthModel + + +def sample_parameters(param_space, n_samples, seed=None): + """Generates parameter sets using Latin Hypercube Sampling.""" + dim = len(param_space) + sampler = qmc.LatinHypercube(d=dim, seed=seed) + sample = sampler.random(n=n_samples) + + l_bounds = [v[0] for v in param_space.values()] + u_bounds = [v[1] for v in param_space.values()] + scaled_samples = qmc.scale(sample, l_bounds, u_bounds) + + param_names = list(param_space.keys()) + is_int = [isinstance(v[0], int) for v in param_space.values()] + + output = [] + for j in range(n_samples): + config = { + param_names[i]: round(scaled_samples[j, i]) + if is_int[i] + else scaled_samples[j, i] + for i in range(dim) + } + output.append(config) + return output + + +param_space = {"n": (10, 100), "width": (10, 30), "height": (10, 30)} +param_names = list(param_space.keys()) +samples = sample_parameters(param_space, n_samples=30, seed=42) + +print("Running simulations for training data...") +results = [] +for config in samples: + model = WealthModel(**config) + for _ in range(50): + model.step() + + results.append({**config, "Gini": model.get_gini()}) + +df = pd.DataFrame(results) +X = df[param_names].values +y = df["Gini"].values + +surrogate = RandomForestRegressor(n_estimators=100, random_state=42) +surrogate.fit(X, y) +print("Surrogate model trained.") + +test_params = {"n": 65, "width": 22, "height": 22} +X_test = np.array([[test_params[p] for p in param_names]]) +prediction = surrogate.predict(X_test)[0] + +print(f"\nPrediction for {test_params}:") +print(f"Approximated Gini: {prediction:.4f}") diff --git a/examples/surrogate_modeling/model.py b/examples/surrogate_modeling/model.py new file mode 100644 index 00000000..c8226971 --- /dev/null +++ b/examples/surrogate_modeling/model.py @@ -0,0 +1,46 @@ +from mesa import Model +from mesa.discrete_space import CellAgent, OrthogonalMooreGrid + + +class WealthAgent(CellAgent): + """An agent with fixed initial wealth.""" + + def __init__(self, model, cell): + super().__init__(model) + self.cell = cell + self.wealth = 1 + + def step(self): + if self.wealth > 0: + other_agent = self.model.random.choice(self.model.agents) + if other_agent is not self: + other_agent.wealth += 1 + self.wealth -= 1 + + +class WealthModel(Model): + """A simple model for wealth distribution.""" + + def __init__(self, n=50, width=10, height=10, rng=None): + super().__init__(rng=rng) + self.num_agents = n + self.grid = OrthogonalMooreGrid((width, height), torus=True, random=self.random) + + all_cells = self.grid.all_cells.cells + placement_cells = self.random.sample(all_cells, k=self.num_agents) + + for cell in placement_cells: + WealthAgent(self, cell) + + def get_gini(self): + """Calculate the Gini coefficient of wealth distribution.""" + agent_wealths = [agent.wealth for agent in self.agents] + x = sorted(agent_wealths) + n = self.num_agents + if n == 0 or sum(x) == 0: + return 0 + b = sum(xi * (n - i) for i, xi in enumerate(x)) / (n * sum(x)) + return 1 + (1 / n) - 2 * b + + def step(self): + self.agents.shuffle_do("step") diff --git a/pyproject.toml b/pyproject.toml index bbaa17f0..49324e6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,9 @@ rl_example = [ "mesa", "tensorboard" ] +surrogate = [ + "scikit-learn", +] [tool.ruff] extend-include = ["*.ipynb"]