Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions examples/example_5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import numpy as np
import sklearn
import scipy.stats as stats

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_validate
from sklearn.datasets import fetch_openml

from matplotlib import pyplot as plt
from sklearn.utils import Bunch

from suprb import SupRB
from suprb.optimizer.rule.es import ES1xLambda
from suprb.optimizer.solution.nsga2 import NonDominatedSortingGeneticAlgorithm2
from suprb.optimizer.solution.sampler import BetaSolutionSampler, DiversitySolutionSampler
from suprb.optimizer.solution.spea2 import StrengthParetoEvolutionaryAlgorithm2
from suprb.optimizer.solution.nsga3 import NonDominatedSortingGeneticAlgorithm3
from suprb.optimizer.solution.ga import GeneticAlgorithm
from suprb.optimizer.solution.ts import TwoStageSolutionComposition
from suprb.logging.multi_objective import MOLogger

from utils import log_scores

import time

algo_names = {
StrengthParetoEvolutionaryAlgorithm2: "SPEA2",
NonDominatedSortingGeneticAlgorithm2: "NSGA-II",
NonDominatedSortingGeneticAlgorithm3: "NSGA-III",
TwoStageSolutionComposition: "TS",
}


def plot_pareto_front(pareto_front: np.ndarray, title: str):
x = pareto_front[:, 0]
y = pareto_front[:, 1]
plt.step(x, y, linestyle="-", marker="x")
plt.title(title)
plt.xlabel("Complexity")
plt.ylabel("Error")
plt.xlim(0, 1)
plt.ylim(0, 1)


if __name__ == "__main__":
random_state = 42
suprb_iter = 2
sc_iter = 1

spea2 = StrengthParetoEvolutionaryAlgorithm2(
n_iter=sc_iter,
population_size=32,
sampler=BetaSolutionSampler(),
early_stopping_delta=0,
early_stopping_patience=10,
)
nsga2 = NonDominatedSortingGeneticAlgorithm2(
n_iter=sc_iter,
population_size=32,
sampler=BetaSolutionSampler(),
early_stopping_delta=0,
early_stopping_patience=10,
)
nsga3 = NonDominatedSortingGeneticAlgorithm3(
n_iter=sc_iter,
population_size=32,
sampler=BetaSolutionSampler(),
early_stopping_delta=0,
early_stopping_patience=-1,
)
nsga3_es = NonDominatedSortingGeneticAlgorithm3(
n_iter=sc_iter,
population_size=32,
sampler=BetaSolutionSampler(),
early_stopping_delta=0,
early_stopping_patience=-1,
)
ga1 = GeneticAlgorithm(n_iter=sc_iter)
ga2 = GeneticAlgorithm(n_iter=sc_iter * 2)
ts = TwoStageSolutionComposition(algorithm_1=ga1, algorithm_2=nsga3, switch_iteration=suprb_iter, warm_start=False)
sc_algos = (nsga3, nsga2, spea2, ts, nsga3_es)
logger_list = []
time_list = []

plt.rcParams.update(
{
"text.usetex": True,
}
)

for i, sc in enumerate(sc_algos):

data, _ = fetch_openml(name="Concrete_Data", version=1, return_X_y=True)
data = data.to_numpy()

X, y = data[:, :8], data[:, 8]
X, y = sklearn.utils.shuffle(X, y, random_state=random_state)
warm_start: bool = (True,)

X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))

model = SupRB(
n_iter=suprb_iter,
rule_discovery=ES1xLambda(n_iter=1000, delay=10),
solution_composition=sc,
logger=MOLogger(),
random_state=random_state,
)
start_time = time.time()
scores = cross_validate(
model,
X,
y,
cv=2,
n_jobs=8,
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
)
end_time = time.time()
time_list.append(end_time - start_time)
log_scores(scores)
logger_list.append(scores["estimator"][0].logger_)
print("Finished!")

for t in time_list:
print(f"Time taken: {t}")
axes, plots = plt.subplots()
for l in logger_list:
##### Plot Pareto Fronts #####
pareto_front = l.pareto_fronts_
pareto_front = np.array(pareto_front[suprb_iter - 1])
hvs = l.metrics_["hypervolume"]
hv = hvs[suprb_iter - 1]
spreads = l.metrics_["spread"]
spread = spreads[suprb_iter - 1]
plot_pareto_front(pareto_front, f"$HV = {hv:.2f}, \Delta = {spread:.2f}$")
plt.show()

##### Plot Hypervolume #####
25 changes: 21 additions & 4 deletions suprb/logging/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor,
def log_metric(key, value):
self.log_metric(key=key, value=value, step=estimator.step_)

def log_metric_stats(metric_name: str, attribute_name: str, lst: list):
comprehension = [getattr(e, attribute_name) for e in lst]
def log_metric_stats(metric_name: str, attribute_name: str, lst: list, index=None):
if index is None:
comprehension = [getattr(e, attribute_name) for e in lst]
else:
comprehension = [getattr(e, attribute_name)[index] for e in lst]

log_metric(metric_name + "_min", min(comprehension))
log_metric(metric_name + "_mean", sum(comprehension) / len(comprehension))
log_metric(metric_name + "_max", max(comprehension))
Expand Down Expand Up @@ -73,13 +77,26 @@ def log_metric_stats(metric_name: str, attribute_name: str, lst: list):
population = estimator.solution_composition_.population_
log_metric("population_size", len(population))
# log_metric("population_diversity", genome_diversity(population)) # When using default logging, not all approaches are compatible with this
log_metric_stats("population_fitness", "fitness_", population)

# This is a bit of a hack to support multidimensional objective functions ~Felix
if population[0].fitness_.__class__.__name__ == "list":
for i in range(len(population[0].fitness_)):
log_metric_stats(f"population_fitness_o_{i}", "fitness_", population, i)
else:
log_metric_stats("population_fitness", "fitness_", population)

log_metric_stats("population_error", "error_", population)
log_metric_stats("population_complexity", "complexity_", population)

# Log elitist
elitist = estimator.solution_composition_.elitist()
log_metric("elitist_fitness", elitist.fitness_)

if elitist.fitness_.__class__.__name__ == "list":
for i in range(len(elitist.fitness_)):
log_metric(f"elitist_fitness_o_{i}", elitist.fitness_[i])
else:
log_metric("elitist_fitness", elitist.fitness_)

log_metric("elitist_error", elitist.error_)
log_metric("elitist_complexity", elitist.complexity_)
# log_metric("elitist_matched", matched_training_samples(elitist.subpopulation)) # When using default logging, not all approaches are compatible with this
Expand Down
20 changes: 20 additions & 0 deletions suprb/logging/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,23 @@ def matched_training_samples(pool: list[Rule]):
matched = np.stack([rule.match_set_ for rule in pool]).any(axis=0).nonzero()[0].shape[0]
total = pool[0].match_set_.shape[0]
return matched / total


def hypervolume(pareto_front: np.ndarray, reference_point: np.ndarray = None):
"""Assumes that the pareto front is sorted by complexity in ascending order."""
last_x = reference_point[0]
volume = 0
for fitness in pareto_front:
volume += (last_x - fitness[0]) * np.prod(reference_point[1:] - fitness[1:])
last_x = fitness[0]
return volume


def spread(pareto_front: np.ndarray):
if len(pareto_front) <= 1:
return 0
"""Assumes that the pareto front is sorted by complexity in ascending order."""
distances = np.linalg.norm(pareto_front[:-1] - pareto_front[1:], axis=1)
avg_distance = np.mean(distances)
delta = np.sum(np.abs(distances - avg_distance)) / ((len(distances)) * avg_distance)
return delta
37 changes: 37 additions & 0 deletions suprb/logging/multi_objective.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np

from . import DefaultLogger
from ..base import BaseRegressor

from suprb.solution.fitness import c_norm, pseudo_accuracy

from .metrics import hypervolume, spread
from ..optimizer.solution.base import MOSolutionComposition, SolutionComposition
from ..optimizer.solution.ts import TwoStageSolutionComposition


class MOLogger(DefaultLogger):

pareto_fronts_: dict

def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
super().log_init(X, y, estimator)
self.pareto_fronts_ = {}

def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor, iteration: int):
super().log_iteration(X, y, estimator, estimator.step_)
sc = estimator.solution_composition_
current_front = sc.pareto_front()
n = current_front[0].fitness.max_genome_length_
if hasattr(current_front[0].fitness, "hv_reference"):
reference_points = current_front[0].fitness.hv_reference
else:
reference_points = np.array([1.0, 1.0])
current_front = [
[1 - c_norm(solution.complexity_, n), 1 - pseudo_accuracy(solution.error_)] for solution in current_front
]
self.pareto_fronts_[iteration] = current_front
current_front = np.array(current_front)
self.log_metric("hypervolume", hypervolume(current_front, reference_points), estimator.step_)
self.log_metric("spread", spread(current_front), estimator.step_)
self.log_metric("sc_iterations", sc.step_, estimator.step_)
1 change: 1 addition & 0 deletions suprb/optimizer/solution/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .archive import SolutionArchive
from .base import SolutionComposition
from .sampler import SolutionSampler
87 changes: 86 additions & 1 deletion suprb/optimizer/solution/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from suprb.optimizer import BaseOptimizer
from suprb.rule import Rule
from suprb.utils import check_random_state
from . import SolutionArchive
from .archive import SolutionArchive
from .sampler import SolutionSampler


class SolutionComposition(BaseOptimizer, metaclass=ABCMeta):
Expand Down Expand Up @@ -145,3 +146,87 @@ def _reset(self):
super()._reset()
if hasattr(self, "population_"):
del self.population_


def hypervolume(pareto_front: list[Solution]):
pareto_front = sorted(pareto_front, key=lambda solution: solution.fitness_[0], reverse=True)
fitness_values = np.array([solution.fitness_ for solution in pareto_front])
# Needs a MultiObjectiveSolutionFitness
reference_point = pareto_front[0].fitness.hv_reference_
last_x = reference_point[0]
volume = 0
for fitness in fitness_values:
volume += (last_x - fitness[0]) * np.prod(reference_point[1:] - fitness[1:])
last_x = fitness[0]
return volume


class MOSolutionComposition(PopulationBasedSolutionComposition, metaclass=ABCMeta):
def __init__(
self,
population_size: int,
n_iter: int,
init: SolutionInit,
archive: SolutionArchive,
sampler: SolutionSampler,
random_state: int,
n_jobs: int,
warm_start: bool,
early_stopping_patience: int = -1,
early_stopping_delta: float = 0,
):
super().__init__(
population_size=population_size,
n_iter=n_iter,
init=init,
archive=archive,
random_state=random_state,
n_jobs=n_jobs,
warm_start=warm_start,
)
self.sampler = sampler
self.early_stopping_patience = early_stopping_patience
self.early_stopping_delta = early_stopping_delta
self._best_hypervolume = 0
self._best_pareto_front = None
self._early_stopping_counter = 0
self.step_ = 0

def check_early_stopping(self):
self.step_ += 1
if self.early_stopping_patience < 0:
return False
hv = self.hypervolume()
hv_diff = hv - self._best_hypervolume
if self._best_hypervolume < hv:
self._best_hypervolume = hv
self._best_pareto_front = self.pareto_front()

if hv_diff > self.early_stopping_delta:
self._early_stopping_counter = 0
else:
self._early_stopping_counter += 1
if self.early_stopping_patience <= self._early_stopping_counter:
return True
return False

@abstractmethod
def pareto_front(self) -> list[Solution]:
pass

def hypervolume(self) -> float:
return hypervolume(self.pareto_front())

def elitist(self) -> Optional[Solution]:
"""Sample an elitist from the Pareto front"""
pf = self.pareto_front()
if len(pf) == 0:
return None
return self.sampler(pf, random_state=self.random_state_)

def optimize(self, X: np.ndarray, y: np.ndarray, **kwargs) -> Union[Solution, list[Solution], None]:
self._best_hypervolume = 0
self._best_pareto_front = None
self._early_stopping_counter = 0
self.step_ = 0
super().optimize(X, y, **kwargs)
4 changes: 4 additions & 0 deletions suprb/optimizer/solution/nsga2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .base import NonDominatedSortingGeneticAlgorithm2
from .crossover import SolutionCrossover
from .mutation import SolutionMutation
from .selection import SolutionSelection
Loading
Loading