Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0c89733
initial commit from GitLab repo
Sep 15, 2025
ae44262
change append of fitness ig obj
Sep 15, 2025
0d43747
change constructors to be sklearn compatible
Sep 16, 2025
9eb80c1
remove use of visualize function
Sep 16, 2025
24b67e5
~
Sep 16, 2025
1c47d38
remove visualize function from nsga2 _optimize
Sep 16, 2025
4b353dc
refactor ig nsga2
Sep 17, 2025
1e38c33
change init for nsga2
Sep 17, 2025
f8b58cb
try to fix sklearn clone error
Sep 19, 2025
2c47900
try to fix origin problem
Sep 19, 2025
37c67d4
Update requirements.txt
DavidvProeck Sep 19, 2025
7829a0e
fix imports and origin generation in NSGA2
Sep 19, 2025
0a286eb
fix requirements
Sep 19, 2025
a722365
remove prefer=threads from parallel calls
Sep 19, 2025
6c2f871
Update nsga2_novelty_G_P.py
DavidvProeck Sep 22, 2025
ee6ebf2
add bracket
DavidvProeck Sep 22, 2025
7aac9bf
fix _optimize of nsga2infogain
Sep 28, 2025
0f78c20
Merge branch 'master' of github.com:DavidvProeck/suprb
Sep 28, 2025
bf6d974
add warmup suprb
Sep 28, 2025
88afcf9
fix n_intial_rules
Oct 8, 2025
c002146
fix n_intial_rules
Oct 8, 2025
549281c
clone rules in score_novelty
Oct 8, 2025
adfe71c
add genome to saved elitist json
Oct 10, 2025
12c4434
change mutation
Oct 12, 2025
0bcb4de
fix json genome
Oct 13, 2025
09681c7
refactor
Oct 14, 2025
5d6e285
change png to pdf
Oct 19, 2025
739029d
finish last reworks
Oct 22, 2025
4388b5b
comment out visualization
Oct 22, 2025
71c537e
increase archive len
Oct 29, 2025
55233f4
modify example 4 so it runs
Nov 17, 2025
e25bf1d
add code comments
Nov 17, 2025
d86fa3b
rework readme
Nov 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,3 @@ title: "SupRB: The Supervised Rule-based Learning System"
url: "https://github.com/heidmic/suprb"
version: 1.0.0
date-released: 2024-11-18

preferred-citation:
type: conference-paper
authors:
- family-names: Heider
given-names: Michael
- family-names: Stegherr
given-names: Helena
- family-names: Wurth
given-names: Jonathan
- family-names: Sraj
given-names: Roman
- family-names: Hähner
given-names: Jörg
title: "Separating Rule Discovery and Global Solution Composition in a Learning Classifier System"
year: 2022
conference: "Genetic and Evolutionary Computation Conference Companion (GECCO ’22 Companion)"
doi: "10.1145/3520304.3529014"
url: "https://doi.org/10.1145/3520304.3529014"
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
[![DOI](https://zenodo.org/badge/303331999.svg)](https://zenodo.org/badge/latestdoi/303331999)
[![DOI](https://zenodo.org/badge/303331999.svg)](https://zenodo.org/badge/latestdoi/303331999) \
This project is a fork of the original SupRB repository (https://github.com/heidmic/suprb).
It extends the original work by adding multi-objective rule discovery (MOO-RD) using NSGA-II, developed as part of my bachelor’s thesis. \
The experiments can be found in the suprb-experimentation repository (https://github.com/DavidvProeck/suprb-experimentation),
which builds upon the original experimentation (https://github.com/heidmic/suprb-experimentation).

The source code of the proposed multi-objective rule discovery (MOO-RD) can be found here: **suprb/optimizer/rule/nsga2**

# SupRB

Expand Down Expand Up @@ -47,7 +53,7 @@ The examples in the examples directory are:
pip3 install -r requirements.txt


We recommend to use Python version 3.12
Tested with Python 3.9.4.


## Contributing
Expand Down
Empty file added __init__.py
Empty file.
4 changes: 2 additions & 2 deletions examples/example_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_validate, train_test_split

from suprb import SupRB
from suprb import SupRB, WarmupSupRB
from suprb.utils import check_random_state
from suprb.optimizer.rule.es import ES1xLambda
from suprb.optimizer.solution.ga import GeneticAlgorithm
Expand Down Expand Up @@ -49,7 +49,7 @@ def create_plot(scores):

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)

model = SupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
model = WarmupSupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())

scores = cross_validate(
model,
Expand Down
103 changes: 103 additions & 0 deletions examples/example_1_nsga2_infogain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import sklearn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_validate, train_test_split

from suprb import SupRB
from suprb.utils import check_random_state
from suprb.optimizer.rule.es import ES1xLambda
from suprb.optimizer.solution.ga import GeneticAlgorithm

from suprb.optimizer.rule.nsga2 import NSGA2InfoGain
from utils import log_scores

import numpy as np

from sklearn.linear_model import Ridge
from sklearn.utils import Bunch, shuffle

from suprb import rule, SupRB
from suprb.logging.combination import CombinedLogger
from suprb.logging.default import DefaultLogger
from suprb.logging.stdout import StdoutLogger
from suprb.optimizer.solution import ga
from suprb.optimizer.rule import origin, mutation

from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
from suprb.optimizer.rule.ns.novelty_search_type import MinimalCriteria




def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
random_state_ = check_random_state(random_state)

X = np.linspace(0, 20, num=n_samples)
y = np.zeros(n_samples)

y[X < 10] = np.sin(np.pi * X[X < 10] / 5) + 0.2 * np.cos(4 * np.pi * X[X < 10] / 5)
y[X >= 10] = X[X >= 10] / 10 - 1

y += random_state_.normal(scale=noise, size=n_samples)
X = X.reshape((-1, 1))

return sklearn.utils.shuffle(X, y, random_state=random_state)


def create_plot(scores):
fig, axes = plt.subplots(2, 2)
X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
for ax, model in zip(axes.flatten(), scores["estimator"]):
pred = model.predict(X_plot)
ax.scatter(X, y, c="b", s=3, label="y_true")
ax.plot(X_plot, pred, c="r", label="y_pred")

plt.savefig("result.png")


if __name__ == "__main__":
random_state = 42

X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)

X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)

model = SupRB(
rule_discovery=NSGA2InfoGain(
n_iter=16,
mu=16,
lmbda=64,
origin_generation=origin.SquaredError(),
init=rule.initialization.MeanInit(
fitness=rule.fitness.MooFitness(),
model=Ridge(alpha=0.01, random_state=random_state),
# matching_type=rule.matching.OrderedBound([-1, 1])
),
mutation=mutation.Normal(
# matching_type=rule.matching.OrderedBound([-1, 1]),
sigma=1.22
),
fitness_objs=[lambda r: r.error_],
fitness_objs_labels=["Error"], # infogain objective is added internally
),
solution_composition=GeneticAlgorithm(),
n_initial_rules=4,
)

scores = cross_validate(
model,
X_train,
y_train,
cv=4,
n_jobs=1,
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
)

create_plot(scores)

log_scores(scores)
118 changes: 118 additions & 0 deletions examples/example_1_nsga2_novelty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import sklearn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_validate, train_test_split

from suprb import SupRB
from suprb.utils import check_random_state
from suprb.optimizer.rule.es import ES1xLambda
from suprb.optimizer.solution.ga import GeneticAlgorithm

from suprb.optimizer.rule.nsga2 import NSGA2Novelty_G_P
from utils import log_scores

import numpy as np

from sklearn.linear_model import Ridge
from sklearn.utils import Bunch, shuffle

from suprb import rule, SupRB, WarmupSupRB
from suprb.logging.combination import CombinedLogger
from suprb.logging.default import DefaultLogger
from suprb.logging.stdout import StdoutLogger
from suprb.optimizer.solution import ga
from suprb.optimizer.rule import origin, mutation

from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
from suprb.optimizer.rule.ns.novelty_search_type import MinimalCriteria




def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
random_state_ = check_random_state(random_state)

X = np.linspace(0, 20, num=n_samples)
y = np.zeros(n_samples)

y[X < 10] = np.sin(np.pi * X[X < 10] / 5) + 0.2 * np.cos(4 * np.pi * X[X < 10] / 5)
y[X >= 10] = X[X >= 10] / 10 - 1

y += random_state_.normal(scale=noise, size=n_samples)
X = X.reshape((-1, 1))

return sklearn.utils.shuffle(X, y, random_state=random_state)


def create_plot(scores):
fig, axes = plt.subplots(2, 2)
X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
for ax, model in zip(axes.flatten(), scores["estimator"]):
pred = model.predict(X_plot)
ax.scatter(X, y, c="b", s=3, label="y_true")
ax.plot(X_plot, pred, c="r", label="y_pred")

plt.savefig("result.png")


if __name__ == "__main__":
random_state = 42

X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)

X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)

model = SupRB(
rule_discovery=NSGA2Novelty_G_P(
n_iter=16,
mu=16,
lmbda=64,
origin_generation=origin.SquaredError(),
init=rule.initialization.MeanInit(
fitness=rule.fitness.MooFitness(),
model=Ridge(alpha=0.01, random_state=random_state),
# matching_type=rule.matching.OrderedBound([-1, 1])
),
mutation=mutation.Normal(
# matching_type=rule.matching.OrderedBound([-1, 1]),
sigma=1.22
),
fitness_objs=[lambda r: r.error_],
fitness_objs_labels=["Error"], # novelty objective is added internally
novelty_calc=NoveltyCalculation(
k_neighbor=15,
# novelty_search_type=MinimalCriteria(min_examples_matched=15) # <- tuned #TODO: Leads to warnings in crowding distance calculation.
),
novelty_mode="G",
profile=False,
min_experience=2, # Rules that match only one sample are considered trivial, so min_experience >= 2
max_restarts=4,
keep_archive_across_restarts=False,
),
solution_composition=GeneticAlgorithm(),
# verbose=2,
# warmup_strategy="auto",
# warmup_rd_steps=0, # fixed
# warmup_max_steps=4, # auto
# warmup_pool_target=None, # auto
# warmup_patience=3, # auto
# warmup_delta=1, # auto
)

scores = cross_validate(
model,
X_train,
y_train,
cv=4,
n_jobs=1,
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
)

create_plot(scores)

log_scores(scores)
5 changes: 5 additions & 0 deletions examples/example_2.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import timedelta
import sklearn

from sklearn.preprocessing import StandardScaler, MinMaxScaler
Expand All @@ -10,8 +11,10 @@

from utils import log_scores

from time import time

if __name__ == "__main__":
t0 = time()
random_state = 42

data, _ = fetch_openml(name="Concrete_Data", version=1, return_X_y=True)
Expand Down Expand Up @@ -50,6 +53,8 @@
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
fit_params={"cleanup": True},
)

log_scores(scores)
print(f"\nTotal runtime: {timedelta(seconds=time() - t0)}")
1 change: 1 addition & 0 deletions examples/example_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
fit_params={"cleanup": True},
)

log_scores(scores)
5 changes: 3 additions & 2 deletions examples/example_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@
elitist_ratio=0.2,
random_state=random_state,
n_jobs=1,
mutation=suprb.optimizer.solution.ga.mutation.BitFlips(),
crossover=suprb.optimizer.solution.ga.crossover.NPoint(n=2),
# mutation=suprb.optimizer.solution.ga.mutation.BitFlips(mutation_rate=0.1),
# crossover=suprb.optimizer.solution.ga.crossover.NPoint(crossover_rate=0.9, n=2),
selection=suprb.optimizer.solution.ga.selection.Tournament(k=6),
init=suprb.solution.initialization.RandomInit(
mixing=suprb.solution.mixing_model.ErrorExperienceHeuristic(),
Expand All @@ -71,6 +71,7 @@
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
# fit_params={"cleanup": True},
)

log_scores(scores)
60 changes: 60 additions & 0 deletions examples/example_concrete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import sklearn
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_validate
from ucimlrepo import fetch_ucirepo
from suprb import SupRB
from suprb.optimizer.rule.es import ES1xLambda
from suprb.optimizer.solution.ga import GeneticAlgorithm
from utils import log_scores




if __name__ == "__main__":
random_state = 42
# Dataset https://doi.org/10.24432/C5PK67
concrete_data = fetch_ucirepo(id=165)

X, y = concrete_data.data.features, concrete_data.data.targets
X = X.to_numpy()
y = y.to_numpy()
X, y = sklearn.utils.shuffle(X, y, random_state=random_state)

# Normalize features and target
X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
y = StandardScaler().fit_transform(np.array(y).reshape(-1, 1)).reshape((-1,))

# Define model
model = SupRB(
rule_discovery=ES1xLambda(
n_iter=32,
lmbda=16,
operator="+",
delay=150,
random_state=random_state,
n_jobs=1,
),
solution_composition=GeneticAlgorithm(
n_iter=32,
population_size=32,
elitist_ratio=0.2,
random_state=random_state,
n_jobs=1,
),
)

# Cross-validation
scores = cross_validate(
model,
X,
y,
cv=4,
n_jobs=32,
verbose=10,
scoring=["r2", "neg_mean_squared_error"],
return_estimator=True,
fit_params={"cleanup": True},
)

log_scores(scores)
Loading
Loading