heidmic · DavidvProeck · Sep 15, 2025 · Sep 15, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/CITATION.cff b/CITATION.cff
@@ -14,22 +14,3 @@ title: "SupRB: The Supervised Rule-based Learning System"
 url: "https://github.com/heidmic/suprb"
 version: 1.0.0
 date-released: 2024-11-18
-
-preferred-citation:
-  type: conference-paper
-  authors:
-    - family-names: Heider
-      given-names: Michael
-    - family-names: Stegherr
-      given-names: Helena
-    - family-names: Wurth
-      given-names: Jonathan
-    - family-names: Sraj
-      given-names: Roman
-    - family-names: Hähner
-      given-names: Jörg
-  title: "Separating Rule Discovery and Global Solution Composition in a Learning Classifier System"
-  year: 2022
-  conference: "Genetic and Evolutionary Computation Conference Companion (GECCO ’22 Companion)"
-  doi: "10.1145/3520304.3529014"
-  url: "https://doi.org/10.1145/3520304.3529014"
diff --git a/README.md b/README.md
@@ -1,4 +1,10 @@
-[![DOI](https://zenodo.org/badge/303331999.svg)](https://zenodo.org/badge/latestdoi/303331999)
+[![DOI](https://zenodo.org/badge/303331999.svg)](https://zenodo.org/badge/latestdoi/303331999) \
+This project is a fork of the original SupRB repository (https://github.com/heidmic/suprb).
+It extends the original work by adding multi-objective rule discovery (MOO-RD) using NSGA-II, developed as part of my bachelor’s thesis. \
+The experiments can be found in the suprb-experimentation repository (https://github.com/DavidvProeck/suprb-experimentation),
+which builds upon the original experimentation (https://github.com/heidmic/suprb-experimentation).
+
+The source code of the proposed multi-objective rule discovery (MOO-RD) can be found here: **suprb/optimizer/rule/nsga2**
 
 # SupRB
 
@@ -47,7 +53,7 @@ The examples in the examples directory are:
     pip3 install -r requirements.txt
 
 
-We recommend to use Python version 3.12
+Tested with Python 3.9.4.
 
 
 ## Contributing

diff --git a/__init__.py b/__init__.py
diff --git a/examples/example_1.py b/examples/example_1.py
@@ -5,7 +5,7 @@
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from sklearn.model_selection import cross_validate, train_test_split
 
-from suprb import SupRB
+from suprb import SupRB, WarmupSupRB
 from suprb.utils import check_random_state
 from suprb.optimizer.rule.es import ES1xLambda
 from suprb.optimizer.solution.ga import GeneticAlgorithm
@@ -49,7 +49,7 @@ def create_plot(scores):
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
 
-    model = SupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
+    model = WarmupSupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
 
     scores = cross_validate(
         model,

diff --git a/examples/example_1_nsga2_infogain.py b/examples/example_1_nsga2_infogain.py
@@ -0,0 +1,103 @@
+import sklearn
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from suprb.optimizer.rule.nsga2 import NSGA2InfoGain
+from utils import log_scores
+
+import numpy as np
+
+from sklearn.linear_model import Ridge
+from sklearn.utils import Bunch, shuffle
+
+from suprb import rule, SupRB
+from suprb.logging.combination import CombinedLogger
+from suprb.logging.default import DefaultLogger
+from suprb.logging.stdout import StdoutLogger
+from suprb.optimizer.solution import ga
+from suprb.optimizer.rule import origin, mutation
+
+from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
+from suprb.optimizer.rule.ns.novelty_search_type import MinimalCriteria
+
+
+
+
+def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+
+    y[X < 10] = np.sin(np.pi * X[X < 10] / 5) + 0.2 * np.cos(4 * np.pi * X[X < 10] / 5)
+    y[X >= 10] = X[X >= 10] / 10 - 1
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(
+        rule_discovery=NSGA2InfoGain(
+            n_iter=16,
+            mu=16,
+            lmbda=64,
+            origin_generation=origin.SquaredError(),
+            init=rule.initialization.MeanInit(
+                fitness=rule.fitness.MooFitness(),
+                model=Ridge(alpha=0.01, random_state=random_state),
+                # matching_type=rule.matching.OrderedBound([-1, 1])
+            ),
+            mutation=mutation.Normal(
+                # matching_type=rule.matching.OrderedBound([-1, 1]),
+                sigma=1.22
+            ),
+            fitness_objs=[lambda r: r.error_],
+            fitness_objs_labels=["Error"],  # infogain objective is added internally
+        ),
+        solution_composition=GeneticAlgorithm(),
+        n_initial_rules=4,
+    )
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=1,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/examples/example_1_nsga2_novelty.py b/examples/example_1_nsga2_novelty.py
@@ -0,0 +1,118 @@
+import sklearn
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from suprb.optimizer.rule.nsga2 import NSGA2Novelty_G_P
+from utils import log_scores
+
+import numpy as np
+
+from sklearn.linear_model import Ridge
+from sklearn.utils import Bunch, shuffle
+
+from suprb import rule, SupRB, WarmupSupRB
+from suprb.logging.combination import CombinedLogger
+from suprb.logging.default import DefaultLogger
+from suprb.logging.stdout import StdoutLogger
+from suprb.optimizer.solution import ga
+from suprb.optimizer.rule import origin, mutation
+
+from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
+from suprb.optimizer.rule.ns.novelty_search_type import MinimalCriteria
+
+
+
+
+def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+
+    y[X < 10] = np.sin(np.pi * X[X < 10] / 5) + 0.2 * np.cos(4 * np.pi * X[X < 10] / 5)
+    y[X >= 10] = X[X >= 10] / 10 - 1
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(
+        rule_discovery=NSGA2Novelty_G_P(
+            n_iter=16,
+            mu=16,
+            lmbda=64,
+            origin_generation=origin.SquaredError(),
+            init=rule.initialization.MeanInit(
+                fitness=rule.fitness.MooFitness(),
+                model=Ridge(alpha=0.01, random_state=random_state),
+                # matching_type=rule.matching.OrderedBound([-1, 1])
+            ),
+            mutation=mutation.Normal(
+                # matching_type=rule.matching.OrderedBound([-1, 1]),
+                sigma=1.22
+            ),
+            fitness_objs=[lambda r: r.error_],
+            fitness_objs_labels=["Error"],  # novelty objective is added internally
+            novelty_calc=NoveltyCalculation(
+                k_neighbor=15,
+                # novelty_search_type=MinimalCriteria(min_examples_matched=15)  # <- tuned #TODO: Leads to warnings in crowding distance calculation.
+            ),
+            novelty_mode="G",
+            profile=False,
+            min_experience=2,  # Rules that match only one sample are considered trivial, so min_experience >= 2
+            max_restarts=4,
+            keep_archive_across_restarts=False,
+        ),
+        solution_composition=GeneticAlgorithm(),
+        # verbose=2,
+        # warmup_strategy="auto",
+        # warmup_rd_steps=0,  # fixed
+        # warmup_max_steps=4,  # auto
+        # warmup_pool_target=None,  # auto
+        # warmup_patience=3,  # auto
+        # warmup_delta=1,  # auto
+    )
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=1,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/examples/example_2.py b/examples/example_2.py
@@ -1,3 +1,4 @@
+from datetime import timedelta
 import sklearn
 
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
@@ -10,8 +11,10 @@
 
 from utils import log_scores
 
+from time import time
 
 if __name__ == "__main__":
+    t0 = time()
     random_state = 42
 
     data, _ = fetch_openml(name="Concrete_Data", version=1, return_X_y=True)
@@ -50,6 +53,8 @@
         verbose=10,
         scoring=["r2", "neg_mean_squared_error"],
         return_estimator=True,
+        fit_params={"cleanup": True},
     )
 
     log_scores(scores)
+    print(f"\nTotal runtime: {timedelta(seconds=time() - t0)}")
diff --git a/examples/example_3.py b/examples/example_3.py
@@ -61,6 +61,7 @@
         verbose=10,
         scoring=["r2", "neg_mean_squared_error"],
         return_estimator=True,
+        fit_params={"cleanup": True},
     )
 
     log_scores(scores)
diff --git a/examples/example_4.py b/examples/example_4.py
@@ -50,8 +50,8 @@
             elitist_ratio=0.2,
             random_state=random_state,
             n_jobs=1,
-            mutation=suprb.optimizer.solution.ga.mutation.BitFlips(),
-            crossover=suprb.optimizer.solution.ga.crossover.NPoint(n=2),
+            # mutation=suprb.optimizer.solution.ga.mutation.BitFlips(mutation_rate=0.1),
+            # crossover=suprb.optimizer.solution.ga.crossover.NPoint(crossover_rate=0.9, n=2),
             selection=suprb.optimizer.solution.ga.selection.Tournament(k=6),
             init=suprb.solution.initialization.RandomInit(
                 mixing=suprb.solution.mixing_model.ErrorExperienceHeuristic(),
@@ -71,6 +71,7 @@
         verbose=10,
         scoring=["r2", "neg_mean_squared_error"],
         return_estimator=True,
+        # fit_params={"cleanup": True},
     )
 
     log_scores(scores)
diff --git a/examples/example_concrete.py b/examples/example_concrete.py
@@ -0,0 +1,60 @@
+import sklearn
+import numpy as np
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate
+from ucimlrepo import fetch_ucirepo
+from suprb import SupRB
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+from utils import log_scores
+
+
+
+
+if __name__ == "__main__":
+    random_state = 42
+    # Dataset https://doi.org/10.24432/C5PK67
+    concrete_data = fetch_ucirepo(id=165)
+
+    X, y = concrete_data.data.features, concrete_data.data.targets
+    X = X.to_numpy()
+    y = y.to_numpy()
+    X, y = sklearn.utils.shuffle(X, y, random_state=random_state)
+
+    # Normalize features and target
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(np.array(y).reshape(-1, 1)).reshape((-1,))
+
+    # Define model
+    model = SupRB(
+        rule_discovery=ES1xLambda(
+            n_iter=32,
+            lmbda=16,
+            operator="+",
+            delay=150,
+            random_state=random_state,
+            n_jobs=1,
+        ),
+        solution_composition=GeneticAlgorithm(
+            n_iter=32,
+            population_size=32,
+            elitist_ratio=0.2,
+            random_state=random_state,
+            n_jobs=1,
+        ),
+    )
+
+    # Cross-validation
+    scores = cross_validate(
+        model,
+        X,
+        y,
+        cv=4,
+        n_jobs=32,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+        fit_params={"cleanup": True},
+    )
+
+    log_scores(scores)