diff --git a/CITATION.cff b/CITATION.cff
index 9f3ae5d2..4b4db2f2 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -14,22 +14,3 @@ title: "SupRB: The Supervised Rule-based Learning System"
 url: "https://github.com/heidmic/suprb"
 version: 1.0.0
 date-released: 2024-11-18
-
-preferred-citation:
-  type: conference-paper
-  authors:
-    - family-names: Heider
-      given-names: Michael
-    - family-names: Stegherr
-      given-names: Helena
-    - family-names: Wurth
-      given-names: Jonathan
-    - family-names: Sraj
-      given-names: Roman
-    - family-names: Hähner
-      given-names: Jörg
-  title: "Separating Rule Discovery and Global Solution Composition in a Learning Classifier System"
-  year: 2022
-  conference: "Genetic and Evolutionary Computation Conference Companion (GECCO ’22 Companion)"
-  doi: "10.1145/3520304.3529014"
-  url: "https://doi.org/10.1145/3520304.3529014"
diff --git a/README.md b/README.md
index 297b8726..bdc113a9 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,10 @@
-[![DOI](https://zenodo.org/badge/303331999.svg)](https://zenodo.org/badge/latestdoi/303331999)
+[![DOI](https://zenodo.org/badge/303331999.svg)](https://zenodo.org/badge/latestdoi/303331999) \
+This project is a fork of the original SupRB repository (https://github.com/heidmic/suprb).
+It extends the original work by adding multi-objective rule discovery (MOO-RD) using NSGA-II, developed as part of my bachelor’s thesis. \
+The experiments can be found in the suprb-experimentation repository (https://github.com/DavidvProeck/suprb-experimentation),
+which builds upon the original experimentation (https://github.com/heidmic/suprb-experimentation).
+
+The source code of the proposed multi-objective rule discovery (MOO-RD) can be found here: **suprb/optimizer/rule/nsga2**
 
 # SupRB
 
@@ -47,7 +53,7 @@ The examples in the examples directory are:
     pip3 install -r requirements.txt
 
 
-We recommend to use Python version 3.12
+Tested with Python 3.9.4.
 
 
 ## Contributing
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/example_1.py b/examples/example_1.py
index 98f16323..98e894e9 100644
--- a/examples/example_1.py
+++ b/examples/example_1.py
@@ -5,7 +5,7 @@
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from sklearn.model_selection import cross_validate, train_test_split
 
-from suprb import SupRB
+from suprb import SupRB, WarmupSupRB
 from suprb.utils import check_random_state
 from suprb.optimizer.rule.es import ES1xLambda
 from suprb.optimizer.solution.ga import GeneticAlgorithm
@@ -49,7 +49,7 @@ def create_plot(scores):
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
 
-    model = SupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
+    model = WarmupSupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
 
     scores = cross_validate(
         model,
diff --git a/examples/example_1_nsga2_infogain.py b/examples/example_1_nsga2_infogain.py
new file mode 100644
index 00000000..d72f2fae
--- /dev/null
+++ b/examples/example_1_nsga2_infogain.py
@@ -0,0 +1,103 @@
+import sklearn
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from suprb.optimizer.rule.nsga2 import NSGA2InfoGain
+from utils import log_scores
+
+import numpy as np
+
+from sklearn.linear_model import Ridge
+from sklearn.utils import Bunch, shuffle
+
+from suprb import rule, SupRB
+from suprb.logging.combination import CombinedLogger
+from suprb.logging.default import DefaultLogger
+from suprb.logging.stdout import StdoutLogger
+from suprb.optimizer.solution import ga
+from suprb.optimizer.rule import origin, mutation
+
+from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
+from suprb.optimizer.rule.ns.novelty_search_type import MinimalCriteria
+
+
+
+
+def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+
+    y[X < 10] = np.sin(np.pi * X[X < 10] / 5) + 0.2 * np.cos(4 * np.pi * X[X < 10] / 5)
+    y[X >= 10] = X[X >= 10] / 10 - 1
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(
+        rule_discovery=NSGA2InfoGain(
+            n_iter=16,
+            mu=16,
+            lmbda=64,
+            origin_generation=origin.SquaredError(),
+            init=rule.initialization.MeanInit(
+                fitness=rule.fitness.MooFitness(),
+                model=Ridge(alpha=0.01, random_state=random_state),
+                # matching_type=rule.matching.OrderedBound([-1, 1])
+            ),
+            mutation=mutation.Normal(
+                # matching_type=rule.matching.OrderedBound([-1, 1]),
+                sigma=1.22
+            ),
+            fitness_objs=[lambda r: r.error_],
+            fitness_objs_labels=["Error"],  # infogain objective is added internally
+        ),
+        solution_composition=GeneticAlgorithm(),
+        n_initial_rules=4,
+    )
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=1,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/examples/example_1_nsga2_novelty.py b/examples/example_1_nsga2_novelty.py
new file mode 100644
index 00000000..5d40116f
--- /dev/null
+++ b/examples/example_1_nsga2_novelty.py
@@ -0,0 +1,118 @@
+import sklearn
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from suprb.optimizer.rule.nsga2 import NSGA2Novelty_G_P
+from utils import log_scores
+
+import numpy as np
+
+from sklearn.linear_model import Ridge
+from sklearn.utils import Bunch, shuffle
+
+from suprb import rule, SupRB, WarmupSupRB
+from suprb.logging.combination import CombinedLogger
+from suprb.logging.default import DefaultLogger
+from suprb.logging.stdout import StdoutLogger
+from suprb.optimizer.solution import ga
+from suprb.optimizer.rule import origin, mutation
+
+from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
+from suprb.optimizer.rule.ns.novelty_search_type import MinimalCriteria
+
+
+
+
+def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+
+    y[X < 10] = np.sin(np.pi * X[X < 10] / 5) + 0.2 * np.cos(4 * np.pi * X[X < 10] / 5)
+    y[X >= 10] = X[X >= 10] / 10 - 1
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(
+        rule_discovery=NSGA2Novelty_G_P(
+            n_iter=16,
+            mu=16,
+            lmbda=64,
+            origin_generation=origin.SquaredError(),
+            init=rule.initialization.MeanInit(
+                fitness=rule.fitness.MooFitness(),
+                model=Ridge(alpha=0.01, random_state=random_state),
+                # matching_type=rule.matching.OrderedBound([-1, 1])
+            ),
+            mutation=mutation.Normal(
+                # matching_type=rule.matching.OrderedBound([-1, 1]),
+                sigma=1.22
+            ),
+            fitness_objs=[lambda r: r.error_],
+            fitness_objs_labels=["Error"],  # novelty objective is added internally
+            novelty_calc=NoveltyCalculation(
+                k_neighbor=15,
+                # novelty_search_type=MinimalCriteria(min_examples_matched=15)  # <- tuned #TODO: Leads to warnings in crowding distance calculation.
+            ),
+            novelty_mode="G",
+            profile=False,
+            min_experience=2,  # Rules that match only one sample are considered trivial, so min_experience >= 2
+            max_restarts=4,
+            keep_archive_across_restarts=False,
+        ),
+        solution_composition=GeneticAlgorithm(),
+        # verbose=2,
+        # warmup_strategy="auto",
+        # warmup_rd_steps=0,  # fixed
+        # warmup_max_steps=4,  # auto
+        # warmup_pool_target=None,  # auto
+        # warmup_patience=3,  # auto
+        # warmup_delta=1,  # auto
+    )
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=1,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/examples/example_2.py b/examples/example_2.py
index ff043e33..329627e8 100644
--- a/examples/example_2.py
+++ b/examples/example_2.py
@@ -1,3 +1,4 @@
+from datetime import timedelta
 import sklearn
 
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
@@ -10,8 +11,10 @@
 
 from utils import log_scores
 
+from time import time
 
 if __name__ == "__main__":
+    t0 = time()
     random_state = 42
 
     data, _ = fetch_openml(name="Concrete_Data", version=1, return_X_y=True)
@@ -50,6 +53,8 @@
         verbose=10,
         scoring=["r2", "neg_mean_squared_error"],
         return_estimator=True,
+        fit_params={"cleanup": True},
     )
 
     log_scores(scores)
+    print(f"\nTotal runtime: {timedelta(seconds=time() - t0)}")
\ No newline at end of file
diff --git a/examples/example_3.py b/examples/example_3.py
index ae674ad9..c8bfe50e 100644
--- a/examples/example_3.py
+++ b/examples/example_3.py
@@ -61,6 +61,7 @@
         verbose=10,
         scoring=["r2", "neg_mean_squared_error"],
         return_estimator=True,
+        fit_params={"cleanup": True},
     )
 
     log_scores(scores)
diff --git a/examples/example_4.py b/examples/example_4.py
index 7948b39f..5839e4b7 100644
--- a/examples/example_4.py
+++ b/examples/example_4.py
@@ -50,8 +50,8 @@
             elitist_ratio=0.2,
             random_state=random_state,
             n_jobs=1,
-            mutation=suprb.optimizer.solution.ga.mutation.BitFlips(),
-            crossover=suprb.optimizer.solution.ga.crossover.NPoint(n=2),
+            # mutation=suprb.optimizer.solution.ga.mutation.BitFlips(mutation_rate=0.1),
+            # crossover=suprb.optimizer.solution.ga.crossover.NPoint(crossover_rate=0.9, n=2),
             selection=suprb.optimizer.solution.ga.selection.Tournament(k=6),
             init=suprb.solution.initialization.RandomInit(
                 mixing=suprb.solution.mixing_model.ErrorExperienceHeuristic(),
@@ -71,6 +71,7 @@
         verbose=10,
         scoring=["r2", "neg_mean_squared_error"],
         return_estimator=True,
+        # fit_params={"cleanup": True},
     )
 
     log_scores(scores)
diff --git a/examples/example_concrete.py b/examples/example_concrete.py
new file mode 100644
index 00000000..c16d4893
--- /dev/null
+++ b/examples/example_concrete.py
@@ -0,0 +1,60 @@
+import sklearn
+import numpy as np
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate
+from ucimlrepo import fetch_ucirepo
+from suprb import SupRB
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+from utils import log_scores
+
+
+
+
+if __name__ == "__main__":
+    random_state = 42
+    # Dataset https://doi.org/10.24432/C5PK67
+    concrete_data = fetch_ucirepo(id=165)
+
+    X, y = concrete_data.data.features, concrete_data.data.targets
+    X = X.to_numpy()
+    y = y.to_numpy()
+    X, y = sklearn.utils.shuffle(X, y, random_state=random_state)
+
+    # Normalize features and target
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(np.array(y).reshape(-1, 1)).reshape((-1,))
+
+    # Define model
+    model = SupRB(
+        rule_discovery=ES1xLambda(
+            n_iter=32,
+            lmbda=16,
+            operator="+",
+            delay=150,
+            random_state=random_state,
+            n_jobs=1,
+        ),
+        solution_composition=GeneticAlgorithm(
+            n_iter=32,
+            population_size=32,
+            elitist_ratio=0.2,
+            random_state=random_state,
+            n_jobs=1,
+        ),
+    )
+
+    # Cross-validation
+    scores = cross_validate(
+        model,
+        X,
+        y,
+        cv=4,
+        n_jobs=32,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+        fit_params={"cleanup": True},
+    )
+
+    log_scores(scores)
diff --git a/examples/example_new.py b/examples/example_new.py
new file mode 100644
index 00000000..212c6ea9
--- /dev/null
+++ b/examples/example_new.py
@@ -0,0 +1,68 @@
+import sklearn
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from utils import log_scores
+
+
+def load_higdon_gramacy_lee(n_samples=500, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+    y[X <= 4] = X[X <= 4] / 10 - 1
+    y[(X < 10) & (X >= 5)] = 1 + 0.5 * np.sin(4 * X[(X < 10) & (X >= 5)]) * 0.2 * X[(X < 10) & (X >= 5)]
+    y[X >= 10] = X[X >= 10] / 10 - 1
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=1,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+        fit_params={"cleanup": True},
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/examples/example_new2.py b/examples/example_new2.py
new file mode 100644
index 00000000..a220b92a
--- /dev/null
+++ b/examples/example_new2.py
@@ -0,0 +1,66 @@
+import sklearn
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from utils import log_scores
+
+
+def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+    y[X >= 0] = np.cos(4 * X[X >= 0])
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=32,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+        fit_params={"cleanup": True},
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/examples/example_new3.py b/examples/example_new3.py
new file mode 100644
index 00000000..b8a82f4b
--- /dev/null
+++ b/examples/example_new3.py
@@ -0,0 +1,68 @@
+import sklearn
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.model_selection import cross_validate, train_test_split
+
+from suprb import SupRB
+from suprb.utils import check_random_state
+from suprb.optimizer.rule.es import ES1xLambda
+from suprb.optimizer.solution.ga import GeneticAlgorithm
+
+from utils import log_scores
+
+
+def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
+    random_state_ = check_random_state(random_state)
+
+    X = np.linspace(0, 20, num=n_samples)
+    y = np.zeros(n_samples)
+    y[X <= 4] = X[X <= 4] / 10 - 1
+    y[(X < 10) & (X >= 5)] = 1 + 0.5 * np.sin(4 * X[(X < 10) & (X >= 5)]) * 0.2 * X[(X < 10) & (X >= 5)]
+    y[X >= 10] = X[X >= 10] / 10 - 1
+
+    y += random_state_.normal(scale=noise, size=n_samples)
+    X = X.reshape((-1, 1))
+
+    return sklearn.utils.shuffle(X, y, random_state=random_state)
+
+
+def create_plot(scores):
+    fig, axes = plt.subplots(2, 2)
+    X_plot = np.linspace(X.min(), X.max(), 500).reshape((-1, 1))
+    for ax, model in zip(axes.flatten(), scores["estimator"]):
+        pred = model.predict(X_plot)
+        ax.scatter(X, y, c="b", s=3, label="y_true")
+        ax.plot(X_plot, pred, c="r", label="y_pred")
+
+    plt.savefig("result.png")
+
+
+if __name__ == "__main__":
+    random_state = 42
+
+    X, y = load_higdon_gramacy_lee(noise=0.1, random_state=random_state)
+
+    X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
+    y = StandardScaler().fit_transform(y.reshape((-1, 1))).reshape((-1,))
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
+
+    model = SupRB(rule_discovery=ES1xLambda(), solution_composition=GeneticAlgorithm())
+
+    scores = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=4,
+        n_jobs=32,
+        verbose=10,
+        scoring=["r2", "neg_mean_squared_error"],
+        return_estimator=True,
+        fit_params={"cleanup": True},
+    )
+
+    create_plot(scores)
+
+    log_scores(scores)
diff --git a/requirements.txt b/requirements.txt
index 6326ee92..f204b1f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ pandas==2.3.0
 pytest==8.4.1
 scikit-learn==1.7.0
 tqdm==4.67.1
+pymoo
diff --git a/setup.cfg b/setup.cfg
index c1f3c7e2..793f738e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = suprb
-version = 1.0.15
+version = 0.1.01
 author = Michael Heider, Jonathan Wurth, Roman-Alexander Sraj
 author_email = michael.heider@uni-a.de
 description = SupRB: The Supervised Rule-based Learning System
@@ -13,11 +13,9 @@ classifiers =
 
 [options]
 packages = find:
-python_requires = ==3.12.11
+python_requires = >=3.9
 include_package_data = True
 install_requires =
-    matplotlib==3.10.3
-    pandas==2.3.0
-    pytest==8.4.1
-    scikit-learn==1.7.0
-    tqdm==4.67.1
+    numpy >= 1.20.0
+    scipy >= 1.7.0
+    scikit-learn >= 1.0.0
diff --git a/suprb/__init__.py b/suprb/__init__.py
index b0b97110..d6ec1711 100644
--- a/suprb/__init__.py
+++ b/suprb/__init__.py
@@ -1,3 +1,4 @@
 from .suprb import SupRB
 from .rule import Rule
 from .solution import Solution
+from .warmup_suprb import WarmupSupRB
diff --git a/suprb/base.py b/suprb/base.py
index d223dc38..748c81c2 100644
--- a/suprb/base.py
+++ b/suprb/base.py
@@ -51,7 +51,7 @@ def _more_str_attributes(self) -> dict:
         return {}
 
 
-class BaseRegressor(RegressorMixin, BaseEstimator, metaclass=ABCMeta):
+class BaseRegressor(BaseEstimator, RegressorMixin, metaclass=ABCMeta):
     """A base (composite) Regressor."""
 
     is_fitted_: bool
diff --git a/suprb/fitness.py b/suprb/fitness.py
index cc6acee8..208f5122 100644
--- a/suprb/fitness.py
+++ b/suprb/fitness.py
@@ -40,3 +40,12 @@ def wu(alpha: float, x1: float, x2: float) -> float:
     """
 
     return ((1 + alpha**2) * x1 * x2) / (alpha**2 * x1 + x2)
+
+
+def moo() -> float:
+    """
+    A placeholder fitness function which always returns NaN.
+    This function is used in multi-objective optimization (MOO) as the rule attribute `.fitness_` is never accessed.
+    Using another fitness function has no impact on the MOO and may be misleading.
+    """
+    return np.nan
\ No newline at end of file
diff --git a/suprb/json.py b/suprb/json.py
index e2c7cf9f..834f69c0 100644
--- a/suprb/json.py
+++ b/suprb/json.py
@@ -59,6 +59,7 @@ def _save_elitist(suprb, json_config):
         "complexity_": suprb.elitist_.complexity_,
         "error_": suprb.elitist_.error_,
         "fitness_": suprb.elitist_.fitness_,
+        # "genome_" : np.ones(len(suprb.pool_))
     }
 
 
diff --git a/suprb/optimizer/rule/base.py b/suprb/optimizer/rule/base.py
index 94acb7eb..27516a96 100644
--- a/suprb/optimizer/rule/base.py
+++ b/suprb/optimizer/rule/base.py
@@ -1,5 +1,5 @@
 from abc import ABCMeta, abstractmethod
-from typing import Optional
+from typing import List, Optional
 
 import numpy as np
 from joblib import Parallel, delayed
@@ -107,3 +107,33 @@ def _optimize(
         random_state: RandomState,
     ) -> Optional[Rule]:
         pass
+
+
+class MultiRuleDiscovery(RuleDiscovery, metaclass=ABCMeta):
+    """
+    Implements basic functionality to generate a rule population,
+    returning the valid ones.
+    Warns the user if not enough valid rules were found.
+    """
+
+    def optimize(self, X: np.array, y: np.array, n_rules: int = 1) -> List[Rule]:
+        self.random_state_ = check_random_state(self.random_state)
+        random_state = self.random_state_
+
+
+        all_rules = self._optimize(X, y, random_state) or [] #origins created in optimize function of subclass
+
+        valid = self._filter_invalid_rules(X=X, y=y, rules=all_rules)
+
+        if len(valid) < n_rules:
+            print(f"Warning: Requested {n_rules} but only {len(valid)} Pareto-optimal rule(s) generated.")
+        
+        return valid[:n_rules]
+    
+    def _optimize(
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            random_state: RandomState,
+    ) -> Optional[List[Rule]]:
+        pass
diff --git a/suprb/optimizer/rule/mutation.py b/suprb/optimizer/rule/mutation.py
index c0b5e5a7..56026ab5 100644
--- a/suprb/optimizer/rule/mutation.py
+++ b/suprb/optimizer/rule/mutation.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 from scipy.stats import halfnorm
-import warnings
 
 from suprb.rule import Rule
 from suprb.utils import RandomState
@@ -31,7 +30,6 @@ def __call__(self, rule: Rule, random_state: RandomState) -> Rule:
         return mutated_rule
 
     def execute(self, rule: Rule, random_state: RandomState):
-        warnings.warn("No matching_type was set! This will impact mutation.")
         pass
 
 
diff --git a/suprb/optimizer/rule/ns/novelty_calculation.py b/suprb/optimizer/rule/ns/novelty_calculation.py
index faf73529..21285062 100644
--- a/suprb/optimizer/rule/ns/novelty_calculation.py
+++ b/suprb/optimizer/rule/ns/novelty_calculation.py
@@ -33,6 +33,10 @@ def _novelty_score(self, rules: list[Rule]) -> list[Rule]:
             if not hasattr(rule, "idx_") or rule.idx_ > len(archive):
                 rule.distances_ = []
                 for archive_rule in archive:
+
+                    if not hasattr(archive_rule, 'distances_'): #Was a problem with MOO-RD with novelty
+                        archive_rule.distances_ = []
+
                     hamming_distance = hamming(rule.match_set_, archive_rule.match_set_)
                     archive_rule.distances_.append(hamming_distance)
                     rule.distances_.append(hamming_distance)
diff --git a/suprb/optimizer/rule/nsga2/__init__.py b/suprb/optimizer/rule/nsga2/__init__.py
new file mode 100644
index 00000000..2ee9f377
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/__init__.py
@@ -0,0 +1,8 @@
+from .nsga2 import NSGA2
+from .pymoo.pymoo_nsga2 import PymooNSGA2
+from .nsga2_global_novelty import NSGA2GlobalNovelty
+from .nsga2_novelty_G_P import NSGA2Novelty_G_P
+from .nsga2ig import NSGA2InfoGain
+from .nsga2_var_reduc import NSGA2VarianceReduction
+#Do not know if needed
+from .nsga2_helpers import visualize_pareto_front
\ No newline at end of file
diff --git a/suprb/optimizer/rule/nsga2/nsga2.py b/suprb/optimizer/rule/nsga2/nsga2.py
new file mode 100644
index 00000000..d1612168
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/nsga2.py
@@ -0,0 +1,256 @@
+import cProfile
+import pstats
+
+import numpy as np
+from typing import Optional, List, Callable, Tuple
+from joblib import Parallel, delayed
+
+from suprb.rule import Rule, RuleInit
+from suprb.rule.initialization import MeanInit
+from suprb.utils import RandomState
+from ..base import MultiRuleDiscovery
+from ..origin import Matching, SquaredError, RuleOriginGeneration
+from ..mutation import RuleMutation, Normal
+from ..constraint import CombinedConstraint, MinRange, Clip
+from ..acceptance import Variance
+from .. import RuleAcceptance, RuleConstraint
+from .nsga2_helpers import visualize_pareto_front
+
+from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
+from pymoo.operators.survival.rank_and_crowding.metrics import calc_crowding_distance, FunctionalDiversity
+
+
+class NSGA2(MultiRuleDiscovery):
+    """
+    Adapted from: A Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II
+    by Kalyanmoy Deb et al..
+
+    This class implements a multi-objective evolutionary algorithm for
+    rule discovery. It uses pymoo's fast nondominated sorting and crowding
+    distance operators, and serves as a base class for multi-objective
+    optimization in MOO-RD.
+
+    Parameters
+    ----------
+    n_iter : int
+        Number of evolutionary iterations.
+    mu : int
+        Population size
+    lmbda : int
+        Number of children sampled each generation.
+    origin_generation : RuleOriginGeneration
+    init : RuleInit
+    mutation : RuleMutation
+    constraint : RuleConstraint
+    acceptance : RuleAcceptance
+    random_state : int or None
+    n_jobs : int
+    fitness_objs : list, optional
+        List of fitness objectives
+        Defaults to [rule.error_, -rule.volume_].
+    fitness_objs_labels : list of str, optional
+        Names corresponding to the objective functions.
+        Defaults to ["obj_0", "obj_1", ...].
+    profile : bool, default=False
+        If True, wraps the optimization loop in a profiler and prints stats.
+    """
+    def __init__(
+            self,
+            n_iter: int = 32,
+            mu: int = 50,
+            lmbda: int = 100,
+            origin_generation: RuleOriginGeneration = SquaredError(),
+            init: RuleInit = MeanInit(),
+            mutation: RuleMutation = Normal(sigma=1.22),
+            constraint: RuleConstraint = CombinedConstraint(MinRange(), Clip()),
+            acceptance: RuleAcceptance = Variance(),
+            random_state: int = None,
+            n_jobs: int = 1,
+            fitness_objs: Optional[List[Callable[[Rule], float]]] = None,
+            fitness_objs_labels: Optional[List[str]] = None,
+            profile: bool = False,
+    ):
+        super().__init__(
+            n_iter=n_iter,
+            origin_generation=origin_generation,
+            init=init,
+            acceptance=acceptance,
+            constraint=constraint,
+            random_state=random_state,
+            n_jobs=n_jobs,
+        )
+        self.mu = mu
+        self.lmbda = lmbda
+        self.mutation = mutation
+        self.constraint = constraint
+
+        if fitness_objs is None:
+            fitness_objs = [
+                lambda r: r.error_,
+                lambda r: -r.volume_,
+            ]
+
+        # Extra objectives in subclasses are added during runtime to avoid sklearn.clone errors.
+        self.fitness_objs: Tuple[Callable[[Rule], float], ...] = tuple(fitness_objs)
+
+        if fitness_objs_labels is None:
+            fitness_objs_labels = [f"obj_{i}" for i in range(len(self.fitness_objs))]
+        self.fitness_objs_labels: Tuple[str, ...] = tuple(fitness_objs_labels)
+
+        self.profile = profile
+
+    def _optimize(
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            random_state: RandomState
+    ) -> Optional[List[Rule]]:
+        profiler = cProfile.Profile() if self.profile else None
+        if profiler:
+            profiler.enable()
+
+        # Generate initial population.
+        population = []
+        origins = self.origin_generation(
+            n_rules=self.mu,
+            X=X,
+            y=y,
+            pool=self.pool_,
+            elitist=self.elitist_,
+            random_state=random_state,
+        )
+
+        population = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._init_valid_origin)(origin, X, y, random_state)
+            for origin in origins
+        )
+        population = [p for p in population if p is not None]
+
+        # Main loop.
+        for _ in range(self.n_iter):
+            # Select parents and generate children.
+            parents = random_state.choice(population, size=self.lmbda, replace=True)
+            children = Parallel(n_jobs=self.n_jobs)(
+                delayed(self._generate_valid_child)(parent, X, y, random_state)
+                for parent in parents
+            )
+            children = [c for c in children if c is not None]
+
+            # Combine current population and children (mu + lmbda), then nondominated sorting into fronts.
+            population_combined = population + children
+            pareto_fronts = self._fast_nondominated_sort(population_combined)
+            population = self._build_next_population(pareto_fronts) #asigns crowding distance, build next population of size mu.
+
+        # The nondominated set.
+        pareto_front = pareto_fronts[0] if pareto_fronts else []
+
+        if profiler:
+            profiler.disable()
+            stats = pstats.Stats(profiler).sort_stats("cumtime")
+            stats.print_stats(20)
+
+        # Visualize the objective space distribution for the chosen fitness objectives. Is intended for the isolated experiments.
+        # Comment out if not run in isolation or for more than one iteration.
+        #visualize_pareto_front(self, pareto_front)
+
+        return pareto_front
+
+# ────────────────────────────────────────────────────────────────────
+# Helper Functions
+# ────────────────────────────────────────────────────────────────────
+    def _fast_nondominated_sort(self, population: List[Rule]) -> List[List[Rule]]:
+        """
+        Sorts population into fronts using pymoo's fast nondominated sort.
+        """
+        if not population:
+            return []
+        objs = self._fitness_objs_runtime()
+        obj_matrix = np.vstack(
+            [[obj(rule) for obj in objs] for rule in population]
+        )
+
+        fronts = NonDominatedSorting().do(obj_matrix, only_non_dominated_front=False)
+        return [[population[i] for i in front] for front in fronts]
+
+
+    def _assign_crowding_distance(
+            self,
+            front: List[Rule],
+            cd_func,
+    ):
+        """
+        Calculate crowding distance using pymoo's crowding distance function.
+        """
+        if not front:
+            return
+
+        objs = self._fitness_objs_runtime()
+        obj_matrix = np.vstack(
+            [[obj(rule) for obj in objs] for rule in front]
+        )
+
+        crowding_distances = cd_func.do(obj_matrix)
+
+        for rule, dist in zip(front, crowding_distances):
+            rule.crowding_distance_ = dist
+
+
+    def _init_valid_origin(
+            self,
+            origin,
+            X,
+            y,
+            random_state,
+    ):
+        rule = self.constraint(self.init(mean=origin, random_state=random_state)).fit(X, y)
+        return rule if rule.is_fitted_ and rule.experience_> 0 else None
+
+
+    def _generate_valid_child(
+        self,
+        parent,
+        X,
+        y,
+        random_state,
+    ):
+        child = self.constraint(self.mutation(parent, random_state)).fit(X,y)
+        return child if child.is_fitted_ and child.experience_ > 0 else None
+
+
+    def _build_next_population(
+            self,
+            pareto_fronts,
+    ):
+        """
+        Add fronts starting from F_0 until population limit mu is reached.
+        Last front is truncated if necessary and the population then is extended by the individuals with the highest crowding distance.
+        """
+        population_new = []
+
+        cd_func = FunctionalDiversity(calc_crowding_distance, filter_out_duplicates=True)
+
+        for front in pareto_fronts:
+            self._assign_crowding_distance(front, cd_func)
+            if len(population_new) + len(front) <= self.mu:
+                population_new.extend(front)
+            else:
+                front_sorted = sorted(front, key=lambda rule: -rule.crowding_distance_)
+                population_new.extend(front_sorted[:self.mu - len(population_new)])
+                break
+
+        return population_new
+
+
+    def _fitness_objs_runtime(self) -> List[Callable[[Rule], float]]:
+        """
+        Used in the subclasses to add the secondary objective at runtime.
+        If set otherwise, the objectives generate a sklearn error.
+        """
+        return list(self.fitness_objs)
+
+
+    def _fitness_labels_runtime(self) -> List[str]:
+        """
+        Returns list of all fitness objectives used.
+        """
+        return list(self.fitness_objs_labels)
diff --git a/suprb/optimizer/rule/nsga2/nsga2_global_novelty.py b/suprb/optimizer/rule/nsga2/nsga2_global_novelty.py
new file mode 100644
index 00000000..59870b3f
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/nsga2_global_novelty.py
@@ -0,0 +1,133 @@
+import numpy as np
+from typing import Optional, List, Callable
+from joblib import Parallel, delayed
+import matplotlib.pyplot as plt
+
+from suprb.rule import Rule, RuleInit
+from suprb.rule.initialization import MeanInit
+from suprb.optimizer.rule.nsga2.nsga2 import NSGA2
+from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
+from suprb.optimizer.rule.ns.archive import ArchiveNovel
+from suprb.optimizer.rule.ns.novelty_search_type import NoveltySearchType
+from suprb.utils import RandomState
+from ..origin import Matching, SquaredError, RuleOriginGeneration
+from ..mutation import RuleMutation, HalfnormIncrease
+from ..constraint import CombinedConstraint, MinRange, Clip
+from ..acceptance import Variance
+from .. import RuleAcceptance, RuleConstraint
+from .nsga2_helpers import visualize_pareto_front
+
+import cProfile
+import pstats
+
+
+
+class NSGA2GlobalNovelty(NSGA2):
+    """
+    Adapted from: A Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II by Kalyanmoy Deb et al.
+    Uses Crowding Distance and NonDominatedSorting from pymoo package.
+    Uses novelty as a fitness objective.
+    Not optimized. Deprecated.
+    """
+    def __init__(
+        self,
+        n_iter: int = 32,
+        mu: int = 50,
+        lmbda: int = 100,
+        origin_generation: RuleOriginGeneration = SquaredError(),
+        init: RuleInit = MeanInit(),
+        mutation: RuleMutation = HalfnormIncrease(sigma=1.22),
+        constraint: RuleConstraint = CombinedConstraint(MinRange(), Clip()),
+        acceptance: RuleAcceptance = Variance(),
+        random_state: int = None,
+        n_jobs: int = 1,
+        fitness_objs: Optional[List[Callable[[Rule], float]]] = None,
+        fitness_objs_labels: Optional[List[str]] = None,
+        novelty_calc: NoveltyCalculation = NoveltyCalculation(
+            novelty_search_type=NoveltySearchType(),
+            archive=ArchiveNovel(),
+            k_neighbor=15,
+        ),
+        profile: bool = False,
+    ):
+        super().__init__(
+            n_iter=n_iter,
+            mu=mu,
+            lmbda=lmbda,
+            origin_generation=origin_generation,
+            init=init,
+            mutation=mutation,
+            constraint=constraint,
+            acceptance=acceptance,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            fitness_objs=fitness_objs,
+            fitness_objs_labels=fitness_objs_labels,
+        )
+        self.novelty_calc = novelty_calc
+
+        self.fitness_objs = list(self.fitness_objs) + [lambda r: -r.novelty_score_]
+        self.fitness_objs_labels = list(self.fitness_objs_labels) + ['-Novelty']    
+
+    def _optimize(
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            random_state: RandomState
+    ) -> Optional[List[Rule]]:
+        profiler = cProfile.Profile() if self.profile else None
+        if profiler:
+            profiler.enable()
+            
+        self.pool_.clear()
+
+        population = []
+
+        origins = self.origin_generation(
+            n_rules=self.mu,
+            X=X,
+            y=y,
+            pool=self.pool_,
+            elitist=self.elitist_,
+            random_state=random_state,
+        )
+
+        population = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._init_valid_origin)(origin, X, y, random_state)
+            for origin in origins
+        )
+
+        population = [p for p in population if p is not None]
+
+        for _ in range(self.n_iter):
+            parents = random_state.choice(population, size=self.lmbda, replace=True)
+
+            children = Parallel(n_jobs=self.n_jobs)(
+                delayed(self._generate_valid_child)(parent, X, y, random_state)
+                for parent in parents
+            )
+
+            children = [c for c in children if c is not None]
+
+            population_combined = population + children
+
+            self.pool_.extend(population_combined)
+            self.novelty_calc.archive.archive = self.pool_
+            _ = self.novelty_calc(self.pool_)
+
+            pareto_fronts = self._fast_nondominated_sort(population_combined)
+            population = self._build_next_population(pareto_fronts)
+
+        pareto_front = pareto_fronts[0]
+        if not pareto_front:
+            return None
+
+        if profiler:
+            profiler.disable()
+            stats = pstats.Stats(profiler).sort_stats("cumtime")
+            stats.print_stats(20)
+
+        visualize_pareto_front(self, pareto_front)
+
+        return pareto_front
+        
\ No newline at end of file
diff --git a/suprb/optimizer/rule/nsga2/nsga2_helpers.py b/suprb/optimizer/rule/nsga2/nsga2_helpers.py
new file mode 100644
index 00000000..600e559a
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/nsga2_helpers.py
@@ -0,0 +1,51 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from typing import List
+from datetime import datetime
+
+def visualize_pareto_front(self, pareto_front: List, save_path: str = "Objective_Space_Distribution") -> None:
+    """
+    Generates PDF of the objective space distribution of the input list of rules.
+    """
+    if not pareto_front:
+        print("No Pareto front provided for visualization.")
+        return
+
+    objs = self._fitness_objs_runtime()
+    labels = self._fitness_labels_runtime()
+
+    if len(objs) != 2:
+        print(f"Expected exactly 2 objectives, found {len(objs)}. Skipping plot.")
+        return
+
+    obj_matrix = np.array([[objs[0](r), objs[1](r)] for r in pareto_front])
+
+    plt.rcParams["font.family"] = "serif"
+    plt.rcParams["font.serif"] = ["Times New Roman", "Times", "DejaVu Serif"]
+    plt.rcParams["mathtext.fontset"] = "cm"
+    plt.rcParams["pdf.fonttype"] = 42
+    plt.rcParams["ps.fonttype"] = 42
+    plt.rcParams["svg.fonttype"] = "none"
+
+    plt.figure(figsize=(6, 4))
+    plt.scatter(
+        obj_matrix[:, 0],
+        obj_matrix[:, 1],
+        s=40,
+        c="royalblue",
+        edgecolors="black",
+        alpha=0.8
+    )
+    plt.xlabel(labels[0], fontsize=11)
+    plt.ylabel(labels[1], fontsize=11)
+    plt.title("Objective Space Distribution", fontsize=12, pad=10)
+    plt.grid(True, linestyle="--", alpha=0.5)
+    plt.tight_layout()
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    pdf_path = f"{save_path}_{timestamp}.pdf"
+
+    plt.savefig(pdf_path, format="pdf", bbox_inches="tight")
+    plt.close()
+
+    print(f"Pareto front visualization saved as PDF: '{pdf_path}'")
\ No newline at end of file
diff --git a/suprb/optimizer/rule/nsga2/nsga2_novelty_G_P.py b/suprb/optimizer/rule/nsga2/nsga2_novelty_G_P.py
new file mode 100644
index 00000000..24979e12
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/nsga2_novelty_G_P.py
@@ -0,0 +1,321 @@
+import copy
+import warnings
+from collections import deque
+
+import numpy as np
+from typing import Literal, Optional, List, Callable, Set, Deque
+from joblib import Parallel, delayed
+
+from suprb import Solution
+from suprb.rule import Rule, RuleInit
+from suprb.rule.initialization import MeanInit
+from suprb.optimizer.rule.nsga2.nsga2 import NSGA2
+from suprb.optimizer.rule.ns.novelty_calculation import NoveltyCalculation
+from suprb.optimizer.rule.ns.archive import ArchiveNovel
+from suprb.optimizer.rule.ns.novelty_search_type import NoveltySearchType
+from suprb.solution.fitness import PseudoBIC
+from suprb.solution.mixing_model import ErrorExperienceHeuristic
+from suprb.utils import RandomState
+from .nsga2_helpers import visualize_pareto_front
+from ..origin import SquaredError, RuleOriginGeneration
+from ..mutation import RuleMutation, HalfnormIncrease, Normal
+from ..constraint import CombinedConstraint, MinRange, Clip
+from ..acceptance import Variance
+from .. import RuleAcceptance, RuleConstraint
+
+import cProfile
+import pstats
+
+
+class NSGA2Novelty_G_P(NSGA2):
+    """
+    MOO-RD variant that adds novelty as an objective and supports restart logic:
+    If a run's Pareto front only contains trivial rules (e.g., experience == 1),
+    the algorithm restarts and accumulates only 'useful' rules until `mu` useful
+    rules are collected or the restart limit is hit.
+
+    Parameters
+    ----------
+    n_iter : int
+        Number of evolutionary iterations.
+    mu : int
+        Population size
+    lmbda : int
+        Number of children sampled each generation.
+    origin_generation : RuleOriginGeneration
+    init : RuleInit
+    mutation : RuleMutation
+    constraint : RuleConstraint
+    acceptance : RuleAcceptance
+    random_state : int or None
+    n_jobs : int
+    fitness_objs : list, optional
+        List of fitness objectives
+        Defaults to [rule.error_, -rule.volume_].
+        Variance reduction objective is added internally.
+    fitness_objs_labels : list of str, optional
+        Names corresponding to the objective functions.
+        Defaults to ["obj_0", "obj_1", ...].
+    novelty_calc: NoveltyCalculation
+        Class to calculate the novelty score based on NoveltySearchType, Archive and k_neigbor.
+    novelty_mode: Literal
+        Either 'P' or 'G'.
+        In 'P' mode, only the current cohort is used for novelty score calculation.
+        In 'G' mode, the current cohort and a local archive is used for novelty score calculation.
+        Defaults to 'P'.
+    profile : bool, default=False
+        If True, wraps the optimization loop in a profiler and prints stats.
+    min_experience: int
+        Threshold used to determine whether a rule is trivial.
+    max_restarts: int
+        Restart limit.
+    keep_archive_across_restarts: bool
+        Determines whether in novelty mode = 'G' the local archive is kept across restarts.
+    """
+
+    def __init__(
+            self,
+            n_iter: int = 32,
+            mu: int = 50,
+            lmbda: int = 100,
+            origin_generation: RuleOriginGeneration = SquaredError(),
+            init: RuleInit = MeanInit(),
+            mutation: RuleMutation = Normal(sigma=1.22),
+            constraint: RuleConstraint = CombinedConstraint(MinRange(), Clip()),
+            acceptance: RuleAcceptance = Variance(),
+            random_state: int = None,
+            n_jobs: int = 1,
+            fitness_objs: Optional[List[Callable[[Rule], float]]] = None,
+            fitness_objs_labels: Optional[List[str]] = None,
+            novelty_calc: NoveltyCalculation = NoveltyCalculation(
+                novelty_search_type=NoveltySearchType(),
+                archive=ArchiveNovel(),
+                k_neighbor=15,
+            ),
+            novelty_mode: Literal["G", "P"] = "P",
+            profile: bool = False,
+            min_experience: int = 2,
+            max_restarts: int = 5,
+            keep_archive_across_restarts: bool = True,
+    ):
+        super().__init__(
+            n_iter=n_iter,
+            mu=mu,
+            lmbda=lmbda,
+            origin_generation=origin_generation,
+            init=init,
+            mutation=mutation,
+            constraint=constraint,
+            acceptance=acceptance,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            fitness_objs=fitness_objs,
+            fitness_objs_labels=fitness_objs_labels,
+        )
+        self.novelty_calc = novelty_calc
+        self.novelty_mode = novelty_mode
+        self.profile = profile
+
+        self.min_experience = min_experience
+        self.max_restarts = max_restarts
+        self.keep_archive_across_restarts = keep_archive_across_restarts
+
+        self._novelty_obj = lambda r: -getattr(r, "novelty_score_", np.inf)
+        self._novelty_label = "-Novelty"
+
+        self.last_front_: List[Rule] = []
+
+        self.archive_maxlen = 256
+        self.local_pool_: Deque[Rule] = deque(maxlen=self.archive_maxlen)
+        self._archive_seen_ids: Set[int] = set()
+
+
+    def _score_novelty(self,
+        rules: List[Rule],
+        cohort: Optional[List[Rule]] = None,
+        force: bool = False,
+    ) -> None:
+        """
+        Compute novelty scores for the given rules.
+        Selects only unscored rules (unless forced),
+        builds a reference archive based on the configured novelty mode ('G' or 'P'),
+        caps the archive size,
+        and delegates scoring to the novelty calculator.
+        """
+        if not rules:
+            return
+
+        to_score = list(rules) if force else [r for r in rules if not hasattr(r, "novelty_score_")]
+        if not to_score:
+            return
+        if self.novelty_mode == "G":
+            for r in to_score:
+                rid = id(r)
+                if rid not in self._archive_seen_ids:
+                    self.local_pool_.append(copy.copy(r))
+                    self._archive_seen_ids.add(rid)
+
+            ref = list(self.local_pool_)
+
+            if cohort and self.last_front_:
+                extra = self.last_front_
+            else:
+                extra = cohort
+
+            if extra:
+                EXTRA_MAX = 256
+                extra_slice = extra[-EXTRA_MAX:] if len(extra) > EXTRA_MAX else extra
+                ref.extend(copy.copy(r) for r in extra_slice)
+        #novelty_mode = 'P'
+        else:
+            ref = [copy.copy(r) for r in cohort] if cohort else [copy.copy(r) for r in rules]
+
+        ref = self._cap_list(ref)
+        self.novelty_calc.archive.archive = ref
+        _ = self.novelty_calc(to_score)
+
+
+# ────────────────────────────────────────────────────────────────
+# Helpers for restart logic
+# ────────────────────────────────────────────────────────────────
+    def _is_useful(self, r: Rule) -> bool:
+        """Define 'useful' rules here."""
+        return getattr(r, "experience_", 0) >= self.min_experience
+
+    def _unique_extend(self, base: List[Rule], new_rules: List[Rule]) -> None:
+        seen = set(map(id, base))
+        for r in new_rules:
+            if id(r) not in seen:
+                base.append(r)
+                seen.add(id(r))
+
+
+    def _run_once(
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            random_state: RandomState,
+            clear_pool: bool,
+    ) -> Optional[List[Rule]]:
+
+        if clear_pool:
+            self.local_pool_.clear()
+            self._archive_seen_ids.clear()
+            self.last_front_ = []
+
+        origins = self.origin_generation(
+            n_rules=self.mu,
+            X=X,
+            y=y,
+            pool=self.pool_,
+            elitist=self.elitist_,
+            random_state=random_state,
+        )
+
+        profiler = cProfile.Profile() if self.profile else None
+        if profiler:
+            profiler.enable()
+
+
+        population = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._init_valid_origin)(origin, X, y, random_state) for origin in origins
+        )
+        population = [p for p in population if p is not None]
+        if not population:
+            if profiler:
+                profiler.disable()
+            return None
+
+        self._score_novelty(population, cohort=population, force=True)
+
+        # main loop
+        for _ in range(self.n_iter):
+            parents = random_state.choice(population, size=self.lmbda, replace=True)
+            children = Parallel(n_jobs=self.n_jobs)(
+                delayed(self._generate_valid_child)(parent, X, y, random_state) for parent in parents
+            )
+            children = [c for c in children if c is not None]
+            if not children:
+                continue
+
+            cohort = population + children
+            self._score_novelty(children, cohort=cohort, force=False)
+            self._score_novelty(population, cohort=cohort, force=True)
+
+            population_combined = population + children
+
+            pareto_fronts = self._fast_nondominated_sort(population_combined)
+            self.last_front_ = pareto_fronts[0]
+
+            population = self._build_next_population(pareto_fronts)
+
+        pareto_front = pareto_fronts[0] if pareto_fronts else []
+
+        if profiler:
+            profiler.disable()
+            stats = pstats.Stats(profiler).sort_stats("cumtime")
+            stats.print_stats(20)
+
+        return pareto_front
+
+
+    def _optimize(
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            random_state: RandomState,
+    ) -> Optional[List[Rule]]:
+
+        useful_rules: List[Rule] = []
+        restarts = 0
+
+        while len(useful_rules) < self.mu and restarts <= self.max_restarts:
+            pareto_front = self._run_once(
+                X=X,
+                y=y,
+                random_state=random_state,
+                clear_pool=(not self.keep_archive_across_restarts),
+            )
+
+            if not pareto_front:
+                restarts += 1
+                continue
+
+            useful_from_front = [r for r in pareto_front if self._is_useful(r)]
+            self._unique_extend(useful_rules, useful_from_front)
+
+            only_trivial = all(getattr(r, "experience_", 0) < self.min_experience for r in pareto_front)
+
+            if only_trivial and len(useful_rules) < self.mu:
+                restarts += 1
+                continue
+
+            if len(useful_rules) < self.mu:
+                restarts += 1
+                continue
+
+        # Truncate to mu if we got more (optional)
+        if useful_rules:
+            useful_rules = useful_rules[: self.mu]
+
+        # to_visualize = useful_rules if useful_rules else (pareto_front or [])
+        # if to_visualize:
+        #     visualize_pareto_front(self, to_visualize)
+
+        print(f"Iterations needed to generate mu useful rules: {restarts + 1}")
+        return useful_rules if useful_rules else (pareto_front or None)
+
+# ────────────────────────────────────────────────────────────────
+# Helper functions
+# ────────────────────────────────────────────────────────────────
+    def _fitness_objs_runtime(self) -> List[Callable[[Rule], float]]:
+        return list(self.fitness_objs) + [self._novelty_obj]
+
+    def _fitness_labels_runtime(self) -> List[str]:
+        return list(self.fitness_objs_labels) + [self._novelty_label]
+
+    def _cap_list(self, seq: List[Rule]) -> List[Rule]:
+        if len(seq) <= self.archive_maxlen:
+            return seq
+
+        return seq[-self.archive_maxlen:]
diff --git a/suprb/optimizer/rule/nsga2/nsga2_var_reduc.py b/suprb/optimizer/rule/nsga2/nsga2_var_reduc.py
new file mode 100644
index 00000000..f1a62a85
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/nsga2_var_reduc.py
@@ -0,0 +1,119 @@
+import numpy as np
+from typing import Optional, List, Callable
+
+from suprb.rule import Rule, RuleInit
+from suprb.utils import RandomState
+
+from suprb.rule.initialization import MeanInit
+from ..origin import SquaredError, RuleOriginGeneration
+from ..mutation import RuleMutation, Normal
+from ..constraint import CombinedConstraint, MinRange, Clip
+from ..acceptance import Variance
+from .. import RuleAcceptance, RuleConstraint
+
+from .nsga2 import NSGA2
+
+
+class NSGA2VarianceReduction(NSGA2):
+    """
+    MOO-RD variant using rule error and variance reduction (regression-style information gain)
+    as fitness objectives.
+    Created because,
+    theoretically, the information gain version should not work for regression tasks.
+
+    Parameters
+    ----------
+    n_iter : int
+        Number of evolutionary iterations.
+    mu : int
+        Population size
+    lmbda : int
+        Number of children sampled each generation.
+    origin_generation : RuleOriginGeneration
+    init : RuleInit
+    mutation : RuleMutation
+    constraint : RuleConstraint
+    acceptance : RuleAcceptance
+    random_state : int or None
+    n_jobs : int
+    fitness_objs : list, optional
+        List of fitness objectives
+        Defaults to [rule.error_, -rule.volume_].
+        Variance reduction objective is added internally.
+    fitness_objs_labels : list of str, optional
+        Names corresponding to the objective functions.
+        Defaults to ["obj_0", "obj_1", ...].
+    profile : bool, default=False
+        If True, wraps the optimization loop in a profiler and prints stats.
+    """
+
+    def __init__(
+        self,
+        n_iter: int = 32,
+        mu: int = 50,
+        lmbda: int = 100,
+        origin_generation: RuleOriginGeneration = SquaredError(),
+        init: RuleInit = MeanInit(),
+        mutation: RuleMutation = Normal(sigma=1.22),
+        constraint: RuleConstraint = CombinedConstraint(MinRange(), Clip()),
+        acceptance: RuleAcceptance = Variance(),
+        random_state: int = None,
+        n_jobs: int = 1,
+        fitness_objs: Optional[List[Callable[[Rule], float]]] = None,
+        fitness_objs_labels: Optional[List[str]] = None,
+        profile: bool = False,
+    ):
+        super().__init__(
+            n_iter=n_iter,
+            mu=mu,
+            lmbda=lmbda,
+            origin_generation=origin_generation,
+            init=init,
+            mutation=mutation,
+            constraint=constraint,
+            acceptance=acceptance,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            fitness_objs=fitness_objs,
+            fitness_objs_labels=fitness_objs_labels,
+            profile=profile,
+        )
+
+
+    def _optimize(self, X: np.ndarray, y: np.ndarray, random_state: RandomState):
+        self._var_y = np.var(y)
+
+        self._varred_obj = lambda r, X_ref=X, y_ref=y, var_y=self._var_y: -self._variance_reduction(
+            r.match(X_ref), y_ref, var_y
+        )
+        self._varred_label = "-Variance Reduction"
+
+        return super()._optimize(X, y, random_state)
+
+# ────────────────────────────────────────────────────────────────
+# Variance Reduction Helpers
+# ────────────────────────────────────────────────────────────────
+    @staticmethod
+    def _variance_reduction(mask: np.ndarray, y: np.ndarray, var_y: float) -> float:
+        """
+        Variance-based analogue to information gain for regression.
+        IG_var = Var(y) - [p * Var(y[mask]) + (1 - p) * Var(y[~mask])]
+        """
+        if mask.size == 0:
+            return 0.0
+        p = float(mask.mean())
+        if p == 0.0 or p == 1.0:
+            return 0.0
+
+        var_match = np.var(y[mask]) if np.any(mask) else 0.0
+        var_not_match = np.var(y[~mask]) if np.any(~mask) else 0.0
+        return var_y - (p * var_match + (1.0 - p) * var_not_match)
+
+# ────────────────────────────────────────────────────────────────
+# Helper functions
+# ────────────────────────────────────────────────────────────────
+    def _fitness_objs_runtime(self) -> List[Callable[[Rule], float]]:
+        return list(self.fitness_objs) + [self._varred_obj]
+
+    def _fitness_labels_runtime(self) -> List[str]:
+        return list(self.fitness_objs_labels) + [self._varred_label]
diff --git a/suprb/optimizer/rule/nsga2/nsga2ig.py b/suprb/optimizer/rule/nsga2/nsga2ig.py
new file mode 100644
index 00000000..5c191907
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/nsga2ig.py
@@ -0,0 +1,126 @@
+import numpy as np
+from typing import Optional, List, Callable
+
+from suprb.rule import Rule, RuleInit
+from suprb.utils import RandomState
+
+from suprb.rule.initialization import MeanInit
+from ..origin import SquaredError, RuleOriginGeneration
+from ..mutation import RuleMutation, Normal
+from ..constraint import CombinedConstraint, MinRange, Clip
+from ..acceptance import Variance
+from .. import RuleAcceptance, RuleConstraint
+
+from .nsga2 import NSGA2
+
+class NSGA2InfoGain(NSGA2):
+    """
+    MOO-RD with rule error and information gain as fitness objectives.
+    Information gain objective based on Shannon entropy.
+
+    Parameters
+    ----------
+    n_iter : int
+        Number of evolutionary iterations.
+    mu : int
+        Population size
+    lmbda : int
+        Number of children sampled each generation.
+    origin_generation : RuleOriginGeneration
+    init : RuleInit
+    mutation : RuleMutation
+    constraint : RuleConstraint
+    acceptance : RuleAcceptance
+    random_state : int or None
+    n_jobs : int
+    fitness_objs : list, optional
+        List of fitness objectives
+        Defaults to [rule.error_, -rule.volume_].
+        Information gain objective is added internally.
+    fitness_objs_labels : list of str, optional
+        Names corresponding to the objective functions.
+        Defaults to ["obj_0", "obj_1", ...].
+    profile : bool, default=False
+        If True, wraps the optimization loop in a profiler and prints stats.
+    """
+    def __init__(
+        self,
+        n_iter: int = 32,
+        mu: int = 50,
+        lmbda: int = 100,
+        origin_generation: RuleOriginGeneration = SquaredError(),
+        init: RuleInit = MeanInit(),
+        mutation: RuleMutation = Normal(sigma=1.22),
+        constraint: RuleConstraint = CombinedConstraint(MinRange(), Clip()),
+        acceptance: RuleAcceptance = Variance(),
+        random_state: int = None,
+        n_jobs: int = 1,
+        fitness_objs: Optional[List[Callable[[Rule], float]]] = None,
+        fitness_objs_labels: Optional[List[str]] = None,
+        profile: bool = False,
+    ):
+        super().__init__(
+            n_iter=n_iter,
+            mu=mu,
+            lmbda=lmbda,
+            origin_generation=origin_generation,
+            init=init,
+            mutation=mutation,
+            constraint=constraint,
+            acceptance=acceptance,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            fitness_objs=fitness_objs,
+            fitness_objs_labels=fitness_objs_labels,
+            profile=profile,
+        )
+
+
+    def _optimize(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        random_state: RandomState
+    ):
+        # Bind dataset for the IG objective.
+        self._X_ref = X
+        self._y_ref = y
+        self._H_y   = self._entropy(y)
+
+        self._infogain_obj = lambda r, X_ref=X, y_ref=y, H_y=self._H_y: -self._information_gain(r.match(X_ref), y_ref, H_y)
+        self._infogain_label = "-IG"
+
+        return super()._optimize(X, y, random_state)
+
+
+# ────────────────────────────────────────────────────────────────────
+# Information Gain Helpers
+# ────────────────────────────────────────────────────────────────────
+    @staticmethod
+    def _entropy(y: np.ndarray) -> float:
+        if y.size == 0:
+            return 0.0
+        vals, cnt = np.unique(y, return_counts=True)
+        p = cnt / cnt.sum()
+        return float(-np.sum(p * np.log2(p + 1e-12)))
+
+
+    @staticmethod
+    def _information_gain(mask: np.ndarray, y: np.ndarray, H_y: float) -> float:
+        if mask.size == 0:
+            return 0.0
+        p = float(mask.mean())
+        if p == 0.0 or p == 1.0:
+            return 0.0
+        H_m = NSGA2InfoGain._entropy(y[mask])
+        H_nm = NSGA2InfoGain._entropy(y[~mask])
+        return H_y - (p * H_m + (1.0 - p) * H_nm)
+
+# ────────────────────────────────────────────────────────────────
+# Helper functions
+# ────────────────────────────────────────────────────────────────
+    def _fitness_objs_runtime(self) -> List[Callable[[Rule], float]]:
+        return list(self.fitness_objs) + [self._infogain_obj]
+
+    def _fitness_labels_runtime(self) -> List[str]:
+        return list(self.fitness_objs_labels) + [self._infogain_label]
diff --git a/suprb/optimizer/rule/nsga2/pymoo/__init__.py b/suprb/optimizer/rule/nsga2/pymoo/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/suprb/optimizer/rule/nsga2/pymoo/pymoo_mutation.py b/suprb/optimizer/rule/nsga2/pymoo/pymoo_mutation.py
new file mode 100644
index 00000000..35a006c4
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/pymoo/pymoo_mutation.py
@@ -0,0 +1,45 @@
+from pymoo.core.mutation import Mutation
+import numpy as np
+from suprb.optimizer.rule.mutation import RuleMutation
+from suprb.rule.base import Rule
+from suprb.optimizer.rule.constraint import RuleConstraint
+
+class PymooRuleMutation(Mutation):
+    """
+    Wrapper class for SupRB's mutation to work with pymoo.
+    Deprecated.
+    """
+    def __init__(self, initial_rule: Rule, mutation_operator: RuleMutation, constraint: RuleConstraint, X_train, y_train, random_state):
+        super().__init__()
+        self.initial_rule = initial_rule
+        self.mutation_operator = mutation_operator
+        self.constraint = constraint
+        self.X_train = X_train
+        self.y_train = y_train
+        self.random_state = random_state
+
+    # def _do(self, problem, X, **kwargs):
+    #     def mutate(params):
+    #         r = self.initial_rule.clone().set_param_vector(params)
+    #         r = self.mutation_operator(r, self.random_state)
+    #         r = self.constraint(r)
+    #         return r.get_param_vector()
+        
+    #     mutated_rules = [mutate(p) for p in X]
+    #     return np.array(mutated_rules)
+
+    def _do(self, problem, X, **kwargs):
+        mutated_population = []
+
+        for params in X:
+            rule = self.initial_rule.clone()
+            rule.set_param_vector(params)
+
+            mutated_rule = self.constraint(self.mutation_operator(rule, self.random_state)).fit(self.X_train, self.y_train)
+
+            if mutated_rule.is_fitted_ and mutated_rule.experience_ > 0:
+                mutated_population.append(mutated_rule.get_param_vector())
+            else:
+                mutated_population.append(params)
+
+        return np.array(mutated_population)
\ No newline at end of file
diff --git a/suprb/optimizer/rule/nsga2/pymoo/pymoo_nsga2.py b/suprb/optimizer/rule/nsga2/pymoo/pymoo_nsga2.py
new file mode 100644
index 00000000..b87f5949
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/pymoo/pymoo_nsga2.py
@@ -0,0 +1,133 @@
+import numpy as np
+from typing import Optional
+
+from suprb.rule import Rule, RuleInit
+from suprb.rule.initialization import MeanInit
+from suprb.optimizer.rule.nsga2.pymoo.rule_optimization_problem import RuleOptimizationProblem
+from suprb.optimizer.rule.nsga2.pymoo.pymoo_mutation import PymooRuleMutation
+from suprb.utils import RandomState
+from suprb.optimizer.rule.base import ParallelSingleRuleDiscovery
+from suprb.optimizer.rule.origin import SquaredError, RuleOriginGeneration
+from suprb.optimizer.rule.mutation import RuleMutation, HalfnormIncrease
+from suprb.optimizer.rule.constraint import CombinedConstraint, MinRange, Clip
+from suprb.optimizer.rule.acceptance import Variance
+from suprb.optimizer.rule import RuleAcceptance, RuleConstraint
+
+
+from pymoo.algorithms.moo.nsga2 import NSGA2
+from pymoo.optimize import minimize
+from pymoo.termination import get_termination
+
+import cProfile
+import pstats
+
+
+
+class PymooNSGA2(ParallelSingleRuleDiscovery):
+    """
+    Adapted from: A Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II by Kalyanmoy Deb et al.
+    In the original plan for the MOO-RD was to use pymoo's precompiled algorithms.
+    This was slower in the end as repeated vectorization of the rule objects was necessary.
+    Deprecated.
+    """
+    def __init__(
+            self,
+            n_iter: int = 100,
+            mu: int = 20,
+            lmbda: int = 40,
+            origin_generation: RuleOriginGeneration = SquaredError(),
+            init: RuleInit = MeanInit(),
+            mutation: RuleMutation = HalfnormIncrease(sigma=1.22),
+            constraint: RuleConstraint = CombinedConstraint(MinRange(), Clip()),
+            acceptance: RuleAcceptance = Variance(),
+            random_state: int = None,
+            n_jobs: int = 1,
+    ):
+        super().__init__(
+            n_iter=n_iter,
+            origin_generation=origin_generation,
+            init=init,
+            acceptance=acceptance,
+            constraint=constraint,
+            random_state=random_state,
+            n_jobs=n_jobs,
+        )
+        self.mu = mu
+        self.lmbda = lmbda
+        self.mutation = mutation
+        self.constraint = constraint
+
+    def _optimize(
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            initial_rule: Rule,
+            random_state: RandomState,
+    ) -> Optional[Rule]:
+        rule_template = initial_rule.clone().fit(X, y)
+
+        problem = RuleOptimizationProblem(
+            rule_template,
+            X,
+            y, 
+            constraint=self.constraint,
+        )
+
+        mutation = PymooRuleMutation(
+            rule_template,
+            mutation_operator=self.mutation,
+            constraint=self.constraint,
+            X_train=X,
+            y_train=y,
+            random_state=random_state,
+        )
+
+        algorithm = NSGA2(
+            pop_size=self.lmbda,
+            mutation=mutation,
+            #mutation=PM(eta=50, prob=0.8),
+            #crossover=SBX(eta=, prob=0.9),
+            eliminate_duplicates=False,
+        )
+
+        termination = get_termination(
+            "n_gen",
+            self.n_iter
+        )
+
+         # ─────────────── START PROFILING ───────────────
+        profiler = cProfile.Profile()
+        profiler.enable()
+
+        result = minimize(
+            problem,
+            algorithm,
+            termination,
+            seed=int(random_state.integers(0, 2**32 - 1)),
+            verbose=False,
+        )
+
+        profiler.disable()
+        # Dump the top 20 functions by cumulative time
+        stats = pstats.Stats(profiler).sort_stats("cumtime")
+        stats.print_stats(20)
+        # ─────────────── END PROFILING ─────────────────
+
+        population = result.algorithm.pop
+
+        F = population.get("F")
+        Xf = population.get("X")
+        rank = population.get("rank")
+
+    
+        pf_idx = np.where(rank == 0)[0]
+        if pf_idx.size == 0:
+            return None
+
+        best_idx = pf_idx[np.argmin(F[pf_idx, 0])]
+
+        best_vec  = Xf[best_idx]
+        best_rule = rule_template.clone().set_param_vector(best_vec)
+        best_rule = best_rule.fit(X, y)
+
+        return best_rule if best_rule.is_fitted_ and best_rule.experience_ > 0 else None
diff --git a/suprb/optimizer/rule/nsga2/pymoo/rule_optimization_problem.py b/suprb/optimizer/rule/nsga2/pymoo/rule_optimization_problem.py
new file mode 100644
index 00000000..470999a5
--- /dev/null
+++ b/suprb/optimizer/rule/nsga2/pymoo/rule_optimization_problem.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from suprb.rule import Rule
+
+from suprb.optimizer.rule import RuleConstraint
+
+from pymoo.core.problem import Problem
+
+
+class RuleOptimizationProblem(Problem):
+    """
+    Wrapper class for SupRB's optimization problem to work with pymoo.
+    Deprecated.
+    """
+    def __init__(self, rule: Rule, X: np.ndarray, y: np.ndarray, constraint: RuleConstraint):
+        self.rule_template = rule
+        self.X = X
+        self.y = y
+        self.param_len = len(rule.get_param_vector())
+        self.constraint = constraint
+
+        xl = np.full(self.param_len, -1.0)
+        xu = np.full(self.param_len, 1.0)
+
+        super().__init__(
+            n_var=self.param_len,
+            n_obj=2,
+            n_constr=0,
+            xl=xl,
+            xu=xu,
+            elementwise_evaluation=False
+        )
+
+    def _evaluate(self, X, out, *args, **kwargs):
+        F = []
+        for x in X:
+            rule = self.rule_template.clone().set_param_vector(x)
+
+            rule = rule.fit(self.X, self.y)
+
+            if not rule.is_fitted_ or rule.experience_ == 0:
+                F.append([np.inf, np.inf])
+            else:
+                rule = self.constraint(rule)
+                F.append([rule.error_, -rule.volume_])
+
+        out["F"] = np.array(F)
diff --git a/suprb/rule/base.py b/suprb/rule/base.py
index c8ba8951..81af6c8e 100644
--- a/suprb/rule/base.py
+++ b/suprb/rule/base.py
@@ -105,3 +105,18 @@ def clone(self, **kwargs) -> Rule:
 
     def _more_str_attributes(self) -> dict:
         return {"experience": self.experience_}
+
+    def get_param_vector(self) -> np.array:
+        """
+        Returns rule parameter vector.
+        Created for MOO-RD with pymoo's precompiled algorithms.
+        """
+        return self.match.get_param_vector()
+    
+    def set_param_vector(self, x:np.array) -> Rule:
+        """
+        Sets rule parameter vector.
+        """
+        rule = self.clone()
+        rule.match.set_param_vector(x)
+        return rule
diff --git a/suprb/rule/fitness.py b/suprb/rule/fitness.py
index 6e624b3b..d8d18ca6 100644
--- a/suprb/rule/fitness.py
+++ b/suprb/rule/fitness.py
@@ -2,9 +2,11 @@
 from typing import Callable
 
 import numpy as np
+import warnings
 
-from suprb.fitness import pseudo_accuracy, emary, wu
+from suprb.fitness import pseudo_accuracy, emary, wu, moo
 from . import Rule, RuleFitness
+from ..fitness import BaseFitness
 
 
 class PseudoAccuracy(RuleFitness):
@@ -64,3 +66,28 @@ class VolumeWu(VolumeRuleFitness):
     def __init__(self, alpha: float = 0.05):
         super().__init__(alpha)
         self.fitness_func_ = wu
+
+class MooFitness(RuleFitness, metaclass=ABCMeta):
+    """
+    A placeholder fitness function which always returns NaN.
+    This function is used in multi-objective optimization (MOO) as the rule attribute `.fitness_` is never accessed.
+    Using another fitness function has no impact on the MOO and may be misleading.
+
+    Changing the fitness attribute to a vector may make the code more intuitive.
+    """
+
+    fitness_func_: Callable
+
+    def __init__(self):
+        super().__init__()
+        self.fitness_func_ = moo
+        warnings.warn(
+            "MooFitness is a dummy fitness function which always returns NaN. "
+            "When using multi-objective optimization (MOO), the rule attribute `.fitness_` is never accessed. "
+            "Using another fitness function has no impact on the MOO and may be misleading.",
+            UserWarning,
+            stacklevel=2,
+        )
+
+    def __call__(self, rule: Rule) -> float:
+        return self.fitness_func_()
\ No newline at end of file
diff --git a/suprb/rule/matching.py b/suprb/rule/matching.py
index c89c9f28..2eb31a09 100644
--- a/suprb/rule/matching.py
+++ b/suprb/rule/matching.py
@@ -41,6 +41,14 @@ def _validate_bounds(self, X: np.ndarray):
 
         if self.bounds.shape[0] != X.shape[1]:
             raise ValueError(f"bounds- and input data dimension mismatch: {self.bounds.shape[0]} != {X.shape[1]}")
+    
+    @abstractmethod
+    def get_param_vector(self) -> np.array:
+        pass
+
+    @abstractmethod
+    def set_param_vector(self, x: np.array ) -> None:
+        pass
 
 
 class OrderedBound(MatchingFunction):
@@ -77,6 +85,14 @@ def min_range(self, min_range: float):
             self.bounds[invalid_indices, 0] -= min_range / 2
             self.bounds[invalid_indices, 1] += min_range / 2
 
+    def get_param_vector(self):
+        return self.bounds.flatten()
+    
+    def set_param_vector(self, x):
+        x = np.asarray(x)
+        self.bounds = x.reshape((-1, 2))
+
+
 
 class UnorderedBound(MatchingFunction):
     """
@@ -124,6 +140,13 @@ def min_range(self, min_range: float):
             self.bounds[invalid_indices_r, 0] -= min_range / 2
             self.bounds[invalid_indices_r, 1] += min_range / 2
 
+    def get_param_vector(self):
+        return self.bounds.flatten()
+
+    def set_param_vector(self, x):
+        d = x.shape[0] // 2
+        self.bounds = x.reshape((d, 2))
+
 
 class CenterSpread(MatchingFunction):
     """
@@ -168,6 +191,13 @@ def min_range(self, min_range: float):
             invalid_indices = np.argwhere(diff < min_range)
             self.bounds[invalid_indices, 1] += min_range / 2
 
+    def get_param_vector(self):
+        return self.bounds.flatten()
+
+    def set_param_vector(self, x):
+        d = x.shape[0] // 2
+        self.bounds = x.reshape((d, 2))
+
 
 class MinPercentage(MatchingFunction):
     """
@@ -211,3 +241,10 @@ def min_range(self, min_range: float):
             # Approximate increasing the width by min_range
             self.bounds[invalid_indices, 0] -= min_range / 2
             self.bounds[invalid_indices, 1] += min_range
+    
+    def get_param_vector(self):
+        return self.bounds.flatten()
+
+    def set_param_vector(self, x):
+        d = x.shape[0] // 2
+        self.bounds = x.reshape((d, 2))
diff --git a/suprb/suprb.py b/suprb/suprb.py
index 7b9869a7..d528825a 100644
--- a/suprb/suprb.py
+++ b/suprb/suprb.py
@@ -4,8 +4,7 @@
 import numpy as np
 from sklearn import clone
 from sklearn.utils import check_X_y
-from sklearn.utils.validation import check_is_fitted, check_array, validate_data
-
+from sklearn.utils.validation import check_is_fitted, check_array
 
 from .base import BaseRegressor
 from .exceptions import PopulationEmptyWarning
@@ -23,18 +22,6 @@
 
 
 class SupRB(BaseRegressor):
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.target_tags.single_output = False
-        tags.non_deterministic = True
-        return tags
-
-    def _more_tags(self):
-        # additional or override tags
-        return {
-            # 'some_tag': True,
-        }
-
     """The multi-solution batch learning LCS developed by the Organic Computing group at Universität Augsburg.
 
     Parameters
@@ -163,7 +150,8 @@ def fit(self, X: np.ndarray, y: np.ndarray, cleanup=False):
         self.elitist_.complexity_ = 99999
 
         # Check that x and y have correct shape
-        X, y = validate_data(self, X, y, ensure_2d=True)
+        X, y = check_X_y(X, y, dtype="float64", y_numeric=True)
+        y = check_array(y, ensure_2d=False, dtype="float64")
 
         # Init sklearn interface
         self.n_features_in_ = X.shape[1]
@@ -198,6 +186,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, cleanup=False):
         if self.n_initial_rules > 0:
             if self._catch_errors(self._discover_rules, X, y, self.n_initial_rules):
                 return self
+            self._log_to_stdout(f"{len(self.pool_)} initial rules discovered before first step.")
 
         # Main loop
         for self.step_ in range(self.n_iter):
@@ -256,9 +245,18 @@ def _discover_rules(self, X: np.ndarray, y: np.ndarray, n_rules: int):
 
         self._log_to_stdout(f"Generating {n_rules} rules", priority=4)
 
+
         # Update the current elitist
-        self.rule_discovery_.elitist_ = self.solution_composition_.elitist()
+        # try catch block needed for n_initial_rules as elitist is None Type without it
+        elit = None
+        try:
+            elit = self.solution_composition_.elitist()
+        except AttributeError:
+            pass
+        if elit is None:
+            elit = self.elitist_ #Initially dummy elitist: Solution([0, 0, 0], [0, 0, 0], ErrorExperienceHeuristic(), PseudoBIC())
 
+        self.rule_discovery_.elitist_ = elit
         # Update the random state
         self.rule_discovery_.random_state = self.rule_discovery_seeds_[self.step_]
 
@@ -285,9 +283,11 @@ def _compose_solution(self, X: np.ndarray, y: np.ndarray):
         # Optimize
         self.solution_composition_.optimize(X, y)
 
-    def predict(self, X):
-        check_is_fitted(self)
-        X = validate_data(self, X, ensure_2d=True, reset=False)
+    def predict(self, X: np.ndarray):
+        # Check is fit had been called
+        check_is_fitted(self, ["is_fitted_"])
+        # Input validation
+        X = check_array(X)
 
         if hasattr(self, "is_error_") and self.is_error_:
             return [0] * len(X)
diff --git a/suprb/warmup_suprb.py b/suprb/warmup_suprb.py
new file mode 100644
index 00000000..008f6774
--- /dev/null
+++ b/suprb/warmup_suprb.py
@@ -0,0 +1,196 @@
+import numpy as np
+from sklearn.utils import check_X_y
+from sklearn.utils.validation import check_array
+
+from . import SupRB
+from .solution import Solution
+from .solution.mixing_model import ErrorExperienceHeuristic
+from .solution.fitness import PseudoBIC
+from .optimizer.rule.es import ES1xLambda
+from .optimizer.solution.ga import GeneticAlgorithm
+from .rule.matching import OrderedBound
+from .logging import DefaultLogger
+from .utils import check_random_state
+
+
+class WarmupSupRB(SupRB):
+    """
+    SupRB variant that performs several Rule-Discovery-only warm-up cycles
+    before the first Solution Composition.
+
+    DEPRECATED. Functionality is already implemented in SupRB via n_initial_rules.
+    """
+
+    def __init__(
+        self,
+        rule_discovery=None,
+        solution_composition=None,
+        matching_type=None,
+        n_iter=32,
+        n_initial_rules=0,
+        n_rules=4,
+        random_state=None,
+        verbose=1,
+        logger=None,
+        n_jobs=1,
+        early_stopping_patience=-1,
+        early_stopping_delta=0,
+        # Warmup params
+        warmup_strategy="fixed",
+        warmup_rd_steps=0,               # fixed
+        warmup_max_steps=16,             # auto
+        warmup_pool_target=None,         # auto
+        warmup_patience=3,               # auto
+        warmup_delta=1,                  # auto
+    ):
+        super().__init__(
+            rule_discovery=rule_discovery,
+            solution_composition=solution_composition,
+            matching_type=matching_type,
+            n_iter=n_iter,
+            n_initial_rules=n_initial_rules,
+            n_rules=n_rules,
+            random_state=random_state,
+            verbose=verbose,
+            logger=logger,
+            n_jobs=n_jobs,
+            early_stopping_patience=early_stopping_patience,
+            early_stopping_delta=early_stopping_delta,
+        )
+        self.warmup_strategy = warmup_strategy
+        self.warmup_rd_steps = warmup_rd_steps
+        self.warmup_max_steps = warmup_max_steps
+        self.warmup_pool_target = warmup_pool_target
+        self.warmup_patience = warmup_patience
+        self.warmup_delta = warmup_delta
+
+
+    def fit(self, X, y, cleanup=False):
+        self.early_stopping_counter_ = 0
+        self.previous_fitness_ = 0
+        self.is_error_ = False
+
+        self.elitist_ = Solution([0, 0, 0], [0, 0, 0], ErrorExperienceHeuristic(), PseudoBIC())
+        self.elitist_.fitness_ = 0
+        self.elitist_.error_ = 99999
+        self.elitist_.complexity_ = 99999
+
+        X, y = check_X_y(X, y, dtype="float64", y_numeric=True)
+        y = check_array(y, ensure_2d=False, dtype="float64")
+        self.n_features_in_ = X.shape[1]
+
+        self.random_state_ = check_random_state(self.random_state)
+        total_pairs = (self.warmup_max_steps + self.n_iter) * 2
+        seeds = np.random.SeedSequence(self.random_state).spawn(total_pairs)
+        self.rule_discovery_seeds_ = seeds[::2]
+        self.solution_composition_seeds_ = seeds[1::2]
+
+        self.pool_ = []
+        self._validate_rule_discovery(default=ES1xLambda())
+        self._validate_solution_composition(default=GeneticAlgorithm())
+        self._validate_matching_type(default=OrderedBound(np.array([])))
+        self._validate_logger(default=DefaultLogger())
+        self._propagate_component_parameters()
+        self._init_bounds(X)
+        self._init_matching_type()
+
+        self.solution_composition_.pool_ = self.pool_
+        self.rule_discovery_.pool_ = self.pool_
+
+        self.solution_composition_.init.fitness.max_genome_length_ = (
+            self.n_rules * (self.n_iter + self.warmup_max_steps) + self.n_initial_rules
+        )
+
+        self.logger_.log_init(X, y, self)
+
+        if self.n_initial_rules > 0:
+            if self._catch_errors(self._discover_rules, X, y, self.n_initial_rules):
+                return self
+
+        # Warmup
+        warmup_done = 0
+        patience_ctr = 0
+        self.step_ = 0
+
+        if self.warmup_strategy not in ("fixed", "auto"):
+            raise ValueError("warmup_strategy must be 'fixed' or 'auto'.")
+
+        def one_rd_cycle():
+            before = len(self.pool_)
+            if self._catch_errors(self._discover_rules, X, y, self.n_rules):
+                return True, 0
+            after = len(self.pool_)
+            growth = after - before
+            if self.verbose >= 2:
+                print(f"[Warm-up RD] Added {growth} rules (pool={after})")
+            return False, growth
+
+        if self.warmup_strategy == "fixed":
+            target = max(0, int(self.warmup_rd_steps))
+            for _ in range(target):
+                err, _growth = one_rd_cycle()
+                if err:
+                    return self
+                warmup_done += 1
+                self.step_ += 1
+        else:  # auto
+            for _ in range(max(0, int(self.warmup_max_steps))):
+                err, growth = one_rd_cycle()
+                if err:
+                    return self
+                warmup_done += 1
+                self.step_ += 1
+
+                if self.warmup_pool_target is not None and len(self.pool_) >= int(self.warmup_pool_target):
+                    break
+                if growth < int(self.warmup_delta):
+                    patience_ctr += 1
+                    if patience_ctr >= int(self.warmup_patience):
+                        break
+                else:
+                    patience_ctr = 0
+
+        self.solution_composition_.init.fitness.max_genome_length_ = (
+            self.n_rules * (self.n_iter + warmup_done) + self.n_initial_rules
+        )
+
+        # Main loop
+        end_step = self.step_ + self.n_iter
+        while self.step_ < end_step:
+            if self._catch_errors(self._discover_rules, X, y, self.n_rules):
+                return self
+            if self._catch_errors(self._compose_solution, X, y, False):
+                return self
+
+            self.logger_.log_iteration(X, y, self, iteration=self.step_)
+            if self.check_early_stopping():
+                break
+
+            self.previous_fitness_ = self.solution_composition_.elitist().fitness_
+            self.step_ += 1
+
+        self.elitist_ = self.solution_composition_.elitist().clone()
+        self.is_fitted_ = True
+        self.logger_.log_final(X, y, self)
+
+        if cleanup:
+            self._cleanup()
+
+        return self
+
+    def _discover_rules(self, X, y, n_rules):
+        self._log_to_stdout(f"Generating {n_rules} rules", priority=4)
+
+        elit = None
+        try:
+            elit = self.solution_composition_.elitist()
+        except Exception:
+            pass
+        if elit is None:
+            elit = self.elitist_
+
+        self.rule_discovery_.elitist_ = elit
+        self.rule_discovery_.random_state = self.rule_discovery_seeds_[self.step_]
+        new_rules = self.rule_discovery_.optimize(X, y, n_rules=n_rules)
+        self.pool_.extend(new_rules)
+
diff --git a/tests/test_solution.py b/tests/test_solution.py
index c3983199..fafb9366 100644
--- a/tests/test_solution.py
+++ b/tests/test_solution.py
@@ -1,6 +1,6 @@
 import unittest
 
-from sklearn.utils.estimator_checks import check_estimator, _regression_dataset
+from sklearn.utils.estimator_checks import check_estimator
 
 import suprb
 import suprb.logging.stdout
@@ -18,69 +18,55 @@ class TestSolution(unittest.TestCase):
 
     def test_check_ga(self):
         estimator = suprb.SupRB(
-            n_iter=4,
+            n_iter=1,
             rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2),
             solution_composition=GeneticAlgorithm(n_iter=2, population_size=2),
             logger=suprb.logging.stdout.StdoutLogger(),
             verbose=10,
         )
 
-        X, y = _regression_dataset()
-        estimator.fit(X, y)
-
         check_estimator(estimator)
 
     def test_check_saga1(self):
         estimator = suprb.SupRB(
-            n_iter=4,
+            n_iter=1,
             rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2),
             solution_composition=SelfAdaptingGeneticAlgorithm1(n_iter=2, population_size=2),
             logger=suprb.logging.stdout.StdoutLogger(),
             verbose=10,
         )
-        X, y = _regression_dataset()
-        estimator.fit(X, y)
 
         check_estimator(estimator)
 
     def test_check_saga2(self):
         estimator = suprb.SupRB(
-            n_iter=4,
+            n_iter=1,
             rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2),
             solution_composition=SelfAdaptingGeneticAlgorithm2(n_iter=2, population_size=2),
             logger=suprb.logging.stdout.StdoutLogger(),
             verbose=10,
         )
 
-        X, y = _regression_dataset()
-        estimator.fit(X, y)
-
         check_estimator(estimator)
 
     def test_check_saga3(self):
         estimator = suprb.SupRB(
-            n_iter=4,
+            n_iter=1,
             rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2),
             solution_composition=SelfAdaptingGeneticAlgorithm3(n_iter=2, population_size=2),
             logger=suprb.logging.stdout.StdoutLogger(),
             verbose=10,
         )
 
-        X, y = _regression_dataset()
-        estimator.fit(X, y)
-
         check_estimator(estimator)
 
     def test_check_sas(self):
         estimator = suprb.SupRB(
-            n_iter=4,
+            n_iter=1,
             rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2),
             solution_composition=SasGeneticAlgorithm(n_iter=2, initial_population_size=2),
             logger=suprb.logging.stdout.StdoutLogger(),
             verbose=10,
         )
 
-        X, y = _regression_dataset()
-        estimator.fit(X, y)
-
         check_estimator(estimator)
diff --git a/tests/test_suprb.py b/tests/test_suprb.py
index b8b8d191..5affa92b 100644
--- a/tests/test_suprb.py
+++ b/tests/test_suprb.py
@@ -1,7 +1,7 @@
 import unittest
 import numpy as np
 
-from sklearn.utils.estimator_checks import check_estimator, _regression_dataset
+from sklearn.utils.estimator_checks import check_estimator
 
 import suprb
 import suprb.logging.stdout
@@ -35,16 +35,13 @@ def test_check_estimator(self):
 
         # Low n_iter for speed. Still takes forever though.
         estimator = suprb.SupRB(
-            n_iter=4,
+            n_iter=1,
             rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2),
             solution_composition=suprb.optimizer.solution.ga.GeneticAlgorithm(n_iter=2, population_size=2),
             logger=suprb.logging.stdout.StdoutLogger(),
             verbose=10,
         )
 
-        X, y = _regression_dataset()
-        estimator.fit(X, y)
-
         check_estimator(estimator)
 
     def test_early_stopping(self):