From 0eaef845473ce397a4a8c88c8ccd7516aaab4f94 Mon Sep 17 00:00:00 2001 From: RomanSraj Date: Fri, 13 Jun 2025 14:24:34 +0200 Subject: [PATCH 1/3] Adapt code and requirements for latest sklearn --- examples/example_1.py | 1 - examples/example_2.py | 1 - examples/example_3.py | 1 - examples/example_4.py | 5 +-- requirements.txt | 95 ++++++++++++++++++++++++++++++++++++------ suprb/base.py | 2 +- suprb/suprb.py | 34 +++++++++++---- tests/test_solution.py | 26 +++++++++--- tests/test_suprb.py | 7 +++- 9 files changed, 137 insertions(+), 35 deletions(-) diff --git a/examples/example_1.py b/examples/example_1.py index b87b5f5c..98f16323 100644 --- a/examples/example_1.py +++ b/examples/example_1.py @@ -60,7 +60,6 @@ def create_plot(scores): verbose=10, scoring=["r2", "neg_mean_squared_error"], return_estimator=True, - fit_params={"cleanup": True}, ) create_plot(scores) diff --git a/examples/example_2.py b/examples/example_2.py index 03541564..ff043e33 100644 --- a/examples/example_2.py +++ b/examples/example_2.py @@ -50,7 +50,6 @@ verbose=10, scoring=["r2", "neg_mean_squared_error"], return_estimator=True, - fit_params={"cleanup": True}, ) log_scores(scores) diff --git a/examples/example_3.py b/examples/example_3.py index c8bfe50e..ae674ad9 100644 --- a/examples/example_3.py +++ b/examples/example_3.py @@ -61,7 +61,6 @@ verbose=10, scoring=["r2", "neg_mean_squared_error"], return_estimator=True, - fit_params={"cleanup": True}, ) log_scores(scores) diff --git a/examples/example_4.py b/examples/example_4.py index b0187744..7948b39f 100644 --- a/examples/example_4.py +++ b/examples/example_4.py @@ -50,8 +50,8 @@ elitist_ratio=0.2, random_state=random_state, n_jobs=1, - mutation=suprb.optimizer.solution.ga.mutation.BitFlips(mutation_rate=0.1), - crossover=suprb.optimizer.solution.ga.crossover.NPoint(crossover_rate=0.9, n=2), + mutation=suprb.optimizer.solution.ga.mutation.BitFlips(), + crossover=suprb.optimizer.solution.ga.crossover.NPoint(n=2), selection=suprb.optimizer.solution.ga.selection.Tournament(k=6), init=suprb.solution.initialization.RandomInit( mixing=suprb.solution.mixing_model.ErrorExperienceHeuristic(), @@ -71,7 +71,6 @@ verbose=10, scoring=["r2", "neg_mean_squared_error"], return_estimator=True, - fit_params={"cleanup": True}, ) log_scores(scores) diff --git a/requirements.txt b/requirements.txt index 0822148f..084bbcf9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,83 @@ -numpy~=1.22.4 -scipy~=1.7.1 -matplotlib~=3.4.3 -scikit-learn~=1.0.1 -scikit-optimize~=0.9 -pandas~=1.3.4 -mlflow~=1.21.0 -hypothesis~=6.24.2 -joblib~=1.1.0 -tqdm~=4.62.3 -pytest~=6.2.5 -protobuf~=3.20.0 +alembic==1.16.1 +annotated-types==0.7.0 +anyio==4.9.0 +attrs==25.3.0 +blinker==1.9.0 +cachetools==5.5.2 +certifi==2025.4.26 +charset-normalizer==3.4.2 +click==8.2.1 +cloudpickle==3.1.1 +contourpy==1.3.2 +cycler==0.12.1 +databricks-sdk==0.57.0 +docker==7.1.0 +exceptiongroup==1.3.0 +fastapi==0.115.12 +Flask==3.1.1 +fonttools==4.58.2 +gitdb==4.0.12 +GitPython==3.1.44 +google-auth==2.40.3 +graphene==3.4.3 +graphql-core==3.2.6 +graphql-relay==3.2.0 +greenlet==3.2.3 +gunicorn==23.0.0 +h11==0.16.0 +hypothesis==6.135.9 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +itsdangerous==2.2.0 +Jinja2==3.1.6 +joblib==1.5.1 +kiwisolver==1.4.8 +Mako==1.3.10 +MarkupSafe==3.0.2 +matplotlib==3.10.3 +mlflow==3.1.0 +mlflow-skinny==3.1.0 +numpy==2.2.6 +opentelemetry-api==1.34.1 +opentelemetry-sdk==1.34.1 +opentelemetry-semantic-conventions==0.55b1 +packaging==25.0 +pandas==2.3.0 +pillow==11.2.1 +pluggy==1.6.0 +protobuf==6.31.1 +pyaml==25.5.0 +pyarrow==20.0.0 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pydantic==2.11.6 +pydantic_core==2.33.2 +Pygments==2.19.1 +pyparsing==3.2.3 +pytest==8.4.0 +python-dateutil==2.9.0.post0 +pytz==2025.2 +PyYAML==6.0.2 +requests==2.32.4 +rsa==4.9.1 +scikit-learn==1.7.0 +scikit-optimize==0.10.2 +scipy==1.15.3 +six==1.17.0 +smmap==5.0.2 +sniffio==1.3.1 +sortedcontainers==2.4.0 +SQLAlchemy==2.0.41 +sqlparse==0.5.3 +starlette==0.46.2 +threadpoolctl==3.6.0 +tomli==2.2.1 +tqdm==4.67.1 +typing-inspection==0.4.1 +typing_extensions==4.14.0 +tzdata==2025.2 +urllib3==2.4.0 +uvicorn==0.34.3 +Werkzeug==3.1.3 +zipp==3.23.0 diff --git a/suprb/base.py b/suprb/base.py index 748c81c2..d223dc38 100644 --- a/suprb/base.py +++ b/suprb/base.py @@ -51,7 +51,7 @@ def _more_str_attributes(self) -> dict: return {} -class BaseRegressor(BaseEstimator, RegressorMixin, metaclass=ABCMeta): +class BaseRegressor(RegressorMixin, BaseEstimator, metaclass=ABCMeta): """A base (composite) Regressor.""" is_fitted_: bool diff --git a/suprb/suprb.py b/suprb/suprb.py index c6db2ebd..f9275e2f 100644 --- a/suprb/suprb.py +++ b/suprb/suprb.py @@ -22,6 +22,18 @@ class SupRB(BaseRegressor): + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.single_output = False + tags.non_deterministic = True + return tags + + def _more_tags(self): + # additional or override tags + return { + # 'some_tag': True, + } + """The multi-solution batch learning LCS developed by the Organic Computing group at Universität Augsburg. Parameters @@ -149,9 +161,13 @@ def fit(self, X: np.ndarray, y: np.ndarray, cleanup=False): self.elitist_.error_ = 99999 self.elitist_.complexity_ = 99999 - # Check that x and y have correct shape - X, y = check_X_y(X, y, dtype="float64", y_numeric=True) - y = check_array(y, ensure_2d=False, dtype="float64") + from sklearn.utils.validation import validate_data, check_is_fitted + + X, y = validate_data(self, X, y, ensure_2d=True) + + # # Check that x and y have correct shape + # X, y = check_X_y(X, y, dtype="float64", y_numeric=True) + # y = check_array(y, ensure_2d=False, dtype="float64") # Init sklearn interface self.n_features_in_ = X.shape[1] @@ -273,12 +289,14 @@ def _compose_solution(self, X: np.ndarray, y: np.ndarray): # Optimize self.solution_composition_.optimize(X, y) - def predict(self, X: np.ndarray): - # Check is fit had been called - check_is_fitted(self, ["is_fitted_"]) - # Input validation - X = check_array(X) + def predict(self, X): + from sklearn.utils.validation import validate_data, check_is_fitted + + check_is_fitted(self) + # validiere Input, ohne n_features_in_ zu resetten (reset=False) + X = validate_data(self, X, ensure_2d=True, reset=False) + # Falls is_error_ gesetzt, gib Dummy-Ausgabe if hasattr(self, "is_error_") and self.is_error_: return [0] * len(X) else: diff --git a/tests/test_solution.py b/tests/test_solution.py index fafb9366..c3983199 100644 --- a/tests/test_solution.py +++ b/tests/test_solution.py @@ -1,6 +1,6 @@ import unittest -from sklearn.utils.estimator_checks import check_estimator +from sklearn.utils.estimator_checks import check_estimator, _regression_dataset import suprb import suprb.logging.stdout @@ -18,55 +18,69 @@ class TestSolution(unittest.TestCase): def test_check_ga(self): estimator = suprb.SupRB( - n_iter=1, + n_iter=4, rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2), solution_composition=GeneticAlgorithm(n_iter=2, population_size=2), logger=suprb.logging.stdout.StdoutLogger(), verbose=10, ) + X, y = _regression_dataset() + estimator.fit(X, y) + check_estimator(estimator) def test_check_saga1(self): estimator = suprb.SupRB( - n_iter=1, + n_iter=4, rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2), solution_composition=SelfAdaptingGeneticAlgorithm1(n_iter=2, population_size=2), logger=suprb.logging.stdout.StdoutLogger(), verbose=10, ) + X, y = _regression_dataset() + estimator.fit(X, y) check_estimator(estimator) def test_check_saga2(self): estimator = suprb.SupRB( - n_iter=1, + n_iter=4, rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2), solution_composition=SelfAdaptingGeneticAlgorithm2(n_iter=2, population_size=2), logger=suprb.logging.stdout.StdoutLogger(), verbose=10, ) + X, y = _regression_dataset() + estimator.fit(X, y) + check_estimator(estimator) def test_check_saga3(self): estimator = suprb.SupRB( - n_iter=1, + n_iter=4, rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2), solution_composition=SelfAdaptingGeneticAlgorithm3(n_iter=2, population_size=2), logger=suprb.logging.stdout.StdoutLogger(), verbose=10, ) + X, y = _regression_dataset() + estimator.fit(X, y) + check_estimator(estimator) def test_check_sas(self): estimator = suprb.SupRB( - n_iter=1, + n_iter=4, rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2), solution_composition=SasGeneticAlgorithm(n_iter=2, initial_population_size=2), logger=suprb.logging.stdout.StdoutLogger(), verbose=10, ) + X, y = _regression_dataset() + estimator.fit(X, y) + check_estimator(estimator) diff --git a/tests/test_suprb.py b/tests/test_suprb.py index 5affa92b..b8b8d191 100644 --- a/tests/test_suprb.py +++ b/tests/test_suprb.py @@ -1,7 +1,7 @@ import unittest import numpy as np -from sklearn.utils.estimator_checks import check_estimator +from sklearn.utils.estimator_checks import check_estimator, _regression_dataset import suprb import suprb.logging.stdout @@ -35,13 +35,16 @@ def test_check_estimator(self): # Low n_iter for speed. Still takes forever though. estimator = suprb.SupRB( - n_iter=1, + n_iter=4, rule_discovery=ES1xLambda(n_iter=4, lmbda=1, delay=2), solution_composition=suprb.optimizer.solution.ga.GeneticAlgorithm(n_iter=2, population_size=2), logger=suprb.logging.stdout.StdoutLogger(), verbose=10, ) + X, y = _regression_dataset() + estimator.fit(X, y) + check_estimator(estimator) def test_early_stopping(self): From ef1782c6bd0a462d5f8d7342b9e2685ffb80fb89 Mon Sep 17 00:00:00 2001 From: RomanSraj Date: Fri, 13 Jun 2025 14:31:29 +0200 Subject: [PATCH 2/3] Small refactoring --- suprb/suprb.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/suprb/suprb.py b/suprb/suprb.py index f9275e2f..7b9869a7 100644 --- a/suprb/suprb.py +++ b/suprb/suprb.py @@ -4,7 +4,8 @@ import numpy as np from sklearn import clone from sklearn.utils import check_X_y -from sklearn.utils.validation import check_is_fitted, check_array +from sklearn.utils.validation import check_is_fitted, check_array, validate_data + from .base import BaseRegressor from .exceptions import PopulationEmptyWarning @@ -161,14 +162,9 @@ def fit(self, X: np.ndarray, y: np.ndarray, cleanup=False): self.elitist_.error_ = 99999 self.elitist_.complexity_ = 99999 - from sklearn.utils.validation import validate_data, check_is_fitted - + # Check that x and y have correct shape X, y = validate_data(self, X, y, ensure_2d=True) - # # Check that x and y have correct shape - # X, y = check_X_y(X, y, dtype="float64", y_numeric=True) - # y = check_array(y, ensure_2d=False, dtype="float64") - # Init sklearn interface self.n_features_in_ = X.shape[1] @@ -290,13 +286,9 @@ def _compose_solution(self, X: np.ndarray, y: np.ndarray): self.solution_composition_.optimize(X, y) def predict(self, X): - from sklearn.utils.validation import validate_data, check_is_fitted - check_is_fitted(self) - - # validiere Input, ohne n_features_in_ zu resetten (reset=False) X = validate_data(self, X, ensure_2d=True, reset=False) - # Falls is_error_ gesetzt, gib Dummy-Ausgabe + if hasattr(self, "is_error_") and self.is_error_: return [0] * len(X) else: From decde83a71ca59d648a0db235783ee7f72ccc989 Mon Sep 17 00:00:00 2001 From: RomanSraj Date: Mon, 23 Jun 2025 11:28:17 +0200 Subject: [PATCH 3/3] Only use essential requirements in requirements.txt --- requirements.txt | 80 +----------------------------------------------- 1 file changed, 1 insertion(+), 79 deletions(-) diff --git a/requirements.txt b/requirements.txt index 084bbcf9..6326ee92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,83 +1,5 @@ -alembic==1.16.1 -annotated-types==0.7.0 -anyio==4.9.0 -attrs==25.3.0 -blinker==1.9.0 -cachetools==5.5.2 -certifi==2025.4.26 -charset-normalizer==3.4.2 -click==8.2.1 -cloudpickle==3.1.1 -contourpy==1.3.2 -cycler==0.12.1 -databricks-sdk==0.57.0 -docker==7.1.0 -exceptiongroup==1.3.0 -fastapi==0.115.12 -Flask==3.1.1 -fonttools==4.58.2 -gitdb==4.0.12 -GitPython==3.1.44 -google-auth==2.40.3 -graphene==3.4.3 -graphql-core==3.2.6 -graphql-relay==3.2.0 -greenlet==3.2.3 -gunicorn==23.0.0 -h11==0.16.0 -hypothesis==6.135.9 -idna==3.10 -importlib_metadata==8.7.0 -iniconfig==2.1.0 -itsdangerous==2.2.0 -Jinja2==3.1.6 -joblib==1.5.1 -kiwisolver==1.4.8 -Mako==1.3.10 -MarkupSafe==3.0.2 matplotlib==3.10.3 -mlflow==3.1.0 -mlflow-skinny==3.1.0 -numpy==2.2.6 -opentelemetry-api==1.34.1 -opentelemetry-sdk==1.34.1 -opentelemetry-semantic-conventions==0.55b1 -packaging==25.0 pandas==2.3.0 -pillow==11.2.1 -pluggy==1.6.0 -protobuf==6.31.1 -pyaml==25.5.0 -pyarrow==20.0.0 -pyasn1==0.6.1 -pyasn1_modules==0.4.2 -pydantic==2.11.6 -pydantic_core==2.33.2 -Pygments==2.19.1 -pyparsing==3.2.3 -pytest==8.4.0 -python-dateutil==2.9.0.post0 -pytz==2025.2 -PyYAML==6.0.2 -requests==2.32.4 -rsa==4.9.1 +pytest==8.4.1 scikit-learn==1.7.0 -scikit-optimize==0.10.2 -scipy==1.15.3 -six==1.17.0 -smmap==5.0.2 -sniffio==1.3.1 -sortedcontainers==2.4.0 -SQLAlchemy==2.0.41 -sqlparse==0.5.3 -starlette==0.46.2 -threadpoolctl==3.6.0 -tomli==2.2.1 tqdm==4.67.1 -typing-inspection==0.4.1 -typing_extensions==4.14.0 -tzdata==2025.2 -urllib3==2.4.0 -uvicorn==0.34.3 -Werkzeug==3.1.3 -zipp==3.23.0