Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
2af2ce5
Fix json (de-)serialization by adding matching type to all rules
wehrfabi Sep 8, 2024
5d04bb4
Merge branch 'heidmic:master' into master
wehrfabi Dec 2, 2024
f8f3115
added BaseSupervised
wehrfabi Dec 5, 2024
f576237
allow ClassifierMixin as param model
wehrfabi Dec 12, 2024
8a6847e
using negative accuracy as score for Classifiers
wehrfabi Dec 12, 2024
956a23a
special case for single class in match set
wehrfabi Dec 16, 2024
76b9ae7
solution now supports accuracy_score
wehrfabi Dec 16, 2024
0209264
fix: negative accuracy_score in solution for proper optimization
wehrfabi Dec 16, 2024
1548e9b
fix: force pred to int on ErrorExperienceHeuristic
wehrfabi Dec 16, 2024
17eda3c
pseudoaccuracy for class now as expected
wehrfabi Dec 17, 2024
0354ab6
Comments: beautified
wehrfabi Dec 17, 2024
1a3e51c
fix: updated solution fitness to support class accuracy
wehrfabi Dec 17, 2024
4a543b6
fix: solution fitness functions now using acc in [0,1] for class
wehrfabi Dec 17, 2024
0304c33
added classification example using the iris dataset
wehrfabi Dec 17, 2024
2e75575
acceptance for classification
wehrfabi Jan 6, 2025
f7cef4a
check if pool is empty before attempting to read it
wehrfabi Jan 6, 2025
8333b90
classification error is now 1 - accuracy
wehrfabi Jan 6, 2025
4a84c9d
added MixingModel for Classification
wehrfabi Jan 7, 2025
61fe839
fix: no 0-prediction from ErrorExpClass in case of mismatch
wehrfabi Jan 9, 2025
f371bfc
accuracy to error in rule acceptance
wehrfabi Jan 21, 2025
80d422d
added score to Solution
wehrfabi Jan 21, 2025
e988f0e
Merge branch 'class' of https://github.com/wehrfabi/suprb into class
wehrfabi Jan 21, 2025
4cd21cb
default_score 0.0 deafult_error 9999
wehrfabi Jan 21, 2025
2ce7531
added bool isClass(ification)
wehrfabi Jan 21, 2025
97cb3ac
isClass in solution fitness
wehrfabi Jan 21, 2025
8822b73
fix acceptance
wehrfabi Jan 21, 2025
92a083f
added score and model_swap
wehrfabi Jan 23, 2025
878a92b
added score and model_swap
wehrfabi Jan 23, 2025
a59fcb4
Merge branch 'class' of https://github.com/wehrfabi/suprb into class
wehrfabi Jan 23, 2025
0f8cd36
fix isClass
wehrfabi Jan 23, 2025
e9324c5
fix solution fit
wehrfabi Jan 25, 2025
87b6e84
use sklearn.base to check task_type
wehrfabi Jan 25, 2025
f1259b0
dont fit suprb if is_fitted_
wehrfabi Jan 28, 2025
a448d99
fix: handle binary matching of rules in volume_fitness
wehrfabi Jan 30, 2025
ce8bb79
apply thesholds
wehrfabi Feb 22, 2025
19f6ba4
expanded logger
wehrfabi Feb 25, 2025
9d5fcba
fix circular import
wehrfabi Mar 5, 2025
dc32e8c
fix model swapping
wehrfabi Mar 6, 2025
70bbcba
cleanup BaseSupervised
wehrfabi Mar 10, 2025
0c9db61
naming isClassifier
wehrfabi Mar 12, 2025
24add37
rename json
wehrfabi Mar 12, 2025
ae9e40d
comply with rename
wehrfabi Mar 12, 2025
92291ba
fix: rename
wehrfabi Mar 12, 2025
8e6bbbb
fix: faulty f1-score logging
wehrfabi Mar 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions examples/classification_smoke.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from ucimlrepo import fetch_ucirepo
import sklearn
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.linear_model import Ridge, LogisticRegression
from sklearn.utils import shuffle

import suprb
from sklearn.compose import make_column_transformer
from suprb.utils import check_random_state
from suprb.optimizer.rule.es import ES1xLambda
from suprb.optimizer.rule.acceptance import MaxError
from suprb.optimizer.solution.ga import GeneticAlgorithm
from suprb.wrapper import SupRBWrapper

from utils import log_scores


if __name__ == '__main__':
random_state = 125
local_model = LogisticRegression(penalty='l1', C=0.1, random_state=random_state, solver='saga', tol=0.001, max_iter=1000)
#CLASSIFICATION
# fetch dataset
iris = fetch_ucirepo(id=53)
X = iris.data.features.to_numpy()
y = iris.data.targets.to_numpy()
X, y = shuffle(X, y, random_state=random_state)
unique = np.unique(y)
toNum = dict(zip(unique, range(1, len(unique)+1)))
# Conversion of tragets to int required for mixing
# Similiar to sklearn.preprocessing.OrdinalEncoder
y = [toNum[x[0]] for x in y]
X = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)

# Comparable with examples/example_2.py
model = SupRBWrapper(print_config=True,
#n_iter=10,
## RULE GENERATION ##
rule_generation=ES1xLambda(),
rule_generation__n_iter=16,
rule_generation__lmbda=16,
rule_generation__operator='+',
rule_generation__delay=10,
rule_generation__random_state=random_state,
rule_generation__n_jobs=4,
rule_generation__init__model=local_model,

## SOLUTION COMPOSITION ##
solution_composition=GeneticAlgorithm(),
solution_composition__init__mixing=suprb.solution.mixing_model.ErrorExperienceClassification(),
solution_composition__n_iter=32,
solution_composition__population_size=32,
solution_composition__elitist_ratio=0.2,
solution_composition__random_state=random_state,
solution_composition__n_jobs=4)

scores = cross_validate(model, X, y, cv=4, n_jobs=4, verbose=10,
scoring=['accuracy'],
return_estimator=True, fit_params={'cleanup': True})

log_scores(scores)
6 changes: 4 additions & 2 deletions examples/wrapper_example_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.linear_model import Ridge


from suprb.utils import check_random_state
Expand Down Expand Up @@ -49,16 +50,17 @@ def load_higdon_gramacy_lee(n_samples=1000, noise=0, random_state=None):
rule_generation__delay=150,
rule_generation__random_state=random_state,
rule_generation__n_jobs=1,
rule_generation__init__model=Ridge(),

## SOLUTION COMPOSITION ##
solution_composition=GeneticAlgorithm(),
solution_composition__n_iter=32,
solution_composition__n_iter=10,
solution_composition__population_size=32,
solution_composition__elitist_ratio=0.2,
solution_composition__random_state=random_state,
solution_composition__n_jobs=1)

scores = cross_validate(model, X, y, cv=4, n_jobs=1, verbose=10,
scores = cross_validate(model, X, y, cv=4, n_jobs=4, verbose=10,
scoring=['r2', 'neg_mean_squared_error'],
return_estimator=True, fit_params={'cleanup': True})

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ joblib~=1.1.0
tqdm~=4.62.3
pytest~=6.2.5
protobuf~=3.20.0
ucimlrepo
49 changes: 43 additions & 6 deletions suprb/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from abc import abstractmethod, ABCMeta
from abc import abstractmethod, ABCMeta, ABC

import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin


class BaseComponent(BaseEstimator, metaclass=ABCMeta):
Expand Down Expand Up @@ -47,13 +47,13 @@ def _more_str_attributes(self) -> dict:
return {}


class BaseRegressor(BaseEstimator, RegressorMixin, metaclass=ABCMeta):
"""A base (composite) Regressor."""
class BaseSupervised(BaseEstimator, metaclass=ABCMeta):
"""A base (composite) Estimator for supervised learning."""

is_fitted_: bool

@abstractmethod
def fit(self, X: np.ndarray, y: np.ndarray) -> BaseRegressor:
def fit(self, X: np.ndarray, y: np.ndarray) -> BaseSupervised:
""" A reference implementation of a fitting function.

Parameters
Expand All @@ -68,7 +68,6 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> BaseRegressor:
self : BaseEstimator
Returns self.
"""

pass

@abstractmethod
Expand All @@ -85,5 +84,43 @@ def predict(self, X: np.ndarray):
y : np.ndarray
Returns the estimation with shape (n_samples,).
"""
pass

@abstractmethod
def score(self, X, y, sample_weight=None):
pass

class BaseRegressor(BaseSupervised, RegressorMixin, metaclass=ABCMeta):
"""A base (composite) Regressor."""

is_fitted_: bool

@abstractmethod
def fit(self, X: np.ndarray, y: np.ndarray) -> BaseRegressor:
pass

@abstractmethod
def predict(self, X: np.ndarray):
pass


class BaseClassifier(BaseSupervised, ClassifierMixin, metaclass=ABCMeta):
"""A base (composite) Classifier."""

is_fitted_: bool

@abstractmethod
def fit(self, X: np.ndarray, y: np.ndarray) -> BaseClassifier:
pass

@abstractmethod
def predict(self, X: np.ndarray):
pass

class SupervisedMixin(ABC):
@abstractmethod
def score(self, X, y, sample_weight=None):
pass

SupervisedMixin.register(RegressorMixin)
SupervisedMixin.register(ClassifierMixin)
8 changes: 8 additions & 0 deletions suprb/fitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ def pseudo_accuracy(error: float, beta=2) -> float:
assert beta > 0
return np.exp(-beta * error)

def pseudo_error(accuracy: float) -> float:
return 1 - accuracy

def actual_accuracy(error: float) -> float:
'''
for classification
'''
return 1 - error

def emary(alpha: float, x1: float, x2: float) -> float:
"""
Expand Down
9 changes: 7 additions & 2 deletions suprb/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def _save_config(suprb, json_config):
elif isinstance(value, primitive):
json_config["config"][key] = value
else:
json_config["config"][key] = _get_full_class_name(value)
cname = _get_full_class_name(value)
if cname == "NoneType":
json_config["config"][key] = None
else:
json_config["config"][key] = cname


def _save_pool(pool, json_config):
Expand All @@ -113,6 +117,7 @@ def _convert_rule_to_json(rule):
return {"error_": rule.error_,
"experience_": rule.experience_,
"match": _convert_dict_to_json(vars(rule.match)),
"matching_type": _get_full_class_name(rule.match),
"is_fitted_": rule.is_fitted_,
"model": {"coef_": _convert_to_json_format(getattr(rule.model, "coef_")),
"intercept_": getattr(rule.model, "intercept_")}}
Expand Down Expand Up @@ -194,7 +199,7 @@ def _load_pool(json_dict, suprb):

def _convert_json_to_rule(json_rule, json_dict):

rule = Rule(_convert_matching_type(json_rule["match"], json_dict["config"]["matching_type"]),
rule = Rule(_convert_matching_type(json_rule["match"], json_rule["matching_type"]),
_convert_from_json_to_array(json_dict["input_space"]),
_convert_model(json_rule["model"], json_dict["config"]["rule_generation__init__model"]),
_get_class(json_dict["config"]["rule_generation__init__fitness"]))
Expand Down
10 changes: 5 additions & 5 deletions suprb/logging/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,28 @@

import numpy as np

from suprb.base import BaseComponent, BaseRegressor
from suprb.base import BaseComponent, BaseSupervised


class BaseLogger(BaseComponent):
"""The base class for loggers."""

@abstractmethod
def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised):
"""Logs initial parameters, before any iteration or fitting has taken place."""
pass

@abstractmethod
def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor, iteration: int):
def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised, iteration: int):
"""Logs an iteration of the estimator. May actually compute errors or scores, depending on the state."""
pass

@abstractmethod
def log_final(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
def log_final(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised):
"""Log the final state of the estimator. It is assumed that the fitting process is already completed."""
pass

@abstractmethod
def get_elitist(self, estimator: BaseRegressor):
def get_elitist(self, estimator: BaseSupervised):
"""Log the final elitist"""
pass
10 changes: 5 additions & 5 deletions suprb/logging/combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from sklearn import clone

from suprb.base import BaseRegressor
from suprb.base import BaseSupervised
from . import BaseLogger


Expand All @@ -16,7 +16,7 @@ def __init__(self, loggers: list[tuple[str, BaseLogger]]):
"""An unique name for every logger must be supplied, such that the parameter get/set are well-defined."""
self.loggers = loggers

def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised):
if any(map(lambda logger: isinstance(logger, CombinedLogger), self.loggers)):
warnings.warn("Nesting loggers is not recommended. Please add all loggers to this top-level logger.")

Expand All @@ -25,11 +25,11 @@ def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
for _, logger in self.loggers_:
logger.log_init(X=X, y=y, estimator=estimator)

def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor, iteration: int):
def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised, iteration: int):
for _, logger in self.loggers_:
logger.log_iteration(X=X, y=y, estimator=estimator, iteration=iteration)

def log_final(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
def log_final(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised):
for _, logger in self.loggers_:
logger.log_final(X=X, y=y, estimator=estimator)

Expand Down Expand Up @@ -71,5 +71,5 @@ def _replace_logger(self, name: str, new_val: BaseLogger):
break
self.loggers = new_loggers

def get_elitist(self, estimator: BaseRegressor):
def get_elitist(self, estimator: BaseSupervised):
pass
18 changes: 10 additions & 8 deletions suprb/logging/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from . import BaseLogger
from .metrics import matched_training_samples, genome_diversity
from .. import json as suprb_json
from suprb.base import BaseRegressor
from .. import json
from suprb.base import BaseSupervised


class DefaultLogger(BaseLogger):
Expand All @@ -25,13 +25,13 @@ def log_params(self, **kwargs):
for key, value in kwargs.items():
self.log_param(key=key, value=value)

def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
def log_init(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised):
self.params_ = {}
self.metrics_ = defaultdict(dict)

self.log_params(**estimator.get_params())

def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor, iteration: int):
def log_iteration(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised, iteration: int):
def log_metric(key, value):
self.log_metric(key=key, value=value, step=estimator.step_)

Expand Down Expand Up @@ -61,17 +61,19 @@ def log_metric_min_max_mean(metric_name: str, attribute_name: str, lst: list):
elitist = estimator.solution_composition_.elitist()
log_metric("elitist_fitness", elitist.fitness_)
log_metric("elitist_error", elitist.error_)
if elitist.isClassifier:
log_metric("elitist_accuracy", elitist.score(X, y))
log_metric("elitist_complexity", elitist.complexity_)
log_metric("elitist_matched", matched_training_samples(elitist.subpopulation))
# log_metric("elitist_rules", elitist.pool)
log_metric("elitist_rules", elitist.pool)

# Log performance
log_metric("training_score", elitist.score(X, y))

def get_elitist(self, estimator: BaseRegressor):
def get_elitist(self, estimator: BaseSupervised):
json_data = {}
suprb_json._save_pool(estimator.solution_composition_.elitist().pool, json_data)
json._save_pool(estimator.solution_composition_.elitist().pool, json_data)
return json_data

def log_final(self, X: np.ndarray, y: np.ndarray, estimator: BaseRegressor):
def log_final(self, X: np.ndarray, y: np.ndarray, estimator: BaseSupervised):
pass
33 changes: 33 additions & 0 deletions suprb/optimizer/rule/acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import numpy as np

from sklearn.metrics import precision_score, recall_score, f1_score

from suprb.base import BaseComponent
from suprb.rule import Rule
from suprb.fitness import pseudo_error


class RuleAcceptance(BaseComponent, metaclass=ABCMeta):
Expand Down Expand Up @@ -38,4 +41,34 @@ def __call__(self, rule: Rule, X: np.ndarray, y: np.ndarray) -> bool:
return False
local_y = y[rule.match_set_]
default_error = np.sum(local_y ** 2) / (len(local_y) * self.beta)
if rule.isClassifier:
# default error is the trivial solution of always choosing the most common label
local_y = [round(y) for y in local_y]
default_accuracy = np.bincount(local_y).max() / (len(local_y) * self.beta)
default_error = pseudo_error(default_accuracy)
return rule.error_ <= default_error


class Precision(RuleAcceptance):
"""Insert if the rule has a precision greater or equal to a threshold"""

def __init__(self, min_precission: float = 0.9):
self.min_precission = min_precission

def __call__(self, rule: Rule, X: np.ndarray, y: np.ndarray) -> bool:
if rule.experience_ < 1:
return False
local_y = y[rule.match_set_]
return precision_score(local_y, rule.predict(X[rule.match_set_]), average='macro', zero_division=np.nan) >= self.min_precission

class F1_Score(RuleAcceptance):
"""Insert if the rule has a f1_score greater or equal to a threshold"""

def __init__(self, min_f1: float = 0.9):
self.min_precission = min_f1

def __call__(self, rule: Rule, X: np.ndarray, y: np.ndarray) -> bool:
if rule.experience_ < 1:
return False
local_y = y[rule.match_set_]
return f1_score(local_y, rule.predict(X[rule.match_set_])) >= self.min_f1
Loading