-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregression.py
More file actions
126 lines (119 loc) · 4.43 KB
/
regression.py
File metadata and controls
126 lines (119 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#importing necessary library
import warnings
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
import crayons
# create a dict of standard models to evaluate {name:object}
def get_models(models=dict()):
# linear models
models['lr'] = LinearRegression()
alpha = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
for a in alpha:
models['lasso-'+str(a)] = Lasso(alpha=a, random_state = 0)
for a in alpha:
models['ridge-'+str(a)] = Ridge(alpha=a, random_state = 0)
for a1 in alpha:
for a2 in alpha:
name = 'en-' + str(a1) + '-' + str(a2)
models[name] = ElasticNet(a1, a2)
alpha = [0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 1.0]
for a in alpha:
models['huber-'+str(a)] = HuberRegressor(alpha = a)
models['lars'] = Lars()
models['llars'] = LassoLars()
models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
models['ranscac'] = RANSACRegressor()
models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
models['theil'] = TheilSenRegressor()
# non-linear models
n_neighbors = range(1, 21)
for k in n_neighbors:
models['knn-'+str(k)] = KNeighborsRegressor(n_neighbors=k)
models['cart'] = DecisionTreeRegressor()
models['extra'] = ExtraTreeRegressor()
models['svml'] = SVR(kernel='linear')
models['svmp'] = SVR(kernel='poly')
c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
for c in c_values:
models['svmr'+str(c)] = SVR(C=c)
# ensemble models
n_trees = 100
models['ada'] = AdaBoostRegressor(n_estimators=n_trees)
models['bag'] = BaggingRegressor(n_estimators=n_trees)
models['rf'] = RandomForestRegressor(n_estimators=n_trees)
models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
print('Defined %d models' % len(models))
return models
#evaluate a dict of models {name:object}, returns {name:score}
def evaluate_models(X, y, models, folds=10, metric='accuracy'):
results = dict()
for name, model in models.items():
# evaluate the model
scores = robust_evaluate_model(X, y, model, folds, metric)
# show process
if scores is not None:
# store a result
results[name] = scores
mean_score, std_score = mean(scores), std(scores)
print(crayons.blue(f'\t[*] NAME => {name}', bold=True))
print(crayons.yellow(f'\t[*] Mean Score => {round(mean_score,3)}', bold = True))
print(crayons.red(f'\t[*] Std_Score => (+/-){round(std_score,3)}', bold=True))
print("\n")
else:
print('>%s: error' % name)
return results
# create a feature preparation pipeline for a model
def make_pipeline(model):
steps = list()
# standardization
steps.append(('standardize', StandardScaler()))
# normalization
steps.append(('normalize', MinMaxScaler()))
# the model
steps.append(('model', model))
# create pipeline
pipeline = Pipeline(steps=steps)
return pipeline
# evaluate a single model
def evaluate_model(X, y, model, folds, metric):
# create the pipeline
pipeline = make_pipeline(model)
# evaluate model
scores = cross_val_score(pipeline, X, y, scoring=metric, cv=folds, n_jobs=-1)
return scores
# evaluate a model and try to trap errors and hide warnings
def robust_evaluate_model(X, y, model, folds, metric):
scores = None
try:
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
scores = evaluate_model(X, y, model, folds, metric)
except:
scores = None
return scores