forked from zhuww/ubc_AI
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassifier.py
More file actions
313 lines (275 loc) · 11 KB
/
classifier.py
File metadata and controls
313 lines (275 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
import random
import numpy as np
from sklearn.decomposition import RandomizedPCA as PCA
class combinedAI(object):
"""
A class to combine different AIs, and have them operate as one
"""
def __init__(self, list_of_AIs, strategy='vote', nvote=None):
"""
inputs
list_of_AIs: list of classifiers
strategy: one of ['union', 'vote']
Note: if nvote=None, we determine best nvote value during self.fit
"""
self.list_of_AIs = list_of_AIs
self.strategy = strategy
m = len(list_of_AIs)
self.nvote = nvote
def fit(self, pfds, target, **kwds):
"""
args: [list of pfd instances], target
"""
train_preds = []
for clf in self.list_of_AIs:
clf.fit(pfds,target, **kwds)
if self.strategy == 'vote' and self.nvote == None:
train_preds.append(clf.predict(pfds)) #nclassifiers x nsamples
# choose 'nvote' that maximizes the trianing-set performance
if self.strategy == 'vote' and self.nvote == None:
train_preds = np.array(train_preds).transpose() #nsamples x nclassifiers
score = 0.
for i in range(len(self.list_of_AIs)):
pct = (i+1.)/len(self.list_of_AIs)
avepred = np.where(train_preds.sum(axis=1) > pct, 1, 0)
this_score = np.mean(np.where(avepred == target, 1, 0))
if this_score > score:
self.nvote = i + 1
score = this_score
def predict(self, test_pfds, pred_mat=False ):
"""
args: [list of test pfd instances], test target
optionally: pred_mat = True returns the [nsamples x npredictions] matrix
so you can run your own prediction combo schemes
(default False)
"""
if not type(test_pfds) in [list, np.ndarray]:
print "warining: changing test_pfds from type %s to list" % (type(test_pfds))
test_pfds = [test_pfds]
self.list_of_predicts = []
for clf in self.list_of_AIs:
self.list_of_predicts.append(clf.predict(test_pfds))
self.predictions = []
for i in range(len(test_pfds)):
if self.strategy == 'union':
if any([c[i] for c in self.list_of_predicts]):
self.predictions.append(1)
else:
self.predictions.append(0)
elif self.strategy == 'vote':
#predict = [c[i] for c in self.list_of_predicts]
#if predict.count(1) > predict.count(0):
#self.predictions.append(1)
#elif predict.count(1) == predict.count(0):
#if random.random() > 0.5:
#self.predictions.append(1)
#else:
#self.predictions.append(0)
#else:
#self.predictions.append(0)
predict = np.array(self.list_of_predicts)
m,n = predict.shape
#print m,n
avepred = predict.sum(0)/m + np.array([(random.random()-0.5)*1.e-10 for i in range(n)])
#print avepred.shape
#print avepred
self.predictions = np.where(avepred > np.ones(n)*self.nvote/float(m), 1, 0)
#return np.array(self.predictions)
if pred_mat:
return predict #[nsamples x npredictions]
else:
return self.predictions
def predict_proba(self, pfds):
"""
predict_proba(self, pfds) classifier method
Compute the likehoods each possible outcomes of samples in T.
The model need to have probability information computed at training
time: fit with attribute `probability` set to True.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : array-like, shape = [n_samples, n_classes]
Returns the probability of the sample for each class in
the model, where classes are ordered by arithmetical
order.
Notes
-----
"""
#for clf in self.list_of_AIs:
#print clf.predict_proba(pfds)
result = np.sum(np.array([clf.predict_proba(pfds) for clf in self.list_of_AIs]), axis=0)/len(self.list_of_AIs)
return result
def score(self, pfds, target, F1=True):
"""
return the mean of success array [1,0,0,1,...,1], where 1 is being right, and 0 is being wrong.
"""
predict = self.predict(pfds)
if not F1:
return np.mean(np.where(predict == target, 1, 0))
else:
P = np.mean(predict[target == 1])
R = np.mean(target[predict == 1])
F1score = 2 * P * R / (P + R)
#print 'returnning F1:', F1
#if F1 < 0.1:
#print predict
#print target
return F1score
class classifier(object):
"""
A class designed to be mixed in with the classifier class, to give it a feature property to specifiy what feature to extract.
Usage:
class svmclf(classifier, svm.SVC):
orig_class = svm.SVC
pass
When initialize the classifier, remember to specify the feature like this:
clf1 = svmclf(gamma=0.1, C=0.8, scale_C=False, feature={'phasebins':32})
the feature has to be a diction like {'phasebins':32}, where 'phasebins' being the name of the feature, 32 is the size.
"""
def __init__(self, feature=None, use_pca=False, n_comp=12, *args, **kwds):
if feature == None:
raise "must specifiy the feature used by this classifier!"
self.feature = feature
self.use_pca = use_pca
self.n_components = n_comp
super(classifier, self).__init__( *args, **kwds)
def fit(self, pfds, target):
"""
args: pfds, target
pfds: the training pfds
target: the training targets
"""
#if 'train_pfds' in self.__dict__ and np.array(self.train_pfds == pfds).all() and str(self.feature) == self.last_feature:
#print 'in fit, skipping extract'
#data = self.train_data
#else:
#print 'in fit, not skipping extract'
#data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
#self.train_pfds = tuple(pfds)
#self.train_data = data
#self.last_feature = str(self.feature)
data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
current_class = self.__class__
self.__class__ = self.orig_class
if self.use_pca:
self.pca = PCA(n_components=self.n_components).fit(data)
data = self.pca.transform(data)
results = self.fit( data, target)
self.__class__ = current_class
return results
#return self.orig_class.fit(self, data, target)
def predict(self, pfds):
"""
args: pfds, target
pfds: the testing pfds
"""
#if 'test_pfds' in self.__dict__ and np.array(self.test_pfds == pfds).all() and str(self.feature) == self.last_feature:
#print 'in predict, skipping extract'
#data = self.test_data
#else:
#print 'in predict, not skipping extract'
#data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
#self.test_pfds = tuple(pfds)
#self.test_data = data
#self.last_feature = str(self.feature)
data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
#self.test_data = data
current_class = self.__class__
self.__class__ = self.orig_class
if self.use_pca:
data = self.pca.transform(data)
results = self.predict(data)
self.__class__ = current_class
return results
#return self.orig_class.predict(self, data)
def predict_proba(self, pfds):
"""
predict_proba(self, pfds) classifier method
Compute the likehoods each possible outcomes of samples in T.
The model need to have probability information computed at training
time: fit with attribute `probability` set to True.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
X : array-like, shape = [n_samples, n_classes]
Returns the probability of the sample for each class in
the model, where classes are ordered by arithmetical
order.
Notes
-----
"""
data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
current_class = self.__class__
self.__class__ = self.orig_class
if self.use_pca:
data = self.pca.transform(data)
results = self.predict_proba(data)
self.__class__ = current_class
return results[...,1]
def score(self, pfds, target, F1=True):
"""
args: pfds, target
pfds: the testing pfds
target: the testing targets
"""
#if 'test_pfds' in self.__dict__ and np.array(self.test_pfds == pfds).all() and str(self.feature) == self.last_feature:
#print 'in score, skipping extract'
#data = self.data
#else:
#print 'in score, not skipping extract'
#data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
#self.test_pfds = tuple(pfds)
#self.data = data
#self.last_feature = str(self.feature)
data = np.array([pfd.getdata(**self.feature) for pfd in pfds])
current_class = self.__class__
self.__class__ = self.orig_class
if self.use_pca:
data = self.pca.transform(data)
#results = self.score(data, target)
predict = self.predict(data)
if not F1:
F1score = np.mean(np.where(predict == target, 1, 0))
else:
P = np.mean(predict[target == 1])
R = np.mean(target[predict == 1])
F1score = 2 * P * R / (P + R)
#print 'returnning F1:', F1
#if F1 < 0.1:
#print predict
#print target
self.__class__ = current_class
return F1score
#return super(classifier, self).score(data, target)
#return self.orig_class.score(self, data, target)
from sklearn import svm, linear_model
class svmclf(classifier, svm.SVC):
"""
the mix-in class for svm.SVC
"""
orig_class = svm.SVC
pass
class LRclf(classifier, linear_model.LogisticRegression):
"""
the mix-in class for svm.SVC
"""
orig_class = linear_model.LogisticRegression
pass
from ubc_AI import pulsar_nnetwork as pnn
class pnnclf(classifier, pnn.NeuralNetwork):
"""
the mixed in class for pnn.NeuralNetwork
"""
orig_class = pnn.NeuralNetwork
pass
from sklearn.tree import DecisionTreeClassifier
class dtreeclf(classifier, DecisionTreeClassifier):
"""
the mixed in class for DecisionTree
"""
orig_class = DecisionTreeClassifier
pass