1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 import operator
15
16
17 from mvpa.clfs.meta import FeatureSelectionClassifier, SplitClassifier, \
18 MulticlassClassifier
19 from mvpa.clfs.smlr import SMLR
20 from mvpa.clfs.knn import kNN
21 from mvpa.clfs.gnb import GNB
22 from mvpa.clfs.kernel import KernelLinear, KernelSquaredExponential
23
24
25 from mvpa.base import externals, cfg
26 from mvpa.measures.anova import OneWayAnova
27 from mvpa.misc.transformers import Absolute
28 from mvpa.clfs.smlr import SMLRWeights
29 from mvpa.featsel.helpers import FractionTailSelector, \
30 FixedNElementTailSelector, RangeElementSelector
31
32 from mvpa.featsel.base import SensitivityBasedFeatureSelection
33
34 _KNOWN_INTERNALS = [ 'knn', 'binary', 'svm', 'linear',
35 'smlr', 'does_feature_selection', 'has_sensitivity',
36 'multiclass', 'non-linear', 'kernel-based', 'lars',
37 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr',
38 'notrain2predict', 'ridge', 'blr', 'gnpp', 'enet', 'glmnet',
39 'gnb', 'plr']
42 """Class to keep known instantiated classifiers
43
44 Should provide easy ways to select classifiers of needed kind:
45 clfswh['linear', 'svm'] should return all linear SVMs
46 clfswh['linear', 'multiclass'] should return all linear classifiers
47 capable of doing multiclass classification
48 """
49
50 - def __init__(self, known_tags=None, matches=None):
51 """Initialize warehouse
52
53 :Parameters:
54 known_tags : list of basestring
55 List of known tags
56 matches : dict
57 Optional dictionary of additional matches. E.g. since any
58 regression can be used as a binary classifier,
59 matches={'binary':['regression']}, would allow to provide
60 regressions also if 'binary' was requested
61 """
62 self._known_tags = set(known_tags)
63 self.__items = []
64 self.__keys = set()
65 if matches is None:
66 matches = {}
67 self.__matches = matches
68
70 if isinstance(args[0], tuple):
71 args = args[0]
72
73
74 if args == (slice(None),):
75 args = []
76
77
78 dargs = set([str(x).lstrip('!') for x in args]).difference(
79 self._known_tags)
80
81 if len(dargs)>0:
82 raise ValueError, "Unknown internals %s requested. Known are %s" % \
83 (list(dargs), list(self._known_tags))
84
85
86 result = []
87
88 for item in self.__items:
89 good = True
90
91 for arg in args:
92
93 if arg.startswith('!'):
94 if (arg[1:] in item._clf_internals):
95 good = False
96 break
97 else:
98 continue
99
100 found = False
101 for arg in [arg] + self.__matches.get(arg, []):
102 if (arg in item._clf_internals):
103 found = True
104 break
105 good = found
106 if not good:
107 break
108 if good:
109 result.append(item)
110 return result
111
113 if operator.isSequenceType(item):
114 for item_ in item:
115 self.__iadd__(item_)
116 else:
117 if not hasattr(item, '_clf_internals'):
118 raise ValueError, "Cannot register %s " % item + \
119 "which has no _clf_internals defined"
120 if len(item._clf_internals) == 0:
121 raise ValueError, "Cannot register %s " % item + \
122 "which has empty _clf_internals"
123 clf_internals = set(item._clf_internals)
124 if clf_internals.issubset(self._known_tags):
125 self.__items.append(item)
126 self.__keys |= clf_internals
127 else:
128 raise ValueError, 'Unknown clf internal(s) %s' % \
129 clf_internals.difference(self._known_tags)
130 return self
131
132 @property
134 """Known internal tags of the classifiers
135 """
136 return self.__keys
137
139 """Listing (description + internals) of registered items
140 """
141 return [(x.descr, x._clf_internals) for x in self.__items]
142
143 @property
145 """Registered items
146 """
147 return self.__items
148
149 clfswh = Warehouse(known_tags=_KNOWN_INTERNALS)
150 regrswh = Warehouse(known_tags=_KNOWN_INTERNALS)
151
152
153
154
155
156
157
158 clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
159 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
160
161
162
163 ]
164
165 clfswh += \
166 [ MulticlassClassifier(clfswh['smlr'][0],
167 descr='Pairs+maxvote multiclass on ' + \
168 clfswh['smlr'][0].descr) ]
169
170 if externals.exists('libsvm'):
171 from mvpa.clfs import libsvmc as libsvm
172 clfswh._known_tags.update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
173 clfswh += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
174 libsvm.SVM(
175 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
176 libsvm.SVM(
177 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
178 libsvm.SVM(svm_impl='NU_SVC',
179 descr="libsvm.LinNuSVM(nu=def)", probability=1)
180 ]
181 clfswh += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
182 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
183 descr="libsvm.RbfNuSVM(nu=def)"),
184 libsvm.SVM(kernel_type='poly',
185 descr='libsvm.PolySVM()', probability=1),
186
187
188
189 ]
190
191
192 regrswh._known_tags.update(['EPSILON_SVR', 'NU_SVR'])
193 regrswh += [libsvm.SVM(svm_impl='EPSILON_SVR', descr='libsvm epsilon-SVR',
194 regression=True),
195 libsvm.SVM(svm_impl='NU_SVR', descr='libsvm nu-SVR',
196 regression=True)]
197
198 if externals.exists('shogun'):
199 from mvpa.clfs import sg
200 clfswh._known_tags.update(sg.SVM._KNOWN_IMPLEMENTATIONS)
201
202
203
204 bad_classifiers = [
205 'mpd',
206
207
208
209 'gpbt',
210
211 'gmnp',
212
213 'svrlight',
214
215 'krr',
216 ]
217 if not externals.exists('sg_fixedcachesize'):
218
219 bad_classifiers.append('gnpp')
220
221 for impl in sg.SVM._KNOWN_IMPLEMENTATIONS:
222
223 if impl in bad_classifiers:
224 continue
225 clfswh += [
226 sg.SVM(
227 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
228 sg.SVM(
229 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
230 sg.SVM(
231 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
232 ]
233 clfswh += [
234 sg.SVM(kernel_type='RBF',
235 descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
236
237
238
239
240
241 ]
242
243 _optional_regressions = []
244 if externals.exists('shogun.krr'):
245 _optional_regressions += ['krr']
246 for impl in ['libsvr'] + _optional_regressions:
247
248
249 regrswh._known_tags.update([impl])
250 regrswh += [ sg.SVM(svm_impl=impl, descr='sg.LinSVMR()/%s' % impl,
251 regression=True),
252
253
254
255 ]
256
257 if len(clfswh['svm', 'linear']) > 0:
258
259 from mvpa.clfs.svm import *
260
261
262 if externals.exists('lars'):
263 import mvpa.clfs.lars as lars
264 from mvpa.clfs.lars import LARS
265 for model in lars.known_models:
266
267 lars_clf = LARS(descr="LARS(%s)" % model, model_type=model)
268 clfswh += lars_clf
269
270
271 lars_regr = LARS(descr="_LARS(%s, regression=True)" % model,
272 regression=True, model_type=model)
273 regrswh += lars_regr
274
275
276
277
278
279
280
281
282
283
284
285
286 if externals.exists('glmnet'):
287 from mvpa.clfs.glmnet import GLMNET_C, GLMNET_R
288 clfswh += GLMNET_C(descr="GLMNET_C()")
289 regrswh += GLMNET_R(descr="GLMNET_R()")
290
291
292 clfswh += kNN(k=5, descr="kNN(k=5)")
293 clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')")
294
295 clfswh += \
296 FeatureSelectionClassifier(
297 kNN(),
298 SensitivityBasedFeatureSelection(
299 SMLRWeights(SMLR(lm=1.0, implementation="C")),
300 RangeElementSelector(mode='select')),
301 descr="kNN on SMLR(lm=1) non-0")
302
303 clfswh += \
304 FeatureSelectionClassifier(
305 kNN(),
306 SensitivityBasedFeatureSelection(
307 OneWayAnova(),
308 FractionTailSelector(0.05, mode='select', tail='upper')),
309 descr="kNN on 5%(ANOVA)")
310
311 clfswh += \
312 FeatureSelectionClassifier(
313 kNN(),
314 SensitivityBasedFeatureSelection(
315 OneWayAnova(),
316 FixedNElementTailSelector(50, mode='select', tail='upper')),
317 descr="kNN on 50(ANOVA)")
318
319
320
321 clfswh += GNB(descr="GNB()")
322 clfswh += GNB(common_variance=True, descr="GNB(common_variance=True)")
323 clfswh += GNB(prior='uniform', descr="GNB(prior='uniform')")
324 clfswh += \
325 FeatureSelectionClassifier(
326 GNB(),
327 SensitivityBasedFeatureSelection(
328 OneWayAnova(),
329 FractionTailSelector(0.05, mode='select', tail='upper')),
330 descr="GNB on 5%(ANOVA)")
331
332
333
334 if externals.exists('scipy'):
335 from mvpa.clfs.gpr import GPR
336
337 clfswh += GPR(kernel=KernelLinear(), descr="GPR(kernel='linear')")
338 clfswh += GPR(kernel=KernelSquaredExponential(),
339 descr="GPR(kernel='sqexp')")
340
341
342 from mvpa.clfs.blr import BLR
343 clfswh += BLR(descr="BLR()")
344
345
346 from mvpa.clfs.plr import PLR
347 clfswh += PLR(descr="PLR()")
348 if externals.exists('scipy'):
349 clfswh += PLR(reduced=0.05, descr="PLR(reduced=0.01)")
350
351
352
353 if len(clfswh['linear', 'svm']) > 0:
354
355 linearSVMC = clfswh['linear', 'svm',
356 cfg.get('svm', 'backend', default='libsvm').lower()
357 ][0]
358
359
360 clfswh += \
361 FeatureSelectionClassifier(
362 linearSVMC.clone(),
363 SensitivityBasedFeatureSelection(
364 SMLRWeights(SMLR(lm=0.1, implementation="C")),
365 RangeElementSelector(mode='select')),
366 descr="LinSVM on SMLR(lm=0.1) non-0")
367
368
369 clfswh += \
370 FeatureSelectionClassifier(
371 linearSVMC.clone(),
372 SensitivityBasedFeatureSelection(
373 SMLRWeights(SMLR(lm=1.0, implementation="C")),
374 RangeElementSelector(mode='select')),
375 descr="LinSVM on SMLR(lm=1) non-0")
376
377
378
379 clfswh += \
380 FeatureSelectionClassifier(
381 RbfCSVMC(),
382 SensitivityBasedFeatureSelection(
383 SMLRWeights(SMLR(lm=1.0, implementation="C")),
384 RangeElementSelector(mode='select')),
385 descr="RbfSVM on SMLR(lm=1) non-0")
386
387 clfswh += \
388 FeatureSelectionClassifier(
389 linearSVMC.clone(),
390 SensitivityBasedFeatureSelection(
391 OneWayAnova(),
392 FractionTailSelector(0.05, mode='select', tail='upper')),
393 descr="LinSVM on 5%(ANOVA)")
394
395 clfswh += \
396 FeatureSelectionClassifier(
397 linearSVMC.clone(),
398 SensitivityBasedFeatureSelection(
399 OneWayAnova(),
400 FixedNElementTailSelector(50, mode='select', tail='upper')),
401 descr="LinSVM on 50(ANOVA)")
402
403 clfswh += \
404 FeatureSelectionClassifier(
405 linearSVMC.clone(),
406 SensitivityBasedFeatureSelection(
407 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
408 FractionTailSelector(0.05, mode='select', tail='upper')),
409 descr="LinSVM on 5%(SVM)")
410
411 clfswh += \
412 FeatureSelectionClassifier(
413 linearSVMC.clone(),
414 SensitivityBasedFeatureSelection(
415 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
416 FixedNElementTailSelector(50, mode='select', tail='upper')),
417 descr="LinSVM on 50(SVM)")
418
419
420
421
422
423
424
425
426
427
428
429
430
431 rfesvm_split = SplitClassifier(linearSVMC)
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 rfesvm = LinearCSVMC()
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511