1
2
3
4
5
6
7
8
9 """Unit tests for PyMVPA SplittingSensitivityAnalyzer"""
10
11 from mvpa.base import externals
12 from mvpa.featsel.base import FeatureSelectionPipeline, \
13 SensitivityBasedFeatureSelection, CombinedFeatureSelection
14 from mvpa.clfs.transerror import TransferError
15 from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
16 from mvpa.featsel.helpers import FixedNElementTailSelector, \
17 FractionTailSelector, RangeElementSelector
18
19 from mvpa.featsel.rfe import RFE
20
21 from mvpa.clfs.meta import SplitClassifier, MulticlassClassifier, \
22 FeatureSelectionClassifier
23 from mvpa.clfs.smlr import SMLR, SMLRWeights
24 from mvpa.misc.transformers import Absolute
25 from mvpa.datasets.splitters import NFoldSplitter, NoneSplitter
26
27 from mvpa.misc.transformers import Absolute, FirstAxisMean, \
28 SecondAxisSumOfAbs, DistPValue
29
30 from mvpa.measures.base import SplitFeaturewiseDatasetMeasure
31 from mvpa.measures.anova import OneWayAnova, CompoundOneWayAnova
32 from mvpa.measures.irelief import IterativeRelief, IterativeReliefOnline, \
33 IterativeRelief_Devel, IterativeReliefOnline_Devel
34
35 from tests_warehouse import *
36 from tests_warehouse_clfs import *
37
38 _MEASURES_2_SWEEP = [ OneWayAnova(),
39 CompoundOneWayAnova(combiner=SecondAxisSumOfAbs),
40 IterativeRelief(), IterativeReliefOnline(),
41 IterativeRelief_Devel(), IterativeReliefOnline_Devel()
42 ]
43 if externals.exists('scipy'):
44 from mvpa.measures.corrcoef import CorrCoef
45 _MEASURES_2_SWEEP += [ CorrCoef(),
46
47
48 ]
51
54
55
56 @sweepargs(dsm=_MEASURES_2_SWEEP)
58 data = datasets['dumbinv']
59
60 datass = data.samples.copy()
61
62
63 f = dsm(data)
64
65
66 self.failUnless(N.all(data.samples == datass))
67 self.failUnless(f.shape == (4,))
68 self.failUnless(abs(f[1]) <= 1e-12,
69 msg="Failed test with value %g instead of != 0.0" % f[1])
70 self.failUnless(f[0] > 0.1)
71
72
73 self.failUnless(not N.any(N.isnan(f)))
74
75
76
77
78
79 @sweepargs(clf=clfswh['has_sensitivity'])
81 """Test analyzers in split classifier
82 """
83
84 _sclf = str(clf)
85 if 'LARS(' in _sclf and "type='stepwise'" in _sclf:
86 return
87
88
89 mclf = SplitClassifier(clf=clf,
90 enable_states=['training_confusion',
91 'confusion'])
92 sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
93 enable_states=["sensitivities"])
94
95
96 self.failUnless(sana.transformer is Absolute)
97 self.failUnless(sana.combiner is FirstAxisMean)
98
99
100
101 map_ = sana(self.dataset)
102 self.failUnlessEqual(len(map_), self.dataset.nfeatures)
103
104 if cfg.getboolean('tests', 'labile', default='yes'):
105 for conf_matrix in [sana.clf.training_confusion] \
106 + sana.clf.confusion.matrices:
107 self.failUnless(
108 conf_matrix.percentCorrect>75,
109 msg="We must have trained on each one more or " \
110 "less correctly. Got %f%% correct on %d labels" %
111 (conf_matrix.percentCorrect,
112 len(self.dataset.uniquelabels)))
113
114 errors = [x.percentCorrect
115 for x in sana.clf.confusion.matrices]
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2:
132
133 return
134 for map__ in [map_]:
135 selected = FixedNElementTailSelector(
136 self.dataset.nfeatures -
137 len(self.dataset.nonbogus_features))(map__)
138 if cfg.getboolean('tests', 'labile', default='yes'):
139 self.failUnlessEqual(
140 list(selected),
141 list(self.dataset.nonbogus_features),
142 msg="At the end we should have selected the right features")
143
144
145 @sweepargs(clf=clfswh['has_sensitivity'])
165
166
167
168 @sweepargs(svm=clfswh['linear', 'svm'])
180
181
182 @sweepargs(svm=clfswh['linear', 'svm'])
194
195
196
197
198 @sweepargs(svm=clfswh['linear', 'svm', 'libsvm', '!sg', '!meta'])
200
201 kwargs = dict(combiner=None, transformer=None,
202 enable_states=["sensitivities"])
203 sana_split = svm.getSensitivityAnalyzer(
204 split_weights=True, **kwargs)
205 sana_full = svm.getSensitivityAnalyzer(
206 force_training=False, **kwargs)
207
208
209 ds2 = datasets['uni4large'].copy()
210 ds2.zscore(baselinelabels = [2, 3])
211 ds2 = ds2['labels', [0,1]]
212
213 map_split = sana_split(ds2)
214 map_full = sana_full(ds2)
215
216 self.failUnlessEqual(map_split.shape, (ds2.nfeatures, 2))
217 self.failUnlessEqual(map_full.shape, (ds2.nfeatures, ))
218
219
220
221 dmap = (-1*map_split[:, 1] + map_split[:, 0]) - map_full
222 self.failUnless((N.abs(dmap) <= 1e-10).all())
223
224
225
226
227
228
229 self.failUnlessRaises(NotImplementedError,
230 sana_split, datasets['uni3medium'])
231
232
234 ds = datasets['uni3small']
235 sana = SplitFeaturewiseDatasetMeasure(
236 analyzer=SMLR(
237 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
238 splitter=NFoldSplitter(),
239 combiner=None)
240
241 sens = sana(ds)
242
243 self.failUnless(sens.shape == (
244 len(ds.uniquechunks), ds.nfeatures, len(ds.uniquelabels)))
245
246
247
248 ds = datasets['uni3medium']
249 sana = SplitFeaturewiseDatasetMeasure(
250 analyzer=SMLR(
251 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
252 splitter=NoneSplitter(nperlabel=0.25, mode='first',
253 nrunspersplit=2),
254 combiner=None,
255 enable_states=['splits', 'sensitivities'])
256 sens = sana(ds)
257
258 self.failUnless(sens.shape == (2, ds.nfeatures, 3))
259 splits = sana.splits
260 self.failUnlessEqual(len(splits), 2)
261 self.failUnless(N.all([s[0].nsamples == ds.nsamples/4 for s in splits]))
262
263 self.failUnless(N.any([splits[0][0].origids != splits[1][0].origids]))
264
265 self.failUnless(N.any(sens[0] != sens[1]))
266
267
268 if not externals.exists('scipy'):
269 return
270
271 ds = datasets['uni2medium']
272 plain_sana = SVM().getSensitivityAnalyzer(
273 combiner=None, transformer=DistPValue())
274 boosted_sana = SplitFeaturewiseDatasetMeasure(
275 analyzer=SVM().getSensitivityAnalyzer(
276 combiner=None, transformer=DistPValue(fpp=0.05)),
277 splitter=NoneSplitter(nperlabel=0.8, mode='first', nrunspersplit=2),
278 combiner=FirstAxisMean,
279 enable_states=['splits', 'sensitivities'])
280
281 fsel = RangeElementSelector(upper=0.1, lower=0.9, inclusive=True)
282
283 sanas = dict(plain=plain_sana, boosted=boosted_sana)
284 for k,sana in sanas.iteritems():
285 clf = FeatureSelectionClassifier(SVM(),
286 SensitivityBasedFeatureSelection(sana, fsel),
287 descr='SVM on p=0.2(both tails) using %s' % k)
288 ce = CrossValidatedTransferError(TransferError(clf),
289 NFoldSplitter())
290 error = ce(ds)
291
292 sens = boosted_sana(ds)
293 sens_plain = plain_sana(ds)
294
295
296
297
298
299
300
301
302 @sweepargs(basic_clf=clfswh['has_sensitivity'])
325
327
328 fss = [SensitivityBasedFeatureSelection(
329 OneWayAnova(),
330 FractionTailSelector(0.05, mode='select', tail='upper')),
331 SensitivityBasedFeatureSelection(
332 SMLRWeights(SMLR(lm=1, implementation="C")),
333 RangeElementSelector(mode='select'))]
334
335 fs = CombinedFeatureSelection(fss, combiner='union',
336 enable_states=['selected_ids',
337 'selections_ids'])
338
339 od, otd = fs(self.dataset)
340
341 self.failUnless(fs.combiner == 'union')
342 self.failUnless(len(fs.selections_ids))
343 self.failUnless(len(fs.selections_ids) <= self.dataset.nfeatures)
344
345 self.failUnless(len(fs.selections_ids) == len(fss))
346
347 for s in fs.selections_ids:
348 self.failUnless(len(s) <= len(fs.selected_ids))
349
350 self.failUnless(od.nfeatures == len(fs.selected_ids))
351 for i, id in enumerate(fs.selected_ids):
352 self.failUnless((od.samples[:,i]
353 == self.dataset.samples[:,id]).all())
354
355
356 fs = CombinedFeatureSelection(fss, combiner='intersection',
357 enable_states=['selected_ids',
358 'selections_ids'])
359
360 od, otd = fs(self.dataset)
361
366
367
368 if __name__ == '__main__':
369 import runner
370