1
2
3
4
5
6
7
8
9 """Base class for data measures: algorithms that quantify properties of
10 datasets.
11
12 Besides the `DatasetMeasure` base class this module also provides the
13 (abstract) `FeaturewiseDatasetMeasure` class. The difference between a general
14 measure and the output of the `FeaturewiseDatasetMeasure` is that the latter
15 returns a 1d map (one value per feature in the dataset). In contrast there are
16 no restrictions on the returned value of `DatasetMeasure` except for that it
17 has to be in some iterable container.
18
19 """
20
21 __docformat__ = 'restructuredtext'
22
23 import numpy as N
24 import mvpa.support.copy as copy
25
26 from mvpa.misc.state import StateVariable, ClassWithCollections
27 from mvpa.misc.args import group_kwargs
28 from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs
29 from mvpa.base.dochelpers import enhancedDocString
30 from mvpa.base import externals, warning
31 from mvpa.clfs.stats import autoNullDist
32
33 if __debug__:
34 from mvpa.base import debug
38 """A measure computed from a `Dataset`
39
40 All dataset measures support arbitrary transformation of the measure
41 after it has been computed. Transformation are done by processing the
42 measure with a functor that is specified via the `transformer` keyword
43 argument of the constructor. Upon request, the raw measure (before
44 transformations are applied) is stored in the `raw_results` state variable.
45
46 Additionally all dataset measures support the estimation of the
47 probabilit(y,ies) of a measure under some distribution. Typically this will
48 be the NULL distribution (no signal), that can be estimated with
49 permutation tests. If a distribution estimator instance is passed to the
50 `null_dist` keyword argument of the constructor the respective
51 probabilities are automatically computed and stored in the `null_prob`
52 state variable.
53
54 .. note::
55 For developers: All subclasses shall get all necessary parameters via
56 their constructor, so it is possible to get the same type of measure for
57 multiple datasets by passing them to the __call__() method successively.
58 """
59
60 raw_results = StateVariable(enabled=False,
61 doc="Computed results before applying any " +
62 "transformation algorithm")
63 null_prob = StateVariable(enabled=True)
64 """Stores the probability of a measure under the NULL hypothesis"""
65 null_t = StateVariable(enabled=False)
66 """Stores the t-score corresponding to null_prob under assumption
67 of Normal distribution"""
68
69 - def __init__(self, transformer=None, null_dist=None, **kwargs):
70 """Does nothing special.
71
72 :Parameters:
73 transformer: Functor
74 This functor is called in `__call__()` to perform a final
75 processing step on the to be returned dataset measure. If None,
76 nothing is called
77 null_dist: instance of distribution estimator
78 The estimated distribution is used to assign a probability for a
79 certain value of the computed measure.
80 """
81 ClassWithCollections.__init__(self, **kwargs)
82
83 self.__transformer = transformer
84 """Functor to be called in return statement of all subclass __call__()
85 methods."""
86 null_dist_ = autoNullDist(null_dist)
87 if __debug__:
88 debug('SA', 'Assigning null_dist %s whenever original given was %s'
89 % (null_dist_, null_dist))
90 self.__null_dist = null_dist_
91
92
93 __doc__ = enhancedDocString('DatasetMeasure', locals(), ClassWithCollections)
94
95
97 """Compute measure on a given `Dataset`.
98
99 Each implementation has to handle a single arguments: the source
100 dataset.
101
102 Returns the computed measure in some iterable (list-like)
103 container applying transformer if such is defined
104 """
105 result = self._call(dataset)
106 result = self._postcall(dataset, result)
107 return result
108
109
110 - def _call(self, dataset):
111 """Actually compute measure on a given `Dataset`.
112
113 Each implementation has to handle a single arguments: the source
114 dataset.
115
116 Returns the computed measure in some iterable (list-like) container.
117 """
118 raise NotImplemented
119
120
121 - def _postcall(self, dataset, result):
122 """Some postprocessing on the result
123 """
124 self.states.raw_results = result
125 if not self.__transformer is None:
126 if __debug__:
127 debug("SA_", "Applying transformer %s" % self.__transformer)
128 result = self.__transformer(result)
129
130
131 if not self.__null_dist is None:
132 if __debug__:
133 debug("SA_", "Estimating NULL distribution using %s"
134 % self.__null_dist)
135
136
137
138
139 measure = copy.copy(self)
140 measure.__null_dist = None
141 self.__null_dist.fit(measure, dataset)
142
143 if self.states.isEnabled('null_t'):
144
145
146 null_prob, null_right_tail = \
147 self.__null_dist.p(result, return_tails=True)
148 self.states.null_prob = null_prob
149
150 externals.exists('scipy', raiseException=True)
151 from scipy.stats import norm
152
153
154
155 tail = self.null_dist.tail
156 if tail == 'left':
157 acdf = N.abs(null_prob)
158 elif tail == 'right':
159 acdf = 1.0 - N.abs(null_prob)
160 elif tail in ['any', 'both']:
161 acdf = 1.0 - N.clip(N.abs(null_prob), 0, 0.5)
162 else:
163 raise RuntimeError, 'Unhandled tail %s' % tail
164
165
166
167
168
169
170
171 clip = 1e-16
172 null_t = norm.ppf(N.clip(acdf, clip, 1.0 - clip))
173
174 null_t = N.array(null_t, ndmin=1, copy=False)
175 null_t[~null_right_tail] *= -1.0
176 self.states.null_t = null_t
177 else:
178
179
180 self.null_prob = self.__null_dist.p(result)
181
182 return result
183
184
186 """String representation of DatasetMeasure
187
188 Includes only arguments which differ from default ones
189 """
190 prefixes = prefixes[:]
191 if self.__transformer is not None:
192 prefixes.append("transformer=%s" % self.__transformer)
193 if self.__null_dist is not None:
194 prefixes.append("null_dist=%s" % self.__null_dist)
195 return super(DatasetMeasure, self).__repr__(prefixes=prefixes)
196
198 """'Untraining' Measure
199
200 Some derived classes might used classifiers, so we need to
201 untrain those
202 """
203 pass
204
205 @property
207 """Return Null Distribution estimator"""
208 return self.__null_dist
209
210 @property
214
217 """A per-feature-measure computed from a `Dataset` (base class).
218
219 Should behave like a DatasetMeasure.
220 """
221
222 base_sensitivities = StateVariable(enabled=False,
223 doc="Stores basic sensitivities if the sensitivity " +
224 "relies on combining multiple ones")
225
226
227
228
229
230
231
232
233
234
235
236
237
239 """Initialize
240
241 :Parameters:
242 combiner : Functor
243 The combiner is only applied if the computed featurewise dataset
244 measure is more than one-dimensional. This is different from a
245 `transformer`, which is always applied. By default, the sum of
246 absolute values along the second axis is computed.
247 """
248 DatasetMeasure.__init__(self, **kwargs)
249
250 self.__combiner = combiner
251
259
260
261 - def _call(self, dataset):
262 """Computes a per-feature-measure on a given `Dataset`.
263
264 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray
265 with one value per feature.
266 """
267 raise NotImplementedError
268
269
270 - def _postcall(self, dataset, result):
271 """Adjusts per-feature-measure for computed `result`
272
273
274 TODO: overlaps in what it does heavily with
275 CombinedSensitivityAnalyzer, thus this one might make use of
276 CombinedSensitivityAnalyzer yoh thinks, and here
277 base_sensitivities doesn't sound appropriate.
278 MH: There is indeed some overlap, but also significant differences.
279 This one operates on a single sensana and combines over second
280 axis, CombinedFeaturewiseDatasetMeasure uses first axis.
281 Additionally, 'Sensitivity' base class is
282 FeaturewiseDatasetMeasures which would have to be changed to
283 CombinedFeaturewiseDatasetMeasure to deal with stuff like
284 SMLRWeights that return multiple sensitivity values by default.
285 Not sure if unification of both (and/or removal of functionality
286 here does not lead to an overall more complicated situation,
287 without any real gain -- after all this one works ;-)
288 """
289
290
291 result = N.atleast_1d(result)
292 result_sq = result.squeeze()
293
294
295 result_sq = N.atleast_1d(result_sq)
296
297 if len(result_sq.shape)>1:
298 n_base = result.shape[1]
299 """Number of base sensitivities"""
300 if self.states.isEnabled('base_sensitivities'):
301 b_sensitivities = []
302 if not self.states.isKnown('biases'):
303 biases = None
304 else:
305 biases = self.states.biases
306 if len(self.states.biases) != n_base:
307 warning("Number of biases %d differs from number "
308 "of base sensitivities %d which could happen "
309 "when measure is collided across labels."
310 % (len(self.states.biases), n_base))
311 for i in xrange(n_base):
312 if not biases is None:
313 if n_base > 1 and len(biases) == 1:
314
315 bias = biases[0]
316 else:
317 bias = biases[i]
318 else:
319 bias = None
320 b_sensitivities = StaticDatasetMeasure(
321 measure = result[:,i],
322 bias = bias)
323 self.states.base_sensitivities = b_sensitivities
324
325
326
327 if self.__combiner is not None:
328 result = self.__combiner(result)
329 else:
330
331
332
333 result = result_sq
334
335
336 result = DatasetMeasure._postcall(self, dataset, result)
337
338 return result
339
340 @property
342 """Return combiner"""
343 return self.__combiner
344
348 """A static (assigned) sensitivity measure.
349
350 Since implementation is generic it might be per feature or
351 per whole dataset
352 """
353
354 - def __init__(self, measure=None, bias=None, *args, **kwargs):
355 """Initialize.
356
357 :Parameters:
358 measure
359 actual sensitivity to be returned
360 bias
361 optionally available bias
362 """
363 DatasetMeasure.__init__(self, *args, **kwargs)
364 if measure is None:
365 raise ValueError, "Sensitivity measure has to be provided"
366 self.__measure = measure
367 self.__bias = bias
368
369 - def _call(self, dataset):
370 """Returns assigned sensitivity
371 """
372 return self.__measure
373
374
375 bias = property(fget=lambda self:self.__bias)
376
377
378
379
380
381
382 -class Sensitivity(FeaturewiseDatasetMeasure):
383
384 _LEGAL_CLFS = []
385 """If Sensitivity is classifier specific, classes of classifiers
386 should be listed in the list
387 """
388
389 - def __init__(self, clf, force_training=True, **kwargs):
390 """Initialize the analyzer with the classifier it shall use.
391
392 :Parameters:
393 clf : :class:`Classifier`
394 classifier to use.
395 force_training : Bool
396 if classifier was already trained -- do not retrain
397 """
398
399 """Does nothing special."""
400 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
401
402 _LEGAL_CLFS = self._LEGAL_CLFS
403 if len(_LEGAL_CLFS) > 0:
404 found = False
405 for clf_class in _LEGAL_CLFS:
406 if isinstance(clf, clf_class):
407 found = True
408 break
409 if not found:
410 raise ValueError, \
411 "Classifier %s has to be of allowed class (%s), but is %s" \
412 % (clf, _LEGAL_CLFS, `type(clf)`)
413
414 self.__clf = clf
415 """Classifier used to computed sensitivity"""
416
417 self._force_training = force_training
418 """Either to force it to train"""
419
421 if prefixes is None:
422 prefixes = []
423 prefixes.append("clf=%s" % repr(self.clf))
424 if not self._force_training:
425 prefixes.append("force_training=%s" % self._force_training)
426 return super(Sensitivity, self).__repr__(prefixes=prefixes)
427
428
430 """Train classifier on `dataset` and then compute actual sensitivity.
431
432 If the classifier is already trained it is possible to extract the
433 sensitivities without passing a dataset.
434 """
435
436 clf = self.__clf
437 if not clf.trained or self._force_training:
438 if dataset is None:
439 raise ValueError, \
440 "Training classifier to compute sensitivities requires " \
441 "a dataset."
442 if __debug__:
443 debug("SA", "Training classifier %s %s" %
444 (`clf`,
445 {False: "since it wasn't yet trained",
446 True: "although it was trained previousely"}
447 [clf.trained]))
448 clf.train(dataset)
449
450 return FeaturewiseDatasetMeasure.__call__(self, dataset)
451
452
455
456
458 """Untrain corresponding classifier for Sensitivity
459 """
460 if self.__clf is not None:
461 self.__clf.untrain()
462
463 @property
465 """Return feature_ids used by the underlying classifier
466 """
467 return self.__clf._getFeatureIds()
468
469
470 clf = property(fget=lambda self:self.__clf,
471 fset=_setClassifier)
472
476 """Set sensitivity analyzers to be merged into a single output"""
477
478 sensitivities = StateVariable(enabled=False,
479 doc="Sensitivities produced by each analyzer")
480
481
482
483
484 - def __init__(self, analyzers=None,
485 combiner=None,
486 **kwargs):
487 """Initialize CombinedFeaturewiseDatasetMeasure
488
489 :Parameters:
490 analyzers : list or None
491 List of analyzers to be used. There is no logic to populate
492 such a list in __call__, so it must be either provided to
493 the constructor or assigned to .analyzers prior calling
494 """
495 if analyzers is None:
496 analyzers = []
497
498 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
499 self.__analyzers = analyzers
500 """List of analyzers to use"""
501
502 self.__combiner = combiner
503 """Which functor to use to combine all sensitivities"""
504
505
506 - def _call(self, dataset):
527
528
530 """Untrain CombinedFDM
531 """
532 if self.__analyzers is not None:
533 for anal in self.__analyzers:
534 anal.untrain()
535
537 """Set the analyzers
538 """
539 self.__analyzers = analyzers
540 """Analyzers to use"""
541
542 analyzers = property(fget=lambda x:x.__analyzers,
543 fset=_setAnalyzers,
544 doc="Used analyzers")
545
552 """Compute measures across splits for a specific analyzer"""
553
554
555
556
557 sensitivities = StateVariable(enabled=False,
558 doc="Sensitivities produced for each split")
559
560 splits = StateVariable(enabled=False, doc=
561 """Store the actual splits of the data. Can be memory expensive""")
562
563 - def __init__(self, splitter, analyzer,
564 insplit_index=0, combiner=None, **kwargs):
565 """Initialize SplitFeaturewiseDatasetMeasure
566
567 :Parameters:
568 splitter : Splitter
569 Splitter to use to split the dataset
570 analyzer : DatasetMeasure
571 Measure to be used. Could be analyzer as well (XXX)
572 insplit_index : int
573 splitter generates tuples of dataset on each iteration
574 (usually 0th for training, 1st for testing).
575 On what split index in that tuple to operate.
576 """
577
578
579
580
581
582
583
584
585 FeaturewiseDatasetMeasure.__init__(self, combiner=None, **kwargs)
586
587 self.__analyzer = analyzer
588 """Analyzer to use per split"""
589
590 self.__combiner = combiner
591 """Which functor to use to combine all sensitivities"""
592
593 self.__splitter = splitter
594 """Splitter to be used on the dataset"""
595
596 self.__insplit_index = insplit_index
597
598
600 """Untrain SplitFeaturewiseDatasetMeasure
601 """
602 if self.__analyzer is not None:
603 self.__analyzer.untrain()
604
605
606 - def _call(self, dataset):
637
640 """Set sensitivity analyzers to be merged into a single output"""
641
642
643
644 @group_kwargs(prefixes=['slave_'], assign=True)
645 - def __init__(self,
646 clf,
647 analyzer=None,
648 combined_analyzer=None,
649 slave_kwargs={},
650 **kwargs):
651 """Initialize Sensitivity Analyzer for `BoostedClassifier`
652
653 :Parameters:
654 clf : `BoostedClassifier`
655 Classifier to be used
656 analyzer : analyzer
657 Is used to populate combined_analyzer
658 slave_*
659 Arguments to pass to created analyzer if analyzer is None
660 """
661 Sensitivity.__init__(self, clf, **kwargs)
662 if combined_analyzer is None:
663
664 kwargs.pop('force_training', None)
665 combined_analyzer = CombinedFeaturewiseDatasetMeasure(**kwargs)
666 self.__combined_analyzer = combined_analyzer
667 """Combined analyzer to use"""
668
669 if analyzer is not None and len(self._slave_kwargs):
670 raise ValueError, \
671 "Provide either analyzer of slave_* arguments, not both"
672 self.__analyzer = analyzer
673 """Analyzer to use for basic classifiers within boosted classifier"""
674
675
677 """Untrain BoostedClassifierSensitivityAnalyzer
678 """
679 if self.__analyzer is not None:
680 self.__analyzer.untrain()
681 self.__combined_analyzer.untrain()
682
683
684 - def _call(self, dataset):
715
716 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
717
720 """Set sensitivity analyzer output just to pass through"""
721
722 clf_sensitivities = StateVariable(enabled=False,
723 doc="Stores sensitivities of the proxied classifier")
724
725
726 @group_kwargs(prefixes=['slave_'], assign=True)
727 - def __init__(self,
728 clf,
729 analyzer=None,
730 **kwargs):
731 """Initialize Sensitivity Analyzer for `BoostedClassifier`
732 """
733 Sensitivity.__init__(self, clf, **kwargs)
734
735 if analyzer is not None and len(self._slave_kwargs):
736 raise ValueError, \
737 "Provide either analyzer of slave_* arguments, not both"
738
739 self.__analyzer = analyzer
740 """Analyzer to use for basic classifiers within boosted classifier"""
741
742
747
748
749 - def _call(self, dataset):
779
780 analyzer = property(fget=lambda x:x.__analyzer)
781
784 """Set sensitivity analyzer output be reverse mapped using mapper of the
785 slave classifier"""
786
787 - def _call(self, dataset):
795
798 """Set sensitivity analyzer output be reverse mapped using mapper of the
799 slave classifier"""
800
801 - def _call(self, dataset):
809