Package mvpa :: Package measures :: Module anova
[hide private]
[frames] | no frames]

Source Code for Module mvpa.measures.anova

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """FeaturewiseDatasetMeasure performing a univariate ANOVA.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.measures.base import FeaturewiseDatasetMeasure 
 16   
 17  # TODO: Extend with access to functionality from scipy.stats? 
 18  # For binary: 
 19  #  2-sample kolmogorov-smirnof might be interesting 
 20  #   (scipy.stats.ks_2samp) to judge if two conditions are derived 
 21  #   from different distributions (take it as 'activity' vs 'rest'), 
 22  # 
 23  # For binary+multiclass: 
 24  #  kruskal-wallis H-test (scipy.stats.kruskal) 
 25  # 
 26  # and may be some others 
 27   
28 -class OneWayAnova(FeaturewiseDatasetMeasure):
29 """`FeaturewiseDatasetMeasure` that performs a univariate ANOVA. 30 31 F-scores are computed for each feature as the standard fraction of between 32 and within group variances. Groups are defined by samples with unique 33 labels. 34 35 No statistical testing is performed, but raw F-scores are returned as a 36 sensitivity map. As usual F-scores have a range of [0,inf] with greater 37 values indicating higher sensitivity. 38 """ 39
40 - def _call(self, dataset, labels=None):
41 # This code is based on SciPy's stats.f_oneway() 42 # Copyright (c) Gary Strangman. All rights reserved 43 # License: BSD 44 # 45 # However, it got tweaked and optimized to better fit into PyMVPA. 46 47 # number of groups 48 if labels is None: 49 labels = dataset.labels 50 51 ul = N.unique(labels) 52 53 na = len(ul) 54 bign = float(dataset.nsamples) 55 alldata = dataset.samples 56 57 # total squares of sums 58 sostot = N.sum(alldata, axis=0) 59 sostot *= sostot 60 sostot /= bign 61 62 # total sum of squares 63 sstot = N.sum(alldata * alldata, axis=0) - sostot 64 65 # between group sum of squares 66 ssbn = 0 67 for l in ul: 68 # all samples for the respective label 69 d = alldata[labels == l] 70 sos = N.sum(d, axis=0) 71 sos *= sos 72 ssbn += sos / float(len(d)) 73 74 ssbn -= sostot 75 # within 76 sswn = sstot - ssbn 77 78 # degrees of freedom 79 dfbn = na-1 80 dfwn = bign - na 81 82 # mean sums of squares 83 msb = ssbn / float(dfbn) 84 msw = sswn / float(dfwn) 85 f = msb / msw 86 # assure no NaNs -- otherwise it leads instead of 87 # sane unittest failure (check of NaNs) to crazy 88 # File "mtrand.pyx", line 1661, in mtrand.shuffle 89 # TypeError: object of type 'numpy.int64' has no len() 90 # without any sane backtrace 91 f[N.isnan(f)] = 0 92 93 return f
94 95 # XXX maybe also compute p-values? 96 #prob = scipy.stats.fprob(dfbn, dfwn, f) 97 #return prob 98 99
100 -class CompoundOneWayAnova(OneWayAnova):
101 """Compound comparisons via univariate ANOVA. 102 103 Provides F-scores per each label if compared to the other labels. 104 """ 105
106 - def _call(self, dataset):
107 """Computes featurewise f-scores using compound comparisons.""" 108 109 orig_labels = dataset.labels 110 labels = orig_labels.copy() 111 112 results = [] 113 for ul in dataset.uniquelabels: 114 labels[orig_labels == ul] = 1 115 labels[orig_labels != ul] = 2 116 results.append(OneWayAnova._call(self, dataset, labels)) 117 118 # features x labels 119 return N.array(results).T
120