mvpa.tests.test_stats

1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 2 # vi: set ft=python sts=4 ts=4 sw=4 et: 3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 4 # 5 # See COPYING file distributed along with the PyMVPA package for the 6 # copyright and license terms. 7 # 8 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 9 """Unit tests for PyMVPA stats helpers -- those requiring scipy""" 10 11 from test_stats import * 12 externals.exists('scipy', raiseException=True) 13 14 from scipy import signal 15 from mvpa.misc.stats import chisquare

16 17 18 -class StatsTestsScipy(unittest.TestCase):

19 """Unittests for various statistics which use scipy""" 20

21 - def testChiSquare(self):

22 """Test chi-square distribution""" 23 # test equal distribution 24 tbl = N.array([[5, 5], [5, 5]]) 25 chi, p = chisquare(tbl) 26 self.failUnless( chi == 0.0 ) 27 self.failUnless( p == 1.0 ) 28 29 # test non-equal distribution 30 tbl = N.array([[4, 0], [0, 4]]) 31 chi, p = chisquare(tbl) 32 self.failUnless(chi == 8.0) 33 self.failUnless(p < 0.05)

34 35

36 - def testNullDistProbAny(self):

37 """Test 'any' tail statistics estimation""" 38 if not externals.exists('scipy'): 39 return 40 41 # test 'any' mode 42 from mvpa.measures.corrcoef import CorrCoef 43 ds = datasets['uni2medium'] 44 45 null = MCNullDist(permutations=20, tail='any') 46 null.fit(CorrCoef(), ds) 47 48 # 100 and -100 should both have zero probability on their respective 49 # tails 50 pm100 = null.p([-100] + [0]*(ds.nfeatures-1)) 51 p100 = null.p([100] + [0]*(ds.nfeatures-1)) 52 assert_array_almost_equal(pm100, p100) 53 54 # With 20 samples isn't that easy to get reliable sampling for 55 # non-parametric, so we can allow somewhat low significance 56 # ;-) 57 self.failUnless(pm100[0] <= 0.1) 58 self.failUnless(p100[0] <= 0.1) 59 60 self.failUnless(N.all(pm100[1:] >= 0.1)) 61 self.failUnless(N.all(pm100[1:] >= 0.1)) 62 # same test with just scalar measure/feature 63 64 null.fit(CorrCoef(), ds.selectFeatures([0])) 65 self.failUnlessAlmostEqual(null.p(-100), null.p(100)) 66 self.failUnless(null.p(100) <= 0.1)

67 68 69 @sweepargs(nd=nulldist_sweep)

70 - def testDatasetMeasureProb(self, nd):

71 """Test estimation of measures statistics""" 72 if not externals.exists('scipy'): 73 # due to null_t requirement 74 return 75 76 ds = datasets['uni2medium'] 77 78 m = OneWayAnova(null_dist=nd, enable_states=['null_t']) 79 score = m(ds) 80 81 score_nonbogus = N.mean(score[ds.nonbogus_features]) 82 score_bogus = N.mean(score[ds.bogus_features]) 83 # plausability check 84 self.failUnless(score_bogus < score_nonbogus) 85 86 null_prob_nonbogus = m.null_prob[ds.nonbogus_features] 87 null_prob_bogus = m.null_prob[ds.bogus_features] 88 89 self.failUnless((null_prob_nonbogus < 0.05).all(), 90 msg="Nonbogus features should have a very unlikely value. Got %s" 91 % null_prob_nonbogus) 92 93 # the others should be a lot larger 94 self.failUnless(N.mean(N.abs(null_prob_bogus)) > 95 N.mean(N.abs(null_prob_nonbogus))) 96 97 if isinstance(nd, MCNullDist): 98 # MCs are not stable with just 10 samples... so lets skip them 99 return 100 101 if cfg.getboolean('tests', 'labile', default='yes'): 102 # Failed on c94ec26eb593687f25d8c27e5cfdc5917e352a69 103 # with MVPA_SEED=833393575 104 self.failUnless((N.abs(m.null_t[ds.nonbogus_features]) >= 5).all(), 105 msg="Nonbogus features should have high t-score. Got %s" 106 % (m.null_t[ds.nonbogus_features])) 107 108 bogus_min = min(N.abs(m.null_t[ds.bogus_features])) 109 self.failUnless(bogus_min < 4, 110 msg="Some bogus features should have low t-score of %g." 111 "Got (t,p,sens):%s" 112 % (bogus_min, 113 zip(m.null_t[ds.bogus_features], 114 m.null_prob[ds.bogus_features], 115 score[ds.bogus_features])))

116 117

118 - def testNegativeT(self):

119 """Basic testing of the sign in p and t scores 120 """ 121 from mvpa.measures.base import FeaturewiseDatasetMeasure 122 123 class BogusMeasure(FeaturewiseDatasetMeasure): 124 """Just put high positive into first 2 features, and high 125 negative into 2nd two 126 """ 127 def _call(self, dataset): 128 """just a little helper... pylint shut up!""" 129 res = N.random.normal(size=(dataset.nfeatures,)) 130 res[0] = res[1] = 100 131 res[2] = res[3] = -100 132 return res

133 134 nd = FixedNullDist(scipy.stats.norm(0, 0.1), tail='any') 135 m = BogusMeasure(null_dist=nd, enable_states=['null_t']) 136 ds = datasets['uni2small'] 137 score = m(ds) 138 t, p = m.null_t, m.null_prob 139 self.failUnless((p>=0).all()) 140 self.failUnless((t[:2] > 0).all()) 141 self.failUnless((t[2:4] < 0).all()) 142 143

144 - def testMatchDistribution(self):

145 """Some really basic testing for matchDistribution 146 """ 147 from mvpa.clfs.stats import matchDistribution, rv_semifrozen 148 149 ds = datasets['uni2medium'] # large to get stable stats 150 data = ds.samples[:, ds.bogus_features[0]] 151 # choose bogus feature, which 152 # should have close to normal distribution 153 154 # Lets test ad-hoc rv_semifrozen 155 floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data) 156 self.failUnless(floc[0] == 0) 157 158 fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data) 159 self.failUnless(fscale[1] == 1) 160 161 flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data) 162 self.failUnless(flocscale[1] == 1 and flocscale[0] == 0) 163 164 full = scipy.stats.norm.fit(data) 165 for res in [floc, fscale, flocscale, full]: 166 self.failUnless(len(res) == 2) 167 168 data_mean = N.mean(data) 169 for loc in [None, data_mean]: 170 for test in ['p-roc', 'kstest']: 171 # some really basic testing 172 matched = matchDistribution( 173 data=data, 174 distributions = ['scipy', 175 ('norm', 176 {'name': 'norm_fixed', 177 'loc': 0.2, 178 'scale': 0.3})], 179 test=test, loc=loc, p=0.05) 180 # at least norm should be in there 181 names = [m[2] for m in matched] 182 if test == 'p-roc': 183 if cfg.getboolean('tests', 'labile', default='yes'): 184 # we can guarantee that only for norm_fixed 185 self.failUnless('norm' in names) 186 self.failUnless('norm_fixed' in names) 187 inorm = names.index('norm_fixed') 188 # and it should be at least in the first 189 # 30 best matching ;-) 190 self.failUnless(inorm <= 30)

191

192 - def testRDistStability(self):

193 """Test either rdist distribution performs nicely 194 """ 195 try: 196 # actually I haven't managed to cause this error 197 scipy.stats.rdist(1.32, 0, 1).pdf(-1.0+N.finfo(float).eps) 198 except Exception, e: 199 self.fail('Failed to compute rdist.pdf due to numeric' 200 ' loss of precision. Exception was %s' % e) 201 202 try: 203 # this one should fail with recent scipy with error 204 # ZeroDivisionError: 0.0 cannot be raised to a negative power 205 206 # XXX: There is 1 more bug in etch's scipy.stats or numpy 207 # (vectorize), so I have to put 2 elements in the 208 # queried x's, otherwise it 209 # would puke. But for now that fix is not here 210 # 211 # value = scipy.stats.rdist(1.32, 0, 1).cdf( 212 # [-1.0+N.finfo(float).eps, 0]) 213 # 214 # to cause it now just run this unittest only with 215 # nosetests -s test_stats:StatsTests.testRDistStability 216 217 # NB: very cool way to store the trace of the execution 218 #import pydb 219 #pydb.debugger(dbg_cmds=['bt', 'l', 's']*300 + ['c']) 220 scipy.stats.rdist(1.32, 0, 1).cdf(-1.0+N.finfo(float).eps) 221 except IndexError, e: 222 self.fail('Failed due to bug which leads to InvalidIndex if only' 223 ' scalar is provided to cdf') 224 except Exception, e: 225 self.fail('Failed to compute rdist.cdf due to numeric' 226 ' loss of precision. Exception was %s' % e) 227 228 v = scipy.stats.rdist(10000, 0, 1).cdf([-0.1]) 229 self.failUnless(v>=0, v<=1)

230 231

232 - def testAnovaCompliance(self):

233 ds = datasets['uni2large'] 234 235 fwm = OneWayAnova() 236 f = fwm(ds) 237 238 f_sp = f_oneway(ds['labels', [1]].samples, 239 ds['labels', [0]].samples) 240 241 # SciPy needs to compute the same F-scores 242 assert_array_almost_equal(f, f_sp[0])

243 244 245

246 - def testGLM(self):

247 """Test GLM 248 """ 249 # play fmri 250 # full-blown HRF with initial dip and undershoot ;-) 251 hrf_x = N.linspace(0, 25, 250) 252 hrf = doubleGammaHRF(hrf_x) - singleGammaHRF(hrf_x, 0.8, 1, 0.05) 253 254 # come up with an experimental design 255 samples = 1800 256 fast_er_onsets = N.array([10, 200, 250, 500, 600, 900, 920, 1400]) 257 fast_er = N.zeros(samples) 258 fast_er[fast_er_onsets] = 1 259 260 # high resolution model of the convolved regressor 261 model_hr = N.convolve(fast_er, hrf)[:samples] 262 263 # downsample the regressor to fMRI resolution 264 tr = 2.0 265 model_lr = signal.resample(model_hr, 266 int(samples / tr / 10), 267 window='ham') 268 269 # generate artifical fMRI data: two voxels one is noise, one has 270 # something 271 baseline = 800.0 272 wsignal = baseline + 2 * model_lr + \ 273 N.random.randn(int(samples / tr / 10)) * 0.2 274 nsignal = baseline + N.random.randn(int(samples / tr / 10)) * 0.5 275 276 # build design matrix: bold-regressor and constant 277 X = N.array([model_lr, N.repeat(1, len(model_lr))]).T 278 279 # two 'voxel' dataset 280 data = Dataset(samples=N.array((wsignal, nsignal, nsignal)).T, labels=1) 281 282 # check GLM betas 283 glm = GLM(X, combiner=None) 284 betas = glm(data) 285 286 # betas for each feature and each regressor 287 self.failUnless(betas.shape == (data.nfeatures, X.shape[1])) 288 289 self.failUnless(N.absolute(betas[:, 1] - baseline < 10).all(), 290 msg="baseline betas should be huge and around 800") 291 292 self.failUnless(betas[0][0] > betas[1, 0], 293 msg="feature (with signal) beta should be larger than for noise") 294 295 if cfg.getboolean('tests', 'labile', default='yes'): 296 self.failUnless(N.absolute(betas[1, 0]) < 0.5) 297 self.failUnless(N.absolute(betas[0, 0]) > 1.0) 298 299 300 # check GLM zscores 301 glm = GLM(X, voi='zstat', combiner=None) 302 zstats = glm(data) 303 304 self.failUnless(zstats.shape == betas.shape) 305 306 self.failUnless((zstats[:, 1] > 1000).all(), 307 msg='constant zstats should be huge') 308 309 if cfg.getboolean('tests', 'labile', default='yes'): 310 self.failUnless(N.absolute(betas[0, 0]) > betas[1][0], 311 msg='with signal should have higher zstats')

312 313

314 - def testBinomdistPPF(self):

315 """Test if binomial distribution works ok 316 317 after possibly a monkey patch 318 """ 319 bdist = scipy.stats.binom(100, 0.5) 320 self.failUnless(bdist.ppf(1.0) == 100) 321 self.failUnless(bdist.ppf(0.9) <= 60) 322 self.failUnless(bdist.ppf(0.5) == 50) 323 self.failUnless(bdist.ppf(0) == -1)

324

325 326 -def suite():

327 """Create the suite""" 328 return unittest.makeSuite(StatsTestsScipy)

329 330 331 if __name__ == '__main__': 332 import runner 333

Source Code for Module mvpa.tests.test_stats_sp