1
2
3
4
5
6
7
8
9 """Unit tests for PyMVPA stats helpers -- those requiring scipy"""
10
11 from test_stats import *
12 externals.exists('scipy', raiseException=True)
13
14 from scipy import signal
15 from mvpa.misc.stats import chisquare
19 """Unittests for various statistics which use scipy"""
20
22 """Test chi-square distribution"""
23
24 tbl = N.array([[5, 5], [5, 5]])
25 chi, p = chisquare(tbl)
26 self.failUnless( chi == 0.0 )
27 self.failUnless( p == 1.0 )
28
29
30 tbl = N.array([[4, 0], [0, 4]])
31 chi, p = chisquare(tbl)
32 self.failUnless(chi == 8.0)
33 self.failUnless(p < 0.05)
34
35
37 """Test 'any' tail statistics estimation"""
38 if not externals.exists('scipy'):
39 return
40
41
42 from mvpa.measures.corrcoef import CorrCoef
43 ds = datasets['uni2medium']
44
45 null = MCNullDist(permutations=20, tail='any')
46 null.fit(CorrCoef(), ds)
47
48
49
50 pm100 = null.p([-100] + [0]*(ds.nfeatures-1))
51 p100 = null.p([100] + [0]*(ds.nfeatures-1))
52 assert_array_almost_equal(pm100, p100)
53
54
55
56
57 self.failUnless(pm100[0] <= 0.1)
58 self.failUnless(p100[0] <= 0.1)
59
60 self.failUnless(N.all(pm100[1:] >= 0.1))
61 self.failUnless(N.all(pm100[1:] >= 0.1))
62
63
64 null.fit(CorrCoef(), ds.selectFeatures([0]))
65 self.failUnlessAlmostEqual(null.p(-100), null.p(100))
66 self.failUnless(null.p(100) <= 0.1)
67
68
69 @sweepargs(nd=nulldist_sweep)
71 """Test estimation of measures statistics"""
72 if not externals.exists('scipy'):
73
74 return
75
76 ds = datasets['uni2medium']
77
78 m = OneWayAnova(null_dist=nd, enable_states=['null_t'])
79 score = m(ds)
80
81 score_nonbogus = N.mean(score[ds.nonbogus_features])
82 score_bogus = N.mean(score[ds.bogus_features])
83
84 self.failUnless(score_bogus < score_nonbogus)
85
86 null_prob_nonbogus = m.null_prob[ds.nonbogus_features]
87 null_prob_bogus = m.null_prob[ds.bogus_features]
88
89 self.failUnless((null_prob_nonbogus < 0.05).all(),
90 msg="Nonbogus features should have a very unlikely value. Got %s"
91 % null_prob_nonbogus)
92
93
94 self.failUnless(N.mean(N.abs(null_prob_bogus)) >
95 N.mean(N.abs(null_prob_nonbogus)))
96
97 if isinstance(nd, MCNullDist):
98
99 return
100
101 if cfg.getboolean('tests', 'labile', default='yes'):
102
103
104 self.failUnless((N.abs(m.null_t[ds.nonbogus_features]) >= 5).all(),
105 msg="Nonbogus features should have high t-score. Got %s"
106 % (m.null_t[ds.nonbogus_features]))
107
108 bogus_min = min(N.abs(m.null_t[ds.bogus_features]))
109 self.failUnless(bogus_min < 4,
110 msg="Some bogus features should have low t-score of %g."
111 "Got (t,p,sens):%s"
112 % (bogus_min,
113 zip(m.null_t[ds.bogus_features],
114 m.null_prob[ds.bogus_features],
115 score[ds.bogus_features])))
116
117
119 """Basic testing of the sign in p and t scores
120 """
121 from mvpa.measures.base import FeaturewiseDatasetMeasure
122
123 class BogusMeasure(FeaturewiseDatasetMeasure):
124 """Just put high positive into first 2 features, and high
125 negative into 2nd two
126 """
127 def _call(self, dataset):
128 """just a little helper... pylint shut up!"""
129 res = N.random.normal(size=(dataset.nfeatures,))
130 res[0] = res[1] = 100
131 res[2] = res[3] = -100
132 return res
133
134 nd = FixedNullDist(scipy.stats.norm(0, 0.1), tail='any')
135 m = BogusMeasure(null_dist=nd, enable_states=['null_t'])
136 ds = datasets['uni2small']
137 score = m(ds)
138 t, p = m.null_t, m.null_prob
139 self.failUnless((p>=0).all())
140 self.failUnless((t[:2] > 0).all())
141 self.failUnless((t[2:4] < 0).all())
142
143
145 """Some really basic testing for matchDistribution
146 """
147 from mvpa.clfs.stats import matchDistribution, rv_semifrozen
148
149 ds = datasets['uni2medium']
150 data = ds.samples[:, ds.bogus_features[0]]
151
152
153
154
155 floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
156 self.failUnless(floc[0] == 0)
157
158 fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
159 self.failUnless(fscale[1] == 1)
160
161 flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
162 self.failUnless(flocscale[1] == 1 and flocscale[0] == 0)
163
164 full = scipy.stats.norm.fit(data)
165 for res in [floc, fscale, flocscale, full]:
166 self.failUnless(len(res) == 2)
167
168 data_mean = N.mean(data)
169 for loc in [None, data_mean]:
170 for test in ['p-roc', 'kstest']:
171
172 matched = matchDistribution(
173 data=data,
174 distributions = ['scipy',
175 ('norm',
176 {'name': 'norm_fixed',
177 'loc': 0.2,
178 'scale': 0.3})],
179 test=test, loc=loc, p=0.05)
180
181 names = [m[2] for m in matched]
182 if test == 'p-roc':
183 if cfg.getboolean('tests', 'labile', default='yes'):
184
185 self.failUnless('norm' in names)
186 self.failUnless('norm_fixed' in names)
187 inorm = names.index('norm_fixed')
188
189
190 self.failUnless(inorm <= 30)
191
193 """Test either rdist distribution performs nicely
194 """
195 try:
196
197 scipy.stats.rdist(1.32, 0, 1).pdf(-1.0+N.finfo(float).eps)
198 except Exception, e:
199 self.fail('Failed to compute rdist.pdf due to numeric'
200 ' loss of precision. Exception was %s' % e)
201
202 try:
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220 scipy.stats.rdist(1.32, 0, 1).cdf(-1.0+N.finfo(float).eps)
221 except IndexError, e:
222 self.fail('Failed due to bug which leads to InvalidIndex if only'
223 ' scalar is provided to cdf')
224 except Exception, e:
225 self.fail('Failed to compute rdist.cdf due to numeric'
226 ' loss of precision. Exception was %s' % e)
227
228 v = scipy.stats.rdist(10000, 0, 1).cdf([-0.1])
229 self.failUnless(v>=0, v<=1)
230
231
233 ds = datasets['uni2large']
234
235 fwm = OneWayAnova()
236 f = fwm(ds)
237
238 f_sp = f_oneway(ds['labels', [1]].samples,
239 ds['labels', [0]].samples)
240
241
242 assert_array_almost_equal(f, f_sp[0])
243
244
245
247 """Test GLM
248 """
249
250
251 hrf_x = N.linspace(0, 25, 250)
252 hrf = doubleGammaHRF(hrf_x) - singleGammaHRF(hrf_x, 0.8, 1, 0.05)
253
254
255 samples = 1800
256 fast_er_onsets = N.array([10, 200, 250, 500, 600, 900, 920, 1400])
257 fast_er = N.zeros(samples)
258 fast_er[fast_er_onsets] = 1
259
260
261 model_hr = N.convolve(fast_er, hrf)[:samples]
262
263
264 tr = 2.0
265 model_lr = signal.resample(model_hr,
266 int(samples / tr / 10),
267 window='ham')
268
269
270
271 baseline = 800.0
272 wsignal = baseline + 2 * model_lr + \
273 N.random.randn(int(samples / tr / 10)) * 0.2
274 nsignal = baseline + N.random.randn(int(samples / tr / 10)) * 0.5
275
276
277 X = N.array([model_lr, N.repeat(1, len(model_lr))]).T
278
279
280 data = Dataset(samples=N.array((wsignal, nsignal, nsignal)).T, labels=1)
281
282
283 glm = GLM(X, combiner=None)
284 betas = glm(data)
285
286
287 self.failUnless(betas.shape == (data.nfeatures, X.shape[1]))
288
289 self.failUnless(N.absolute(betas[:, 1] - baseline < 10).all(),
290 msg="baseline betas should be huge and around 800")
291
292 self.failUnless(betas[0][0] > betas[1, 0],
293 msg="feature (with signal) beta should be larger than for noise")
294
295 if cfg.getboolean('tests', 'labile', default='yes'):
296 self.failUnless(N.absolute(betas[1, 0]) < 0.5)
297 self.failUnless(N.absolute(betas[0, 0]) > 1.0)
298
299
300
301 glm = GLM(X, voi='zstat', combiner=None)
302 zstats = glm(data)
303
304 self.failUnless(zstats.shape == betas.shape)
305
306 self.failUnless((zstats[:, 1] > 1000).all(),
307 msg='constant zstats should be huge')
308
309 if cfg.getboolean('tests', 'labile', default='yes'):
310 self.failUnless(N.absolute(betas[0, 0]) > betas[1][0],
311 msg='with signal should have higher zstats')
312
313
315 """Test if binomial distribution works ok
316
317 after possibly a monkey patch
318 """
319 bdist = scipy.stats.binom(100, 0.5)
320 self.failUnless(bdist.ppf(1.0) == 100)
321 self.failUnless(bdist.ppf(0.9) <= 60)
322 self.failUnless(bdist.ppf(0.5) == 50)
323 self.failUnless(bdist.ppf(0) == -1)
324
329
330
331 if __name__ == '__main__':
332 import runner
333