1
2
3
4
5
6
7
8
9 """Simply functors that transform something."""
10
11 _DEV_DOC = """
12 Follow the convetion that functions start with lower case, and classes
13 with uppercase letter.
14 """
15
16 __docformat__ = 'restructuredtext'
17
18
19 import numpy as N
20
21 from mvpa.base import externals, warning
22 from mvpa.misc.state import StateVariable, ClassWithCollections
23
24 if __debug__:
25 from mvpa.base import debug
26
27
29 """
30 Returns the elementwise absolute of any argument.
31
32 :Parameter:
33 x: scalar | sequence
34
35 """
36 return N.absolute(x)
37
38
40 """Returns elementwise '1 - x' of any argument."""
41 return 1 - x
42
43
45 """Return whatever it was called with."""
46 return x
47
48
50 """Mean computed along the first axis."""
51 return N.mean(x, axis=0)
52
54 """Sum computed over first axis of whether the values are not
55 equal to zero."""
56 return (N.asarray(x)!=0).sum(axis=0)
57
58
60 """Mean across 2nd axis
61
62 Use cases:
63 - to combine multiple sensitivities to get sense about
64 mean sensitivity across splits
65 """
66 return N.mean(x, axis=1)
67
68
70 """Sum of absolute values along the 2nd axis
71
72 Use cases:
73 - to combine multiple sensitivities to get sense about
74 what features are most influential
75 """
76 return N.abs(x).sum(axis=1)
77
78
80 """Max of absolute values along the 2nd axis
81 """
82 return N.abs(x).max(axis=1)
83
84
86 """Just what the name suggests."""
87 return N.mean(x)
88
89
90 -def L2Normed(x, norm=1.0, reverse=False):
91 """Norm the values so that regular vector norm becomes `norm`
92
93 More verbose: Norm that the sum of the squared elements of the
94 returned vector becomes `norm`.
95 """
96 xnorm = N.linalg.norm(x)
97 return x * (norm/xnorm)
98
99 -def L1Normed(x, norm=1.0, reverse=False):
100 """Norm the values so that L_1 norm (sum|x|) becomes `norm`"""
101 xnorm = N.sum(N.abs(x))
102 return x * (norm/xnorm)
103
104
106 """Rank-order by value. Highest gets 0"""
107
108
109 nelements = len(x)
110 indexes = N.arange(nelements)
111 t_indexes = indexes
112 if not reverse:
113 t_indexes = indexes[::-1]
114 tosort = zip(x, indexes)
115 tosort.sort()
116 ztosort = zip(tosort, t_indexes)
117 rankorder = N.empty(nelements, dtype=int)
118 rankorder[ [x[0][1] for x in ztosort] ] = \
119 [x[1] for x in ztosort]
120 return rankorder
121
122
126
127
129 """Helper to apply transformer over specific axis
130 """
131
132 - def __init__(self, transformer, axis=None):
133 """Initialize transformer wrapper with an axis.
134
135 :Parameters:
136 transformer
137 A callable to be used
138 axis : None or int
139 If None -- apply transformer across all the data. If some
140 int -- over that axis
141 """
142 self.transformer = transformer
143
144 if not (axis is None or isinstance(axis, int)):
145 raise ValueError, "axis must be specified by integer"
146 self.axis = axis
147
148
149 - def __call__(self, x, *args, **kwargs):
150 transformer = self.transformer
151 axis = self.axis
152 if axis is None:
153 return transformer(x, *args, **kwargs)
154
155 x = N.asanyarray(x)
156 shape = x.shape
157 if axis >= len(shape):
158 raise ValueError, "Axis given in constructor %d is higher " \
159 "than dimensionality of the data of shape %s" % (axis, shape)
160
161
162
163
164
165
166
167 shape_sweep = shape[:axis] + shape[axis+1:]
168 shrinker = None
169 """Either transformer reduces the dimensionality of the data"""
170
171 for index_sweep in N.ndindex(shape_sweep):
172 if axis > 0:
173 index = index_sweep[:axis]
174 else:
175 index = ()
176 index = index + (slice(None),) + index_sweep[axis:]
177 x_sel = x[index]
178 x_t = transformer(x_sel, *args, **kwargs)
179 if shrinker is None:
180 if N.isscalar(x_t) or x_t.shape == shape_sweep:
181 results = N.empty(shape_sweep, dtype=x.dtype)
182 shrinker = True
183 elif x_t.shape == x_sel.shape:
184 results = N.empty(x.shape, dtype=x.dtype)
185 shrinker = False
186 else:
187 raise RuntimeError, 'Not handled by OverAxis kind of transformer'
188
189 if shrinker:
190 results[index_sweep] = x_t
191 else:
192 results[index] = x_t
193
194 return results
195
196
198 """Converts values into p-values under vague and non-scientific assumptions
199 """
200
201 nulldist_number = StateVariable(enabled=True,
202 doc="Number of features within the estimated null-distribution")
203
204 positives_recovered = StateVariable(enabled=True,
205 doc="Number of features considered to be positives and which were recovered")
206
207
208 - def __init__(self, sd=0, distribution='rdist', fpp=None, nbins=400, **kwargs):
209 """L2-Norm the values, convert them to p-values of a given distribution.
210
211 :Parameters:
212 sd : int
213 Samples dimension (if len(x.shape)>1) on which to operate
214 distribution : string
215 Which distribution to use. Known are: 'rdist' (later normal should
216 be there as well)
217 fpp : float
218 At what p-value (both tails) if not None, to control for false
219 positives. It would iteratively prune the tails (tentative real positives)
220 until empirical p-value becomes less or equal to numerical.
221 nbins : int
222 Number of bins for the iterative pruning of positives
223
224 WARNING: Highly experimental/slow/etc: no theoretical grounds have been
225 presented in any paper, nor proven
226 """
227 externals.exists('scipy', raiseException=True)
228 ClassWithCollections.__init__(self, **kwargs)
229
230 self.sd = sd
231 if not (distribution in ['rdist']):
232 raise ValueError, "Actually only rdist supported at the moment" \
233 " got %s" % distribution
234 self.distribution = distribution
235 self.fpp = fpp
236 self.nbins = nbins
237
238
240 from mvpa.support.stats import scipy
241 import scipy.stats as stats
242
243
244 distribution = self.distribution
245 sd = self.sd
246 fpp = self.fpp
247 nbins = self.nbins
248
249 x = N.asanyarray(x)
250 shape_orig = x.shape
251 ndims = len(shape_orig)
252
253
254
255
256
257 numpy_center_points = externals.versions['numpy'] < (1, 1)
258
259
260 if ndims > 2:
261 raise NotImplementedError, \
262 "TODO: add support for more than 2 dimensions"
263 elif ndims == 1:
264 x, sd = x[:, N.newaxis], 0
265
266
267 if sd == 0: x = x.T
268
269
270 pvalues = N.zeros(x.shape)
271 nulldist_number, positives_recovered = [], []
272
273
274 nd = x.shape[1]
275 if __debug__:
276 if nd < x.shape[0]:
277 warning("Number of features in DistPValue lower than number of"
278 " items -- may be incorrect sd=%d was provided" % sd)
279 for i, xx in enumerate(x):
280 dist = stats.rdist(nd-1, 0, 1)
281 xx /= N.linalg.norm(xx)
282
283 if fpp is not None:
284 if __debug__:
285 debug('TRAN_', "starting adaptive adjustment i=%d" % i)
286
287
288 Nxx, xxx, pN_emp_prev = len(xx), xx, 1.0
289 Nxxx = Nxx
290 indexes = N.arange(Nxx)
291 """What features belong to Null-distribution"""
292 while True:
293 hist, bins = N.histogram(xxx, bins=nbins, normed=False)
294 pdf = hist.astype(float)/Nxxx
295 if not numpy_center_points:
296
297 bins = 0.5 * (bins[0:-1] + bins[1:])
298 bins_halfstep = (bins[1] - bins[2])/2.0
299
300
301
302 dist_cdf = dist.cdf(bins)
303
304
305
306
307
308
309
310 cdf = N.zeros(nbins, dtype=float)
311
312 dist_prevv = cdf_prevv = 0.0
313 for j in range(nbins):
314 cdf_prevv = cdf[j] = cdf_prevv + pdf[j]
315
316
317
318 p = (0.5 - N.abs(dist_cdf - 0.5) < fpp/2.0)
319
320
321
322
323 pN_emp = N.sum(pdf[p])
324
325 if __debug__:
326 debug('TRAN_', "empirical p=%.3f for theoretical p=%.3f"
327 % (pN_emp, fpp))
328
329 if pN_emp <= fpp:
330
331 break
332
333 if pN_emp > pN_emp_prev:
334 if __debug__:
335 debug('TRAN_', "Diverging -- thus keeping last result "
336 "with p=%.3f" % pN_emp_prev)
337
338 indexes, xxx, dist = indexes_prev, xxx_prev, dist_prev
339 break
340
341 pN_emp_prev = pN_emp
342
343 keep = N.logical_and(xxx > bins[0],
344 xxx < bins[-1])
345 if __debug__:
346 debug('TRAN_', "Keeping %d out of %d elements" %
347 (N.sum(keep), Nxxx))
348
349
350 indexes_prev, xxx_prev, dist_prev = indexes, xxx, dist
351
352
353
354 xxx, indexes = xxx[keep], indexes[keep]
355
356 xxx = xxx / N.linalg.norm(xxx)
357 Nxxx = len(xxx)
358 dist = stats.rdist(Nxxx-1, 0, 1)
359
360 Nindexes = len(indexes)
361 Nrecovered = Nxx - Nindexes
362
363 nulldist_number += [Nindexes]
364 positives_recovered += [Nrecovered]
365
366 if __debug__:
367 if distribution == 'rdist':
368 assert(dist.args[0] == Nindexes-1)
369 debug('TRAN', "Positives recovery finished with %d out of %d "
370 "entries in Null-distribution, thus %d positives "
371 "were recovered" % (Nindexes, Nxx, Nrecovered))
372
373
374
375 pvalues[i, :] = dist.cdf(xx)
376
377
378 result = pvalues
379
380
381
382 self.nulldist_number = nulldist_number
383 self.positives_recovered = positives_recovered
384
385
386 if sd == 0:
387 result = result.T
388
389 return result
390