1
2
3
4
5
6
7
8
9 """Data mapper"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.mappers.metric import Metric
16
17 from mvpa.datasets import Dataset
18 from mvpa.misc.vproperty import VProperty
19 from mvpa.base.dochelpers import enhancedDocString
20
21 if __debug__:
22 from mvpa.base import warning
23 from mvpa.base import debug
24
25
27 """Interface to provide mapping between two spaces: IN and OUT.
28 Methods are prefixed correspondingly. forward/reverse operate
29 on the entire dataset. get(In|Out)Id[s] operate per element::
30
31 forward
32 --------->
33 IN OUT
34 <--------/
35 reverse
36 """
38 """
39 :Parameters:
40 metric : Metric
41 Optional metric
42 """
43 self.__metric = None
44 """Pylint happiness"""
45 self.setMetric(metric)
46 """Actually assign the metric"""
47
48
49
50
51
52
53
54
56 """Map data from the IN dataspace into OUT space.
57 """
58 raise NotImplementedError
59
60
62 """Reverse map data from OUT space into the IN space.
63 """
64 raise NotImplementedError
65
66
68 """Returns the size of the entity in input space"""
69 raise NotImplementedError
70
71
73 """Returns the size of the entity in output space"""
74 raise NotImplementedError
75
76
78 """Limit the OUT space to a certain set of features.
79
80 :Parameters:
81 outIds: sequence
82 Subset of ids of the current feature in OUT space to keep.
83 """
84 raise NotImplementedError
85
86
88 """Translate a feature id into a coordinate/index in input space.
89
90 Such a translation might not be meaningful or even possible for a
91 particular mapping algorithm and therefore cannot be relied upon.
92 """
93 raise NotImplementedError
94
95
96
97
98
99
101 """Validate feature id in OUT space.
102
103 Override if OUT space is not simly a 1D vector
104 """
105 return(outId >= 0 and outId < self.getOutSize())
106
107
109 """Validate id in IN space.
110
111 Override if IN space is not simly a 1D vector
112 """
113 return(inId >= 0 and inId < self.getInSize())
114
115
116 - def train(self, dataset):
117 """Perform training of the mapper.
118
119 This method is called to put the mapper in a state that allows it to
120 perform to intended mapping.
121
122 :Parameter:
123 dataset: Dataset or subclass
124
125 .. note::
126 The default behavior of this method is to do nothing.
127 """
128 pass
129
130
132 """Get feature neighbors in input space, given an id in output space.
133
134 This method has to be reimplemented whenever a derived class does not
135 provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`.
136 """
137 if self.metric is None:
138 raise RuntimeError, "No metric was assigned to %s, thus no " \
139 "neighboring information is present" % self
140
141 if self.isValidOutId(outId):
142 inId = self.getInId(outId)
143 for inId in self.getNeighborIn(inId, *args, **kwargs):
144 yield self.getOutId(inId)
145
146
147
148
149
150
152 """Return the list of coordinates for the neighbors.
153
154 :Parameters:
155 inId
156 id (index) of an element in input dataspace.
157 *args, **kwargs
158 Any additional arguments are passed to the embedded metric of the
159 mapper.
160
161 XXX See TODO below: what to return -- list of arrays or list
162 of tuples?
163 """
164 if self.metric is None:
165 raise RuntimeError, "No metric was assigned to %s, thus no " \
166 "neighboring information is present" % self
167
168 isValidInId = self.isValidInId
169 if isValidInId(inId):
170 for neighbor in self.metric.getNeighbor(inId, *args, **kwargs):
171 if isValidInId(neighbor):
172 yield neighbor
173
174
176 """Return the list of coordinates for the neighbors.
177
178 By default it simply constructs the list based on
179 the generator returned by getNeighbor()
180 """
181 return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]
182
183
185 if self.__metric is not None:
186 s = "metric=%s" % repr(self.__metric)
187 else:
188 s = ''
189 return "%s(%s)" % (self.__class__.__name__, s)
190
191
193 """Calls the mappers forward() method.
194 """
195 return self.forward(data)
196
197
199 """To make pylint happy"""
200 return self.__metric
201
202
204 """To make pylint happy"""
205 if metric is not None and not isinstance(metric, Metric):
206 raise ValueError, "metric for Mapper must be an " \
207 "instance of a Metric class . Got %s" \
208 % `type(metric)`
209 self.__metric = metric
210
211
212 metric = property(fget=getMetric, fset=setMetric)
213 nfeatures = VProperty(fget=getOutSize)
214
215
216
218 """Linear mapping between multidimensional spaces.
219
220 This class cannot be used directly. Sub-classes have to implement
221 the `_train()` method, which has to compute the projection matrix
222 `_proj` and optionally offset vectors `_offset_in` and
223 `_offset_out` (if initialized with demean=True, which is default)
224 given a dataset (see `_train()` docstring for more information).
225
226 Once the projection matrix is available, this class provides
227 functionality to perform forward and backwards linear mapping of
228 data, the latter by default using pseudo-inverse (but could be
229 altered in subclasses, like hermitian (conjugate) transpose in
230 case of SVD). Additionally, `ProjectionMapper` supports optional
231 selection of arbitrary component (i.e. columns of the projection
232 matrix) of the projection.
233
234 Forward and back-projection matrices (a.k.a. *projection* and
235 *reconstruction*) are available via the `proj` and `recon`
236 properties.
237 """
238
239 _DEV__doc__ = """Think about renaming `demean`, may be `translation`?"""
240
241 - def __init__(self, selector=None, demean=True):
242 """Initialize the ProjectionMapper
243
244 :Parameters:
245 selector: None | list
246 Which components (i.e. columns of the projection matrix)
247 should be used for mapping. If `selector` is `None` all
248 components are used. If a list is provided, all list
249 elements are treated as component ids and the respective
250 components are selected (all others are discarded).
251 demean: bool
252 Either data should be demeaned while computing
253 projections and applied back while doing reverse()
254 """
255 Mapper.__init__(self)
256
257 self._selector = selector
258 self._proj = None
259 """Forward projection matrix."""
260 self._recon = None
261 """Reverse projection (reconstruction) matrix."""
262 self._demean = demean
263 """Flag whether to demean the to be projected data, prior to projection.
264 """
265 self._offset_in = None
266 """Offset (most often just mean) in the input space"""
267 self._offset_out = None
268 """Offset (most often just mean) in the output space"""
269
270 __doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper)
271
272
273 - def train(self, dataset, *args, **kwargs):
274 """Determine the projection matrix.
275
276 :Parameters:
277 dataset : Dataset
278 Dataset to operate on
279 *args
280 Optional positional arguments to pass to _train
281 of subclass
282 **kwargs
283 Optional keyword arguments to pass to _train
284 of subclass
285 """
286
287 if isinstance(dataset, Dataset):
288 samples = dataset.samples
289 else:
290 samples = dataset
291 self._offset_in = samples.mean(axis=0)
292
293
294
295
296 self._train(dataset, *args, **kwargs)
297
298
299 if self._selector is not None:
300 self.selectOut(self._selector)
301
302
304 """Helper which optionally demeans
305 """
306 if self._demean:
307
308 data = data - self._offset_in
309
310 if __debug__ and "MAP_" in debug.active:
311 debug("MAP_",
312 "%s: Mean of data in input space %s was subtracted" %
313 (self.__class__.__name__, self._offset_in))
314 return data
315
316
318 """Worker method. Needs to be implemented by subclass.
319
320 This method has to train the mapper and store the resulting
321 transformation matrix in `self._proj`.
322 """
323 raise NotImplementedError
324
325
326 - def forward(self, data, demean=None):
327 """Perform forward projection.
328
329 :Parameters:
330 data: ndarray
331 Data array to map
332 demean: boolean | None
333 Override demean setting for this method call.
334
335 :Returns:
336 NumPy array
337 """
338
339 if demean is None:
340 demean = self._demean
341
342 if self._proj is None:
343 raise RuntimeError, "Mapper needs to be train before used."
344
345 d = N.asmatrix(data)
346
347
348 if demean and self._offset_in is not None:
349 d = d - self._offset_in
350
351
352 res = (d * self._proj).A
353
354
355 if demean and self._offset_out is not None:
356 res += self._offset_out
357
358 return res
359
360
362 """Reproject (reconstruct) data into the original feature space.
363
364 :Returns:
365 NumPy array
366 """
367 if self._proj is None:
368 raise RuntimeError, "Mapper needs to be trained before used."
369 d = N.asmatrix(data)
370
371 if self._demean and self._offset_out is not None:
372 d = d - self._offset_out
373
374
375 res = (d * self.recon).A
376
377
378 if self._demean and self._offset_in is not None:
379 res += self._offset_in
380
381 return res
382
384 """Given that a projection is present -- compute reconstruction matrix.
385 By default -- pseudoinverse of projection matrix. Might be overridden
386 in derived classes for efficiency.
387 """
388 return N.linalg.pinv(self._proj)
389
391 """Compute (if necessary) and return reconstruction matrix
392 """
393
394 recon = self._recon
395 if recon is None:
396 self._recon = recon = self._computeRecon()
397 return recon
398
399
401 """Returns the number of original features."""
402 return self._proj.shape[0]
403
404
406 """Returns the number of components to project on."""
407 return self._proj.shape[1]
408
409
411 """Choose a subset of components (and remove all others)."""
412 self._proj = self._proj[:, outIds]
413 if self._offset_out is not None:
414 self._offset_out = self._offset_out[outIds]
415
416 self._recon = None
417
418 proj = property(fget=lambda self: self._proj, doc="Projection matrix")
419 recon = property(fget=_getRecon, doc="Backprojection matrix")
420
421
422
424 """Meta mapper that combines several embedded mappers.
425
426 This mapper can be used the map from several input dataspaces into a common
427 output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward`
428 is called with a sequence of data, each element in that sequence is passed
429 to the corresponding mapper, which in turned forward-maps the data. The
430 output of all mappers is finally stacked (horizontally or column or
431 feature-wise) into a single large 2D matrix (nsamples x nfeatures).
432
433 .. note::
434 This mapper can only embbed mappers that transform data into a 2D
435 (nsamples x nfeatures) representation. For mappers not supporting this
436 transformation, consider wrapping them in a
437 :class:`~mvpa.mappers.base.ChainMapper` with an appropriate
438 post-processing mapper.
439
440 CombinedMapper fully supports forward and backward mapping, training,
441 runtime selection of a feature subset (in output dataspace) and retrieval
442 of neighborhood information.
443 """
445 """
446 :Parameters:
447 mappers: list of Mapper instances
448 The order of the mappers in the list is important, as it will define
449 the order in which data snippets have to be passed to
450 :meth:`~mvpa.mappers.base.CombinedMapper.forward`.
451 **kwargs
452 All additional arguments are passed to the base-class constructor.
453 """
454 Mapper.__init__(self, **kwargs)
455
456 if not len(mappers):
457 raise ValueError, \
458 'CombinedMapper needs at least one embedded mapper.'
459
460 self._mappers = mappers
461
462
464 """Map data from the IN spaces into to common OUT space.
465
466 :Parameter:
467 data: sequence
468 Each element in the `data` sequence is passed to the corresponding
469 embedded mapper and is mapped individually by it. The number of
470 elements in `data` has to match the number of embedded mappers. Each
471 element is `data` has to provide the same number of samples
472 (first dimension).
473
474 :Returns:
475 array: nsamples x nfeatures
476 Horizontally stacked array of all embedded mapper outputs.
477 """
478 if not len(data) == len(self._mappers):
479 raise ValueError, \
480 "CombinedMapper needs a sequence with data for each " \
481 "Mapper"
482
483
484
485 try:
486 return N.hstack(
487 [self._mappers[i].forward(d) for i, d in enumerate(data)])
488 except ValueError:
489 raise ValueError, \
490 "Embedded mappers do not generate same number of samples. " \
491 "Check input data."
492
493
495 """Reverse map data from OUT space into the IN spaces.
496
497 :Parameter:
498 data: array
499 Single data array to be reverse mapped into a sequence of data
500 snippets in their individual IN spaces.
501
502 :Returns:
503 list
504 """
505
506
507
508 data = N.asanyarray(data).T
509
510 if not len(data) == self.getOutSize():
511 raise ValueError, \
512 "Data shape does match mapper reverse mapping properties."
513
514 result = []
515 fsum = 0
516 for m in self._mappers:
517
518 fsum_new = fsum + m.getOutSize()
519
520 result.append(m.reverse(data[fsum:fsum_new].T))
521
522 fsum = fsum_new
523
524 return result
525
526
527 - def train(self, dataset):
528 """Trains all embedded mappers.
529
530 The provided training dataset is splitted appropriately and the
531 corresponding pieces are passed to the
532 :meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper.
533
534 :Parameter:
535 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
536 A dataset with the number of features matching the `outSize` of the
537 `CombinedMapper`.
538 """
539 if dataset.nfeatures != self.getOutSize():
540 raise ValueError, "Training dataset does not match the mapper " \
541 "properties."
542
543 fsum = 0
544 for m in self._mappers:
545
546 fsum_new = fsum + m.getOutSize()
547 m.train(dataset.selectFeatures(range(fsum, fsum_new)))
548 fsum = fsum_new
549
550
552 """Returns the size of the entity in input space"""
553 return N.sum(m.getInSize() for m in self._mappers)
554
555
557 """Returns the size of the entity in output space"""
558 return N.sum(m.getOutSize() for m in self._mappers)
559
560
562 """Remove some elements and leave only ids in 'out'/feature space.
563
564 .. note::
565 The subset selection is done inplace
566
567 :Parameter:
568 outIds: sequence
569 All output feature ids to be selected/kept.
570 """
571
572
573 ids = N.asanyarray(outIds)
574 fsum = 0
575 for m in self._mappers:
576
577 selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum)
578
579 selected = ids[selector] - fsum
580 fsum += m.getOutSize()
581
582 m.selectOut(selected)
583
584
586 """Get the ids of the neighbors of a single feature in output dataspace.
587
588 :Parameters:
589 outId: int
590 Single id of a feature in output space, whos neighbors should be
591 determined.
592 *args, **kwargs
593 Additional arguments are passed to the metric of the embedded
594 mapper, that is responsible for the corresponding feature.
595
596 Returns a list of outIds
597 """
598 fsum = 0
599 for m in self._mappers:
600 fsum_new = fsum + m.getOutSize()
601 if outId >= fsum and outId < fsum_new:
602 return m.getNeighbor(outId - fsum, *args, **kwargs)
603 fsum = fsum_new
604
605 raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"
606
607
609 s = Mapper.__repr__(self).rstrip(' )')
610
611 if not s[-1] == '(':
612 s += ' '
613 s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
614 return s
615
616
617
619 """Meta mapper that embedded a chain of other mappers.
620
621 Each mapper in the chain is called successively to perform forward or
622 reverse mapping.
623
624 .. note::
625
626 In its current implementation the `ChainMapper` treats all but the last
627 mapper as simple pre-processing (in forward()) or post-processing (in
628 reverse()) steps. All other capabilities, e.g. training and neighbor
629 metrics are provided by or affect *only the last mapper in the chain*.
630
631 With respect to neighbor metrics this means that they are determined
632 based on the input space of the *last mapper* in the chain and *not* on
633 the input dataspace of the `ChainMapper` as a whole
634 """
636 """
637 :Parameters:
638 mappers: list of Mapper instances
639 **kwargs
640 All additional arguments are passed to the base-class constructor.
641 """
642 Mapper.__init__(self, **kwargs)
643
644 if not len(mappers):
645 raise ValueError, 'ChainMapper needs at least one embedded mapper.'
646
647 self._mappers = mappers
648
649
651 """Calls all mappers in the chain successively.
652
653 :Parameter:
654 data
655 data to be chain-mapped.
656 """
657 mp = data
658 for m in self._mappers:
659 mp = m.forward(mp)
660
661 return mp
662
663
665 """Calls all mappers in the chain successively, in reversed order.
666
667 :Parameter:
668 data: array
669 data array to be reverse mapped into the orginal dataspace.
670 """
671 mp = data
672 for m in reversed(self._mappers):
673 mp = m.reverse(mp)
674
675 return mp
676
677
678 - def train(self, dataset):
679 """Trains the *last* mapper in the chain.
680
681 :Parameter:
682 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
683 A dataset with the number of features matching the `outSize` of the
684 last mapper in the chain (which is identical to the one of the
685 `ChainMapper` itself).
686 """
687 if dataset.nfeatures != self.getOutSize():
688 raise ValueError, "Training dataset does not match the mapper " \
689 "properties."
690
691 self._mappers[-1].train(dataset)
692
693
695 """Returns the size of the entity in input space"""
696 return self._mappers[0].getInSize()
697
698
700 """Returns the size of the entity in output space"""
701 return self._mappers[-1].getOutSize()
702
703
705 """Remove some elements from the *last* mapper in the chain.
706
707 :Parameter:
708 outIds: sequence
709 All output feature ids to be selected/kept.
710 """
711 self._mappers[-1].selectOut(outIds)
712
713
715 """Get the ids of the neighbors of a single feature in output dataspace.
716
717 .. note::
718
719 The neighbors are determined based on the input space of the *last
720 mapper* in the chain and *not* on the input dataspace of the
721 `ChainMapper` as a whole!
722
723 :Parameters:
724 outId: int
725 Single id of a feature in output space, whos neighbors should be
726 determined.
727 *args, **kwargs
728 Additional arguments are passed to the metric of the embedded
729 mapper, that is responsible for the corresponding feature.
730
731 Returns a list of outIds
732 """
733 return self._mappers[-1].getNeighbor(outId, *args, **kwargs)
734
735
737 s = Mapper.__repr__(self).rstrip(' )')
738
739 if not s[-1] == '(':
740 s += ' '
741 s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
742 return s
743