Package mvpa :: Package mappers :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.mappers.base

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Data mapper""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.mappers.metric import Metric 
 16   
 17  from mvpa.datasets import Dataset 
 18  from mvpa.misc.vproperty import VProperty 
 19  from mvpa.base.dochelpers import enhancedDocString 
 20   
 21  if __debug__: 
 22      from mvpa.base import warning 
 23      from mvpa.base import debug 
 24   
 25   
26 -class Mapper(object):
27 """Interface to provide mapping between two spaces: IN and OUT. 28 Methods are prefixed correspondingly. forward/reverse operate 29 on the entire dataset. get(In|Out)Id[s] operate per element:: 30 31 forward 32 ---------> 33 IN OUT 34 <--------/ 35 reverse 36 """
37 - def __init__(self, metric=None):
38 """ 39 :Parameters: 40 metric : Metric 41 Optional metric 42 """ 43 self.__metric = None 44 """Pylint happiness""" 45 self.setMetric(metric) 46 """Actually assign the metric"""
47 48 # 49 # The following methods are abstract and merely define the intended 50 # interface of a mapper and have to be implemented in derived classes. See 51 # the docstrings of the respective methods for details about what they 52 # should do. 53 # 54
55 - def forward(self, data):
56 """Map data from the IN dataspace into OUT space. 57 """ 58 raise NotImplementedError
59 60
61 - def reverse(self, data):
62 """Reverse map data from OUT space into the IN space. 63 """ 64 raise NotImplementedError
65 66
67 - def getInSize(self):
68 """Returns the size of the entity in input space""" 69 raise NotImplementedError
70 71
72 - def getOutSize(self):
73 """Returns the size of the entity in output space""" 74 raise NotImplementedError
75 76
77 - def selectOut(self, outIds):
78 """Limit the OUT space to a certain set of features. 79 80 :Parameters: 81 outIds: sequence 82 Subset of ids of the current feature in OUT space to keep. 83 """ 84 raise NotImplementedError
85 86
87 - def getInId(self, outId):
88 """Translate a feature id into a coordinate/index in input space. 89 90 Such a translation might not be meaningful or even possible for a 91 particular mapping algorithm and therefore cannot be relied upon. 92 """ 93 raise NotImplementedError
94 95 96 # 97 # The following methods are candidates for reimplementation in derived 98 # classes, in cases where the provided default behavior is not appropriate. 99 #
100 - def isValidOutId(self, outId):
101 """Validate feature id in OUT space. 102 103 Override if OUT space is not simly a 1D vector 104 """ 105 return(outId >= 0 and outId < self.getOutSize())
106 107
108 - def isValidInId(self, inId):
109 """Validate id in IN space. 110 111 Override if IN space is not simly a 1D vector 112 """ 113 return(inId >= 0 and inId < self.getInSize())
114 115
116 - def train(self, dataset):
117 """Perform training of the mapper. 118 119 This method is called to put the mapper in a state that allows it to 120 perform to intended mapping. 121 122 :Parameter: 123 dataset: Dataset or subclass 124 125 .. note:: 126 The default behavior of this method is to do nothing. 127 """ 128 pass
129 130
131 - def getNeighbor(self, outId, *args, **kwargs):
132 """Get feature neighbors in input space, given an id in output space. 133 134 This method has to be reimplemented whenever a derived class does not 135 provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`. 136 """ 137 if self.metric is None: 138 raise RuntimeError, "No metric was assigned to %s, thus no " \ 139 "neighboring information is present" % self 140 141 if self.isValidOutId(outId): 142 inId = self.getInId(outId) 143 for inId in self.getNeighborIn(inId, *args, **kwargs): 144 yield self.getOutId(inId)
145 146 147 # 148 # The following methods provide common functionality for all mappers 149 # and there should be no immediate need to reimplement them 150 #
151 - def getNeighborIn(self, inId, *args, **kwargs):
152 """Return the list of coordinates for the neighbors. 153 154 :Parameters: 155 inId 156 id (index) of an element in input dataspace. 157 *args, **kwargs 158 Any additional arguments are passed to the embedded metric of the 159 mapper. 160 161 XXX See TODO below: what to return -- list of arrays or list 162 of tuples? 163 """ 164 if self.metric is None: 165 raise RuntimeError, "No metric was assigned to %s, thus no " \ 166 "neighboring information is present" % self 167 168 isValidInId = self.isValidInId 169 if isValidInId(inId): 170 for neighbor in self.metric.getNeighbor(inId, *args, **kwargs): 171 if isValidInId(neighbor): 172 yield neighbor
173 174
175 - def getNeighbors(self, outId, *args, **kwargs):
176 """Return the list of coordinates for the neighbors. 177 178 By default it simply constructs the list based on 179 the generator returned by getNeighbor() 180 """ 181 return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]
182 183
184 - def __repr__(self):
185 if self.__metric is not None: 186 s = "metric=%s" % repr(self.__metric) 187 else: 188 s = '' 189 return "%s(%s)" % (self.__class__.__name__, s)
190 191
192 - def __call__(self, data):
193 """Calls the mappers forward() method. 194 """ 195 return self.forward(data)
196 197
198 - def getMetric(self):
199 """To make pylint happy""" 200 return self.__metric
201 202
203 - def setMetric(self, metric):
204 """To make pylint happy""" 205 if metric is not None and not isinstance(metric, Metric): 206 raise ValueError, "metric for Mapper must be an " \ 207 "instance of a Metric class . Got %s" \ 208 % `type(metric)` 209 self.__metric = metric
210 211 212 metric = property(fget=getMetric, fset=setMetric) 213 nfeatures = VProperty(fget=getOutSize)
214 215 216
217 -class ProjectionMapper(Mapper):
218 """Linear mapping between multidimensional spaces. 219 220 This class cannot be used directly. Sub-classes have to implement 221 the `_train()` method, which has to compute the projection matrix 222 `_proj` and optionally offset vectors `_offset_in` and 223 `_offset_out` (if initialized with demean=True, which is default) 224 given a dataset (see `_train()` docstring for more information). 225 226 Once the projection matrix is available, this class provides 227 functionality to perform forward and backwards linear mapping of 228 data, the latter by default using pseudo-inverse (but could be 229 altered in subclasses, like hermitian (conjugate) transpose in 230 case of SVD). Additionally, `ProjectionMapper` supports optional 231 selection of arbitrary component (i.e. columns of the projection 232 matrix) of the projection. 233 234 Forward and back-projection matrices (a.k.a. *projection* and 235 *reconstruction*) are available via the `proj` and `recon` 236 properties. 237 """ 238 239 _DEV__doc__ = """Think about renaming `demean`, may be `translation`?""" 240
241 - def __init__(self, selector=None, demean=True):
242 """Initialize the ProjectionMapper 243 244 :Parameters: 245 selector: None | list 246 Which components (i.e. columns of the projection matrix) 247 should be used for mapping. If `selector` is `None` all 248 components are used. If a list is provided, all list 249 elements are treated as component ids and the respective 250 components are selected (all others are discarded). 251 demean: bool 252 Either data should be demeaned while computing 253 projections and applied back while doing reverse() 254 """ 255 Mapper.__init__(self) 256 257 self._selector = selector 258 self._proj = None 259 """Forward projection matrix.""" 260 self._recon = None 261 """Reverse projection (reconstruction) matrix.""" 262 self._demean = demean 263 """Flag whether to demean the to be projected data, prior to projection. 264 """ 265 self._offset_in = None 266 """Offset (most often just mean) in the input space""" 267 self._offset_out = None 268 """Offset (most often just mean) in the output space"""
269 270 __doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper) 271 272
273 - def train(self, dataset, *args, **kwargs):
274 """Determine the projection matrix. 275 276 :Parameters: 277 dataset : Dataset 278 Dataset to operate on 279 *args 280 Optional positional arguments to pass to _train 281 of subclass 282 **kwargs 283 Optional keyword arguments to pass to _train 284 of subclass 285 """ 286 # store the feature wise mean 287 if isinstance(dataset, Dataset): 288 samples = dataset.samples 289 else: 290 samples = dataset 291 self._offset_in = samples.mean(axis=0) 292 # ??? Setting of _offset_out is to be done in a child 293 # class 294 295 # compute projection matrix with subclass logic 296 self._train(dataset, *args, **kwargs) 297 298 # perform component selection 299 if self._selector is not None: 300 self.selectOut(self._selector)
301 302
303 - def _demeanData(self, data):
304 """Helper which optionally demeans 305 """ 306 if self._demean: 307 # demean the training data 308 data = data - self._offset_in 309 310 if __debug__ and "MAP_" in debug.active: 311 debug("MAP_", 312 "%s: Mean of data in input space %s was subtracted" % 313 (self.__class__.__name__, self._offset_in)) 314 return data
315 316
317 - def _train(self, dataset):
318 """Worker method. Needs to be implemented by subclass. 319 320 This method has to train the mapper and store the resulting 321 transformation matrix in `self._proj`. 322 """ 323 raise NotImplementedError
324 325
326 - def forward(self, data, demean=None):
327 """Perform forward projection. 328 329 :Parameters: 330 data: ndarray 331 Data array to map 332 demean: boolean | None 333 Override demean setting for this method call. 334 335 :Returns: 336 NumPy array 337 """ 338 # let arg overwrite instance flag 339 if demean is None: 340 demean = self._demean 341 342 if self._proj is None: 343 raise RuntimeError, "Mapper needs to be train before used." 344 345 d = N.asmatrix(data) 346 347 # Remove input offset if present 348 if demean and self._offset_in is not None: 349 d = d - self._offset_in 350 351 # Do forward projection 352 res = (d * self._proj).A 353 354 # Add output offset if present 355 if demean and self._offset_out is not None: 356 res += self._offset_out 357 358 return res
359 360
361 - def reverse(self, data):
362 """Reproject (reconstruct) data into the original feature space. 363 364 :Returns: 365 NumPy array 366 """ 367 if self._proj is None: 368 raise RuntimeError, "Mapper needs to be trained before used." 369 d = N.asmatrix(data) 370 # Remove offset if present in output space 371 if self._demean and self._offset_out is not None: 372 d = d - self._offset_out 373 374 # Do reverse projection 375 res = (d * self.recon).A 376 377 # Add offset in input space 378 if self._demean and self._offset_in is not None: 379 res += self._offset_in 380 381 return res
382
383 - def _computeRecon(self):
384 """Given that a projection is present -- compute reconstruction matrix. 385 By default -- pseudoinverse of projection matrix. Might be overridden 386 in derived classes for efficiency. 387 """ 388 return N.linalg.pinv(self._proj)
389
390 - def _getRecon(self):
391 """Compute (if necessary) and return reconstruction matrix 392 """ 393 # (re)build reconstruction matrix 394 recon = self._recon 395 if recon is None: 396 self._recon = recon = self._computeRecon() 397 return recon
398 399
400 - def getInSize(self):
401 """Returns the number of original features.""" 402 return self._proj.shape[0]
403 404
405 - def getOutSize(self):
406 """Returns the number of components to project on.""" 407 return self._proj.shape[1]
408 409
410 - def selectOut(self, outIds):
411 """Choose a subset of components (and remove all others).""" 412 self._proj = self._proj[:, outIds] 413 if self._offset_out is not None: 414 self._offset_out = self._offset_out[outIds] 415 # invalidate reconstruction matrix 416 self._recon = None
417 418 proj = property(fget=lambda self: self._proj, doc="Projection matrix") 419 recon = property(fget=_getRecon, doc="Backprojection matrix")
420 421 422
423 -class CombinedMapper(Mapper):
424 """Meta mapper that combines several embedded mappers. 425 426 This mapper can be used the map from several input dataspaces into a common 427 output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward` 428 is called with a sequence of data, each element in that sequence is passed 429 to the corresponding mapper, which in turned forward-maps the data. The 430 output of all mappers is finally stacked (horizontally or column or 431 feature-wise) into a single large 2D matrix (nsamples x nfeatures). 432 433 .. note:: 434 This mapper can only embbed mappers that transform data into a 2D 435 (nsamples x nfeatures) representation. For mappers not supporting this 436 transformation, consider wrapping them in a 437 :class:`~mvpa.mappers.base.ChainMapper` with an appropriate 438 post-processing mapper. 439 440 CombinedMapper fully supports forward and backward mapping, training, 441 runtime selection of a feature subset (in output dataspace) and retrieval 442 of neighborhood information. 443 """
444 - def __init__(self, mappers, **kwargs):
445 """ 446 :Parameters: 447 mappers: list of Mapper instances 448 The order of the mappers in the list is important, as it will define 449 the order in which data snippets have to be passed to 450 :meth:`~mvpa.mappers.base.CombinedMapper.forward`. 451 **kwargs 452 All additional arguments are passed to the base-class constructor. 453 """ 454 Mapper.__init__(self, **kwargs) 455 456 if not len(mappers): 457 raise ValueError, \ 458 'CombinedMapper needs at least one embedded mapper.' 459 460 self._mappers = mappers
461 462
463 - def forward(self, data):
464 """Map data from the IN spaces into to common OUT space. 465 466 :Parameter: 467 data: sequence 468 Each element in the `data` sequence is passed to the corresponding 469 embedded mapper and is mapped individually by it. The number of 470 elements in `data` has to match the number of embedded mappers. Each 471 element is `data` has to provide the same number of samples 472 (first dimension). 473 474 :Returns: 475 array: nsamples x nfeatures 476 Horizontally stacked array of all embedded mapper outputs. 477 """ 478 if not len(data) == len(self._mappers): 479 raise ValueError, \ 480 "CombinedMapper needs a sequence with data for each " \ 481 "Mapper" 482 483 # return a big array for the result of the forward mapped data 484 # of each embedded mapper 485 try: 486 return N.hstack( 487 [self._mappers[i].forward(d) for i, d in enumerate(data)]) 488 except ValueError: 489 raise ValueError, \ 490 "Embedded mappers do not generate same number of samples. " \ 491 "Check input data."
492 493
494 - def reverse(self, data):
495 """Reverse map data from OUT space into the IN spaces. 496 497 :Parameter: 498 data: array 499 Single data array to be reverse mapped into a sequence of data 500 snippets in their individual IN spaces. 501 502 :Returns: 503 list 504 """ 505 # assure array and transpose 506 # i.e. transpose of 1D does nothing, but of 2D puts features 507 # along first dimension 508 data = N.asanyarray(data).T 509 510 if not len(data) == self.getOutSize(): 511 raise ValueError, \ 512 "Data shape does match mapper reverse mapping properties." 513 514 result = [] 515 fsum = 0 516 for m in self._mappers: 517 # calculate upper border 518 fsum_new = fsum + m.getOutSize() 519 520 result.append(m.reverse(data[fsum:fsum_new].T)) 521 522 fsum = fsum_new 523 524 return result
525 526
527 - def train(self, dataset):
528 """Trains all embedded mappers. 529 530 The provided training dataset is splitted appropriately and the 531 corresponding pieces are passed to the 532 :meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper. 533 534 :Parameter: 535 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass 536 A dataset with the number of features matching the `outSize` of the 537 `CombinedMapper`. 538 """ 539 if dataset.nfeatures != self.getOutSize(): 540 raise ValueError, "Training dataset does not match the mapper " \ 541 "properties." 542 543 fsum = 0 544 for m in self._mappers: 545 # need to split the dataset 546 fsum_new = fsum + m.getOutSize() 547 m.train(dataset.selectFeatures(range(fsum, fsum_new))) 548 fsum = fsum_new
549 550
551 - def getInSize(self):
552 """Returns the size of the entity in input space""" 553 return N.sum(m.getInSize() for m in self._mappers)
554 555
556 - def getOutSize(self):
557 """Returns the size of the entity in output space""" 558 return N.sum(m.getOutSize() for m in self._mappers)
559 560
561 - def selectOut(self, outIds):
562 """Remove some elements and leave only ids in 'out'/feature space. 563 564 .. note:: 565 The subset selection is done inplace 566 567 :Parameter: 568 outIds: sequence 569 All output feature ids to be selected/kept. 570 """ 571 # determine which features belong to what mapper 572 # and call its selectOut() accordingly 573 ids = N.asanyarray(outIds) 574 fsum = 0 575 for m in self._mappers: 576 # bool which meta feature ids belongs to this mapper 577 selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum) 578 # make feature ids relative to this dataset 579 selected = ids[selector] - fsum 580 fsum += m.getOutSize() 581 # finally apply to mapper 582 m.selectOut(selected)
583 584
585 - def getNeighbor(self, outId, *args, **kwargs):
586 """Get the ids of the neighbors of a single feature in output dataspace. 587 588 :Parameters: 589 outId: int 590 Single id of a feature in output space, whos neighbors should be 591 determined. 592 *args, **kwargs 593 Additional arguments are passed to the metric of the embedded 594 mapper, that is responsible for the corresponding feature. 595 596 Returns a list of outIds 597 """ 598 fsum = 0 599 for m in self._mappers: 600 fsum_new = fsum + m.getOutSize() 601 if outId >= fsum and outId < fsum_new: 602 return m.getNeighbor(outId - fsum, *args, **kwargs) 603 fsum = fsum_new 604 605 raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"
606 607
608 - def __repr__(self):
609 s = Mapper.__repr__(self).rstrip(' )') 610 # beautify 611 if not s[-1] == '(': 612 s += ' ' 613 s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers]) 614 return s
615 616 617
618 -class ChainMapper(Mapper):
619 """Meta mapper that embedded a chain of other mappers. 620 621 Each mapper in the chain is called successively to perform forward or 622 reverse mapping. 623 624 .. note:: 625 626 In its current implementation the `ChainMapper` treats all but the last 627 mapper as simple pre-processing (in forward()) or post-processing (in 628 reverse()) steps. All other capabilities, e.g. training and neighbor 629 metrics are provided by or affect *only the last mapper in the chain*. 630 631 With respect to neighbor metrics this means that they are determined 632 based on the input space of the *last mapper* in the chain and *not* on 633 the input dataspace of the `ChainMapper` as a whole 634 """
635 - def __init__(self, mappers, **kwargs):
636 """ 637 :Parameters: 638 mappers: list of Mapper instances 639 **kwargs 640 All additional arguments are passed to the base-class constructor. 641 """ 642 Mapper.__init__(self, **kwargs) 643 644 if not len(mappers): 645 raise ValueError, 'ChainMapper needs at least one embedded mapper.' 646 647 self._mappers = mappers
648 649
650 - def forward(self, data):
651 """Calls all mappers in the chain successively. 652 653 :Parameter: 654 data 655 data to be chain-mapped. 656 """ 657 mp = data 658 for m in self._mappers: 659 mp = m.forward(mp) 660 661 return mp
662 663
664 - def reverse(self, data):
665 """Calls all mappers in the chain successively, in reversed order. 666 667 :Parameter: 668 data: array 669 data array to be reverse mapped into the orginal dataspace. 670 """ 671 mp = data 672 for m in reversed(self._mappers): 673 mp = m.reverse(mp) 674 675 return mp
676 677
678 - def train(self, dataset):
679 """Trains the *last* mapper in the chain. 680 681 :Parameter: 682 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass 683 A dataset with the number of features matching the `outSize` of the 684 last mapper in the chain (which is identical to the one of the 685 `ChainMapper` itself). 686 """ 687 if dataset.nfeatures != self.getOutSize(): 688 raise ValueError, "Training dataset does not match the mapper " \ 689 "properties." 690 691 self._mappers[-1].train(dataset)
692 693
694 - def getInSize(self):
695 """Returns the size of the entity in input space""" 696 return self._mappers[0].getInSize()
697 698
699 - def getOutSize(self):
700 """Returns the size of the entity in output space""" 701 return self._mappers[-1].getOutSize()
702 703
704 - def selectOut(self, outIds):
705 """Remove some elements from the *last* mapper in the chain. 706 707 :Parameter: 708 outIds: sequence 709 All output feature ids to be selected/kept. 710 """ 711 self._mappers[-1].selectOut(outIds)
712 713
714 - def getNeighbor(self, outId, *args, **kwargs):
715 """Get the ids of the neighbors of a single feature in output dataspace. 716 717 .. note:: 718 719 The neighbors are determined based on the input space of the *last 720 mapper* in the chain and *not* on the input dataspace of the 721 `ChainMapper` as a whole! 722 723 :Parameters: 724 outId: int 725 Single id of a feature in output space, whos neighbors should be 726 determined. 727 *args, **kwargs 728 Additional arguments are passed to the metric of the embedded 729 mapper, that is responsible for the corresponding feature. 730 731 Returns a list of outIds 732 """ 733 return self._mappers[-1].getNeighbor(outId, *args, **kwargs)
734 735
736 - def __repr__(self):
737 s = Mapper.__repr__(self).rstrip(' )') 738 # beautify 739 if not s[-1] == '(': 740 s += ' ' 741 s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers]) 742 return s
743