Home | Trees | Indices | Help |
|
---|
|
1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 2 # vi: set ft=python sts=4 ts=4 sw=4 et: 3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 4 # 5 # See COPYING file distributed along with the PyMVPA package for the 6 # copyright and license terms. 7 # 8 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 9 """Dataset container""" 10 11 __docformat__ = 'restructuredtext' 12 13 import numpy as N 14 15 import random 16 from mvpa.datasets.mapped import MappedDataset 17 18 19 20 if __debug__: 21 from mvpa.base import debug, warning 22 2325 """Dataset container 26 27 The class is useful to combine several Datasets with different origin and 28 type and bind them together. Such a combined dataset can then by used to 29 e.g. pass it to a classifier. 30 31 MetaDataset does not permanently duplicate data stored in the dataset it 32 contains. The combined samples matrix is build on demand and samples 33 attribute access is redirected to the first dataset in the container. 34 35 Currently operations other than samples or feature selection are not fully 36 supported, e.g. passing a MetaDataset to detrend() will initially result in 37 a detrended MetaDataset, but the combined and detrended samples matrix will 38 be lost after the next call to selectSamples() or selectFeatures(), which 39 freshly pulls samples from all datasets in the container. """ 40 41 # This class is intentionally _not_ implemented as a subclass of Dataset. 42 # IMHO Dataset contains to much logic unecessary logic. 43 # XXX implement MappedMetaDataset along with a MetaMapper that simply calls 44 # the mappers in the datasets in the container; or maybe just add flag to 45 # MetaDataset to behave like a MappedDataset20747 """Initialize dataset instance 48 49 :Parameters: 50 datasets : list 51 """ 52 # XXX Maybe add checks that all datasets have identical samples 53 # attributes 54 self.__datasets = datasets 55 56 # contains the combine samples matrix for caching 57 self.__samples = None58 5961 """Update the combined samples matrix from all underlying datasets. 62 """ 63 # note, that hstack will do a copy of _all_ data 64 self.__samples = N.hstack([ds.samples for ds in self.__datasets])65 6668 """Implemented to redirect access to underlying datasets. 69 """ 70 if name == 'samples': 71 # do something to combine (and cache) samples arrays 72 if self.__samples is None: 73 self.rebuildSamples() 74 return self.__samples 75 76 else: 77 # redirect all other to first dataset 78 # ??? maybe limit to some specific supported ones 79 return self.__datasets[0].__getattribute__(name)80 8183 """Do feature selection on all underlying datasets at once. 84 """ 85 # determine which features belong to what dataset 86 # and call its selectFeatures() accordingly 87 ids = N.asanyarray(ids) 88 result = [] 89 fsum = 0 90 for ds in self.__datasets: 91 # bool which meta feature ids belongs to this dataset 92 selector = N.logical_and(ids < fsum + ds.nfeatures, ids >= fsum) 93 # make feature ids relative to this dataset 94 selected = ids[selector] - fsum 95 # do feature selection on underlying dataset 96 # XXX not sure if we should keep empty datasets? (probably) 97 result.append(ds.selectFeatures(selected)) 98 fsum += ds.nfeatures 99 100 return MetaDataset(result)101 102104 """Apply a mapper on all underlying datasets. 105 """ 106 return MetaDataset([ds.applyMapper(*args, **kwargs) \ 107 for ds in self.__datasets])108 109111 """Select samples from all underlying datasets at once. 112 """ 113 return MetaDataset([ds.selectSamples(*args, **kwargs) \ 114 for ds in self.__datasets])115 116118 """Toggle label permutation. 119 """ 120 # permute on first 121 self.__datasets[0].permuteLabels(*args, **kwargs) 122 123 # and apply to all others 124 for ds in self.__datasets[1:]: 125 ds.samples[:] = self.__datasets[0].samples126 127129 """Return a MetaDataset with a random subset of samples. 130 """ 131 # if interger is given take this value for all classes 132 if isinstance(nperlabel, int): 133 nperlabel = [ nperlabel for i in self.__datasets[0].uniquelabels ] 134 135 sample = [] 136 # for each available class 137 for i, r in enumerate(self.__datasets[0].uniquelabels): 138 # get the list of pattern ids for this class 139 sample += \ 140 random.sample((self.__datasets[0].labels == r).nonzero()[0], 141 nperlabel[i] ) 142 143 return MetaDataset([ds.selectSamples(sample) \ 144 for ds in self.__datasets])145 146 151 152154 """Number of features per sample. 155 """ 156 return N.sum([ds.nfeatures for ds in self.__datasets])157 158160 """Set the data type of the samples array. 161 """ 162 # reset samples 163 self.__samples = None 164 165 for ds in self.__datasets: 166 if ds.samples.dtype != dtype: 167 ds.samples = ds.samples.astype(dtype)168 169171 """Perform reverse mapping 172 173 :Return: 174 List of results per each used mapper and the corresponding part of 175 the provided `val`. 176 """ 177 # assure array and transpose for easy slicing 178 # i.e. transpose of 1D does nothing, but of 2D puts features 179 # along first dimension 180 val = N.asanyarray(val).T 181 182 # do we have multiple or just one 183 mflag = len(val.shape) > 1 184 185 result = [] 186 fsum = 0 187 for ds in self.__datasets: 188 # calculate upper border 189 fsum_new = fsum + ds.nfeatures 190 191 # now map back if mapper is present, otherwise just store 192 # need to pass transposed!! 193 if isinstance(ds, MappedDataset): 194 result.append(ds.mapReverse(val[fsum:fsum_new].T)) 195 else: 196 result.append(val[fsum:fsum_new].T) 197 198 fsum = fsum_new 199 200 return result201 202 203 # read-only class properties 204 nsamples = property(fget=getNSamples) 205 nfeatures = property(fget=getNFeatures) 206 datasets = property(fget=lambda self: self.__datasets)
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Mon Apr 23 23:09:43 2012 | http://epydoc.sourceforge.net |