Package mvpa :: Package datasets :: Module event
[hide private]
[frames] | no frames]

Source Code for Module mvpa.datasets.event

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Event-based dataset type""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13   
 14  import numpy as N 
 15   
 16  from mvpa.mappers.array import DenseArrayMapper 
 17  from mvpa.mappers.boxcar import BoxcarMapper 
 18  from mvpa.mappers.mask import MaskMapper 
 19  from mvpa.datasets.base import Dataset 
 20  from mvpa.datasets.mapped import MappedDataset 
 21  from mvpa.mappers.base import ChainMapper, CombinedMapper 
 22  from mvpa.base import warning 
 23   
24 -class EventDataset(MappedDataset):
25 """Event-based dataset 26 27 This dataset type can be used to segment 'raw' data input into meaningful 28 boxcar-shaped samples, by simply defining a list of events 29 (see :class:`~mvpa.misc.support.Event`). 30 31 Additionally, it can be used to add arbitrary information (as features) 32 to each event-sample (extracted from the event list itself). An 33 appropriate mapper is automatically constructed, that merges original 34 samples and additional features into a common feature space and also 35 separates them again during reverse-mapping. Otherwise, this dataset type 36 is a regular dataset (in contrast to `MetaDataset`). 37 38 The properties of an :class:`~mvpa.misc.support.Event` supported/required 39 by this class are: 40 41 `onset` 42 An integer indicating the startpoint of an event as the sample 43 index in the input data. 44 45 `duration` 46 How many input data samples following the onset sample should be 47 considered for an event. The embedded 48 :class:`~mvpa.mappers.boxcar.BoxcarMapper` will use the maximum boxlength 49 (i.e., `duration`) of all defined events to create a regular-shaped data 50 array. 51 52 `label` 53 The corresponding label of that event (numeric or literal). 54 55 `chunk` 56 An optional chunk id. 57 58 `features` 59 A list with an arbitrary number of features values (floats), that will 60 be added to the feature vector of the corresponding sample. 61 """
62 - def __init__(self, samples=None, events=None, mask=None, bcshape=None, 63 dametric=None, **kwargs):
64 """ 65 :Parameters: 66 samples: ndarray 67 'Raw' input data from which boxcar-shaped samples will be extracted. 68 events: sequence of `Event` instances 69 Both an events `onset` and `duration` are assumed to be provided 70 as #samples. The boxlength will be determined by the maximum 71 duration of all events. 72 mask: boolean array 73 Only features corresponding to non-zero mask elements will be 74 considered for the final dataset. The mask shape either has to match 75 the shape of the generated boxcar-samples, or the shape of the 'raw' 76 input samples. In the latter case, the mask is automatically 77 expanded to cover the whole boxcar. If no mask is provided, a 78 full mask will be constructed automatically. 79 bcshape: tuple 80 Shape of the boxcar samples generated by the embedded boxcar mapper. 81 If not provided this is determined automatically. However, this 82 required an extra mapping step. 83 dametric: Metric 84 Custom metric to be used by the embedded DenseArrayMapper. 85 **kwargs 86 All additional arguments are passed to the base class. 87 """ 88 # check if we are in copy constructor mode 89 if events is None: 90 MappedDataset.__init__(self, samples=samples, **kwargs) 91 return 92 93 # 94 # otherwise we really want to freshly prepare a dataset 95 # 96 97 # loop over events and extract all meaningful information to charge 98 # a boxcar mapper 99 startpoints = [e['onset'] for e in events] 100 try: 101 durations = [e['duration'] for e in events] 102 except KeyError: 103 raise ValueError, "Each event must have a `duration`!" 104 105 # we need a regular array, so all events must have a common 106 # boxlength 107 boxlength = max(durations) 108 if __debug__: 109 if not max(durations) == min(durations): 110 warning('Boxcar mapper will use maximum boxlength (%i) of all ' 111 'provided Events.'% boxlength) 112 113 # now look for stuff we need for the dataset itself 114 try: 115 labels = [e['label'] for e in events] 116 except KeyError: 117 raise ValueError, "Each event must have a `label`!" 118 # chunks are optional 119 chunks = [e['chunk'] for e in events if e.has_key('chunk')] 120 if not len(chunks): 121 chunks = None 122 123 # optional stuff 124 # extract additional features for each event 125 extrafeatures = [e['features'] 126 for e in events if e.has_key('features')] 127 128 # sanity check for extra features 129 if len(extrafeatures): 130 if len(extrafeatures) == len(startpoints): 131 try: 132 # will fail if varying number of features per event 133 extrafeatures = N.asanyarray(extrafeatures) 134 except ValueError: 135 raise ValueError, \ 136 'Unequal number of extra features per event' 137 else: 138 raise ValueError, \ 139 'Each event has to provide to same number of extra ' \ 140 'features.' 141 else: 142 extrafeatures = None 143 144 # now build the mapper 145 # we know the properties of the boxcar mapper, so now use it 146 # to determine its output size unless it is already provided 147 bcmapper = BoxcarMapper(startpoints, boxlength) 148 149 # determine array mapper input shape, as a fail-safe procedure 150 # in case no mask provided, and to check the mask sanity if we have one 151 if bcshape is None: 152 # map the data and look at the shape of the first sample 153 # to determine the properties of the array mapper 154 bcshape = bcmapper(samples)[0].shape 155 156 # expand the mask if necessary (ie. if provided in raw sample space and 157 # not in boxcar space 158 if not mask is None: 159 if len(mask.shape) < len(bcshape)-1: 160 # complement needed dimensions 161 mshape = mask.shape 162 missing_dims = len(bcshape) - 1 - len(mshape) 163 mask = mask.reshape((1,)*missing_dims + mshape) 164 if len(mask.shape) == len(bcshape) - 1: 165 # replicate per each boxcar elemenet 166 mask = N.array([mask] * bcshape[0]) 167 168 # now we can build the array mapper, using the optionally provided 169 # custom metric 170 amapper = DenseArrayMapper(mask=mask, shape=bcshape, metric=dametric) 171 172 # now compose the full mapper for the main samples 173 mapper = ChainMapper([bcmapper, amapper]) 174 175 # if we have extra features, we need to combine them with the rest 176 if not extrafeatures is None: 177 # first half for main samples, second half simple mask mapper 178 # for unstructured additional features 179 mapper = CombinedMapper( 180 (mapper, 181 MaskMapper(mask=N.ones(extrafeatures.shape[1])))) 182 183 # add extra features to the samples 184 samples = (samples, extrafeatures) 185 186 # finally init baseclass 187 MappedDataset.__init__(self, 188 samples=samples, 189 labels=labels, 190 chunks=chunks, 191 mapper=mapper, 192 **kwargs)
193