mvpa.mappers.procrustean

25 """Mapper to project from one space to another using Procrustean 26 transformation (shift + scaling + rotation) 27 """ 28 29 _DEV__doc__ = """Possibly revert back to inherit from ProjectionMapper""" 30

31 - def __init__(self, scaling=True, reflection=True, reduction=True, 32 oblique=False, oblique_rcond=-1, **kwargs):

33 """Initialize the ProcrusteanMapper 34 35 :Parameters: 36 scaling: bool 37 Scale data for the transformation (no longer rigid body 38 transformation) 39 reflection: bool 40 Allow for the data to be reflected (so it might not be a rotation). 41 Effective only for non-oblique transformations 42 reduction: bool 43 If true, it is allowed to map into lower-dimensional 44 space. Forward transformation might be suboptimal then and reverse 45 transformation might not recover all original variance 46 oblique: bool 47 Either to allow non-orthogonal transformation -- might heavily overfit 48 the data if there is less samples than dimensions. Use `oblique_rcond`. 49 oblique_rcond: float 50 Cutoff for 'small' singular values to regularize the inverse. See 51 :class:`~numpy.linalg.lstsq` for more information. 52 """ 53 ProjectionMapper.__init__(self, **kwargs) 54 55 self._scaling = scaling 56 """Either to determine the scaling factor""" 57 58 self._reduction = reduction 59 self._reflection = reflection 60 self._oblique = oblique 61 self._oblique_rcond = oblique_rcond 62 self._scale = None 63 """Estimated scale"""

64 65 __doc__ = enhancedDocString('ProcrusteanMapper', locals(), ProjectionMapper) 66 67 # XXX we should just use beautiful ClassWithCollections everywhere... makes 68 # life so easier... for now -- manual

69 - def __repr__(self):

70 s = ProjectionMapper.__repr__(self).rstrip(' )') 71 if not s[-1] == '(': s += ', ' 72 s += "scaling=%d, reflection=%d, reduction=%d, " \ 73 "oblique=%s, oblique_rcond=%g)" % \ 74 (self._scaling, self._reflection, self._reduction, 75 self._oblique, self._oblique_rcond) 76 return s

77 78 # XXX we have to override train since now we have multiple datasets 79 # alternative way is to assign target to the labels of the source 80 # dataset

81 - def _train(self, source, target=None):

82 """Train Procrustean transformation 83 84 :Parameters: 85 source : dataset or ndarray 86 Source space for determining the transformation. If target 87 is None, then labels of 'source' dataset are taken as the target 88 target : dataset or ndarray or Null 89 Target space for determining the transformation 90 """ 91 92 # Since it is unsupervised, we don't care about labels 93 datas = () 94 odatas = () 95 means = () 96 shapes = () 97 98 assess_residuals = __debug__ and 'MAP_' in debug.active 99 100 if target is None: 101 target = source.labels 102 103 for i, ds in enumerate((source, target)): 104 if isinstance(ds, Dataset): 105 data = N.asarray(ds.samples) 106 else: 107 data = ds 108 if assess_residuals: 109 odatas += (data,) 110 if i == 0: 111 mean = self._offset_in 112 else: 113 mean = data.mean(axis=0) 114 data = data - mean 115 means += (mean,) 116 datas += (data,) 117 shapes += (data.shape,) 118 119 # shortcuts for sizes 120 sn, sm = shapes[0] 121 tn, tm = shapes[1] 122 123 # Check the sizes 124 if sn != tn: 125 raise ValueError, "Data for both spaces should have the same " \ 126 "number of samples. Got %d in source and %d in target space" \ 127 % (sn, tn) 128 129 # Sums of squares 130 ssqs = [N.sum(d**2, axis=0) for d in datas] 131 132 # XXX check for being invariant? 133 # needs to be tuned up properly and not raise but handle 134 for i in xrange(2): 135 if N.all(ssqs[i] <= N.abs((N.finfo(datas[i].dtype).eps 136 * sn * means[i] )**2)): 137 raise ValueError, "For now do not handle invariant in time datasets" 138 139 norms = [ N.sqrt(N.sum(ssq)) for ssq in ssqs ] 140 normed = [ data/norm for (data, norm) in zip(datas, norms) ] 141 142 # add new blank dimensions to source space if needed 143 if sm < tm: 144 normed[0] = N.hstack( (normed[0], N.zeros((sn, tm-sm))) ) 145 146 if sm > tm: 147 if self._reduction: 148 normed[1] = N.hstack( (normed[1], N.zeros((sn, sm-tm))) ) 149 else: 150 raise ValueError, "reduction=False, so mapping from " \ 151 "higher dimensionality " \ 152 "source space is not supported. Source space had %d " \ 153 "while target %d dimensions (features)" % (sm, tm) 154 155 source, target = normed 156 if self._oblique: 157 # Just do silly linear system of equations ;) or naive 158 # inverse problem 159 if sn == sm and tm == 1: 160 T = N.linalg.solve(source, target) 161 else: 162 T = N.linalg.lstsq(source, target, rcond=self._oblique_rcond)[0] 163 ss = 1.0 164 else: 165 # Orthogonal transformation 166 # figure out optimal rotation 167 U, s, Vh = N.linalg.svd(N.dot(target.T, source), 168 full_matrices=False) 169 T = N.dot(Vh.T, U.T) 170 171 if not self._reflection: 172 # then we need to assure that it is only rotation 173 # "recipe" from 174 # http://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem 175 # for more and info and original references, see 176 # http://dx.doi.org/10.1007%2FBF02289451 177 nsv = len(s) 178 s[:-1] = 1 179 s[-1] = N.linalg.det(T) 180 T = N.dot(U[:, :nsv] * s, Vh) 181 182 # figure out scale and final translation 183 # XXX with reflection False -- not sure if here or there or anywhere... 184 ss = sum(s) 185 186 # if we were to collect standardized distance 187 # std_d = 1 - sD**2 188 189 # select out only relevant dimensions 190 if sm != tm: 191 T = T[:sm, :tm] 192 193 self._scale = scale = ss * norms[1] / norms[0] 194 # Assign projection 195 if self._scaling: 196 proj = scale * T 197 else: 198 proj = T 199 self._proj = proj 200 201 if self._demean: 202 self._offset_out = means[1] 203 204 if __debug__ and 'MAP_' in debug.active: 205 # compute the residuals 206 res_f = self.forward(odatas[0]) 207 d_f = N.linalg.norm(odatas[1] - res_f)/N.linalg.norm(odatas[1]) 208 res_r = self.reverse(odatas[1]) 209 d_r = N.linalg.norm(odatas[0] - res_r)/N.linalg.norm(odatas[0]) 210 debug('MAP_', "%s, residuals are forward: %g," 211 " reverse: %g" % (repr(self), d_f, d_r))

Source Code for Module mvpa.mappers.procrustean