Package mvpa :: Package mappers :: Module som
[hide private]
[frames] | no frames]

Source Code for Module mvpa.mappers.som

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Self-organizing map (SOM) mapper.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13   
 14  import numpy as N 
 15  from mvpa.mappers.base import Mapper 
 16   
 17  if __debug__: 
 18      from mvpa.base import debug 
 19   
20 -class SimpleSOMMapper(Mapper):
21 """Mapper using a self-organizing map (SOM) for dimensionality reduction. 22 23 This mapper provides a simple, but pretty fast implementation of a 24 self-organizing map using an unsupervised training algorithm. It performs a 25 ND -> 2D mapping, which can for, example, be used for visualization of 26 high-dimensional data. 27 28 This SOM implementation uses squared Euclidean distance to determine 29 the best matching Kohonen unit and a Gaussian neighborhood influence 30 kernel. 31 """
32 - def __init__(self, kshape, niter, learning_rate=0.005, 33 iradius=None):
34 """ 35 :Parameters: 36 kshape: (int, int) 37 Shape of the internal Kohonen layer. Currently, only 2D Kohonen 38 layers are supported, although the length of an axis might be set 39 to 1. 40 niter: int 41 Number of iteration during network training. 42 learning_rate: float 43 Initial learning rate, which will continuously decreased during 44 network training. 45 iradius: float | None 46 Initial radius of the Gaussian neighborhood kernel radius, which 47 will continuously decreased during network training. If `None` 48 (default) the radius is set equal to the longest edge of the 49 Kohonen layer. 50 """ 51 # init base class 52 Mapper.__init__(self) 53 54 self.kshape = N.array(kshape, dtype='int') 55 56 if iradius is None: 57 self.radius = self.kshape.max() 58 else: 59 self.radius = iradius 60 61 # learning rate 62 self.lrate = learning_rate 63 64 # number of training iterations 65 self.niter = niter 66 67 # precompute whatever can be done 68 # scalar for decay of learning rate and radius across all iterations 69 self.iter_scale = self.niter / N.log(self.radius) 70 71 # the internal kohonen layer 72 self._K = None
73 74
75 - def train(self, ds):
76 """Perform network training. 77 78 :Parameter: 79 ds: Dataset 80 All samples in the dataset will be used for unsupervised training of 81 the SOM. 82 """ 83 # XXX initialize with clever default, e.g. plain of first two PCA 84 # components 85 self._K = N.random.standard_normal(tuple(self.kshape) + (ds.nfeatures,)) 86 87 # units weight vector deltas for batch training 88 # (height x width x #features) 89 unit_deltas = N.zeros(self._K.shape, dtype='float') 90 91 # precompute distance kernel between elements in the Kohonen layer 92 # that will remain constant throughout the training 93 # (just compute one quadrant, as the distances are symmetric) 94 # XXX maybe do other than squared Euclidean? 95 dqd = N.fromfunction(lambda x, y: (x**2 + y**2)**0.5, 96 self.kshape, dtype='float') 97 98 # for all iterations 99 for it in xrange(1, self.niter + 1): 100 # compute the neighborhood impact kernel for this iteration 101 # has to be recomputed since kernel shrinks over time 102 k = self._computeInfluenceKernel(it, dqd) 103 104 # for all training vectors 105 for s in ds.samples: 106 # determine closest unit (as element coordinate) 107 b = self._getBMU(s) 108 109 # train all units at once by unfolding the kernel (from the 110 # single quadrant that is precomputed), cutting it to the 111 # right shape and simply multiply it to the difference of target 112 # and all unit weights.... 113 infl = N.vstack(( 114 N.hstack(( 115 # upper left 116 k[b[0]:0:-1, b[1]:0:-1], 117 # upper right 118 k[b[0]:0:-1, :self.kshape[1] - b[1]])), 119 N.hstack(( 120 # lower left 121 k[:self.kshape[0] - b[0], b[1]:0:-1], 122 # lower right 123 k[:self.kshape[0] - b[0], :self.kshape[1] - b[1]])) 124 )) 125 unit_deltas += infl[:,:,N.newaxis] * (s - self._K) 126 127 # apply cumulative unit deltas 128 self._K += unit_deltas 129 130 if __debug__: 131 debug("SOM", "Iteration %d/%d done: ||unit_deltas||=%g" % 132 (it, self.niter, N.sqrt(N.sum(unit_deltas **2)))) 133 134 # reset unit deltas 135 unit_deltas.fill(0.)
136 137
138 - def _computeInfluenceKernel(self, iter, dqd):
139 """Compute the neighborhood kernel for some iteration. 140 141 :Parameters: 142 iter: int 143 The iteration for which to compute the kernel. 144 dqd: array (nrows x ncolumns) 145 This is one quadrant of Euclidean distances between Kohonen unit 146 locations. 147 """ 148 # compute radius decay for this iteration 149 curr_max_radius = self.radius * N.exp(-1.0 * iter / self.iter_scale) 150 151 # same for learning rate 152 curr_lrate = self.lrate * N.exp(-1.0 * iter / self.iter_scale) 153 154 # compute Gaussian influence kernel 155 infl = N.exp((-1.0 * dqd) / (2 * curr_max_radius * iter)) 156 infl *= curr_lrate 157 158 # hard-limit kernel to max radius 159 # XXX is this really necessary? 160 infl[dqd > curr_max_radius] = 0. 161 162 return infl
163 164
165 - def _getBMU(self, sample):
166 """Returns the ID of the best matching unit. 167 168 'best' is determined as minimal squared Euclidean distance between 169 any units weight vector and some given target `sample` 170 171 :Parameters: 172 sample: array 173 Target sample. 174 175 :Returns: 176 tuple: (row, column) 177 """ 178 # TODO expose distance function as parameter 179 loc = N.argmin(((self.K - sample) ** 2).sum(axis=2)) 180 181 # assumes 2D Kohonen layer 182 return (N.divide(loc, self.kshape[1]), loc % self.kshape[1])
183 184
185 - def forward(self, data):
186 """Map data from the IN dataspace into OUT space. 187 188 Mapping is performs by simple determining the best matching Kohonen 189 unit for each data sample. 190 """ 191 return N.array([self._getBMU(d) for d in data])
192 193
194 - def reverse(self, data):
195 """Reverse map data from OUT space into the IN space. 196 """ 197 # simple transform into appropriate array slicing and 198 # return the associated Kohonen unit weights 199 return self.K[tuple(N.transpose(data))]
200 201
202 - def getInSize(self):
203 """Returns the size of the entity in input space""" 204 return self.K.shape[-1]
205 206
207 - def getOutSize(self):
208 """Returns the size of the entity in output space""" 209 return self.K.shape[:-1]
210 211
212 - def selectOut(self, outIds):
213 """Limit the OUT space to a certain set of features. 214 215 This is currently not implemented. Moreover, although it is technically 216 possible to implement this functionality, it is unsure whether it is 217 meaningful in the context of SOMs. 218 """ 219 raise NotImplementedError
220 221
222 - def getInId(self, outId):
223 """Translate a feature id into a coordinate/index in input space. 224 225 This is not meaningful in the context of SOMs. 226 """ 227 raise NotImplementedError
228 229
230 - def isValidOutId(self, outId):
231 """Validate feature id in OUT space. 232 """ 233 return (outId >= 0).all() and (outId < self.kshape).all()
234 235
236 - def __repr__(self):
237 s = Mapper.__repr__(self).rstrip(' )') 238 # beautify 239 if not s[-1] == '(': 240 s += ' ' 241 s += 'kshape=%s, niter=%i, learning_rate=%f, iradius=%f)' \ 242 % (str(tuple(self.kshape)), self.niter, self.lrate, 243 self.radius) 244 return s
245 246
247 - def _accessKohonen(self):
248 """Provide access to the Kohonen layer. 249 250 With some care. 251 """ 252 if self._K is None: 253 raise RuntimeError, \ 254 'The SOM needs to be trained before access to the Kohonen ' \ 255 'layer is possible.' 256 257 return self._K
258 259 260 K = property(fget=_accessKohonen)
261