1
2
3
4
5
6
7
8
9 """Self-organizing map (SOM) mapper."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15 from mvpa.mappers.base import Mapper
16
17 if __debug__:
18 from mvpa.base import debug
19
21 """Mapper using a self-organizing map (SOM) for dimensionality reduction.
22
23 This mapper provides a simple, but pretty fast implementation of a
24 self-organizing map using an unsupervised training algorithm. It performs a
25 ND -> 2D mapping, which can for, example, be used for visualization of
26 high-dimensional data.
27
28 This SOM implementation uses squared Euclidean distance to determine
29 the best matching Kohonen unit and a Gaussian neighborhood influence
30 kernel.
31 """
32 - def __init__(self, kshape, niter, learning_rate=0.005,
33 iradius=None):
34 """
35 :Parameters:
36 kshape: (int, int)
37 Shape of the internal Kohonen layer. Currently, only 2D Kohonen
38 layers are supported, although the length of an axis might be set
39 to 1.
40 niter: int
41 Number of iteration during network training.
42 learning_rate: float
43 Initial learning rate, which will continuously decreased during
44 network training.
45 iradius: float | None
46 Initial radius of the Gaussian neighborhood kernel radius, which
47 will continuously decreased during network training. If `None`
48 (default) the radius is set equal to the longest edge of the
49 Kohonen layer.
50 """
51
52 Mapper.__init__(self)
53
54 self.kshape = N.array(kshape, dtype='int')
55
56 if iradius is None:
57 self.radius = self.kshape.max()
58 else:
59 self.radius = iradius
60
61
62 self.lrate = learning_rate
63
64
65 self.niter = niter
66
67
68
69 self.iter_scale = self.niter / N.log(self.radius)
70
71
72 self._K = None
73
74
76 """Perform network training.
77
78 :Parameter:
79 ds: Dataset
80 All samples in the dataset will be used for unsupervised training of
81 the SOM.
82 """
83
84
85 self._K = N.random.standard_normal(tuple(self.kshape) + (ds.nfeatures,))
86
87
88
89 unit_deltas = N.zeros(self._K.shape, dtype='float')
90
91
92
93
94
95 dqd = N.fromfunction(lambda x, y: (x**2 + y**2)**0.5,
96 self.kshape, dtype='float')
97
98
99 for it in xrange(1, self.niter + 1):
100
101
102 k = self._computeInfluenceKernel(it, dqd)
103
104
105 for s in ds.samples:
106
107 b = self._getBMU(s)
108
109
110
111
112
113 infl = N.vstack((
114 N.hstack((
115
116 k[b[0]:0:-1, b[1]:0:-1],
117
118 k[b[0]:0:-1, :self.kshape[1] - b[1]])),
119 N.hstack((
120
121 k[:self.kshape[0] - b[0], b[1]:0:-1],
122
123 k[:self.kshape[0] - b[0], :self.kshape[1] - b[1]]))
124 ))
125 unit_deltas += infl[:,:,N.newaxis] * (s - self._K)
126
127
128 self._K += unit_deltas
129
130 if __debug__:
131 debug("SOM", "Iteration %d/%d done: ||unit_deltas||=%g" %
132 (it, self.niter, N.sqrt(N.sum(unit_deltas **2))))
133
134
135 unit_deltas.fill(0.)
136
137
139 """Compute the neighborhood kernel for some iteration.
140
141 :Parameters:
142 iter: int
143 The iteration for which to compute the kernel.
144 dqd: array (nrows x ncolumns)
145 This is one quadrant of Euclidean distances between Kohonen unit
146 locations.
147 """
148
149 curr_max_radius = self.radius * N.exp(-1.0 * iter / self.iter_scale)
150
151
152 curr_lrate = self.lrate * N.exp(-1.0 * iter / self.iter_scale)
153
154
155 infl = N.exp((-1.0 * dqd) / (2 * curr_max_radius * iter))
156 infl *= curr_lrate
157
158
159
160 infl[dqd > curr_max_radius] = 0.
161
162 return infl
163
164
166 """Returns the ID of the best matching unit.
167
168 'best' is determined as minimal squared Euclidean distance between
169 any units weight vector and some given target `sample`
170
171 :Parameters:
172 sample: array
173 Target sample.
174
175 :Returns:
176 tuple: (row, column)
177 """
178
179 loc = N.argmin(((self.K - sample) ** 2).sum(axis=2))
180
181
182 return (N.divide(loc, self.kshape[1]), loc % self.kshape[1])
183
184
186 """Map data from the IN dataspace into OUT space.
187
188 Mapping is performs by simple determining the best matching Kohonen
189 unit for each data sample.
190 """
191 return N.array([self._getBMU(d) for d in data])
192
193
195 """Reverse map data from OUT space into the IN space.
196 """
197
198
199 return self.K[tuple(N.transpose(data))]
200
201
203 """Returns the size of the entity in input space"""
204 return self.K.shape[-1]
205
206
208 """Returns the size of the entity in output space"""
209 return self.K.shape[:-1]
210
211
213 """Limit the OUT space to a certain set of features.
214
215 This is currently not implemented. Moreover, although it is technically
216 possible to implement this functionality, it is unsure whether it is
217 meaningful in the context of SOMs.
218 """
219 raise NotImplementedError
220
221
223 """Translate a feature id into a coordinate/index in input space.
224
225 This is not meaningful in the context of SOMs.
226 """
227 raise NotImplementedError
228
229
231 """Validate feature id in OUT space.
232 """
233 return (outId >= 0).all() and (outId < self.kshape).all()
234
235
237 s = Mapper.__repr__(self).rstrip(' )')
238
239 if not s[-1] == '(':
240 s += ' '
241 s += 'kshape=%s, niter=%i, learning_rate=%f, iradius=%f)' \
242 % (str(tuple(self.kshape)), self.niter, self.lrate,
243 self.radius)
244 return s
245
246
248 """Provide access to the Kohonen layer.
249
250 With some care.
251 """
252 if self._K is None:
253 raise RuntimeError, \
254 'The SOM needs to be trained before access to the Kohonen ' \
255 'layer is possible.'
256
257 return self._K
258
259
260 K = property(fget=_accessKohonen)
261