1
2
3
4
5
6
7
8
9
10 """FeaturewiseDatasetMeasure performing multivariate Iterative RELIEF
11 (I-RELIEF) algorithm.
12 See : Y. Sun, Iterative RELIEF for Feature Weighting: Algorithms, Theories,
13 and Applications, IEEE Trans. on Pattern Analysis and Machine Intelligence
14 (TPAMI), vol. 29, no. 6, pp. 1035-1051, June 2007."""
15
16
17 __docformat__ = 'restructuredtext'
18
19 import numpy as N
20
21 from mvpa.measures.base import FeaturewiseDatasetMeasure
22 from mvpa.clfs.kernel import KernelSquaredExponential, KernelExponential, \
23 KernelMatern_3_2, KernelMatern_5_2
24 from mvpa.clfs.distance import pnorm_w
25
26 if __debug__:
27 from mvpa.base import debug
28
29
31 """`FeaturewiseDatasetMeasure` that performs multivariate I-RELIEF
32 algorithm. Batch version allowing various kernels.
33
34 UNDER DEVELOPEMNT.
35
36 Batch I-RELIEF-2 feature weighting algorithm. Works for binary or
37 multiclass class-labels. Batch version with complexity O(T*N^2*I),
38 where T is the number of iterations, N the number of instances, I
39 the number of features.
40
41 See: Y. Sun, Iterative RELIEF for Feature Weighting: Algorithms,
42 Theories, and Applications, IEEE Trans. on Pattern Analysis and
43 Machine Intelligence (TPAMI), vol. 29, no. 6, pp. 1035-1051, June
44 2007. http://plaza.ufl.edu/sunyijun/Paper/PAMI_1.pdf
45
46 Note that current implementation allows to use only
47 exponential-like kernels. Support for linear kernel will be
48 added later.
49 """
50 - def __init__(self, threshold = 1.0e-2, kernel = None, kernel_width = 1.0,
51 w_guess = None, **kwargs):
68
69
71 """Compute hit/miss dictionaries.
72
73 For each instance compute the set of indices having the same
74 class label and different class label.
75
76 Note that this computation is independent of the number of
77 features.
78 """
79
80 M = {}
81 H = {}
82 for i in range(label.size):
83 M[i] = N.where(label != label[i])[0]
84 tmp = (N.where(label == label[i])[0]).tolist()
85 tmp.remove(i)
86
87 assert(tmp != [])
88 H[i] = N.array(tmp)
89
90 return M, H
91
92
93 - def _call(self, dataset):
94 """Computes featurewise I-RELIEF weights."""
95 samples = dataset.samples
96 NS, NF = samples.shape[:2]
97 if self.w_guess == None:
98 self.w = N.ones(NF, 'd')
99
100 self.w = self.w/(self.w**2).sum()
101
102 M, H = self.compute_M_H(dataset.labels)
103
104 while True:
105 self.k = self.kernel(length_scale = self.kernel_width/self.w)
106 d_w_k = self.k.compute(samples)
107
108
109
110
111 d_w_k[N.abs(d_w_k-1.0) < 1.0e-15] = 0.0
112 ni = N.zeros(NF, 'd')
113 for n in range(NS):
114
115 gamma_n = 1.0 - N.nan_to_num(d_w_k[n, M[n]].sum() \
116 / (d_w_k[n, :].sum()-d_w_k[n, n]))
117 alpha_n = N.nan_to_num(d_w_k[n, M[n]]/(d_w_k[n, M[n]].sum()))
118 beta_n = N.nan_to_num(d_w_k[n, H[n]]/(d_w_k[n, H[n]].sum()))
119
120 m_n = (N.abs(samples[n, :] - samples[M[n], :]) \
121 * alpha_n[:, None]).sum(0)
122 h_n = (N.abs(samples[n, :] - samples[H[n], :]) \
123 * beta_n[:, None]).sum(0)
124 ni += gamma_n*(m_n-h_n)
125 ni = ni/NS
126
127 ni_plus = N.clip(ni, 0.0, N.inf)
128 w_new = N.nan_to_num(ni_plus/(N.sqrt((ni_plus**2).sum())))
129 change = N.abs(w_new-self.w).sum()
130 if __debug__ and 'IRELIEF' in debug.active:
131 debug('IRELIEF',
132 "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d"
133 % (change, w_new.max(), w_new.min(), w_new.mean(),
134 w_new.std(), N.isnan(w_new).sum()))
135
136
137 self.w = w_new
138 if change < self.threshold:
139 break
140
141 return self.w
142
143
145 """`FeaturewiseDatasetMeasure` that performs multivariate I-RELIEF
146 algorithm. Online version.
147
148 UNDER DEVELOPMENT
149
150 Online version with complexity O(T*N*I),
151 where N is the number of instances and I the number of features.
152
153 See: Y. Sun, Iterative RELIEF for Feature Weighting: Algorithms,
154 Theories, and Applications, IEEE Trans. on Pattern Analysis and
155 Machine Intelligence (TPAMI), vol. 29, no. 6, pp. 1035-1051, June
156 2007. http://plaza.ufl.edu/sunyijun/Paper/PAMI_1.pdf
157
158 Note that this implementation is not fully online, since hit and
159 miss dictionaries (H,M) are computed once at the beginning using
160 full access to all labels. This can be easily corrected to a full
161 online implementation. But this is not mandatory now since the
162 major goal of this current online implementation is reduction of
163 computational complexity.
164 """
165
166 - def __init__(self, a=5.0, permute=True, max_iter=3, **kwargs):
167 """Constructor of the IRELIEF class.
168
169 """
170
171 IterativeRelief_Devel.__init__(self, **kwargs)
172
173 self.a = a
174 self.permute = permute
175 self.max_iter = max_iter
176
177
178 - def _call(self, dataset):
179 """Computes featurewise I-RELIEF-2 weights. Online version."""
180 NS = dataset.samples.shape[0]
181 NF = dataset.samples.shape[1]
182 if self.w_guess == None:
183 self.w = N.ones(NF, 'd')
184
185 self.w = self.w/(self.w**2).sum()
186
187 M, H = self.compute_M_H(dataset.labels)
188
189 ni = N.zeros(NF, 'd')
190 pi = N.zeros(NF, 'd')
191
192 if self.permute:
193
194 random_sequence = N.random.permutation(NS)
195 else:
196 random_sequence = N.arange(NS)
197
198 change = self.threshold + 1.0
199 iteration = 0
200 counter = 0.0
201 while change > self.threshold and iteration < self.max_iter:
202 if __debug__:
203 debug('IRELIEF', "Iteration %d" % iteration)
204
205 for t in range(NS):
206 counter += 1.0
207 n = random_sequence[t]
208
209 self.k = self.kernel(length_scale = self.kernel_width/self.w)
210 d_w_k_xn_Mn = self.k.compute(dataset.samples[None, n, :],
211 dataset.samples[M[n], :]).squeeze()
212 d_w_k_xn_Mn_sum = d_w_k_xn_Mn.sum()
213 d_w_k_xn_x = self.k.compute(dataset.samples[None, n, :],
214 dataset.samples).squeeze()
215 gamma_n = 1.0 - d_w_k_xn_Mn_sum / d_w_k_xn_x.sum()
216 alpha_n = d_w_k_xn_Mn / d_w_k_xn_Mn_sum
217
218 d_w_k_xn_Hn = self.k.compute(dataset.samples[None, n, :],
219 dataset.samples[H[n], :]).squeeze()
220 beta_n = d_w_k_xn_Hn / d_w_k_xn_Hn.sum()
221
222 m_n = (N.abs(dataset.samples[n, :] - dataset.samples[M[n], :]) \
223 * alpha_n[:, N.newaxis]).sum(0)
224 h_n = (N.abs(dataset.samples[n, :] - dataset.samples[H[n], :]) \
225 * beta_n[:, N.newaxis]).sum(0)
226 pi = gamma_n * (m_n-h_n)
227 learning_rate = 1.0 / (counter * self.a + 1.0)
228 ni_new = ni + learning_rate * (pi - ni)
229 ni = ni_new
230
231
232 ni_plus = N.clip(ni, 0.0, N.inf)
233 w_new = N.nan_to_num(ni_plus / (N.sqrt((ni_plus ** 2).sum())))
234 change = N.abs(w_new - self.w).sum()
235 if t % 10 == 0 and __debug__ and 'IRELIEF' in debug.active:
236 debug('IRELIEF',
237 "t=%d change=%.4f max=%f min=%.4f mean=%.4f std=%.4f"
238 " #nan=%d" %
239 (t, change, w_new.max(), w_new.min(), w_new.mean(),
240 w_new.std(), N.isnan(w_new).sum()))
241
242 self.w = w_new
243
244 if change < self.threshold and iteration > 0:
245 break
246
247 iteration += 1
248
249 return self.w
250
251
252
254 """`FeaturewiseDatasetMeasure` that performs multivariate I-RELIEF
255 algorithm. Batch version.
256
257 Batch I-RELIEF-2 feature weighting algorithm. Works for binary or
258 multiclass class-labels. Batch version with complexity O(T*N^2*I),
259 where T is the number of iterations, N the number of instances, I
260 the number of features.
261
262 See: Y. Sun, Iterative RELIEF for Feature Weighting: Algorithms,
263 Theories, and Applications, IEEE Trans. on Pattern Analysis and
264 Machine Intelligence (TPAMI), vol. 29, no. 6, pp. 1035-1051, June
265 2007. http://plaza.ufl.edu/sunyijun/Paper/PAMI_1.pdf
266
267 Note that current implementation allows to use only
268 exponential-like kernels. Support for linear kernel will be
269 added later.
270 """
271 - def __init__(self, threshold=1.0e-2, kernel_width=1.0,
272 w_guess=None, **kwargs):
284
285
287 """Compute hit/miss dictionaries.
288
289 For each instance compute the set of indices having the same
290 class label and different class label.
291
292 Note that this computation is independent of the number of
293 features.
294
295 XXX should it be some generic function since it doesn't use self
296 """
297
298 M = {}
299 H = {}
300 for i in range(label.size):
301 M[i] = N.where(label != label[i])[0]
302 tmp = (N.where(label == label[i])[0]).tolist()
303 tmp.remove(i)
304
305 assert(tmp != [])
306 H[i] = N.array(tmp)
307
308 return M, H
309
310
311 - def k(self, distances):
312 """Exponential kernel."""
313 kd = N.exp(-distances/self.kernel_width)
314
315 kd[N.abs(distances) < 1.0e-15] = 0.0
316 return kd
317
318
319 - def _call(self, dataset):
320 """Computes featurewise I-RELIEF weights."""
321 samples = dataset.samples
322 NS, NF = samples.shape[:2]
323
324 if self.w_guess == None:
325 w = N.ones(NF, 'd')
326
327 w /= (w ** 2).sum()
328
329 M, H = self.compute_M_H(dataset.labels)
330
331 while True:
332 d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1))
333 ni = N.zeros(NF, 'd')
334 for n in range(NS):
335
336 gamma_n = 1.0 - N.nan_to_num(d_w_k[n, M[n]].sum() \
337 / (d_w_k[n, :].sum() - d_w_k[n, n]))
338 alpha_n = N.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum()))
339 beta_n = N.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))
340
341 m_n = (N.abs(samples[n, :] - samples[M[n], :]) \
342 * alpha_n[:, None]).sum(0)
343 h_n = (N.abs(samples[n, :] - samples[H[n], :]) \
344 * beta_n[:, None]).sum(0)
345 ni += gamma_n*(m_n - h_n)
346
347 ni = ni / NS
348
349 ni_plus = N.clip(ni, 0.0, N.inf)
350 w_new = N.nan_to_num(ni_plus / (N.sqrt((ni_plus**2).sum())))
351 change = N.abs(w_new - w).sum()
352 if __debug__ and 'IRELIEF' in debug.active:
353 debug('IRELIEF',
354 "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \
355 % (change, w_new.max(), w_new.min(), w_new.mean(),
356 w_new.std(), N.isnan(w_new).sum()))
357
358
359 w = w_new
360 if change < self.threshold:
361 break
362
363 self.w = w
364 return w
365
366
368 """`FeaturewiseDatasetMeasure` that performs multivariate I-RELIEF
369 algorithm. Online version.
370
371 This algorithm is exactly the one in the referenced paper
372 (I-RELIEF-2 online), using weighted 1-norm and Exponential
373 Kernel.
374 """
375
376 - def __init__(self, a=10.0, permute=True, max_iter=3, **kwargs):
377 """Constructor of the IRELIEF class.
378
379 """
380
381 IterativeRelief.__init__(self, **kwargs)
382
383 self.a = a
384 self.permute = permute
385 self.max_iter = max_iter
386
387
388 - def _call(self, dataset):
389 """Computes featurewise I-RELIEF-2 weights. Online version."""
390
391 samples = dataset.samples
392 NS, NF = samples.shape[:2]
393 threshold = self.threshold
394 a = self.a
395
396 if self.w_guess == None:
397 w = N.ones(NF, 'd')
398
399
400 w /= (w ** 2).sum()
401
402 M, H = self.compute_M_H(dataset.labels)
403
404 ni = N.zeros(NF, 'd')
405 pi = N.zeros(NF, 'd')
406
407 if self.permute:
408
409 random_sequence = N.random.permutation(NS)
410 else:
411 random_sequence = N.arange(NS)
412
413 change = threshold + 1.0
414 iteration = 0
415 counter = 0.0
416 while change > threshold and iteration < self.max_iter:
417 if __debug__:
418 debug('IRELIEF', "Iteration %d" % iteration)
419 for t in range(NS):
420 counter += 1.0
421 n = random_sequence[t]
422
423 d_xn_x = N.abs(samples[n, :] - samples)
424 d_w_k_xn_x = self.k((d_xn_x * w).sum(1))
425
426 d_w_k_xn_Mn = d_w_k_xn_x[M[n]]
427 d_w_k_xn_Mn_sum = d_w_k_xn_Mn.sum()
428
429 gamma_n = 1.0 - d_w_k_xn_Mn_sum / d_w_k_xn_x.sum()
430 alpha_n = d_w_k_xn_Mn / d_w_k_xn_Mn_sum
431
432 d_w_k_xn_Hn = d_w_k_xn_x[H[n]]
433 beta_n = d_w_k_xn_Hn / d_w_k_xn_Hn.sum()
434
435 m_n = (d_xn_x[M[n], :] * alpha_n[:, None]).sum(0)
436 h_n = (d_xn_x[H[n], :] * beta_n[:, None]).sum(0)
437 pi = gamma_n * (m_n - h_n)
438 learning_rate = 1.0 / (counter * a + 1.0)
439 ni_new = ni + learning_rate * (pi - ni)
440 ni = ni_new
441
442
443 ni_plus = N.clip(ni, 0.0, N.inf)
444 w_new = N.nan_to_num(ni_plus / (N.sqrt((ni_plus ** 2).sum())))
445 change = N.abs(w_new - w).sum()
446 if t % 10 == 0 and __debug__ and 'IRELIEF' in debug.active:
447 debug('IRELIEF',
448 "t=%d change=%.4f max=%f min=%.4f mean=%.4f std=%.4f"
449 " #nan=%d" %
450 (t, change, w_new.max(), w_new.min(), w_new.mean(),
451 w_new.std(), N.isnan(w_new).sum()))
452
453 w = w_new
454
455 if change < threshold and iteration > 0:
456 break
457
458 iteration += 1
459
460 self.w = w
461 return w
462