1
2
3
4
5
6
7
8
9 """Sparse Multinomial Logistic Regression classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.base import warning, externals
16 from mvpa.clfs.base import Classifier
17 from mvpa.measures.base import Sensitivity
18 from mvpa.misc.exceptions import ConvergenceError
19 from mvpa.misc.param import Parameter
20 from mvpa.misc.state import StateVariable
21 from mvpa.misc.transformers import SecondAxisMaxOfAbs
22
23
24 _DEFAULT_IMPLEMENTATION = "Python"
25 if externals.exists('ctypes'):
26
27 from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression
28 _DEFAULT_IMPLEMENTATION = "C"
29 else:
30 _cStepwiseRegression = None
31 warning("SMLR implementation without ctypes is overwhelmingly slow."
32 " You are strongly advised to install python-ctypes")
33
34 if __debug__:
35 from mvpa.base import debug
36
38 """Convert labels to one-of-M form.
39
40 TODO: Might be useful elsewhere so could migrate into misc/
41 """
42
43
44 new_labels = N.zeros((len(labels), len(ulabels)))
45
46
47 for i, c in enumerate(ulabels):
48 new_labels[labels == c, i] = 1
49
50 return new_labels
51
52
53
54 -class SMLR(Classifier):
55 """Sparse Multinomial Logistic Regression `Classifier`.
56
57 This is an implementation of the SMLR algorithm published in
58 :ref:`Krishnapuram et al., 2005 <KCF+05>` (2005, IEEE Transactions
59 on Pattern Analysis and Machine Intelligence). Be sure to cite
60 that article if you use this classifier for your work.
61 """
62
63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary',
64 'multiclass', 'does_feature_selection' ]
65
66
67 lm = Parameter(.1, min=1e-10, allowedtype='float',
68 doc="""The penalty term lambda. Larger values will give rise to
69 more sparsification.""")
70
71 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float',
72 doc="""When the weight change for each cycle drops below this value
73 the regression is considered converged. Smaller values
74 lead to tighter convergence.""")
75
76 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0,
77 doc="""Decay rate in the probability of resampling a zero weight.
78 1.0 will immediately decrease to the min_resamp from 1.0, 0.0
79 will never decrease from 1.0.""")
80
81 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0,
82 doc="Minimum resampling probability for zeroed weights")
83
84 maxiter = Parameter(10000, allowedtype='int', min=1,
85 doc="""Maximum number of iterations before stopping if not
86 converged.""")
87
88 has_bias = Parameter(True, allowedtype='bool',
89 doc="""Whether to add a bias term to allow fits to data not through
90 zero""")
91
92 fit_all_weights = Parameter(True, allowedtype='bool',
93 doc="""Whether to fit weights for all classes or to the number of
94 classes minus one. Both should give nearly identical results, but
95 if you set fit_all_weights to True it will take a little longer
96 and yield weights that are fully analyzable for each class. Also,
97 note that the convergence rate may be different, but convergence
98 point is the same.""")
99
100 implementation = Parameter(_DEFAULT_IMPLEMENTATION,
101 allowedtype='basestring',
102 choices=["C", "Python"],
103 doc="""Use C or Python as the implementation of
104 stepwise_regression. C version brings significant speedup thus is
105 the default one.""")
106
107 seed = Parameter(None, allowedtype='None or int',
108 doc="""Seed to be used to initialize random generator, might be
109 used to replicate the run""")
110
111 unsparsify = Parameter(False, allowedtype='bool',
112 doc="""***EXPERIMENTAL*** Whether to unsparsify the weights via
113 regression. Note that it likely leads to worse classifier
114 performance, but more interpretable weights.""")
115
116 std_to_keep = Parameter(2.0, allowedtype='float',
117 doc="""Standard deviation threshold of weights to keep when
118 unsparsifying.""")
119
121 """Initialize an SMLR classifier.
122 """
123
124 """
125 TODO:
126 # Add in likelihood calculation
127 # Add kernels, not just direct methods.
128 """
129
130 Classifier.__init__(self, **kwargs)
131
132 if _cStepwiseRegression is None and self.implementation == 'C':
133 warning('SMLR: C implementation is not available.'
134 ' Using pure Python one')
135 self.implementation = 'Python'
136
137
138 self.__ulabels = None
139 """Unigue labels from the training set."""
140 self.__weights_all = None
141 """Contains all weights including bias values"""
142 self.__weights = None
143 """Just the weights, without the biases"""
144 self.__biases = None
145 """The biases, will remain none if has_bias is False"""
146
147
148 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E,
149 auto_corr,
150 lambda_over_2_auto_corr,
151 S,
152 M,
153 maxiter,
154 convergence_tol,
155 resamp_decay,
156 min_resamp,
157 verbose,
158 seed = None):
159 """The (much slower) python version of the stepwise
160 regression. I'm keeping this around for now so that we can
161 compare results."""
162
163
164 ns, nd = X.shape
165
166
167 converged = False
168 incr = N.finfo(N.float).max
169 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0
170 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0
171 p_resamp = N.ones(w.shape, dtype=N.float)
172
173 if seed is not None:
174
175 N.random.seed(seed)
176
177 if __debug__:
178 debug("SMLR_", "random seed=%s" % seed)
179
180
181 while not converged and cycles < maxiter:
182
183 w_old = w[basis, m]
184
185
186 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]:
187
188
189 P = E[:, m]/S
190
191
192 grad = XY[basis, m] - N.dot(X[:, basis], P)
193
194
195 w_new = w_old + grad/auto_corr[basis]
196
197
198 if w_new > lambda_over_2_auto_corr[basis]:
199 w_new -= lambda_over_2_auto_corr[basis]
200 changed = True
201
202 if w_old == 0:
203 non_zero += 1
204
205 p_resamp[basis, m] = 1.0
206 elif w_new < -lambda_over_2_auto_corr[basis]:
207 w_new += lambda_over_2_auto_corr[basis]
208 changed = True
209
210 if w_old == 0:
211 non_zero += 1
212
213 p_resamp[basis, m] = 1.0
214 else:
215
216 w_new = 0.0
217
218
219 p_resamp[basis, m] -= (p_resamp[basis, m] - \
220 min_resamp) * resamp_decay
221
222
223 if w_old == 0:
224 changed = False
225 wasted_basis += 1
226 else:
227 changed = True
228 non_zero -= 1
229
230
231 if changed:
232
233
234 w_diff = w_new - w_old
235 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff
236 E_new_m = N.exp(Xw[:, m])
237 S += E_new_m - E[:, m]
238 E[:, m] = E_new_m
239
240
241 w[basis, m] = w_new
242
243
244 sum2_w_diff += w_diff*w_diff
245
246
247 sum2_w_old += w_old*w_old
248
249
250 m = N.mod(m+1, w.shape[1])
251 if m == 0:
252
253 basis = N.mod(basis+1, nd)
254 if basis == 0:
255
256 cycles += 1
257
258
259 incr = N.sqrt(sum2_w_diff) / \
260 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps)
261
262
263 converged = incr < convergence_tol
264
265 if __debug__:
266 debug("SMLR_", \
267 "cycle=%d ; incr=%g ; non_zero=%d ; " %
268 (cycles, incr, non_zero) +
269 "wasted_basis=%d ; " %
270 (wasted_basis) +
271 "sum2_w_old=%g ; sum2_w_diff=%g" %
272 (sum2_w_old, sum2_w_diff))
273
274
275 sum2_w_diff = 0.0
276 sum2_w_old = 0.0
277 wasted_basis = 0
278
279
280 if not converged:
281 raise ConvergenceError, \
282 "More than %d Iterations without convergence" % \
283 (maxiter)
284
285
286
287
288 return cycles
289
290
292 """Train the classifier using `dataset` (`Dataset`).
293 """
294
295 labels = _label2oneofm(dataset.labels, dataset.uniquelabels)
296 self.__ulabels = dataset.uniquelabels.copy()
297
298 Y = labels
299 M = len(self.__ulabels)
300
301
302 X = dataset.samples
303
304
305 if self.params.has_bias:
306 if __debug__:
307 debug("SMLR_", "hstacking 1s for bias")
308
309
310 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype)))
311
312 if self.params.implementation.upper() == 'C':
313 _stepwise_regression = _cStepwiseRegression
314
315
316 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']):
317 if __debug__:
318 debug("SMLR_",
319 "Copying data to get it C_CONTIGUOUS/ALIGNED")
320 X = N.array(X, copy=True, dtype=N.double, order='C')
321
322
323 if X.dtype != N.double:
324 if __debug__:
325 debug("SMLR_", "Converting data to double")
326
327 X = X.astype(N.double)
328
329
330 elif self.params.implementation.upper() == 'PYTHON':
331 _stepwise_regression = self._pythonStepwiseRegression
332 else:
333 raise ValueError, \
334 "Unknown implementation %s of stepwise_regression" % \
335 self.params.implementation
336
337
338 ns, nd = X.shape
339
340
341 if self.params.fit_all_weights:
342 c_to_fit = M
343 else:
344 c_to_fit = M-1
345
346
347 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0))
348 XY = N.dot(X.T, Y[:, :c_to_fit])
349 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr
350
351
352 w = N.zeros((nd, c_to_fit), dtype=N.double)
353 Xw = N.zeros((ns, c_to_fit), dtype=N.double)
354 E = N.ones((ns, c_to_fit), dtype=N.double)
355 S = M*N.ones(ns, dtype=N.double)
356
357
358 if __debug__:
359 verbosity = int( "SMLR_" in debug.active )
360 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed)
361 else:
362 verbosity = 0
363
364
365 cycles = _stepwise_regression(w,
366 X,
367 XY,
368 Xw,
369 E,
370 auto_corr,
371 lambda_over_2_auto_corr,
372 S,
373 M,
374 self.params.maxiter,
375 self.params.convergence_tol,
376 self.params.resamp_decay,
377 self.params.min_resamp,
378 verbosity,
379 self.params.seed)
380
381 if cycles >= self.params.maxiter:
382
383 raise ConvergenceError, \
384 "More than %d Iterations without convergence" % \
385 (self.params.maxiter)
386
387
388 if self.params.unsparsify:
389
390 w = self._unsparsify_weights(X, w)
391
392
393 self.__weights_all = w
394 self.__weights = w[:dataset.nfeatures, :]
395
396 if self.states.isEnabled('feature_ids'):
397 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures, :]),
398 axis=1)>0)[0]
399
400
401 if self.params.has_bias:
402 self.__biases = w[-1, :]
403
404 if __debug__:
405 debug('SMLR', "train finished in %d cycles on data.shape=%s " %
406 (cycles, X.shape) +
407 "min:max(data)=%f:%f, got min:max(w)=%f:%f" %
408 (N.min(X), N.max(X), N.min(w), N.max(w)))
409
411 """Unsparsify weights via least squares regression."""
412
413 new_weights = N.zeros(weights.shape, dtype=N.double)
414
415
416
417 b = samples
418 sst = N.power(b - b.mean(0),2).sum(0)
419
420
421 for i in range(weights.shape[1]):
422 w = weights[:,i]
423
424
425 ind = w!=0
426
427
428 a = b[:,ind]
429
430
431 betas = N.linalg.lstsq(a,b)[0]
432
433
434
435 f = N.dot(a,betas)
436 sse = N.power((b-f),2).sum(0)
437 rsquare = N.zeros(sse.shape,dtype=sse.dtype)
438 gind = sst>0
439 rsquare[gind] = 1-(sse[gind]/sst[gind])
440
441
442
443 new_weights[:,i] = N.dot(w[ind],betas)*rsquare
444
445
446 tozero = N.abs(new_weights) < self.params.std_to_keep*N.std(new_weights)
447 orig_zero = weights==0.0
448 if orig_zero.sum() < tozero.sum():
449
450 tozero = orig_zero
451 new_weights[tozero] = 0.0
452
453 debug('SMLR_', "Start nonzero: %d; Finish nonzero: %d" % \
454 ((weights!=0).sum(), (new_weights!=0).sum()))
455
456 return new_weights
457
458
460 """Return ids of the used features
461 """
462 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
463
464
466 """Predict the output for the provided data.
467 """
468
469 if self.params.has_bias:
470
471 data = N.hstack((data,
472 N.ones((data.shape[0], 1), dtype=data.dtype)))
473
474
475 if self.params.fit_all_weights:
476 w = self.__weights_all
477 else:
478 w = N.hstack((self.__weights_all,
479 N.zeros((self.__weights_all.shape[0], 1))))
480
481
482 dot_prod = N.dot(data, w)
483 E = N.exp(dot_prod)
484 S = N.sum(E, 1)
485
486 if __debug__:
487 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " %
488 (`data.shape`, N.min(data), N.max(data)) +
489 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" %
490 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod),
491 N.min(E), N.max(E)))
492
493 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1)
494 self.values = values
495
496
497 predictions = N.asarray([self.__ulabels[N.argmax(vals)]
498 for vals in values])
499
500
501
502
503 return predictions
504
505
510
511
512 biases = property(lambda self: self.__biases)
513 weights = property(lambda self: self.__weights)
514
515
516
518 """`SensitivityAnalyzer` that reports the weights SMLR trained
519 on a given `Dataset`.
520
521 By default SMLR provides multiple weights per feature (one per label in
522 training dataset). By default, all weights are combined into a single
523 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor
524 arguments how to custmize this behavior.
525 """
526
527 biases = StateVariable(enabled=True,
528 doc="A 1-d ndarray of biases")
529
530 _LEGAL_CLFS = [ SMLR ]
531
532
533 - def _call(self, dataset=None):
534 """Extract weights from SMLR classifier.
535
536 SMLR always has weights available, so nothing has to be computed here.
537 """
538 clf = self.clf
539 weights = clf.weights
540
541
542
543
544
545
546
547
548
549
550
551
552
553 if clf.has_bias:
554 self.biases = clf.biases
555
556 if __debug__:
557 debug('SMLR',
558 "Extracting weights for %d-class SMLR" %
559 (weights.shape[1]+1) +
560 "Result: min=%f max=%f" %\
561 (N.min(weights), N.max(weights)))
562
563 return weights
564