Package mvpa :: Package clfs :: Module smlr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.smlr

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Sparse Multinomial Logistic Regression classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.base import warning, externals 
 16  from mvpa.clfs.base import Classifier 
 17  from mvpa.measures.base import Sensitivity 
 18  from mvpa.misc.exceptions import ConvergenceError 
 19  from mvpa.misc.param import Parameter 
 20  from mvpa.misc.state import StateVariable 
 21  from mvpa.misc.transformers import SecondAxisMaxOfAbs 
 22   
 23   
 24  _DEFAULT_IMPLEMENTATION = "Python" 
 25  if externals.exists('ctypes'): 
 26      # Uber-fast C-version of the stepwise regression 
 27      from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression 
 28      _DEFAULT_IMPLEMENTATION = "C" 
 29  else: 
 30      _cStepwiseRegression = None 
 31      warning("SMLR implementation without ctypes is overwhelmingly slow." 
 32              " You are strongly advised to install python-ctypes") 
 33   
 34  if __debug__: 
 35      from mvpa.base import debug 
 36   
37 -def _label2oneofm(labels, ulabels):
38 """Convert labels to one-of-M form. 39 40 TODO: Might be useful elsewhere so could migrate into misc/ 41 """ 42 43 # allocate for the new one-of-M labels 44 new_labels = N.zeros((len(labels), len(ulabels))) 45 46 # loop and convert to one-of-M 47 for i, c in enumerate(ulabels): 48 new_labels[labels == c, i] = 1 49 50 return new_labels
51 52 53
54 -class SMLR(Classifier):
55 """Sparse Multinomial Logistic Regression `Classifier`. 56 57 This is an implementation of the SMLR algorithm published in 58 :ref:`Krishnapuram et al., 2005 <KCF+05>` (2005, IEEE Transactions 59 on Pattern Analysis and Machine Intelligence). Be sure to cite 60 that article if you use this classifier for your work. 61 """ 62 63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary', 64 'multiclass', 'does_feature_selection' ] 65 # XXX: later 'kernel-based'? 66 67 lm = Parameter(.1, min=1e-10, allowedtype='float', 68 doc="""The penalty term lambda. Larger values will give rise to 69 more sparsification.""") 70 71 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float', 72 doc="""When the weight change for each cycle drops below this value 73 the regression is considered converged. Smaller values 74 lead to tighter convergence.""") 75 76 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0, 77 doc="""Decay rate in the probability of resampling a zero weight. 78 1.0 will immediately decrease to the min_resamp from 1.0, 0.0 79 will never decrease from 1.0.""") 80 81 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0, 82 doc="Minimum resampling probability for zeroed weights") 83 84 maxiter = Parameter(10000, allowedtype='int', min=1, 85 doc="""Maximum number of iterations before stopping if not 86 converged.""") 87 88 has_bias = Parameter(True, allowedtype='bool', 89 doc="""Whether to add a bias term to allow fits to data not through 90 zero""") 91 92 fit_all_weights = Parameter(True, allowedtype='bool', 93 doc="""Whether to fit weights for all classes or to the number of 94 classes minus one. Both should give nearly identical results, but 95 if you set fit_all_weights to True it will take a little longer 96 and yield weights that are fully analyzable for each class. Also, 97 note that the convergence rate may be different, but convergence 98 point is the same.""") 99 100 implementation = Parameter(_DEFAULT_IMPLEMENTATION, 101 allowedtype='basestring', 102 choices=["C", "Python"], 103 doc="""Use C or Python as the implementation of 104 stepwise_regression. C version brings significant speedup thus is 105 the default one.""") 106 107 seed = Parameter(None, allowedtype='None or int', 108 doc="""Seed to be used to initialize random generator, might be 109 used to replicate the run""") 110 111 unsparsify = Parameter(False, allowedtype='bool', 112 doc="""***EXPERIMENTAL*** Whether to unsparsify the weights via 113 regression. Note that it likely leads to worse classifier 114 performance, but more interpretable weights.""") 115 116 std_to_keep = Parameter(2.0, allowedtype='float', 117 doc="""Standard deviation threshold of weights to keep when 118 unsparsifying.""") 119
120 - def __init__(self, **kwargs):
121 """Initialize an SMLR classifier. 122 """ 123 124 """ 125 TODO: 126 # Add in likelihood calculation 127 # Add kernels, not just direct methods. 128 """ 129 # init base class first 130 Classifier.__init__(self, **kwargs) 131 132 if _cStepwiseRegression is None and self.implementation == 'C': 133 warning('SMLR: C implementation is not available.' 134 ' Using pure Python one') 135 self.implementation = 'Python' 136 137 # pylint friendly initializations 138 self.__ulabels = None 139 """Unigue labels from the training set.""" 140 self.__weights_all = None 141 """Contains all weights including bias values""" 142 self.__weights = None 143 """Just the weights, without the biases""" 144 self.__biases = None 145 """The biases, will remain none if has_bias is False"""
146 147
148 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E, 149 auto_corr, 150 lambda_over_2_auto_corr, 151 S, 152 M, 153 maxiter, 154 convergence_tol, 155 resamp_decay, 156 min_resamp, 157 verbose, 158 seed = None):
159 """The (much slower) python version of the stepwise 160 regression. I'm keeping this around for now so that we can 161 compare results.""" 162 163 # get the data information into easy vars 164 ns, nd = X.shape 165 166 # initialize the iterative optimization 167 converged = False 168 incr = N.finfo(N.float).max 169 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0 170 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0 171 p_resamp = N.ones(w.shape, dtype=N.float) 172 173 if seed is not None: 174 # set the random seed 175 N.random.seed(seed) 176 177 if __debug__: 178 debug("SMLR_", "random seed=%s" % seed) 179 180 # perform the optimization 181 while not converged and cycles < maxiter: 182 # get the starting weight 183 w_old = w[basis, m] 184 185 # see if we're gonna update 186 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]: 187 # let's do it 188 # get the probability 189 P = E[:, m]/S 190 191 # set the gradient 192 grad = XY[basis, m] - N.dot(X[:, basis], P) 193 194 # calculate the new weight with the Laplacian prior 195 w_new = w_old + grad/auto_corr[basis] 196 197 # keep weights within bounds 198 if w_new > lambda_over_2_auto_corr[basis]: 199 w_new -= lambda_over_2_auto_corr[basis] 200 changed = True 201 # unmark from being zero if necessary 202 if w_old == 0: 203 non_zero += 1 204 # reset the prob of resampling 205 p_resamp[basis, m] = 1.0 206 elif w_new < -lambda_over_2_auto_corr[basis]: 207 w_new += lambda_over_2_auto_corr[basis] 208 changed = True 209 # unmark from being zero if necessary 210 if w_old == 0: 211 non_zero += 1 212 # reset the prob of resampling 213 p_resamp[basis, m] = 1.0 214 else: 215 # gonna zero it out 216 w_new = 0.0 217 218 # decrease the p_resamp 219 p_resamp[basis, m] -= (p_resamp[basis, m] - \ 220 min_resamp) * resamp_decay 221 222 # set number of non-zero 223 if w_old == 0: 224 changed = False 225 wasted_basis += 1 226 else: 227 changed = True 228 non_zero -= 1 229 230 # process any changes 231 if changed: 232 #print "w[%d, %d] = %g" % (basis, m, w_new) 233 # update the expected values 234 w_diff = w_new - w_old 235 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff 236 E_new_m = N.exp(Xw[:, m]) 237 S += E_new_m - E[:, m] 238 E[:, m] = E_new_m 239 240 # update the weight 241 w[basis, m] = w_new 242 243 # keep track of the sqrt sum squared diffs 244 sum2_w_diff += w_diff*w_diff 245 246 # add to the old no matter what 247 sum2_w_old += w_old*w_old 248 249 # update the class and basis 250 m = N.mod(m+1, w.shape[1]) 251 if m == 0: 252 # we completed a cycle of labels 253 basis = N.mod(basis+1, nd) 254 if basis == 0: 255 # we completed a cycle of features 256 cycles += 1 257 258 # assess convergence 259 incr = N.sqrt(sum2_w_diff) / \ 260 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps) 261 262 # save the new weights 263 converged = incr < convergence_tol 264 265 if __debug__: 266 debug("SMLR_", \ 267 "cycle=%d ; incr=%g ; non_zero=%d ; " % 268 (cycles, incr, non_zero) + 269 "wasted_basis=%d ; " % 270 (wasted_basis) + 271 "sum2_w_old=%g ; sum2_w_diff=%g" % 272 (sum2_w_old, sum2_w_diff)) 273 274 # reset the sum diffs and wasted_basis 275 sum2_w_diff = 0.0 276 sum2_w_old = 0.0 277 wasted_basis = 0 278 279 280 if not converged: 281 raise ConvergenceError, \ 282 "More than %d Iterations without convergence" % \ 283 (maxiter) 284 285 # calcualte the log likelihoods and posteriors for the training data 286 #log_likelihood = x 287 288 return cycles
289 290
291 - def _train(self, dataset):
292 """Train the classifier using `dataset` (`Dataset`). 293 """ 294 # Process the labels to turn into 1 of N encoding 295 labels = _label2oneofm(dataset.labels, dataset.uniquelabels) 296 self.__ulabels = dataset.uniquelabels.copy() 297 298 Y = labels 299 M = len(self.__ulabels) 300 301 # get the dataset information into easy vars 302 X = dataset.samples 303 304 # see if we are adding a bias term 305 if self.params.has_bias: 306 if __debug__: 307 debug("SMLR_", "hstacking 1s for bias") 308 309 # append the bias term to the features 310 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype))) 311 312 if self.params.implementation.upper() == 'C': 313 _stepwise_regression = _cStepwiseRegression 314 # 315 # TODO: avoid copying to non-contig arrays, use strides in ctypes? 316 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']): 317 if __debug__: 318 debug("SMLR_", 319 "Copying data to get it C_CONTIGUOUS/ALIGNED") 320 X = N.array(X, copy=True, dtype=N.double, order='C') 321 322 # currently must be double for the C code 323 if X.dtype != N.double: 324 if __debug__: 325 debug("SMLR_", "Converting data to double") 326 # must cast to double 327 X = X.astype(N.double) 328 329 # set the feature dimensions 330 elif self.params.implementation.upper() == 'PYTHON': 331 _stepwise_regression = self._pythonStepwiseRegression 332 else: 333 raise ValueError, \ 334 "Unknown implementation %s of stepwise_regression" % \ 335 self.params.implementation 336 337 # set the feature dimensions 338 ns, nd = X.shape 339 340 # decide the size of weights based on num classes estimated 341 if self.params.fit_all_weights: 342 c_to_fit = M 343 else: 344 c_to_fit = M-1 345 346 # Precompute what we can 347 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0)) 348 XY = N.dot(X.T, Y[:, :c_to_fit]) 349 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr 350 351 # set starting values 352 w = N.zeros((nd, c_to_fit), dtype=N.double) 353 Xw = N.zeros((ns, c_to_fit), dtype=N.double) 354 E = N.ones((ns, c_to_fit), dtype=N.double) 355 S = M*N.ones(ns, dtype=N.double) 356 357 # set verbosity 358 if __debug__: 359 verbosity = int( "SMLR_" in debug.active ) 360 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed) 361 else: 362 verbosity = 0 363 364 # call the chosen version of stepwise_regression 365 cycles = _stepwise_regression(w, 366 X, 367 XY, 368 Xw, 369 E, 370 auto_corr, 371 lambda_over_2_auto_corr, 372 S, 373 M, 374 self.params.maxiter, 375 self.params.convergence_tol, 376 self.params.resamp_decay, 377 self.params.min_resamp, 378 verbosity, 379 self.params.seed) 380 381 if cycles >= self.params.maxiter: 382 # did not converge 383 raise ConvergenceError, \ 384 "More than %d Iterations without convergence" % \ 385 (self.params.maxiter) 386 387 # see if unsparsify the weights 388 if self.params.unsparsify: 389 # unsparsify 390 w = self._unsparsify_weights(X, w) 391 392 # save the weights 393 self.__weights_all = w 394 self.__weights = w[:dataset.nfeatures, :] 395 396 if self.states.isEnabled('feature_ids'): 397 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures, :]), 398 axis=1)>0)[0] 399 400 # and a bias 401 if self.params.has_bias: 402 self.__biases = w[-1, :] 403 404 if __debug__: 405 debug('SMLR', "train finished in %d cycles on data.shape=%s " % 406 (cycles, X.shape) + 407 "min:max(data)=%f:%f, got min:max(w)=%f:%f" % 408 (N.min(X), N.max(X), N.min(w), N.max(w)))
409
410 - def _unsparsify_weights(self, samples, weights):
411 """Unsparsify weights via least squares regression.""" 412 # allocate for the new weights 413 new_weights = N.zeros(weights.shape, dtype=N.double) 414 415 # get the sample data we're predicting and the sum squared 416 # total variance 417 b = samples 418 sst = N.power(b - b.mean(0),2).sum(0) 419 420 # loop over each column 421 for i in range(weights.shape[1]): 422 w = weights[:,i] 423 424 # get the nonzero ind 425 ind = w!=0 426 427 # get the features with non-zero weights 428 a = b[:,ind] 429 430 # predict all the data with the non-zero features 431 betas = N.linalg.lstsq(a,b)[0] 432 433 # determine the R^2 for each feature based on the sum 434 # squared prediction error 435 f = N.dot(a,betas) 436 sse = N.power((b-f),2).sum(0) 437 rsquare = N.zeros(sse.shape,dtype=sse.dtype) 438 gind = sst>0 439 rsquare[gind] = 1-(sse[gind]/sst[gind]) 440 441 # derrive new weights by combining the betas and weights 442 # scaled by the rsquare 443 new_weights[:,i] = N.dot(w[ind],betas)*rsquare 444 445 # take the tails 446 tozero = N.abs(new_weights) < self.params.std_to_keep*N.std(new_weights) 447 orig_zero = weights==0.0 448 if orig_zero.sum() < tozero.sum(): 449 # should not end up with fewer than start 450 tozero = orig_zero 451 new_weights[tozero] = 0.0 452 453 debug('SMLR_', "Start nonzero: %d; Finish nonzero: %d" % \ 454 ((weights!=0).sum(), (new_weights!=0).sum())) 455 456 return new_weights
457 458
459 - def _getFeatureIds(self):
460 """Return ids of the used features 461 """ 462 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
463 464
465 - def _predict(self, data):
466 """Predict the output for the provided data. 467 """ 468 # see if we are adding a bias term 469 if self.params.has_bias: 470 # append the bias term to the features 471 data = N.hstack((data, 472 N.ones((data.shape[0], 1), dtype=data.dtype))) 473 474 # append the zeros column to the weights if necessary 475 if self.params.fit_all_weights: 476 w = self.__weights_all 477 else: 478 w = N.hstack((self.__weights_all, 479 N.zeros((self.__weights_all.shape[0], 1)))) 480 481 # determine the probability values for making the prediction 482 dot_prod = N.dot(data, w) 483 E = N.exp(dot_prod) 484 S = N.sum(E, 1) 485 486 if __debug__: 487 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " % 488 (`data.shape`, N.min(data), N.max(data)) + 489 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" % 490 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod), 491 N.min(E), N.max(E))) 492 493 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1) 494 self.values = values 495 496 # generate predictions 497 predictions = N.asarray([self.__ulabels[N.argmax(vals)] 498 for vals in values]) 499 # no need to assign state variable here -- would be done 500 # in Classifier._postpredict anyway 501 #self.predictions = predictions 502 503 return predictions
504 505
506 - def getSensitivityAnalyzer(self, **kwargs):
507 """Returns a sensitivity analyzer for SMLR.""" 508 kwargs.setdefault('combiner', SecondAxisMaxOfAbs) 509 return SMLRWeights(self, **kwargs)
510 511 512 biases = property(lambda self: self.__biases) 513 weights = property(lambda self: self.__weights)
514 515 516
517 -class SMLRWeights(Sensitivity):
518 """`SensitivityAnalyzer` that reports the weights SMLR trained 519 on a given `Dataset`. 520 521 By default SMLR provides multiple weights per feature (one per label in 522 training dataset). By default, all weights are combined into a single 523 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor 524 arguments how to custmize this behavior. 525 """ 526 527 biases = StateVariable(enabled=True, 528 doc="A 1-d ndarray of biases") 529 530 _LEGAL_CLFS = [ SMLR ] 531 532
533 - def _call(self, dataset=None):
534 """Extract weights from SMLR classifier. 535 536 SMLR always has weights available, so nothing has to be computed here. 537 """ 538 clf = self.clf 539 weights = clf.weights 540 # XXX: MH: The following warning is inappropriate. In almost all cases 541 # SMLR will return more than one weight per feature. Even in the case of 542 # binary problem it will fit both weights by default. So unless you 543 # specify fit_all_weights=False manually this warning is always there. 544 # To much annoyance IMHO. I moved this information into the docstring, 545 # as there is no technical problem here, as FeaturewiseDatasetMeasure 546 # by default applies a combiner -- just that people should know... 547 # PLEASE ACKNOWLEDGE AND REMOVE 548 #if weights.shape[1] != 1: 549 # warning("You are estimating sensitivity for SMLR %s with multiple" 550 # " sensitivities available %s. Make sure that it is what you" 551 # " intended to do" % (self, weights.shape) ) 552 553 if clf.has_bias: 554 self.biases = clf.biases 555 556 if __debug__: 557 debug('SMLR', 558 "Extracting weights for %d-class SMLR" % 559 (weights.shape[1]+1) + 560 "Result: min=%f max=%f" %\ 561 (N.min(weights), N.max(weights))) 562 563 return weights
564