mvpa.clfs.smlr

55 """Sparse Multinomial Logistic Regression `Classifier`. 56 57 This is an implementation of the SMLR algorithm published in 58 :ref:`Krishnapuram et al., 2005 <KCF+05>` (2005, IEEE Transactions 59 on Pattern Analysis and Machine Intelligence). Be sure to cite 60 that article if you use this classifier for your work. 61 """ 62 63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary', 64 'multiclass', 'does_feature_selection' ] 65 # XXX: later 'kernel-based'? 66 67 lm = Parameter(.1, min=1e-10, allowedtype='float', 68 doc="""The penalty term lambda. Larger values will give rise to 69 more sparsification.""") 70 71 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float', 72 doc="""When the weight change for each cycle drops below this value 73 the regression is considered converged. Smaller values 74 lead to tighter convergence.""") 75 76 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0, 77 doc="""Decay rate in the probability of resampling a zero weight. 78 1.0 will immediately decrease to the min_resamp from 1.0, 0.0 79 will never decrease from 1.0.""") 80 81 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0, 82 doc="Minimum resampling probability for zeroed weights") 83 84 maxiter = Parameter(10000, allowedtype='int', min=1, 85 doc="""Maximum number of iterations before stopping if not 86 converged.""") 87 88 has_bias = Parameter(True, allowedtype='bool', 89 doc="""Whether to add a bias term to allow fits to data not through 90 zero""") 91 92 fit_all_weights = Parameter(True, allowedtype='bool', 93 doc="""Whether to fit weights for all classes or to the number of 94 classes minus one. Both should give nearly identical results, but 95 if you set fit_all_weights to True it will take a little longer 96 and yield weights that are fully analyzable for each class. Also, 97 note that the convergence rate may be different, but convergence 98 point is the same.""") 99 100 implementation = Parameter(_DEFAULT_IMPLEMENTATION, 101 allowedtype='basestring', 102 choices=["C", "Python"], 103 doc="""Use C or Python as the implementation of 104 stepwise_regression. C version brings significant speedup thus is 105 the default one.""") 106 107 seed = Parameter(None, allowedtype='None or int', 108 doc="""Seed to be used to initialize random generator, might be 109 used to replicate the run""") 110 111 unsparsify = Parameter(False, allowedtype='bool', 112 doc="""***EXPERIMENTAL*** Whether to unsparsify the weights via 113 regression. Note that it likely leads to worse classifier 114 performance, but more interpretable weights.""") 115 116 std_to_keep = Parameter(2.0, allowedtype='float', 117 doc="""Standard deviation threshold of weights to keep when 118 unsparsifying.""") 119

120 - def __init__(self, **kwargs):

121 """Initialize an SMLR classifier. 122 """ 123 124 """ 125 TODO: 126 # Add in likelihood calculation 127 # Add kernels, not just direct methods. 128 """ 129 # init base class first 130 Classifier.__init__(self, **kwargs) 131 132 if _cStepwiseRegression is None and self.implementation == 'C': 133 warning('SMLR: C implementation is not available.' 134 ' Using pure Python one') 135 self.implementation = 'Python' 136 137 # pylint friendly initializations 138 self.__ulabels = None 139 """Unigue labels from the training set.""" 140 self.__weights_all = None 141 """Contains all weights including bias values""" 142 self.__weights = None 143 """Just the weights, without the biases""" 144 self.__biases = None 145 """The biases, will remain none if has_bias is False"""

146 147

148 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E, 149 auto_corr, 150 lambda_over_2_auto_corr, 151 S, 152 M, 153 maxiter, 154 convergence_tol, 155 resamp_decay, 156 min_resamp, 157 verbose, 158 seed = None):

159 """The (much slower) python version of the stepwise 160 regression. I'm keeping this around for now so that we can 161 compare results.""" 162 163 # get the data information into easy vars 164 ns, nd = X.shape 165 166 # initialize the iterative optimization 167 converged = False 168 incr = N.finfo(N.float).max 169 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0 170 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0 171 p_resamp = N.ones(w.shape, dtype=N.float) 172 173 if seed is not None: 174 # set the random seed 175 N.random.seed(seed) 176 177 if __debug__: 178 debug("SMLR_", "random seed=%s" % seed) 179 180 # perform the optimization 181 while not converged and cycles < maxiter: 182 # get the starting weight 183 w_old = w[basis, m] 184 185 # see if we're gonna update 186 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]: 187 # let's do it 188 # get the probability 189 P = E[:, m]/S 190 191 # set the gradient 192 grad = XY[basis, m] - N.dot(X[:, basis], P) 193 194 # calculate the new weight with the Laplacian prior 195 w_new = w_old + grad/auto_corr[basis] 196 197 # keep weights within bounds 198 if w_new > lambda_over_2_auto_corr[basis]: 199 w_new -= lambda_over_2_auto_corr[basis] 200 changed = True 201 # unmark from being zero if necessary 202 if w_old == 0: 203 non_zero += 1 204 # reset the prob of resampling 205 p_resamp[basis, m] = 1.0 206 elif w_new < -lambda_over_2_auto_corr[basis]: 207 w_new += lambda_over_2_auto_corr[basis] 208 changed = True 209 # unmark from being zero if necessary 210 if w_old == 0: 211 non_zero += 1 212 # reset the prob of resampling 213 p_resamp[basis, m] = 1.0 214 else: 215 # gonna zero it out 216 w_new = 0.0 217 218 # decrease the p_resamp 219 p_resamp[basis, m] -= (p_resamp[basis, m] - \ 220 min_resamp) * resamp_decay 221 222 # set number of non-zero 223 if w_old == 0: 224 changed = False 225 wasted_basis += 1 226 else: 227 changed = True 228 non_zero -= 1 229 230 # process any changes 231 if changed: 232 #print "w[%d, %d] = %g" % (basis, m, w_new) 233 # update the expected values 234 w_diff = w_new - w_old 235 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff 236 E_new_m = N.exp(Xw[:, m]) 237 S += E_new_m - E[:, m] 238 E[:, m] = E_new_m 239 240 # update the weight 241 w[basis, m] = w_new 242 243 # keep track of the sqrt sum squared diffs 244 sum2_w_diff += w_diff*w_diff 245 246 # add to the old no matter what 247 sum2_w_old += w_old*w_old 248 249 # update the class and basis 250 m = N.mod(m+1, w.shape[1]) 251 if m == 0: 252 # we completed a cycle of labels 253 basis = N.mod(basis+1, nd) 254 if basis == 0: 255 # we completed a cycle of features 256 cycles += 1 257 258 # assess convergence 259 incr = N.sqrt(sum2_w_diff) / \ 260 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps) 261 262 # save the new weights 263 converged = incr < convergence_tol 264 265 if __debug__: 266 debug("SMLR_", \ 267 "cycle=%d ; incr=%g ; non_zero=%d ; " % 268 (cycles, incr, non_zero) + 269 "wasted_basis=%d ; " % 270 (wasted_basis) + 271 "sum2_w_old=%g ; sum2_w_diff=%g" % 272 (sum2_w_old, sum2_w_diff)) 273 274 # reset the sum diffs and wasted_basis 275 sum2_w_diff = 0.0 276 sum2_w_old = 0.0 277 wasted_basis = 0 278 279 280 if not converged: 281 raise ConvergenceError, \ 282 "More than %d Iterations without convergence" % \ 283 (maxiter) 284 285 # calcualte the log likelihoods and posteriors for the training data 286 #log_likelihood = x 287 288 return cycles

289 290

291 - def _train(self, dataset):

292 """Train the classifier using `dataset` (`Dataset`). 293 """ 294 # Process the labels to turn into 1 of N encoding 295 labels = _label2oneofm(dataset.labels, dataset.uniquelabels) 296 self.__ulabels = dataset.uniquelabels.copy() 297 298 Y = labels 299 M = len(self.__ulabels) 300 301 # get the dataset information into easy vars 302 X = dataset.samples 303 304 # see if we are adding a bias term 305 if self.params.has_bias: 306 if __debug__: 307 debug("SMLR_", "hstacking 1s for bias") 308 309 # append the bias term to the features 310 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype))) 311 312 if self.params.implementation.upper() == 'C': 313 _stepwise_regression = _cStepwiseRegression 314 # 315 # TODO: avoid copying to non-contig arrays, use strides in ctypes? 316 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']): 317 if __debug__: 318 debug("SMLR_", 319 "Copying data to get it C_CONTIGUOUS/ALIGNED") 320 X = N.array(X, copy=True, dtype=N.double, order='C') 321 322 # currently must be double for the C code 323 if X.dtype != N.double: 324 if __debug__: 325 debug("SMLR_", "Converting data to double") 326 # must cast to double 327 X = X.astype(N.double) 328 329 # set the feature dimensions 330 elif self.params.implementation.upper() == 'PYTHON': 331 _stepwise_regression = self._pythonStepwiseRegression 332 else: 333 raise ValueError, \ 334 "Unknown implementation %s of stepwise_regression" % \ 335 self.params.implementation 336 337 # set the feature dimensions 338 ns, nd = X.shape 339 340 # decide the size of weights based on num classes estimated 341 if self.params.fit_all_weights: 342 c_to_fit = M 343 else: 344 c_to_fit = M-1 345 346 # Precompute what we can 347 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0)) 348 XY = N.dot(X.T, Y[:, :c_to_fit]) 349 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr 350 351 # set starting values 352 w = N.zeros((nd, c_to_fit), dtype=N.double) 353 Xw = N.zeros((ns, c_to_fit), dtype=N.double) 354 E = N.ones((ns, c_to_fit), dtype=N.double) 355 S = M*N.ones(ns, dtype=N.double) 356 357 # set verbosity 358 if __debug__: 359 verbosity = int( "SMLR_" in debug.active ) 360 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed) 361 else: 362 verbosity = 0 363 364 # call the chosen version of stepwise_regression 365 cycles = _stepwise_regression(w, 366 X, 367 XY, 368 Xw, 369 E, 370 auto_corr, 371 lambda_over_2_auto_corr, 372 S, 373 M, 374 self.params.maxiter, 375 self.params.convergence_tol, 376 self.params.resamp_decay, 377 self.params.min_resamp, 378 verbosity, 379 self.params.seed) 380 381 if cycles >= self.params.maxiter: 382 # did not converge 383 raise ConvergenceError, \ 384 "More than %d Iterations without convergence" % \ 385 (self.params.maxiter) 386 387 # see if unsparsify the weights 388 if self.params.unsparsify: 389 # unsparsify 390 w = self._unsparsify_weights(X, w) 391 392 # save the weights 393 self.__weights_all = w 394 self.__weights = w[:dataset.nfeatures, :] 395 396 if self.states.isEnabled('feature_ids'): 397 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures, :]), 398 axis=1)>0)[0] 399 400 # and a bias 401 if self.params.has_bias: 402 self.__biases = w[-1, :] 403 404 if __debug__: 405 debug('SMLR', "train finished in %d cycles on data.shape=%s " % 406 (cycles, X.shape) + 407 "min:max(data)=%f:%f, got min:max(w)=%f:%f" % 408 (N.min(X), N.max(X), N.min(w), N.max(w)))

409

410 - def _unsparsify_weights(self, samples, weights):

411 """Unsparsify weights via least squares regression.""" 412 # allocate for the new weights 413 new_weights = N.zeros(weights.shape, dtype=N.double) 414 415 # get the sample data we're predicting and the sum squared 416 # total variance 417 b = samples 418 sst = N.power(b - b.mean(0),2).sum(0) 419 420 # loop over each column 421 for i in range(weights.shape[1]): 422 w = weights[:,i] 423 424 # get the nonzero ind 425 ind = w!=0 426 427 # get the features with non-zero weights 428 a = b[:,ind] 429 430 # predict all the data with the non-zero features 431 betas = N.linalg.lstsq(a,b)[0] 432 433 # determine the R^2 for each feature based on the sum 434 # squared prediction error 435 f = N.dot(a,betas) 436 sse = N.power((b-f),2).sum(0) 437 rsquare = N.zeros(sse.shape,dtype=sse.dtype) 438 gind = sst>0 439 rsquare[gind] = 1-(sse[gind]/sst[gind]) 440 441 # derrive new weights by combining the betas and weights 442 # scaled by the rsquare 443 new_weights[:,i] = N.dot(w[ind],betas)*rsquare 444 445 # take the tails 446 tozero = N.abs(new_weights) < self.params.std_to_keep*N.std(new_weights) 447 orig_zero = weights==0.0 448 if orig_zero.sum() < tozero.sum(): 449 # should not end up with fewer than start 450 tozero = orig_zero 451 new_weights[tozero] = 0.0 452 453 debug('SMLR_', "Start nonzero: %d; Finish nonzero: %d" % \ 454 ((weights!=0).sum(), (new_weights!=0).sum())) 455 456 return new_weights

457 458

459 - def _getFeatureIds(self):

460 """Return ids of the used features 461 """ 462 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]

463 464

465 - def _predict(self, data):

466 """Predict the output for the provided data. 467 """ 468 # see if we are adding a bias term 469 if self.params.has_bias: 470 # append the bias term to the features 471 data = N.hstack((data, 472 N.ones((data.shape[0], 1), dtype=data.dtype))) 473 474 # append the zeros column to the weights if necessary 475 if self.params.fit_all_weights: 476 w = self.__weights_all 477 else: 478 w = N.hstack((self.__weights_all, 479 N.zeros((self.__weights_all.shape[0], 1)))) 480 481 # determine the probability values for making the prediction 482 dot_prod = N.dot(data, w) 483 E = N.exp(dot_prod) 484 S = N.sum(E, 1) 485 486 if __debug__: 487 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " % 488 (`data.shape`, N.min(data), N.max(data)) + 489 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" % 490 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod), 491 N.min(E), N.max(E))) 492 493 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1) 494 self.values = values 495 496 # generate predictions 497 predictions = N.asarray([self.__ulabels[N.argmax(vals)] 498 for vals in values]) 499 # no need to assign state variable here -- would be done 500 # in Classifier._postpredict anyway 501 #self.predictions = predictions 502 503 return predictions

504 505

506 - def getSensitivityAnalyzer(self, **kwargs):

507 """Returns a sensitivity analyzer for SMLR.""" 508 kwargs.setdefault('combiner', SecondAxisMaxOfAbs) 509 return SMLRWeights(self, **kwargs)

510 511 512 biases = property(lambda self: self.__biases) 513 weights = property(lambda self: self.__weights)

Source Code for Module mvpa.clfs.smlr