Package mvpa :: Package clfs :: Module gpr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.gpr

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com> 
  6  #   See COPYING file distributed along with the PyMVPA package for the 
  7  #   copyright and license terms. 
  8  # 
  9  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
 10  """Gaussian Process Regression (GPR).""" 
 11   
 12  __docformat__ = 'restructuredtext' 
 13   
 14   
 15  import numpy as N 
 16   
 17  from mvpa.base import externals 
 18   
 19  from mvpa.misc.state import StateVariable 
 20  from mvpa.clfs.base import Classifier 
 21  from mvpa.misc.param import Parameter 
 22  from mvpa.clfs.kernel import KernelSquaredExponential, KernelLinear 
 23  from mvpa.measures.base import Sensitivity 
 24  from mvpa.misc.exceptions import InvalidHyperparameterError 
 25  from mvpa.datasets import Dataset 
 26   
 27  if externals.exists("scipy", raiseException=True): 
 28      from scipy.linalg import cho_solve as SLcho_solve 
 29      from scipy.linalg import cholesky as SLcholesky 
 30      import scipy.linalg as SL 
 31      # Some local binding for bits of speed up 
 32      SLAError = SL.basic.LinAlgError 
 33   
 34  if __debug__: 
 35      from mvpa.base import debug 
 36   
 37  # Some local bindings for bits of speed up 
 38  Nlog = N.log 
 39  Ndot = N.dot 
 40  Ndiag = N.diag 
 41  NLAcholesky = N.linalg.cholesky 
 42  NLAsolve = N.linalg.solve 
 43  NLAError = N.linalg.linalg.LinAlgError 
 44  eps64 = N.finfo(N.float64).eps 
 45   
 46  # Some precomputed items. log is relatively expensive 
 47  _halflog2pi = 0.5 * Nlog(2 * N.pi) 
 48   
 49   
50 -class GPR(Classifier):
51 """Gaussian Process Regression (GPR). 52 53 """ 54 55 predicted_variances = StateVariable(enabled=False, 56 doc="Variance per each predicted value") 57 58 log_marginal_likelihood = StateVariable(enabled=False, 59 doc="Log Marginal Likelihood") 60 61 log_marginal_likelihood_gradient = StateVariable(enabled=False, 62 doc="Log Marginal Likelihood Gradient") 63 64 _clf_internals = [ 'gpr', 'regression', 'retrainable' ] 65 66 67 # NOTE XXX Parameters of the classifier. Values available as 68 # clf.parameter or clf.params.parameter, or as 69 # clf.params['parameter'] (as the full Parameter object) 70 # 71 # __doc__ and __repr__ for class is conviniently adjusted to 72 # reflect values of those params 73 74 # Kernel machines/classifiers should be refactored also to behave 75 # the same and define kernel parameter appropriately... TODO, but SVMs 76 # already kinda do it nicely ;-) 77 78 sigma_noise = Parameter(0.001, allowedtype='float', min=1e-10, 79 doc="the standard deviation of the gaussian noise.") 80 81 # XXX For now I don't introduce kernel parameter since yet to unify 82 # kernel machines 83 #kernel = Parameter(None, allowedtype='Kernel', 84 # doc="Kernel object defining the covariance between instances. " 85 # "(Defaults to KernelSquaredExponential if None in arguments)") 86 87 lm = Parameter(0.0, min=0.0, allowedtype='float', 88 doc="""The regularization term lambda. 89 Increase this when the kernel matrix is not positive, definite.""") 90 91
92 - def __init__(self, kernel=None, **kwargs):
93 """Initialize a GPR regression analysis. 94 95 :Parameters: 96 kernel : Kernel 97 a kernel object defining the covariance between instances. 98 (Defaults to KernelSquaredExponential if None in arguments) 99 """ 100 # init base class first 101 Classifier.__init__(self, **kwargs) 102 103 # It does not make sense to calculate a confusion matrix for a GPR 104 # XXX it does ;) it will be a RegressionStatistics actually ;-) 105 # So if someone desires -- let him have it 106 # self.states.enable('training_confusion', False) 107 108 # set kernel: 109 if kernel is None: 110 kernel = KernelSquaredExponential() 111 self.__kernel = kernel 112 113 # append proper clf_internal depending on the kernel 114 # TODO: unify finally all kernel-based machines. 115 # make SMLR to use kernels 116 if isinstance(kernel, KernelLinear): 117 self._clf_internals += ['linear'] 118 else: 119 self._clf_internals += ['non-linear'] 120 121 if externals.exists('openopt') \ 122 and not 'has_sensitivity' in self._clf_internals: 123 self._clf_internals += ['has_sensitivity'] 124 125 # No need to initialize state variables. Unless they got set 126 # they would raise an exception self.predicted_variances = 127 # None self.log_marginal_likelihood = None 128 self._init_internals() 129 pass
130 131
132 - def _init_internals(self):
133 """Reset some internal variables to None. 134 135 To be used in constructor and untrain() 136 """ 137 self._train_fv = None 138 self._labels = None 139 self._km_train_train = None 140 self._train_labels = None 141 self._alpha = None 142 self._L = None 143 self._LL = None 144 self.__kernel.reset() 145 pass
146 147
148 - def __repr__(self):
149 """String summary of the object 150 """ 151 return super(GPR, self).__repr__( 152 prefixes=['kernel=%s' % self.__kernel])
153 154
156 """ 157 Compute log marginal likelihood using self.train_fv and self.labels. 158 """ 159 if __debug__: 160 debug("GPR", "Computing log_marginal_likelihood") 161 self.log_marginal_likelihood = \ 162 -0.5*Ndot(self._train_labels, self._alpha) - \ 163 Nlog(self._L.diagonal()).sum() - \ 164 self._km_train_train.shape[0] * _halflog2pi 165 return self.log_marginal_likelihood
166 167
169 """Compute gradient of the log marginal likelihood. This 170 version use a more compact formula provided by Williams and 171 Rasmussen book. 172 """ 173 # XXX EO: check whether the precomputed self.alpha self.Kinv 174 # are actually the ones corresponding to the hyperparameters 175 # used to compute this gradient! 176 # YYY EO: currently this is verified outside gpr.py but it is 177 # not an efficient solution. 178 # XXX EO: Do some memoizing since it could happen that some 179 # hyperparameters are kept constant by user request, so we 180 # don't need (somtimes) to recompute the corresponding 181 # gradient again. 182 183 # self.Kinv = N.linalg.inv(self._C) 184 # Faster: 185 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0])) 186 187 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:]) 188 tmp = alphalphaT - Kinv 189 # Pass tmp to __kernel and let it compute its gradient terms. 190 # This scales up to huge number of hyperparameters: 191 grad_LML_hypers = self.__kernel.compute_lml_gradient( 192 tmp, self._train_fv) 193 grad_K_sigma_n = 2.0*self.sigma_noise*N.eye(tmp.shape[0]) 194 # Add the term related to sigma_noise: 195 # grad_LML_sigma_n = 0.5 * N.trace(N.dot(tmp,grad_K_sigma_n)) 196 # Faster formula: tr(AB) = (A*B.T).sum() 197 grad_LML_sigma_n = 0.5 * (tmp * (grad_K_sigma_n).T).sum() 198 lml_gradient = N.hstack([grad_LML_sigma_n, grad_LML_hypers]) 199 self.log_marginal_likelihood_gradient = lml_gradient 200 return lml_gradient
201 202
204 """Compute gradient of the log marginal likelihood when 205 hyperparameters are in logscale. This version use a more 206 compact formula provided by Williams and Rasmussen book. 207 """ 208 # Kinv = N.linalg.inv(self._C) 209 # Faster: 210 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0])) 211 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:]) 212 tmp = alphalphaT - Kinv 213 grad_LML_log_hypers = \ 214 self.__kernel.compute_lml_gradient_logscale(tmp, self._train_fv) 215 grad_K_log_sigma_n = 2.0 * self.sigma_noise ** 2 * N.eye(Kinv.shape[0]) 216 # Add the term related to sigma_noise: 217 # grad_LML_log_sigma_n = 0.5 * N.trace(N.dot(tmp, grad_K_log_sigma_n)) 218 # Faster formula: tr(AB) = (A * B.T).sum() 219 grad_LML_log_sigma_n = 0.5 * (tmp * (grad_K_log_sigma_n).T).sum() 220 lml_gradient = N.hstack([grad_LML_log_sigma_n, grad_LML_log_hypers]) 221 self.log_marginal_likelihood_gradient = lml_gradient 222 return lml_gradient
223 224
225 - def getSensitivityAnalyzer(self, flavor='auto', **kwargs):
226 """Returns a sensitivity analyzer for GPR. 227 228 :Parameters: 229 flavor : basestring 230 What sensitivity to provide. Valid values are 231 'linear', 'model_select', 'auto'. 232 In case of 'auto' selects 'linear' for linear kernel 233 and 'model_select' for the rest. 'linear' corresponds to 234 GPRLinearWeights and 'model_select' to GRPWeights 235 """ 236 # XXX The following two lines does not work since 237 # self.__kernel is instance of kernel.KernelLinear and not 238 # just KernelLinear. How to fix? 239 # YYY yoh is not sure what is the problem... KernelLinear is actually 240 # kernel.KernelLinear so everything shoudl be ok 241 if flavor == 'auto': 242 flavor = ('model_select', 'linear')\ 243 [int(isinstance(self.__kernel, KernelLinear))] 244 if __debug__: 245 debug("GPR", "Returning '%s' sensitivity analyzer" % flavor) 246 247 # Return proper sensitivity 248 if flavor == 'linear': 249 return GPRLinearWeights(self, **kwargs) 250 elif flavor == 'model_select': 251 # sanity check 252 if not ('has_sensitivity' in self._clf_internals): 253 raise ValueError, \ 254 "model_select flavor is not available probably " \ 255 "due to not available 'openopt' module" 256 return GPRWeights(self, **kwargs) 257 else: 258 raise ValueError, "Flavor %s is not recognized" % flavor
259 260
261 - def _train(self, data):
262 """Train the classifier using `data` (`Dataset`). 263 """ 264 265 # local bindings for faster lookup 266 retrainable = self.params.retrainable 267 if retrainable: 268 newkernel = False 269 newL = False 270 _changedData = self._changedData 271 272 self._train_fv = train_fv = data.samples 273 self._train_labels = train_labels = data.labels 274 275 if not retrainable or _changedData['traindata'] \ 276 or _changedData.get('kernel_params', False): 277 if __debug__: 278 debug("GPR", "Computing train train kernel matrix") 279 self._km_train_train = km_train_train = self.__kernel.compute(train_fv) 280 newkernel = True 281 if retrainable: 282 self._km_train_test = None # reset to facilitate recomputation 283 else: 284 if __debug__: 285 debug("GPR", "Not recomputing kernel since retrainable and " 286 "nothing has changed") 287 km_train_train = self._km_train_train # reuse 288 289 if not retrainable or newkernel or _changedData['params']: 290 if __debug__: 291 debug("GPR", "Computing L. sigma_noise=%g" % self.sigma_noise) 292 # XXX it seems that we do not need binding to object, but may be 293 # commented out code would return? 294 self._C = km_train_train + \ 295 self.sigma_noise**2 * N.identity(km_train_train.shape[0], 'd') 296 # The following decomposition could raise 297 # N.linalg.linalg.LinAlgError because of numerical 298 # reasons, due to the too rapid decay of 'self._C' 299 # eigenvalues. In that case we try adding a small constant 300 # to self._C, e.g. epsilon=1.0e-20. It should be a form of 301 # Tikhonov regularization. This is equivalent to adding 302 # little white gaussian noise to data. 303 # 304 # XXX EO: how to choose epsilon? 305 # 306 # Cholesky decomposition is provided by three different 307 # NumPy/SciPy routines (fastest first): 308 # 1) self._LL = scipy.linalg.cho_factor(self._C, lower=True) 309 # self._L = L = N.tril(self._LL[0]) 310 # 2) self._L = scipy.linalg.cholesky(self._C, lower=True) 311 # 3) self._L = numpy.linalg.cholesky(self._C) 312 # Even though 1 is the fastest we choose 2 since 1 does 313 # not return a clean lower-triangular matrix (see docstring). 314 315 # PBS: I just made it so the KernelMatrix is regularized 316 # all the time. I figured that if ever you were going to 317 # use regularization, you would want to set it yourself 318 # and use the same value for all folds of your data. 319 try: 320 # apply regularization 321 epsilon = self.params.lm * N.eye(self._C.shape[0]) 322 self._L = SLcholesky(self._C + epsilon, lower=True) 323 self._LL = (self._L, True) 324 except SLAError: 325 raise SLAError("Kernel matrix is not positive, definite. " + \ 326 "Try increasing the lm parameter.") 327 pass 328 newL = True 329 else: 330 if __debug__: 331 debug("GPR", "Not computing L since kernel, data and params " 332 "stayed the same") 333 L = self._L # reuse 334 335 # XXX we leave _alpha being recomputed, although we could check 336 # if newL or _changedData['labels'] 337 # 338 if __debug__: 339 debug("GPR", "Computing alpha") 340 # self._alpha = NLAsolve(L.transpose(), 341 # NLAsolve(L, train_labels)) 342 # Faster: 343 self._alpha = SLcho_solve(self._LL, train_labels) 344 345 # compute only if the state is enabled 346 if self.states.isEnabled('log_marginal_likelihood'): 347 self.compute_log_marginal_likelihood() 348 pass 349 350 if retrainable: 351 # we must assign it only if it is retrainable 352 self.states.retrained = not newkernel or not newL 353 354 if __debug__: 355 debug("GPR", "Done training") 356 357 pass
358 359
360 - def _predict(self, data):
361 """ 362 Predict the output for the provided data. 363 """ 364 retrainable = self.params.retrainable 365 366 if not retrainable or self._changedData['testdata'] \ 367 or self._km_train_test is None: 368 if __debug__: 369 debug('GPR', "Computing train test kernel matrix") 370 km_train_test = self.__kernel.compute(self._train_fv, data) 371 if retrainable: 372 self._km_train_test = km_train_test 373 self.states.repredicted = False 374 else: 375 if __debug__: 376 debug('GPR', "Not recomputing train test kernel matrix") 377 km_train_test = self._km_train_test 378 self.states.repredicted = True 379 380 381 predictions = Ndot(km_train_test.transpose(), self._alpha) 382 383 if self.states.isEnabled('predicted_variances'): 384 # do computation only if state variable was enabled 385 if not retrainable or self._km_test_test is None \ 386 or self._changedData['testdata']: 387 if __debug__: 388 debug('GPR', "Computing test test kernel matrix") 389 km_test_test = self.__kernel.compute(data) 390 if retrainable: 391 self._km_test_test = km_test_test 392 else: 393 if __debug__: 394 debug('GPR', "Not recomputing test test kernel matrix") 395 km_test_test = self._km_test_test 396 397 if __debug__: 398 debug("GPR", "Computing predicted variances") 399 L = self._L 400 # v = NLAsolve(L, km_train_test) 401 # Faster: 402 piv = N.arange(L.shape[0]) 403 v = SL.lu_solve((L.T, piv), km_train_test, trans=1) 404 # self.predicted_variances = \ 405 # Ndiag(km_test_test - Ndot(v.T, v)) \ 406 # + self.sigma_noise**2 407 # Faster formula: N.diag(Ndot(v.T, v)) = (v**2).sum(0): 408 self.predicted_variances = Ndiag(km_test_test) - (v ** 2).sum(0) \ 409 + self.sigma_noise ** 2 410 pass 411 412 if __debug__: 413 debug("GPR", "Done predicting") 414 return predictions
415 416
417 - def _setRetrainable(self, value, force=False):
418 """Internal function : need to set _km_test_test 419 """ 420 super(GPR, self)._setRetrainable(value, force) 421 if force or (value and value != self.params.retrainable): 422 self._km_test_test = None
423 424
425 - def untrain(self):
426 super(GPR, self).untrain() 427 # XXX might need to take special care for retrainable. later 428 self._init_internals() 429 pass
430 431
432 - def set_hyperparameters(self, hyperparameter):
433 """ 434 Set hyperparameters' values. 435 436 Note that 'hyperparameter' is a sequence so the order of its 437 values is important. First value must be sigma_noise, then 438 other kernel's hyperparameters values follow in the exact 439 order the kernel expect them to be. 440 """ 441 if hyperparameter[0] < self.params['sigma_noise'].min: 442 raise InvalidHyperparameterError() 443 self.sigma_noise = hyperparameter[0] 444 if hyperparameter.size > 1: 445 self.__kernel.set_hyperparameters(hyperparameter[1:]) 446 pass 447 return
448 449 kernel = property(fget=lambda self:self.__kernel) 450 pass
451 452
453 -class GPRLinearWeights(Sensitivity):
454 """`SensitivityAnalyzer` that reports the weights GPR trained 455 on a given `Dataset`. 456 457 In case of KernelLinear compute explicitly the coefficients 458 of the linear regression, together with their variances (if 459 requested). 460 461 Note that the intercept is not computed. 462 """ 463 464 variances = StateVariable(enabled=False, 465 doc="Variances of the weights (for KernelLinear)") 466 467 _LEGAL_CLFS = [ GPR ] 468 469
470 - def _call(self, dataset):
471 """Extract weights from GPR 472 """ 473 474 clf = self.clf 475 kernel = clf.kernel 476 train_fv = clf._train_fv 477 478 weights = Ndot(kernel.Sigma_p, 479 Ndot(train_fv.T, clf._alpha)) 480 481 if self.states.isEnabled('variances'): 482 # super ugly formulas that can be quite surely improved: 483 tmp = N.linalg.inv(clf._L) 484 Kyinv = Ndot(tmp.T, tmp) 485 # XXX in such lengthy matrix manipulations you might better off 486 # using N.matrix where * is a matrix product 487 self.states.variances = Ndiag( 488 kernel.Sigma_p - 489 Ndot(kernel.Sigma_p, 490 Ndot(train_fv.T, 491 Ndot(Kyinv, 492 Ndot(train_fv, kernel.Sigma_p))))) 493 return weights
494 495 496 if externals.exists('openopt'): 497 498 from mvpa.clfs.model_selector import ModelSelector 499
500 - class GPRWeights(Sensitivity):
501 """`SensitivityAnalyzer` that reports the weights GPR trained 502 on a given `Dataset`. 503 """ 504 505 _LEGAL_CLFS = [ GPR ] 506
507 - def _call(self, dataset):
508 """Extract weights from GPR 509 """ 510 511 clf = self.clf 512 # normalize data: 513 clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \ 514 / clf._train_labels.std() 515 # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \ 516 # /clf._train_fv.std(0) 517 dataset = Dataset(samples=clf._train_fv, labels=clf._train_labels) 518 clf.states.enable("log_marginal_likelihood") 519 ms = ModelSelector(clf, dataset) 520 # Note that some kernels does not have gradient yet! 521 # XXX Make it initialize to clf's current hyperparameter values 522 # or may be add ability to specify starting points in the constructor 523 sigma_noise_initial = 1.0e-5 524 sigma_f_initial = 1.0 525 length_scale_initial = N.ones(dataset.nfeatures)*1.0e4 526 # length_scale_initial = N.random.rand(dataset.nfeatures)*1.0e4 527 hyp_initial_guess = N.hstack([sigma_noise_initial, 528 sigma_f_initial, 529 length_scale_initial]) 530 fixedHypers = N.array([0]*hyp_initial_guess.size, dtype=bool) 531 fixedHypers = None 532 problem = ms.max_log_marginal_likelihood( 533 hyp_initial_guess=hyp_initial_guess, 534 optimization_algorithm="scipy_lbfgsb", 535 ftol=1.0e-3, fixedHypers=fixedHypers, 536 use_gradient=True, logscale=True) 537 if __debug__ and 'GPR_WEIGHTS' in debug.active: 538 problem.iprint = 1 539 lml = ms.solve() 540 weights = 1.0/ms.hyperparameters_best[2:] # weight = 1/length_scale 541 if __debug__: 542 debug("GPR", 543 "%s, train: shape %s, labels %s, min:max %g:%g, " 544 "sigma_noise %g, sigma_f %g" % 545 (clf, clf._train_fv.shape, N.unique(clf._train_labels), 546 clf._train_fv.min(), clf._train_fv.max(), 547 ms.hyperparameters_best[0], ms.hyperparameters_best[1])) 548 549 return weights
550