mvpa.clfs.gpr

51 """Gaussian Process Regression (GPR). 52 53 """ 54 55 predicted_variances = StateVariable(enabled=False, 56 doc="Variance per each predicted value") 57 58 log_marginal_likelihood = StateVariable(enabled=False, 59 doc="Log Marginal Likelihood") 60 61 log_marginal_likelihood_gradient = StateVariable(enabled=False, 62 doc="Log Marginal Likelihood Gradient") 63 64 _clf_internals = [ 'gpr', 'regression', 'retrainable' ] 65 66 67 # NOTE XXX Parameters of the classifier. Values available as 68 # clf.parameter or clf.params.parameter, or as 69 # clf.params['parameter'] (as the full Parameter object) 70 # 71 # __doc__ and __repr__ for class is conviniently adjusted to 72 # reflect values of those params 73 74 # Kernel machines/classifiers should be refactored also to behave 75 # the same and define kernel parameter appropriately... TODO, but SVMs 76 # already kinda do it nicely ;-) 77 78 sigma_noise = Parameter(0.001, allowedtype='float', min=1e-10, 79 doc="the standard deviation of the gaussian noise.") 80 81 # XXX For now I don't introduce kernel parameter since yet to unify 82 # kernel machines 83 #kernel = Parameter(None, allowedtype='Kernel', 84 # doc="Kernel object defining the covariance between instances. " 85 # "(Defaults to KernelSquaredExponential if None in arguments)") 86 87 lm = Parameter(0.0, min=0.0, allowedtype='float', 88 doc="""The regularization term lambda. 89 Increase this when the kernel matrix is not positive, definite.""") 90 91

92 - def __init__(self, kernel=None, **kwargs):

93 """Initialize a GPR regression analysis. 94 95 :Parameters: 96 kernel : Kernel 97 a kernel object defining the covariance between instances. 98 (Defaults to KernelSquaredExponential if None in arguments) 99 """ 100 # init base class first 101 Classifier.__init__(self, **kwargs) 102 103 # It does not make sense to calculate a confusion matrix for a GPR 104 # XXX it does ;) it will be a RegressionStatistics actually ;-) 105 # So if someone desires -- let him have it 106 # self.states.enable('training_confusion', False) 107 108 # set kernel: 109 if kernel is None: 110 kernel = KernelSquaredExponential() 111 self.__kernel = kernel 112 113 # append proper clf_internal depending on the kernel 114 # TODO: unify finally all kernel-based machines. 115 # make SMLR to use kernels 116 if isinstance(kernel, KernelLinear): 117 self._clf_internals += ['linear'] 118 else: 119 self._clf_internals += ['non-linear'] 120 121 if externals.exists('openopt') \ 122 and not 'has_sensitivity' in self._clf_internals: 123 self._clf_internals += ['has_sensitivity'] 124 125 # No need to initialize state variables. Unless they got set 126 # they would raise an exception self.predicted_variances = 127 # None self.log_marginal_likelihood = None 128 self._init_internals() 129 pass

130 131

132 - def _init_internals(self):

133 """Reset some internal variables to None. 134 135 To be used in constructor and untrain() 136 """ 137 self._train_fv = None 138 self._labels = None 139 self._km_train_train = None 140 self._train_labels = None 141 self._alpha = None 142 self._L = None 143 self._LL = None 144 self.__kernel.reset() 145 pass

146 147

148 - def __repr__(self):

149 """String summary of the object 150 """ 151 return super(GPR, self).__repr__( 152 prefixes=['kernel=%s' % self.__kernel])

153 154

155 - def compute_log_marginal_likelihood(self):

156 """ 157 Compute log marginal likelihood using self.train_fv and self.labels. 158 """ 159 if __debug__: 160 debug("GPR", "Computing log_marginal_likelihood") 161 self.log_marginal_likelihood = \ 162 -0.5*Ndot(self._train_labels, self._alpha) - \ 163 Nlog(self._L.diagonal()).sum() - \ 164 self._km_train_train.shape[0] * _halflog2pi 165 return self.log_marginal_likelihood

166 167

168 - def compute_gradient_log_marginal_likelihood(self):

169 """Compute gradient of the log marginal likelihood. This 170 version use a more compact formula provided by Williams and 171 Rasmussen book. 172 """ 173 # XXX EO: check whether the precomputed self.alpha self.Kinv 174 # are actually the ones corresponding to the hyperparameters 175 # used to compute this gradient! 176 # YYY EO: currently this is verified outside gpr.py but it is 177 # not an efficient solution. 178 # XXX EO: Do some memoizing since it could happen that some 179 # hyperparameters are kept constant by user request, so we 180 # don't need (somtimes) to recompute the corresponding 181 # gradient again. 182 183 # self.Kinv = N.linalg.inv(self._C) 184 # Faster: 185 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0])) 186 187 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:]) 188 tmp = alphalphaT - Kinv 189 # Pass tmp to __kernel and let it compute its gradient terms. 190 # This scales up to huge number of hyperparameters: 191 grad_LML_hypers = self.__kernel.compute_lml_gradient( 192 tmp, self._train_fv) 193 grad_K_sigma_n = 2.0*self.sigma_noise*N.eye(tmp.shape[0]) 194 # Add the term related to sigma_noise: 195 # grad_LML_sigma_n = 0.5 * N.trace(N.dot(tmp,grad_K_sigma_n)) 196 # Faster formula: tr(AB) = (A*B.T).sum() 197 grad_LML_sigma_n = 0.5 * (tmp * (grad_K_sigma_n).T).sum() 198 lml_gradient = N.hstack([grad_LML_sigma_n, grad_LML_hypers]) 199 self.log_marginal_likelihood_gradient = lml_gradient 200 return lml_gradient

201 202

203 - def compute_gradient_log_marginal_likelihood_logscale(self):

204 """Compute gradient of the log marginal likelihood when 205 hyperparameters are in logscale. This version use a more 206 compact formula provided by Williams and Rasmussen book. 207 """ 208 # Kinv = N.linalg.inv(self._C) 209 # Faster: 210 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0])) 211 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:]) 212 tmp = alphalphaT - Kinv 213 grad_LML_log_hypers = \ 214 self.__kernel.compute_lml_gradient_logscale(tmp, self._train_fv) 215 grad_K_log_sigma_n = 2.0 * self.sigma_noise ** 2 * N.eye(Kinv.shape[0]) 216 # Add the term related to sigma_noise: 217 # grad_LML_log_sigma_n = 0.5 * N.trace(N.dot(tmp, grad_K_log_sigma_n)) 218 # Faster formula: tr(AB) = (A * B.T).sum() 219 grad_LML_log_sigma_n = 0.5 * (tmp * (grad_K_log_sigma_n).T).sum() 220 lml_gradient = N.hstack([grad_LML_log_sigma_n, grad_LML_log_hypers]) 221 self.log_marginal_likelihood_gradient = lml_gradient 222 return lml_gradient

223 224

225 - def getSensitivityAnalyzer(self, flavor='auto', **kwargs):

226 """Returns a sensitivity analyzer for GPR. 227 228 :Parameters: 229 flavor : basestring 230 What sensitivity to provide. Valid values are 231 'linear', 'model_select', 'auto'. 232 In case of 'auto' selects 'linear' for linear kernel 233 and 'model_select' for the rest. 'linear' corresponds to 234 GPRLinearWeights and 'model_select' to GRPWeights 235 """ 236 # XXX The following two lines does not work since 237 # self.__kernel is instance of kernel.KernelLinear and not 238 # just KernelLinear. How to fix? 239 # YYY yoh is not sure what is the problem... KernelLinear is actually 240 # kernel.KernelLinear so everything shoudl be ok 241 if flavor == 'auto': 242 flavor = ('model_select', 'linear')\ 243 [int(isinstance(self.__kernel, KernelLinear))] 244 if __debug__: 245 debug("GPR", "Returning '%s' sensitivity analyzer" % flavor) 246 247 # Return proper sensitivity 248 if flavor == 'linear': 249 return GPRLinearWeights(self, **kwargs) 250 elif flavor == 'model_select': 251 # sanity check 252 if not ('has_sensitivity' in self._clf_internals): 253 raise ValueError, \ 254 "model_select flavor is not available probably " \ 255 "due to not available 'openopt' module" 256 return GPRWeights(self, **kwargs) 257 else: 258 raise ValueError, "Flavor %s is not recognized" % flavor

259 260

261 - def _train(self, data):

262 """Train the classifier using `data` (`Dataset`). 263 """ 264 265 # local bindings for faster lookup 266 retrainable = self.params.retrainable 267 if retrainable: 268 newkernel = False 269 newL = False 270 _changedData = self._changedData 271 272 self._train_fv = train_fv = data.samples 273 self._train_labels = train_labels = data.labels 274 275 if not retrainable or _changedData['traindata'] \ 276 or _changedData.get('kernel_params', False): 277 if __debug__: 278 debug("GPR", "Computing train train kernel matrix") 279 self._km_train_train = km_train_train = self.__kernel.compute(train_fv) 280 newkernel = True 281 if retrainable: 282 self._km_train_test = None # reset to facilitate recomputation 283 else: 284 if __debug__: 285 debug("GPR", "Not recomputing kernel since retrainable and " 286 "nothing has changed") 287 km_train_train = self._km_train_train # reuse 288 289 if not retrainable or newkernel or _changedData['params']: 290 if __debug__: 291 debug("GPR", "Computing L. sigma_noise=%g" % self.sigma_noise) 292 # XXX it seems that we do not need binding to object, but may be 293 # commented out code would return? 294 self._C = km_train_train + \ 295 self.sigma_noise**2 * N.identity(km_train_train.shape[0], 'd') 296 # The following decomposition could raise 297 # N.linalg.linalg.LinAlgError because of numerical 298 # reasons, due to the too rapid decay of 'self._C' 299 # eigenvalues. In that case we try adding a small constant 300 # to self._C, e.g. epsilon=1.0e-20. It should be a form of 301 # Tikhonov regularization. This is equivalent to adding 302 # little white gaussian noise to data. 303 # 304 # XXX EO: how to choose epsilon? 305 # 306 # Cholesky decomposition is provided by three different 307 # NumPy/SciPy routines (fastest first): 308 # 1) self._LL = scipy.linalg.cho_factor(self._C, lower=True) 309 # self._L = L = N.tril(self._LL[0]) 310 # 2) self._L = scipy.linalg.cholesky(self._C, lower=True) 311 # 3) self._L = numpy.linalg.cholesky(self._C) 312 # Even though 1 is the fastest we choose 2 since 1 does 313 # not return a clean lower-triangular matrix (see docstring). 314 315 # PBS: I just made it so the KernelMatrix is regularized 316 # all the time. I figured that if ever you were going to 317 # use regularization, you would want to set it yourself 318 # and use the same value for all folds of your data. 319 try: 320 # apply regularization 321 epsilon = self.params.lm * N.eye(self._C.shape[0]) 322 self._L = SLcholesky(self._C + epsilon, lower=True) 323 self._LL = (self._L, True) 324 except SLAError: 325 raise SLAError("Kernel matrix is not positive, definite. " + \ 326 "Try increasing the lm parameter.") 327 pass 328 newL = True 329 else: 330 if __debug__: 331 debug("GPR", "Not computing L since kernel, data and params " 332 "stayed the same") 333 L = self._L # reuse 334 335 # XXX we leave _alpha being recomputed, although we could check 336 # if newL or _changedData['labels'] 337 # 338 if __debug__: 339 debug("GPR", "Computing alpha") 340 # self._alpha = NLAsolve(L.transpose(), 341 # NLAsolve(L, train_labels)) 342 # Faster: 343 self._alpha = SLcho_solve(self._LL, train_labels) 344 345 # compute only if the state is enabled 346 if self.states.isEnabled('log_marginal_likelihood'): 347 self.compute_log_marginal_likelihood() 348 pass 349 350 if retrainable: 351 # we must assign it only if it is retrainable 352 self.states.retrained = not newkernel or not newL 353 354 if __debug__: 355 debug("GPR", "Done training") 356 357 pass

358 359

360 - def _predict(self, data):

361 """ 362 Predict the output for the provided data. 363 """ 364 retrainable = self.params.retrainable 365 366 if not retrainable or self._changedData['testdata'] \ 367 or self._km_train_test is None: 368 if __debug__: 369 debug('GPR', "Computing train test kernel matrix") 370 km_train_test = self.__kernel.compute(self._train_fv, data) 371 if retrainable: 372 self._km_train_test = km_train_test 373 self.states.repredicted = False 374 else: 375 if __debug__: 376 debug('GPR', "Not recomputing train test kernel matrix") 377 km_train_test = self._km_train_test 378 self.states.repredicted = True 379 380 381 predictions = Ndot(km_train_test.transpose(), self._alpha) 382 383 if self.states.isEnabled('predicted_variances'): 384 # do computation only if state variable was enabled 385 if not retrainable or self._km_test_test is None \ 386 or self._changedData['testdata']: 387 if __debug__: 388 debug('GPR', "Computing test test kernel matrix") 389 km_test_test = self.__kernel.compute(data) 390 if retrainable: 391 self._km_test_test = km_test_test 392 else: 393 if __debug__: 394 debug('GPR', "Not recomputing test test kernel matrix") 395 km_test_test = self._km_test_test 396 397 if __debug__: 398 debug("GPR", "Computing predicted variances") 399 L = self._L 400 # v = NLAsolve(L, km_train_test) 401 # Faster: 402 piv = N.arange(L.shape[0]) 403 v = SL.lu_solve((L.T, piv), km_train_test, trans=1) 404 # self.predicted_variances = \ 405 # Ndiag(km_test_test - Ndot(v.T, v)) \ 406 # + self.sigma_noise**2 407 # Faster formula: N.diag(Ndot(v.T, v)) = (v**2).sum(0): 408 self.predicted_variances = Ndiag(km_test_test) - (v ** 2).sum(0) \ 409 + self.sigma_noise ** 2 410 pass 411 412 if __debug__: 413 debug("GPR", "Done predicting") 414 return predictions

415 416

417 - def _setRetrainable(self, value, force=False):

418 """Internal function : need to set _km_test_test 419 """ 420 super(GPR, self)._setRetrainable(value, force) 421 if force or (value and value != self.params.retrainable): 422 self._km_test_test = None

423 424

425 - def untrain(self):

426 super(GPR, self).untrain() 427 # XXX might need to take special care for retrainable. later 428 self._init_internals() 429 pass

430 431

432 - def set_hyperparameters(self, hyperparameter):

433 """ 434 Set hyperparameters' values. 435 436 Note that 'hyperparameter' is a sequence so the order of its 437 values is important. First value must be sigma_noise, then 438 other kernel's hyperparameters values follow in the exact 439 order the kernel expect them to be. 440 """ 441 if hyperparameter[0] < self.params['sigma_noise'].min: 442 raise InvalidHyperparameterError() 443 self.sigma_noise = hyperparameter[0] 444 if hyperparameter.size > 1: 445 self.__kernel.set_hyperparameters(hyperparameter[1:]) 446 pass 447 return

448 449 kernel = property(fget=lambda self:self.__kernel) 450 pass

Source Code for Module mvpa.clfs.gpr