| Home | Trees | Indices | Help |
|
|---|
|
|
1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2 # vi: set ft=python sts=4 ts=4 sw=4 et:
3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4 #
5 # Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com>
6 # See COPYING file distributed along with the PyMVPA package for the
7 # copyright and license terms.
8 #
9 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
10 """Gaussian Process Regression (GPR)."""
11
12 __docformat__ = 'restructuredtext'
13
14
15 import numpy as N
16
17 from mvpa.base import externals
18
19 from mvpa.misc.state import StateVariable
20 from mvpa.clfs.base import Classifier
21 from mvpa.misc.param import Parameter
22 from mvpa.clfs.kernel import KernelSquaredExponential, KernelLinear
23 from mvpa.measures.base import Sensitivity
24 from mvpa.misc.exceptions import InvalidHyperparameterError
25 from mvpa.datasets import Dataset
26
27 if externals.exists("scipy", raiseException=True):
28 from scipy.linalg import cho_solve as SLcho_solve
29 from scipy.linalg import cholesky as SLcholesky
30 import scipy.linalg as SL
31 # Some local binding for bits of speed up
32 SLAError = SL.basic.LinAlgError
33
34 if __debug__:
35 from mvpa.base import debug
36
37 # Some local bindings for bits of speed up
38 Nlog = N.log
39 Ndot = N.dot
40 Ndiag = N.diag
41 NLAcholesky = N.linalg.cholesky
42 NLAsolve = N.linalg.solve
43 NLAError = N.linalg.linalg.LinAlgError
44 eps64 = N.finfo(N.float64).eps
45
46 # Some precomputed items. log is relatively expensive
47 _halflog2pi = 0.5 * Nlog(2 * N.pi)
48
49
51 """Gaussian Process Regression (GPR).
52
53 """
54
55 predicted_variances = StateVariable(enabled=False,
56 doc="Variance per each predicted value")
57
58 log_marginal_likelihood = StateVariable(enabled=False,
59 doc="Log Marginal Likelihood")
60
61 log_marginal_likelihood_gradient = StateVariable(enabled=False,
62 doc="Log Marginal Likelihood Gradient")
63
64 _clf_internals = [ 'gpr', 'regression', 'retrainable' ]
65
66
67 # NOTE XXX Parameters of the classifier. Values available as
68 # clf.parameter or clf.params.parameter, or as
69 # clf.params['parameter'] (as the full Parameter object)
70 #
71 # __doc__ and __repr__ for class is conviniently adjusted to
72 # reflect values of those params
73
74 # Kernel machines/classifiers should be refactored also to behave
75 # the same and define kernel parameter appropriately... TODO, but SVMs
76 # already kinda do it nicely ;-)
77
78 sigma_noise = Parameter(0.001, allowedtype='float', min=1e-10,
79 doc="the standard deviation of the gaussian noise.")
80
81 # XXX For now I don't introduce kernel parameter since yet to unify
82 # kernel machines
83 #kernel = Parameter(None, allowedtype='Kernel',
84 # doc="Kernel object defining the covariance between instances. "
85 # "(Defaults to KernelSquaredExponential if None in arguments)")
86
87 lm = Parameter(0.0, min=0.0, allowedtype='float',
88 doc="""The regularization term lambda.
89 Increase this when the kernel matrix is not positive, definite.""")
90
91
93 """Initialize a GPR regression analysis.
94
95 :Parameters:
96 kernel : Kernel
97 a kernel object defining the covariance between instances.
98 (Defaults to KernelSquaredExponential if None in arguments)
99 """
100 # init base class first
101 Classifier.__init__(self, **kwargs)
102
103 # It does not make sense to calculate a confusion matrix for a GPR
104 # XXX it does ;) it will be a RegressionStatistics actually ;-)
105 # So if someone desires -- let him have it
106 # self.states.enable('training_confusion', False)
107
108 # set kernel:
109 if kernel is None:
110 kernel = KernelSquaredExponential()
111 self.__kernel = kernel
112
113 # append proper clf_internal depending on the kernel
114 # TODO: unify finally all kernel-based machines.
115 # make SMLR to use kernels
116 if isinstance(kernel, KernelLinear):
117 self._clf_internals += ['linear']
118 else:
119 self._clf_internals += ['non-linear']
120
121 if externals.exists('openopt') \
122 and not 'has_sensitivity' in self._clf_internals:
123 self._clf_internals += ['has_sensitivity']
124
125 # No need to initialize state variables. Unless they got set
126 # they would raise an exception self.predicted_variances =
127 # None self.log_marginal_likelihood = None
128 self._init_internals()
129 pass
130
131
133 """Reset some internal variables to None.
134
135 To be used in constructor and untrain()
136 """
137 self._train_fv = None
138 self._labels = None
139 self._km_train_train = None
140 self._train_labels = None
141 self._alpha = None
142 self._L = None
143 self._LL = None
144 self.__kernel.reset()
145 pass
146
147
149 """String summary of the object
150 """
151 return super(GPR, self).__repr__(
152 prefixes=['kernel=%s' % self.__kernel])
153
154
156 """
157 Compute log marginal likelihood using self.train_fv and self.labels.
158 """
159 if __debug__:
160 debug("GPR", "Computing log_marginal_likelihood")
161 self.log_marginal_likelihood = \
162 -0.5*Ndot(self._train_labels, self._alpha) - \
163 Nlog(self._L.diagonal()).sum() - \
164 self._km_train_train.shape[0] * _halflog2pi
165 return self.log_marginal_likelihood
166
167
169 """Compute gradient of the log marginal likelihood. This
170 version use a more compact formula provided by Williams and
171 Rasmussen book.
172 """
173 # XXX EO: check whether the precomputed self.alpha self.Kinv
174 # are actually the ones corresponding to the hyperparameters
175 # used to compute this gradient!
176 # YYY EO: currently this is verified outside gpr.py but it is
177 # not an efficient solution.
178 # XXX EO: Do some memoizing since it could happen that some
179 # hyperparameters are kept constant by user request, so we
180 # don't need (somtimes) to recompute the corresponding
181 # gradient again.
182
183 # self.Kinv = N.linalg.inv(self._C)
184 # Faster:
185 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0]))
186
187 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:])
188 tmp = alphalphaT - Kinv
189 # Pass tmp to __kernel and let it compute its gradient terms.
190 # This scales up to huge number of hyperparameters:
191 grad_LML_hypers = self.__kernel.compute_lml_gradient(
192 tmp, self._train_fv)
193 grad_K_sigma_n = 2.0*self.sigma_noise*N.eye(tmp.shape[0])
194 # Add the term related to sigma_noise:
195 # grad_LML_sigma_n = 0.5 * N.trace(N.dot(tmp,grad_K_sigma_n))
196 # Faster formula: tr(AB) = (A*B.T).sum()
197 grad_LML_sigma_n = 0.5 * (tmp * (grad_K_sigma_n).T).sum()
198 lml_gradient = N.hstack([grad_LML_sigma_n, grad_LML_hypers])
199 self.log_marginal_likelihood_gradient = lml_gradient
200 return lml_gradient
201
202
204 """Compute gradient of the log marginal likelihood when
205 hyperparameters are in logscale. This version use a more
206 compact formula provided by Williams and Rasmussen book.
207 """
208 # Kinv = N.linalg.inv(self._C)
209 # Faster:
210 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0]))
211 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:])
212 tmp = alphalphaT - Kinv
213 grad_LML_log_hypers = \
214 self.__kernel.compute_lml_gradient_logscale(tmp, self._train_fv)
215 grad_K_log_sigma_n = 2.0 * self.sigma_noise ** 2 * N.eye(Kinv.shape[0])
216 # Add the term related to sigma_noise:
217 # grad_LML_log_sigma_n = 0.5 * N.trace(N.dot(tmp, grad_K_log_sigma_n))
218 # Faster formula: tr(AB) = (A * B.T).sum()
219 grad_LML_log_sigma_n = 0.5 * (tmp * (grad_K_log_sigma_n).T).sum()
220 lml_gradient = N.hstack([grad_LML_log_sigma_n, grad_LML_log_hypers])
221 self.log_marginal_likelihood_gradient = lml_gradient
222 return lml_gradient
223
224
226 """Returns a sensitivity analyzer for GPR.
227
228 :Parameters:
229 flavor : basestring
230 What sensitivity to provide. Valid values are
231 'linear', 'model_select', 'auto'.
232 In case of 'auto' selects 'linear' for linear kernel
233 and 'model_select' for the rest. 'linear' corresponds to
234 GPRLinearWeights and 'model_select' to GRPWeights
235 """
236 # XXX The following two lines does not work since
237 # self.__kernel is instance of kernel.KernelLinear and not
238 # just KernelLinear. How to fix?
239 # YYY yoh is not sure what is the problem... KernelLinear is actually
240 # kernel.KernelLinear so everything shoudl be ok
241 if flavor == 'auto':
242 flavor = ('model_select', 'linear')\
243 [int(isinstance(self.__kernel, KernelLinear))]
244 if __debug__:
245 debug("GPR", "Returning '%s' sensitivity analyzer" % flavor)
246
247 # Return proper sensitivity
248 if flavor == 'linear':
249 return GPRLinearWeights(self, **kwargs)
250 elif flavor == 'model_select':
251 # sanity check
252 if not ('has_sensitivity' in self._clf_internals):
253 raise ValueError, \
254 "model_select flavor is not available probably " \
255 "due to not available 'openopt' module"
256 return GPRWeights(self, **kwargs)
257 else:
258 raise ValueError, "Flavor %s is not recognized" % flavor
259
260
262 """Train the classifier using `data` (`Dataset`).
263 """
264
265 # local bindings for faster lookup
266 retrainable = self.params.retrainable
267 if retrainable:
268 newkernel = False
269 newL = False
270 _changedData = self._changedData
271
272 self._train_fv = train_fv = data.samples
273 self._train_labels = train_labels = data.labels
274
275 if not retrainable or _changedData['traindata'] \
276 or _changedData.get('kernel_params', False):
277 if __debug__:
278 debug("GPR", "Computing train train kernel matrix")
279 self._km_train_train = km_train_train = self.__kernel.compute(train_fv)
280 newkernel = True
281 if retrainable:
282 self._km_train_test = None # reset to facilitate recomputation
283 else:
284 if __debug__:
285 debug("GPR", "Not recomputing kernel since retrainable and "
286 "nothing has changed")
287 km_train_train = self._km_train_train # reuse
288
289 if not retrainable or newkernel or _changedData['params']:
290 if __debug__:
291 debug("GPR", "Computing L. sigma_noise=%g" % self.sigma_noise)
292 # XXX it seems that we do not need binding to object, but may be
293 # commented out code would return?
294 self._C = km_train_train + \
295 self.sigma_noise**2 * N.identity(km_train_train.shape[0], 'd')
296 # The following decomposition could raise
297 # N.linalg.linalg.LinAlgError because of numerical
298 # reasons, due to the too rapid decay of 'self._C'
299 # eigenvalues. In that case we try adding a small constant
300 # to self._C, e.g. epsilon=1.0e-20. It should be a form of
301 # Tikhonov regularization. This is equivalent to adding
302 # little white gaussian noise to data.
303 #
304 # XXX EO: how to choose epsilon?
305 #
306 # Cholesky decomposition is provided by three different
307 # NumPy/SciPy routines (fastest first):
308 # 1) self._LL = scipy.linalg.cho_factor(self._C, lower=True)
309 # self._L = L = N.tril(self._LL[0])
310 # 2) self._L = scipy.linalg.cholesky(self._C, lower=True)
311 # 3) self._L = numpy.linalg.cholesky(self._C)
312 # Even though 1 is the fastest we choose 2 since 1 does
313 # not return a clean lower-triangular matrix (see docstring).
314
315 # PBS: I just made it so the KernelMatrix is regularized
316 # all the time. I figured that if ever you were going to
317 # use regularization, you would want to set it yourself
318 # and use the same value for all folds of your data.
319 try:
320 # apply regularization
321 epsilon = self.params.lm * N.eye(self._C.shape[0])
322 self._L = SLcholesky(self._C + epsilon, lower=True)
323 self._LL = (self._L, True)
324 except SLAError:
325 raise SLAError("Kernel matrix is not positive, definite. " + \
326 "Try increasing the lm parameter.")
327 pass
328 newL = True
329 else:
330 if __debug__:
331 debug("GPR", "Not computing L since kernel, data and params "
332 "stayed the same")
333 L = self._L # reuse
334
335 # XXX we leave _alpha being recomputed, although we could check
336 # if newL or _changedData['labels']
337 #
338 if __debug__:
339 debug("GPR", "Computing alpha")
340 # self._alpha = NLAsolve(L.transpose(),
341 # NLAsolve(L, train_labels))
342 # Faster:
343 self._alpha = SLcho_solve(self._LL, train_labels)
344
345 # compute only if the state is enabled
346 if self.states.isEnabled('log_marginal_likelihood'):
347 self.compute_log_marginal_likelihood()
348 pass
349
350 if retrainable:
351 # we must assign it only if it is retrainable
352 self.states.retrained = not newkernel or not newL
353
354 if __debug__:
355 debug("GPR", "Done training")
356
357 pass
358
359
361 """
362 Predict the output for the provided data.
363 """
364 retrainable = self.params.retrainable
365
366 if not retrainable or self._changedData['testdata'] \
367 or self._km_train_test is None:
368 if __debug__:
369 debug('GPR', "Computing train test kernel matrix")
370 km_train_test = self.__kernel.compute(self._train_fv, data)
371 if retrainable:
372 self._km_train_test = km_train_test
373 self.states.repredicted = False
374 else:
375 if __debug__:
376 debug('GPR', "Not recomputing train test kernel matrix")
377 km_train_test = self._km_train_test
378 self.states.repredicted = True
379
380
381 predictions = Ndot(km_train_test.transpose(), self._alpha)
382
383 if self.states.isEnabled('predicted_variances'):
384 # do computation only if state variable was enabled
385 if not retrainable or self._km_test_test is None \
386 or self._changedData['testdata']:
387 if __debug__:
388 debug('GPR', "Computing test test kernel matrix")
389 km_test_test = self.__kernel.compute(data)
390 if retrainable:
391 self._km_test_test = km_test_test
392 else:
393 if __debug__:
394 debug('GPR', "Not recomputing test test kernel matrix")
395 km_test_test = self._km_test_test
396
397 if __debug__:
398 debug("GPR", "Computing predicted variances")
399 L = self._L
400 # v = NLAsolve(L, km_train_test)
401 # Faster:
402 piv = N.arange(L.shape[0])
403 v = SL.lu_solve((L.T, piv), km_train_test, trans=1)
404 # self.predicted_variances = \
405 # Ndiag(km_test_test - Ndot(v.T, v)) \
406 # + self.sigma_noise**2
407 # Faster formula: N.diag(Ndot(v.T, v)) = (v**2).sum(0):
408 self.predicted_variances = Ndiag(km_test_test) - (v ** 2).sum(0) \
409 + self.sigma_noise ** 2
410 pass
411
412 if __debug__:
413 debug("GPR", "Done predicting")
414 return predictions
415
416
418 """Internal function : need to set _km_test_test
419 """
420 super(GPR, self)._setRetrainable(value, force)
421 if force or (value and value != self.params.retrainable):
422 self._km_test_test = None
423
424
426 super(GPR, self).untrain()
427 # XXX might need to take special care for retrainable. later
428 self._init_internals()
429 pass
430
431
433 """
434 Set hyperparameters' values.
435
436 Note that 'hyperparameter' is a sequence so the order of its
437 values is important. First value must be sigma_noise, then
438 other kernel's hyperparameters values follow in the exact
439 order the kernel expect them to be.
440 """
441 if hyperparameter[0] < self.params['sigma_noise'].min:
442 raise InvalidHyperparameterError()
443 self.sigma_noise = hyperparameter[0]
444 if hyperparameter.size > 1:
445 self.__kernel.set_hyperparameters(hyperparameter[1:])
446 pass
447 return
448
449 kernel = property(fget=lambda self:self.__kernel)
450 pass
451
452
454 """`SensitivityAnalyzer` that reports the weights GPR trained
455 on a given `Dataset`.
456
457 In case of KernelLinear compute explicitly the coefficients
458 of the linear regression, together with their variances (if
459 requested).
460
461 Note that the intercept is not computed.
462 """
463
464 variances = StateVariable(enabled=False,
465 doc="Variances of the weights (for KernelLinear)")
466
467 _LEGAL_CLFS = [ GPR ]
468
469
471 """Extract weights from GPR
472 """
473
474 clf = self.clf
475 kernel = clf.kernel
476 train_fv = clf._train_fv
477
478 weights = Ndot(kernel.Sigma_p,
479 Ndot(train_fv.T, clf._alpha))
480
481 if self.states.isEnabled('variances'):
482 # super ugly formulas that can be quite surely improved:
483 tmp = N.linalg.inv(clf._L)
484 Kyinv = Ndot(tmp.T, tmp)
485 # XXX in such lengthy matrix manipulations you might better off
486 # using N.matrix where * is a matrix product
487 self.states.variances = Ndiag(
488 kernel.Sigma_p -
489 Ndot(kernel.Sigma_p,
490 Ndot(train_fv.T,
491 Ndot(Kyinv,
492 Ndot(train_fv, kernel.Sigma_p)))))
493 return weights
494
495
496 if externals.exists('openopt'):
497
498 from mvpa.clfs.model_selector import ModelSelector
499
501 """`SensitivityAnalyzer` that reports the weights GPR trained
502 on a given `Dataset`.
503 """
504
505 _LEGAL_CLFS = [ GPR ]
506
508 """Extract weights from GPR
509 """
510
511 clf = self.clf
512 # normalize data:
513 clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \
514 / clf._train_labels.std()
515 # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \
516 # /clf._train_fv.std(0)
517 dataset = Dataset(samples=clf._train_fv, labels=clf._train_labels)
518 clf.states.enable("log_marginal_likelihood")
519 ms = ModelSelector(clf, dataset)
520 # Note that some kernels does not have gradient yet!
521 # XXX Make it initialize to clf's current hyperparameter values
522 # or may be add ability to specify starting points in the constructor
523 sigma_noise_initial = 1.0e-5
524 sigma_f_initial = 1.0
525 length_scale_initial = N.ones(dataset.nfeatures)*1.0e4
526 # length_scale_initial = N.random.rand(dataset.nfeatures)*1.0e4
527 hyp_initial_guess = N.hstack([sigma_noise_initial,
528 sigma_f_initial,
529 length_scale_initial])
530 fixedHypers = N.array([0]*hyp_initial_guess.size, dtype=bool)
531 fixedHypers = None
532 problem = ms.max_log_marginal_likelihood(
533 hyp_initial_guess=hyp_initial_guess,
534 optimization_algorithm="scipy_lbfgsb",
535 ftol=1.0e-3, fixedHypers=fixedHypers,
536 use_gradient=True, logscale=True)
537 if __debug__ and 'GPR_WEIGHTS' in debug.active:
538 problem.iprint = 1
539 lml = ms.solve()
540 weights = 1.0/ms.hyperparameters_best[2:] # weight = 1/length_scale
541 if __debug__:
542 debug("GPR",
543 "%s, train: shape %s, labels %s, min:max %g:%g, "
544 "sigma_noise %g, sigma_f %g" %
545 (clf, clf._train_fv.shape, N.unique(clf._train_labels),
546 clf._train_fv.min(), clf._train_fv.max(),
547 ms.hyperparameters_best[0], ms.hyperparameters_best[1]))
548
549 return weights
550
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Mon Apr 23 23:09:48 2012 | http://epydoc.sourceforge.net |