1
2
3
4
5
6
7
8
9
10 """Kernels for Gaussian Process Regression and Classification."""
11
12
13 _DEV__DOC__ = """
14 Make use of Parameter Collections to keep parameters of the
15 kernels. Then we would get a uniform .reset() functionality. Now reset
16 is provided just for parts which are failing in the unittests, but
17 there is many more places where they are not reset properly if
18 classifier gets trained on some new data of different dimensionality
19 """
20
21 __docformat__ = 'restructuredtext'
22
23
24 import numpy as N
25
26 from mvpa.misc.exceptions import InvalidHyperparameterError
27 from mvpa.clfs.distance import squared_euclidean_distance
28
29 if __debug__:
30 from mvpa.base import debug, warning
31
32
34 """Kernel function base class.
35
36 """
37
40
43
44 - def compute(self, data1, data2=None):
45 raise NotImplementedError
46
48 """Resets the kernel dropping internal variables to the original values"""
49 pass
50
52 raise NotImplementedError
53
55 raise NotImplementedError
56
58 raise NotImplementedError
59
60 pass
61
63 """The constant kernel class.
64 """
65 - def __init__(self, sigma_0=1.0, **kwargs):
66 """Initialize the constant kernel instance.
67
68 :Parameters:
69 sigma_0 : float
70 standard deviation of the Gaussian prior probability
71 N(0,sigma_0**2) of the intercept of the constant regression.
72 (Defaults to 1.0)
73 """
74
75 Kernel.__init__(self, **kwargs)
76
77 self.sigma_0 = sigma_0
78 self.kernel_matrix = None
79
81 return "%s(sigma_0=%s)" % (self.__class__.__name__, str(self.sigma_0))
82
83 - def compute(self, data1, data2=None):
84 """Compute kernel matrix.
85
86 :Parameters:
87 data1 : numpy.ndarray
88 data
89 data2 : numpy.ndarray
90 data
91 (Defaults to None)
92 """
93 if data2 is None:
94 data2 = data1
95 pass
96 self.kernel_matrix = \
97 (self.sigma_0 ** 2) * N.ones((data1.shape[0], data2.shape[0]))
98 return self.kernel_matrix
99
105
107 K_grad_sigma_0 = 2*self.sigma_0
108
109
110
111 self.lml_gradient = 0.5*N.array(K_grad_sigma_0*alphaalphaT_Kinv.sum())
112 return self.lml_gradient
113
115 K_grad_sigma_0 = 2*self.sigma_0**2
116 self.lml_gradient = 0.5*N.array(K_grad_sigma_0*alphaalphaT_Kinv.sum())
117 return self.lml_gradient
118
119 pass
120
121
123 """The linear kernel class.
124 """
125 - def __init__(self, Sigma_p=None, sigma_0=1.0, **kwargs):
126 """Initialize the linear kernel instance.
127
128 :Parameters:
129 Sigma_p : numpy.ndarray
130 Covariance matrix of the Gaussian prior probability N(0,Sigma_p)
131 on the weights of the linear regression.
132 (Defaults to None)
133 sigma_0 : float
134 the standard deviation of the Gaussian prior N(0,sigma_0**2)
135 of the intercept of the linear regression.
136 (Deafults to 1.0)
137 """
138
139 Kernel.__init__(self, **kwargs)
140
141
142 self.Sigma_p = Sigma_p
143 self.sigma_0 = sigma_0
144 self.kernel_matrix = None
145
146
148 return "%s(Sigma_p=%s, sigma_0=%s)" \
149 % (self.__class__.__name__, str(self.Sigma_p), str(self.sigma_0))
150
151
155
156
157 - def compute(self, data1, data2=None):
158 """Compute kernel matrix.
159 Set Sigma_p to correct dimensions and default value if necessary.
160
161 :Parameters:
162 data1 : numpy.ndarray
163 data
164 data2 : numpy.ndarray
165 data
166 (Defaults to None)
167 """
168 if data2 is None:
169 data2 = data1
170 pass
171
172
173
174
175
176
177 Sigma_p = self.Sigma_p
178
179 if N.isscalar(Sigma_p):
180 if Sigma_p == 1.0:
181 data2_sc = data2.T
182 else:
183 data2_sc = Sigma_p * data2.T
184
185
186
187 elif len(Sigma_p.shape) == 1 and \
188 Sigma_p.shape[0] == data1.shape[1]:
189
190
191 data2_sc = (Sigma_p * data1).T
192
193
194
195 else:
196 data2_sc = N.dot(Sigma_p, data2.T)
197 pass
198
199
200
201 self.kernel_matrix = N.dot(data1, data2_sc) + self.sigma_0 ** 2
202 return self.kernel_matrix
203
205
206
207
208
209
210
211
212 if N.any(hyperparameter < 0):
213 raise InvalidHyperparameterError()
214 self.sigma_0 = N.array(hyperparameter[0])
215 self._Sigma_p = N.diagflat(hyperparameter[1:])
216 return
217
219 def lml_grad(K_grad_i):
220
221
222 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
223 lml_gradient = []
224 lml_gradient.append(2*self.sigma_0*alphaalphaT_Kinv.sum())
225 for i in range(self.Sigma_p.shape[0]):
226
227
228 K_grad_i = N.multiply.outer(data[:,i],data[:,i])
229 lml_gradient.append(lml_grad(K_grad_i))
230 pass
231 self.lml_gradient = lml_gradient = 0.5*N.array(lml_gradient)
232 return lml_gradient
233
235 def lml_grad(K_grad_i):
236
237
238 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
239 lml_gradient = []
240 lml_gradient.append(2*self.sigma_0**2*alphaalphaT_Kinv.sum())
241 Sigma_p = self.Sigma_p
242 for i in range(Sigma_p.shape[0]):
243
244
245 K_grad_log_i = Sigma_p[i,i]*N.multiply.outer(data[:,i],data[:,i])
246 lml_gradient.append(lml_grad(K_grad_log_i))
247 pass
248 self.lml_gradient = lml_gradient = 0.5*N.array(lml_gradient)
249 return lml_gradient
250
251
253 """Set Sigma_p value and store _orig for reset
254 """
255 if (v is None):
256
257 v = 1.0
258 self._Sigma_p_orig = self._Sigma_p = v
259
260 Sigma_p = property(fget=lambda x:x._Sigma_p,
261 fset=_setSigma_p)
262
263 pass
264
265
267 """The Exponential kernel class.
268
269 Note that it can handle a length scale for each dimension for
270 Automtic Relevance Determination.
271
272 """
273 - def __init__(self, length_scale=1.0, sigma_f = 1.0, **kwargs):
274 """Initialize an Exponential kernel instance.
275
276 :Parameters:
277 length_scale : float OR numpy.ndarray
278 the characteristic length-scale (or length-scales) of the
279 phenomenon under investigation.
280 (Defaults to 1.0)
281 sigma_f : float
282 Signal standard deviation.
283 (Defaults to 1.0)
284 """
285
286 Kernel.__init__(self, **kwargs)
287
288 self.length_scale = length_scale
289 self.sigma_f = sigma_f
290 self.kernel_matrix = None
291
292
294 return "%s(length_scale=%s, sigma_f=%s)" \
295 % (self.__class__.__name__, str(self.length_scale), str(self.sigma_f))
296
297 - def compute(self, data1, data2=None):
298 """Compute kernel matrix.
299
300 :Parameters:
301 data1 : numpy.ndarray
302 data
303 data2 : numpy.ndarray
304 data
305 (Defaults to None)
306 """
307
308
309
310
311 self.wdm = N.sqrt(squared_euclidean_distance(
312 data1, data2, weight=(self.length_scale**-2)))
313 self.kernel_matrix = \
314 self.sigma_f**2 * N.exp(-self.wdm)
315 return self.kernel_matrix
316
318 """Compute gradient of the kernel matrix. A must for fast
319 model selection with high-dimensional data.
320 """
321 raise NotImplementedError
322
324 """Set hyperaparmeters from a vector.
325
326 Used by model selection.
327 """
328 if N.any(hyperparameter < 0):
329 raise InvalidHyperparameterError()
330 self.sigma_f = hyperparameter[0]
331 self.length_scale = hyperparameter[1:]
332 return
333
335 """Compute grandient of the kernel and return the portion of
336 log marginal likelihood gradient due to the kernel.
337 Shorter formula. Allows vector of lengthscales (ARD)
338 BUT THIS LAST OPTION SEEMS NOT TO WORK FOR (CURRENTLY)
339 UNKNOWN REASONS.
340 """
341 self.lml_gradient = []
342 def lml_grad(K_grad_i):
343
344
345 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
346 grad_sigma_f = 2.0/self.sigma_f*self.kernel_matrix
347 self.lml_gradient.append(lml_grad(grad_sigma_f))
348 if N.isscalar(self.length_scale) or self.length_scale.size==1:
349
350 K_grad_l = self.wdm*self.kernel_matrix*(self.length_scale**-1)
351 self.lml_gradient.append(lml_grad(K_grad_l))
352 else:
353
354 for i in range(self.length_scale.size):
355 K_grad_i = (self.length_scale[i]**-3)*(self.wdm**-1)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
356 self.lml_gradient.append(lml_grad(K_grad_i))
357 pass
358 pass
359 self.lml_gradient = 0.5*N.array(self.lml_gradient)
360 return self.lml_gradient
361
363 """Compute grandient of the kernel and return the portion of
364 log marginal likelihood gradient due to the kernel.
365 Shorter formula. Allows vector of lengthscales (ARD).
366 BUT THIS LAST OPTION SEEMS NOT TO WORK FOR (CURRENTLY)
367 UNKNOWN REASONS.
368 """
369 self.lml_gradient = []
370 def lml_grad(K_grad_i):
371
372
373 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
374 grad_log_sigma_f = 2.0*self.kernel_matrix
375 self.lml_gradient.append(lml_grad(grad_log_sigma_f))
376 if N.isscalar(self.length_scale) or self.length_scale.size==1:
377
378 K_grad_l = self.wdm*self.kernel_matrix
379 self.lml_gradient.append(lml_grad(K_grad_l))
380 else:
381
382 for i in range(self.length_scale.size):
383 K_grad_i = (self.length_scale[i]**-2)*(self.wdm**-1)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
384 self.lml_gradient.append(lml_grad(K_grad_i))
385 pass
386 pass
387 self.lml_gradient = 0.5*N.array(self.lml_gradient)
388 return self.lml_gradient
389
390 pass
391
392
394 """The Squared Exponential kernel class.
395
396 Note that it can handle a length scale for each dimension for
397 Automtic Relevance Determination.
398
399 """
400 - def __init__(self, length_scale=1.0, sigma_f=1.0, **kwargs):
401 """Initialize a Squared Exponential kernel instance.
402
403 :Parameters:
404 length_scale : float OR numpy.ndarray
405 the characteristic length-scale (or length-scales) of the
406 phenomenon under investigation.
407 (Defaults to 1.0)
408 sigma_f : float
409 Signal standard deviation.
410 (Defaults to 1.0)
411 """
412
413 Kernel.__init__(self, **kwargs)
414
415 self.length_scale = length_scale
416 self.sigma_f = sigma_f
417 self.kernel_matrix = None
418
419
423
424
426 return "%s(length_scale=%s, sigma_f=%s)" \
427 % (self.__class__.__name__, str(self.length_scale), str(self.sigma_f))
428
429 - def compute(self, data1, data2=None):
430 """Compute kernel matrix.
431
432 :Parameters:
433 data1 : numpy.ndarray
434 data
435 data2 : numpy.ndarray
436 data
437 (Defaults to None)
438 """
439
440 self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2))
441 self.kernel_matrix = self.sigma_f**2 * N.exp(-0.5*self.wdm2)
442
443
444
445
446 return self.kernel_matrix
447
449 """Set hyperaparmeters from a vector.
450
451 Used by model selection.
452 """
453 if N.any(hyperparameter < 0):
454 raise InvalidHyperparameterError()
455 self.sigma_f = hyperparameter[0]
456 self._length_scale = hyperparameter[1:]
457 return
458
460 """Compute grandient of the kernel and return the portion of
461 log marginal likelihood gradient due to the kernel.
462 Shorter formula. Allows vector of lengthscales (ARD).
463 """
464 self.lml_gradient = []
465 def lml_grad(K_grad_i):
466
467
468 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
469 grad_sigma_f = 2.0/self.sigma_f*self.kernel_matrix
470 self.lml_gradient.append(lml_grad(grad_sigma_f))
471 if N.isscalar(self.length_scale) or self.length_scale.size==1:
472
473 K_grad_l = self.wdm2*self.kernel_matrix*(1.0/self.length_scale)
474 self.lml_gradient.append(lml_grad(K_grad_l))
475 else:
476
477 for i in range(self.length_scale.size):
478 K_grad_i = 1.0/(self.length_scale[i]**3)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
479 self.lml_gradient.append(lml_grad(K_grad_i))
480 pass
481 pass
482 self.lml_gradient = 0.5*N.array(self.lml_gradient)
483 return self.lml_gradient
484
486 """Compute grandient of the kernel and return the portion of
487 log marginal likelihood gradient due to the kernel.
488 Hyperparameters are in log scale which is sometimes more
489 stable. Shorter formula. Allows vector of lengthscales (ARD).
490 """
491 self.lml_gradient = []
492 def lml_grad(K_grad_i):
493
494
495 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
496 K_grad_log_sigma_f = 2.0*self.kernel_matrix
497 self.lml_gradient.append(lml_grad(K_grad_log_sigma_f))
498 if N.isscalar(self.length_scale) or self.length_scale.size==1:
499
500 K_grad_log_l = self.wdm2*self.kernel_matrix
501 self.lml_gradient.append(lml_grad(K_grad_log_l))
502 else:
503
504 for i in range(self.length_scale.size):
505 K_grad_log_l_i = 1.0/(self.length_scale[i]**2)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
506 self.lml_gradient.append(lml_grad(K_grad_log_l_i))
507 pass
508 pass
509 self.lml_gradient = 0.5*N.array(self.lml_gradient)
510 return self.lml_gradient
511
513 """Set value of length_scale and its _orig
514 """
515 self._length_scale = self._length_scale_orig = v
516
517 length_scale = property(fget=lambda x:x._length_scale,
518 fset=_setlength_scale)
519 pass
520
522 """The Matern kernel class for the case ni=3/2 or ni=5/2.
523
524 Note that it can handle a length scale for each dimension for
525 Automtic Relevance Determination.
526
527 """
528 - def __init__(self, length_scale=1.0, sigma_f=1.0, numerator=3.0, **kwargs):
529 """Initialize a Squared Exponential kernel instance.
530
531 :Parameters:
532 length_scale : float OR numpy.ndarray
533 the characteristic length-scale (or length-scales) of the
534 phenomenon under investigation.
535 (Defaults to 1.0)
536 sigma_f : float
537 Signal standard deviation.
538 (Defaults to 1.0)
539 numerator: float
540 the numerator of parameter ni of Matern covariance functions.
541 Currently only numerator=3.0 and numerator=5.0 are implemented.
542 (Defaults to 3.0)
543 """
544
545 Kernel.__init__(self, **kwargs)
546
547 self.length_scale = length_scale
548 self.sigma_f = sigma_f
549 self.kernel_matrix = None
550 if numerator == 3.0 or numerator == 5.0:
551 self.numerator = numerator
552 else:
553 raise NotImplementedError
554
556 return "%s(length_scale=%s, ni=%d/2)" \
557 % (self.__class__.__name__, str(self.length_scale), self.numerator)
558
559 - def compute(self, data1, data2=None):
560 """Compute kernel matrix.
561
562 :Parameters:
563 data1 : numpy.ndarray
564 data
565 data2 : numpy.ndarray
566 data
567 (Defaults to None)
568 """
569 tmp = squared_euclidean_distance(
570 data1, data2, weight=0.5 / (self.length_scale ** 2))
571 if self.numerator == 3.0:
572 tmp = N.sqrt(tmp)
573 self.kernel_matrix = \
574 self.sigma_f**2 * (1.0 + N.sqrt(3.0) * tmp) \
575 * N.exp(-N.sqrt(3.0) * tmp)
576 elif self.numerator == 5.0:
577 tmp2 = N.sqrt(tmp)
578 self.kernel_matrix = \
579 self.sigma_f**2 * (1.0 + N.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \
580 * N.exp(-N.sqrt(5.0) * tmp2)
581 return self.kernel_matrix
582
584 """Compute gradient of the kernel matrix. A must for fast
585 model selection with high-dimensional data.
586 """
587
588
589
590 raise NotImplementedError
591
593 """Set hyperaparmeters from a vector.
594
595 Used by model selection.
596 Note: 'numerator' is not considered as an hyperparameter.
597 """
598 if N.any(hyperparameter < 0):
599 raise InvalidHyperparameterError()
600 self.sigma_f = hyperparameter[0]
601 self.length_scale = hyperparameter[1:]
602 return
603
604 pass
605
606
608 """The Matern kernel class for the case ni=5/2.
609
610 This kernel is just KernelMatern_3_2(numerator=5.0).
611 """
613 """Initialize a Squared Exponential kernel instance.
614
615 :Parameters:
616 length_scale : float OR numpy.ndarray
617 the characteristic length-scale (or length-scales) of the
618 phenomenon under investigation.
619 (Defaults to 1.0)
620 """
621 KernelMatern_3_2.__init__(self, numerator=5.0, **kwargs)
622 pass
623
624
626 """The Rational Quadratic (RQ) kernel class.
627
628 Note that it can handle a length scale for each dimension for
629 Automtic Relevance Determination.
630
631 """
632 - def __init__(self, length_scale=1.0, sigma_f=1.0, alpha=0.5, **kwargs):
633 """Initialize a Squared Exponential kernel instance.
634
635 :Parameters:
636 length_scale : float OR numpy.ndarray
637 the characteristic length-scale (or length-scales) of the
638 phenomenon under investigation.
639 (Defaults to 1.0)
640 sigma_f : float
641 Signal standard deviation.
642 (Defaults to 1.0)
643 alpha: float
644 The parameter of the RQ functions family.
645 (Defaults to 2.0)
646 """
647
648 Kernel.__init__(self, **kwargs)
649
650 self.length_scale = length_scale
651 self.sigma_f = sigma_f
652 self.kernel_matrix = None
653 self.alpha = alpha
654
658
659 - def compute(self, data1, data2=None):
660 """Compute kernel matrix.
661
662 :Parameters:
663 data1 : numpy.ndarray
664 data
665 data2 : numpy.ndarray
666 data
667 (Defaults to None)
668 """
669 tmp = squared_euclidean_distance(
670 data1, data2, weight=1.0 / (self.length_scale ** 2))
671 self.kernel_matrix = \
672 self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
673 return self.kernel_matrix
674
676 """Compute gradient of the kernel matrix. A must for fast
677 model selection with high-dimensional data.
678 """
679
680
681
682 raise NotImplementedError
683
685 """Set hyperaparmeters from a vector.
686
687 Used by model selection.
688 Note: 'alpha' is not considered as an hyperparameter.
689 """
690 if N.any(hyperparameter < 0):
691 raise InvalidHyperparameterError()
692 self.sigma_f = hyperparameter[0]
693 self.length_scale = hyperparameter[1:]
694 return
695
696 pass
697
698
699
700 kernel_dictionary = {'constant': KernelConstant,
701 'linear': KernelLinear,
702 'exponential': KernelExponential,
703 'squared exponential': KernelSquaredExponential,
704 'Matern ni=3/2': KernelMatern_3_2,
705 'Matern ni=5/2': KernelMatern_5_2,
706 'rational quadratic': KernelRationalQuadratic}
707