1
2
3
4
5
6
7
8
9
10 """Model selction."""
11
12 __docformat__ = 'restructuredtext'
13
14
15 import numpy as N
16 from mvpa.base import externals
17 from mvpa.misc.exceptions import InvalidHyperparameterError
18
19 if externals.exists("scipy", raiseException=True):
20 import scipy.linalg as SL
21
22
23 if externals.exists("openopt", raiseException=True):
24 try:
25 from openopt import NLP
26 except ImportError:
27 from scikits.openopt import NLP
28
29 if __debug__:
30 from mvpa.base import debug
31
33
34
35 if __debug__:
36 da = debug.active
37 if 'OPENOPT' in da:
38 return 1
39 elif 'MOD_SEL' in da:
40 return 0
41 return -1
42
43
45 """Model selection facility.
46
47 Select a model among multiple models (i.e., a parametric model,
48 parametrized by a set of hyperparamenters).
49 """
50
51 - def __init__(self, parametric_model, dataset):
52 self.parametric_model = parametric_model
53 self.dataset = dataset
54 self.hyperparameters_best = None
55 self.log_marginal_likelihood_best = None
56 self.problem = None
57 pass
58
59 - def max_log_marginal_likelihood(self, hyp_initial_guess, maxiter=1,
60 optimization_algorithm="scipy_cg", ftol=1.0e-3, fixedHypers=None,
61 use_gradient=False, logscale=False):
62 """
63 Set up the optimization problem in order to maximize
64 the log_marginal_likelihood.
65
66 :Parameters:
67
68 parametric_model : Classifier
69 the actual parameteric model to be optimized.
70
71 hyp_initial_guess : numpy.ndarray
72 set of hyperparameters' initial values where to start
73 optimization.
74
75 optimization_algorithm : string
76 actual name of the optimization algorithm. See
77 http://scipy.org/scipy/scikits/wiki/NLP
78 for a comprehensive/updated list of available NLP solvers.
79 (Defaults to 'ralg')
80
81 ftol : float
82 threshold for the stopping criterion of the solver,
83 which is mapped in OpenOpt NLP.ftol
84 (Defaults to 1.0e-3)
85
86 fixedHypers : numpy.ndarray (boolean array)
87 boolean vector of the same size of hyp_initial_guess;
88 'False' means that the corresponding hyperparameter must
89 be kept fixed (so not optimized).
90 (Defaults to None, which during means all True)
91
92 NOTE: the maximization of log_marginal_likelihood is a non-linear
93 optimization problem (NLP). This fact is confirmed by Dmitrey,
94 author of OpenOpt.
95 """
96 self.problem = None
97 self.use_gradient = use_gradient
98 self.logscale = logscale
99 self.optimization_algorithm = optimization_algorithm
100 self.hyp_initial_guess = N.array(hyp_initial_guess)
101 self.hyp_initial_guess_log = N.log(self.hyp_initial_guess)
102 if fixedHypers is None:
103 fixedHypers = N.zeros(self.hyp_initial_guess.shape[0],dtype=bool)
104 pass
105 self.freeHypers = -fixedHypers
106 if self.logscale:
107 self.hyp_running_guess = self.hyp_initial_guess_log.copy()
108 else:
109 self.hyp_running_guess = self.hyp_initial_guess.copy()
110 pass
111 self.f_last_x = None
112
113 def f(x):
114 """
115 Wrapper to the log_marginal_likelihood to be
116 maximized.
117 """
118
119
120
121
122
123
124
125
126
127
128
129 self.f_last_x = x.copy()
130 self.hyp_running_guess[self.freeHypers] = x
131
132 try:
133 if self.logscale:
134 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess))
135 else:
136 self.parametric_model.set_hyperparameters(self.hyp_running_guess)
137 pass
138 except InvalidHyperparameterError:
139 if __debug__: debug("MOD_SEL","WARNING: invalid hyperparameters!")
140 return -N.inf
141 try:
142 self.parametric_model.train(self.dataset)
143 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError):
144
145 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!")
146 return -N.inf
147 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood()
148
149 return log_marginal_likelihood
150
151 def df(x):
152 """
153 Proxy to the log_marginal_likelihood first
154 derivative. Necessary for OpenOpt when using derivatives.
155 """
156 self.hyp_running_guess[self.freeHypers] = x
157
158
159
160
161
162
163
164
165 try:
166 if self.logscale:
167 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess))
168 else:
169 self.parametric_model.set_hyperparameters(self.hyp_running_guess)
170 pass
171 except InvalidHyperparameterError:
172 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
173 return -N.inf
174
175
176
177
178 if N.any(x!=self.f_last_x):
179 if __debug__: debug("MOD_SEL","UNEXPECTED: recomputing train+log_marginal_likelihood.")
180 try:
181 self.parametric_model.train(self.dataset)
182 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError):
183 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!")
184
185
186 return N.zeros(x.size)
187 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood()
188 pass
189 if self.logscale:
190 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood_logscale()
191 else:
192 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood()
193 pass
194
195 return gradient_log_marginal_likelihood[self.freeHypers]
196
197
198 if self.logscale:
199
200 x0 = self.hyp_initial_guess_log[self.freeHypers]
201 else:
202 x0 = self.hyp_initial_guess[self.freeHypers]
203 pass
204 self.contol = 1.0e-20
205
206
207 if self.use_gradient:
208
209 self.problem = NLP(f, x0, df=df, contol=self.contol, goal='maximum')
210 else:
211 self.problem = NLP(f, x0, contol=self.contol, goal='maximum')
212 pass
213 self.problem.name = "Max LogMargLikelihood"
214 if not self.logscale:
215
216
217
218 self.problem.lb = N.zeros(self.problem.n)+self.contol
219 pass
220
221 self.problem.maxiter = maxiter
222
223
224 self.problem.checkdf = True
225
226 self.problem.ftol = ftol
227 self.problem.iprint = _openopt_debug()
228 return self.problem
229
230
231 - def solve(self, problem=None):
232 """Solve the maximization problem, check outcome and collect results.
233 """
234
235
236
237
238
239 if N.all(self.freeHypers==False):
240 self.hyperparameters_best = self.hyp_initial_guess.copy()
241 try:
242 self.parametric_model.set_hyperparameters(self.hyperparameters_best)
243 except InvalidHyperparameterError:
244 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
245 self.log_marginal_likelihood_best = -N.inf
246 return self.log_marginal_likelihood_best
247 self.parametric_model.train(self.dataset)
248 self.log_marginal_likelihood_best = self.parametric_model.compute_log_marginal_likelihood()
249 return self.log_marginal_likelihood_best
250
251 result = self.problem.solve(self.optimization_algorithm)
252 if result.stopcase == -1:
253
254
255
256 print "Unable to find a maximum to log_marginal_likelihood"
257 elif result.stopcase == 0:
258 print "Limits exceeded"
259 elif result.stopcase == 1:
260 self.hyperparameters_best = self.hyp_initial_guess.copy()
261 if self.logscale:
262 self.hyperparameters_best[self.freeHypers] = N.exp(result.xf)
263 else:
264 self.hyperparameters_best[self.freeHypers] = result.xf
265 pass
266 self.log_marginal_likelihood_best = result.ff
267 pass
268 self.stopcase = result.stopcase
269 return self.log_marginal_likelihood_best
270
271 pass
272