1
2
3
4
5
6
7
8
9 """Python interface to the SWIG-wrapped libsvm"""
10
11 __docformat__ = 'restructuredtext'
12
13
14 from math import exp, fabs
15 import re, copy
16
17 import numpy as N
18
19 from mvpa.clfs.libsvmc import _svmc as svmc
20 from mvpa.clfs.libsvmc._svmc import C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, \
21 NU_SVR, LINEAR, POLY, RBF, SIGMOID, \
22 PRECOMPUTED
23
24 if __debug__:
25 from mvpa.base import debug
35
45
50
55
59
63
66 """
67 SVMParameter class safe to be deepcopied.
68 """
69
70 default_parameters = {
71 'svm_type' : C_SVC,
72 'kernel_type' : RBF,
73 'degree' : 3,
74 'gamma' : 0,
75 'coef0' : 0,
76 'nu' : 0.5,
77 'cache_size' : 100,
78 'C' : 1,
79 'eps' : 1e-3,
80 'p' : 0.1,
81 'shrinking' : 1,
82 'nr_weight' : 0,
83 'weight_label' : [],
84 'weight' : [],
85 'probability' : 0
86 }
87
89 """Internal class to to avoid memory leaks returning away svmc's params"""
90
92 self.param = svmc.new_svm_parameter()
93 for attr, val in params.items():
94
95 if attr == 'weight_label':
96
97 val = intArray(val)
98
99
100 elif attr == 'weight':
101
102 val = doubleArray(val)
103
104
105
106 set_func = getattr(svmc, 'svm_parameter_%s_set' % (attr))
107 set_func(self.param, val)
108
115
116
118 self._orig_params = kw
119 self.untrain()
120
122 self._params = {}
123 self._params.update(self.default_parameters)
124 self._params.update(**self._orig_params)
125 self.__svmc_params = None
126 self.__svmc_recompute = False
127
130
132 return "SVMParameter: %s" % `self._params`
133
138
143
145 if not self.__svmc_params is None:
146 del self.__svmc_params
147 self.__svmc_params = None
148
149 @property
157
162
164 """Not exactly proper one -- if lists are svmc_recompute, would fail anyways"""
165 self.__svmc_recompute = True
166 self._params[key] = value
167
168 @classmethod
170 for key in cls.default_parameters.keys():
171 exec "%s.%s = property(fget=%s, fset=%s)" % \
172 (cls.__name__, key,
173 "lambda self:self._params['%s']" % key,
174 "lambda self,val:self._setParameter('%s', val)" % key)
175
176
177 SVMParameter._register_properties()
180 """convert a sequence or mapping to an SVMNode array"""
181 import operator
182
183 length = len(x)
184
185
186
187
188 if isinstance(x, dict):
189 iter_range = list(x).sort()
190 iter_values = N.ndarray(x.values())
191 elif isinstance(x, N.ndarray):
192 iter_range = range(length)
193 iter_values = x
194 elif operator.isSequenceType(x):
195 iter_range = range(length)
196 iter_values = N.asarray(x)
197 else:
198 raise TypeError, "data must be a mapping or an ndarray or a sequence"
199
200
201 data = svmc.svm_node_array(length + 1)
202
203 svmc.svm_node_array_set(data, length, -1, 0.0)
204
205 svmc.svm_node_array_set(data, iter_range, iter_values)
206
207 return data
208
213 assert len(y) == len(x)
214 self.prob = prob = svmc.new_svm_problem()
215 self.size = size = len(y)
216
217 self.y_array = y_array = svmc.new_double(size)
218 for i in range(size):
219 svmc.double_setitem(y_array, i, y[i])
220
221 self.x_matrix = x_matrix = svmc.svm_node_matrix(size)
222 self.data = []
223 self.maxlen = 0
224 for i in range(size):
225 data = convert2SVMNode(x[i])
226 self.data.append(data)
227 svmc.svm_node_matrix_set(x_matrix, i, data)
228 if type(x[i]) == dict:
229 if (len(x[i]) > 0):
230 self.maxlen = max(self.maxlen, max(x[i].keys()))
231 else:
232 self.maxlen = max(self.maxlen, len(x[i]))
233
234 svmc.svm_problem_l_set(prob, size)
235 svmc.svm_problem_y_set(prob, y_array)
236 svmc.svm_problem_x_set(prob, x_matrix)
237
238
240 return "<SVMProblem: size = %s>" % (self.size)
241
242
252
282
283
285 """
286 Print string representation of the model or easier comprehension
287 and some statistics
288 """
289 ret = '<SVMModel:'
290 try:
291 ret += ' type = %s, ' % `self.svm_type`
292 ret += ' number of classes = %d (%s), ' \
293 % ( self.nr_class, `self.labels` )
294 except:
295 pass
296 return ret+'>'
297
298
304
305
308
309
311 if self.svm_type == NU_SVR \
312 or self.svm_type == EPSILON_SVR \
313 or self.svm_type == ONE_CLASS:
314 raise TypeError, "Unable to get label from a SVR/ONE_CLASS model"
315 return self.labels
316
317
318
319
320
321
322
333
334
350
351
375
376
378
379 ret = svmc.svm_get_svr_probability(self.model)
380 if ret == 0:
381 raise TypeError, "not a regression model or probability " \
382 "information not available"
383 return ret
384
385
387
388 sigma = self.getSVRProbability()
389 return lambda z: exp(-fabs(z)/sigma)/(2*sigma)
390
391
392 - def save(self, filename):
394
395
397 if __debug__:
398 debug('CLF_', 'Destroying libsvm.SVMModel %s' % (`self`))
399
400 try:
401 if svmc.__version__ < 300:
402 svmc.svm_destroy_model(self.model)
403 else:
404 svmc.svm_destroy_model_helper(self.model)
405 except:
406
407
408 pass
409
410
413
414
416 """Returns a list with the number of support vectors per class.
417 """
418 return [ svmc.int_getitem(svmc.svm_model_nSV_get( self.model ), i)
419 for i in range( self.nr_class ) ]
420
421
431
432
434 """Return coefficients for SVs... Needs to be used directly with caution!
435
436 Summary on what is happening in libsvm internals with sv_coef
437
438 svm_model's sv_coef (especially) are "cleverly" packed into a matrix
439 nr_class - 1 x #SVs_total which stores
440 coefficients for
441 nr_class x (nr_class-1) / 2
442 binary classifiers' SV coefficients.
443
444 For classifier i-vs-j
445 General packing rule can be described as:
446
447 i-th row contains sv_coefficients for SVs of class i it took
448 in all i-vs-j or j-vs-i classifiers.
449
450 Another useful excerpt from svm.cpp is
451
452 // classifier (i,j): coefficients with
453 // i are in sv_coef[j-1][nz_start[i]...],
454 // j are in sv_coef[i][nz_start[j]...]
455
456 It can also be described as j-th column lists coefficients for SV # j which
457 belongs to some class C, which it took (if it was an SV, ie != 0)
458 in classifiers i vs C (iff i<C), or C vs i+1 (iff i>C)
459
460 This way no byte of storage is wasted but imho such setup is quite convolved
461 """
462 return svmc.doubleppcarray2numpy_array(
463 svmc.svm_model_sv_coef_get(self.model),
464 self.nr_class - 1,
465 self.getTotalNSV())
466
467
469 """Return constant(s) in decision function(s) (if multi-class)"""
470 return doubleArray2List(svmc.svm_model_rho_get(self.model),
471 self.nr_class * (self.nr_class-1)/2)
472