Package mvpa :: Package clfs :: Package libsvmc :: Module svm
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.libsvmc.svm

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Wrap the libsvm package into a very simple class interface.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  import operator 
 16   
 17  from mvpa.base import warning 
 18  from mvpa.misc.state import StateVariable 
 19   
 20  from mvpa.clfs._svmbase import _SVM 
 21   
 22  from mvpa.clfs.libsvmc import _svm as svm 
 23  from sens import LinearSVMWeights 
 24   
 25  if __debug__: 
 26      from mvpa.base import debug 
 27   
 28  # we better expose those since they are mentioned in docstrings 
 29  # although pylint would not be happy 
 30  from mvpa.clfs.libsvmc._svmc import \ 
 31       C_SVC, NU_SVC, EPSILON_SVR, \ 
 32       NU_SVR, LINEAR, POLY, RBF, SIGMOID, \ 
 33       PRECOMPUTED, ONE_CLASS 
 34   
 35   
36 -class SVM(_SVM):
37 """Support Vector Machine Classifier. 38 39 This is a simple interface to the libSVM package. 40 """ 41 42 # Since this is internal feature of LibSVM, this state variable is present 43 # here 44 probabilities = StateVariable(enabled=False, 45 doc="Estimates of samples probabilities as provided by LibSVM") 46 47 _KERNELS = { "linear": (svm.svmc.LINEAR, None, LinearSVMWeights), 48 "rbf" : (svm.svmc.RBF, ('gamma',), None), 49 "poly": (svm.svmc.POLY, ('gamma', 'degree', 'coef0'), None), 50 "sigmoid": (svm.svmc.SIGMOID, ('gamma', 'coef0'), None), 51 } 52 # TODO: Complete the list ;-) 53 54 # TODO p is specific for SVR 55 _KNOWN_PARAMS = [ 'epsilon', 'probability', 'shrinking', 56 'weight_label', 'weight'] 57 58 _KNOWN_KERNEL_PARAMS = [ 'cache_size' ] 59 60 _KNOWN_IMPLEMENTATIONS = { 61 'C_SVC' : (svm.svmc.C_SVC, ('C',), 62 ('binary', 'multiclass'), 'C-SVM classification'), 63 'NU_SVC' : (svm.svmc.NU_SVC, ('nu',), 64 ('binary', 'multiclass'), 'nu-SVM classification'), 65 'ONE_CLASS' : (svm.svmc.ONE_CLASS, (), 66 ('oneclass',), 'one-class-SVM'), 67 'EPSILON_SVR' : (svm.svmc.EPSILON_SVR, ('C', 'tube_epsilon'), 68 ('regression',), 'epsilon-SVM regression'), 69 'NU_SVR' : (svm.svmc.NU_SVR, ('nu', 'tube_epsilon'), 70 ('regression',), 'nu-SVM regression') 71 } 72 73 _clf_internals = _SVM._clf_internals + [ 'libsvm' ] 74
75 - def __init__(self, 76 kernel_type='linear', 77 **kwargs):
78 # XXX Determine which parameters depend on each other and implement 79 # safety/simplifying logic around them 80 # already done for: nr_weight 81 # thought: weight and weight_label should be a dict 82 """Interface class to LIBSVM classifiers and regressions. 83 84 Default implementation (C/nu/epsilon SVM) is chosen depending 85 on the given parameters (C/nu/tube_epsilon). 86 """ 87 88 svm_impl = kwargs.get('svm_impl', None) 89 # Depending on given arguments, figure out desired SVM 90 # implementation 91 if svm_impl is None: 92 for arg, impl in [ ('tube_epsilon', 'EPSILON_SVR'), 93 ('C', 'C_SVC'), 94 ('nu', 'NU_SVC') ]: 95 if kwargs.has_key(arg): 96 svm_impl = impl 97 if __debug__: 98 debug('SVM', 'No implementation was specified. Since ' 99 '%s is given among arguments, assume %s' % 100 (arg, impl)) 101 break 102 if svm_impl is None: 103 svm_impl = 'C_SVC' 104 if __debug__: 105 debug('SVM', 'Assign C_SVC "by default"') 106 kwargs['svm_impl'] = svm_impl 107 108 # init base class 109 _SVM.__init__(self, kernel_type, **kwargs) 110 111 self._svm_type = self._KNOWN_IMPLEMENTATIONS[svm_impl][0] 112 113 if 'nu' in self._KNOWN_PARAMS and 'epsilon' in self._KNOWN_PARAMS: 114 # overwrite eps param with new default value (information 115 # taken from libSVM docs 116 self.params['epsilon'].setDefault(0.001) 117 118 self.__model = None 119 """Holds the trained SVM."""
120 121 122
123 - def _train(self, dataset):
124 """Train SVM 125 """ 126 # libsvm needs doubles 127 if dataset.samples.dtype == 'float64': 128 src = dataset.samples 129 else: 130 src = dataset.samples.astype('double') 131 132 svmprob = svm.SVMProblem( dataset.labels.tolist(), src ) 133 134 # Translate few params 135 TRANSLATEDICT = {'epsilon': 'eps', 136 'tube_epsilon': 'p'} 137 args = [] 138 for paramname, param in self.params.items.items() \ 139 + self.kernel_params.items.items(): 140 if paramname in TRANSLATEDICT: 141 argname = TRANSLATEDICT[paramname] 142 elif paramname in svm.SVMParameter.default_parameters: 143 argname = paramname 144 else: 145 if __debug__: 146 debug("SVM_", "Skipping parameter %s since it is not known" 147 "to libsvm" % paramname) 148 continue 149 args.append( (argname, param.value) ) 150 151 # ??? All those parameters should be fetched if present from 152 # **kwargs and create appropriate parameters within .params or 153 # .kernel_params 154 libsvm_param = svm.SVMParameter( 155 kernel_type=self._kernel_type, 156 svm_type=self._svm_type, 157 **dict(args)) 158 """Store SVM parameters in libSVM compatible format.""" 159 160 if self.params.isKnown('C'):#svm_type in [svm.svmc.C_SVC]: 161 C = self.params.C 162 if not operator.isSequenceType(C): 163 # we were not given a tuple for balancing between classes 164 C = [C] 165 166 Cs = list(C[:]) # copy 167 for i in xrange(len(Cs)): 168 if Cs[i] < 0: 169 Cs[i] = self._getDefaultC(dataset.samples)*abs(Cs[i]) 170 if __debug__: 171 debug("SVM", "Default C for %s was computed to be %s" % 172 (C[i], Cs[i])) 173 174 libsvm_param._setParameter('C', Cs[0]) 175 176 if len(Cs)>1: 177 C0 = abs(C[0]) 178 scale = 1.0/(C0)#*N.sqrt(C0)) 179 # so we got 1 C per label 180 if len(Cs) != len(dataset.uniquelabels): 181 raise ValueError, "SVM was parametrized with %d Cs but " \ 182 "there are %d labels in the dataset" % \ 183 (len(Cs), len(dataset.uniquelabels)) 184 weight = [ c*scale for c in Cs ] 185 libsvm_param._setParameter('weight', weight) 186 187 self.__model = svm.SVMModel(svmprob, libsvm_param)
188 189
190 - def _predict(self, data):
191 """Predict values for the data 192 """ 193 # libsvm needs doubles 194 if data.dtype == 'float64': 195 src = data 196 else: 197 src = data.astype('double') 198 states = self.states 199 200 predictions = [ self.model.predict(p) for p in src ] 201 202 if states.isEnabled("values"): 203 if self.regression: 204 values = [ self.model.predictValuesRaw(p)[0] for p in src ] 205 else: 206 trained_labels = self.trained_labels 207 nlabels = len(trained_labels) 208 # XXX We do duplicate work. model.predict calls 209 # predictValuesRaw internally and then does voting or 210 # thresholding. So if speed becomes a factor we might 211 # want to move out logic from libsvm over here to base 212 # predictions on obtined values, or adjust libsvm to 213 # spit out values from predict() as well 214 if nlabels == 2: 215 # Apperently libsvm reorders labels so we need to 216 # track (1,0) values instead of (0,1) thus just 217 # lets take negative reverse 218 values = [ self.model.predictValues(p)[(trained_labels[1], 219 trained_labels[0])] 220 for p in src ] 221 if len(values) > 0: 222 if __debug__: 223 debug("SVM", 224 "Forcing values to be ndarray and reshaping" 225 " them into 1D vector") 226 values = N.asarray(values).reshape(len(values)) 227 else: 228 # In multiclass we return dictionary for all pairs 229 # of labels, since libsvm does 1-vs-1 pairs 230 values = [ self.model.predictValues(p) for p in src ] 231 states.values = values 232 233 if states.isEnabled("probabilities"): 234 # XXX Is this really necesssary? yoh don't think so since 235 # assignment to states is doing the same 236 #self.probabilities = [ self.model.predictProbability(p) 237 # for p in src ] 238 try: 239 states.probabilities = [ self.model.predictProbability(p) 240 for p in src ] 241 except TypeError: 242 warning("Current SVM %s doesn't support probability " % 243 self + " estimation.") 244 return predictions
245 246
247 - def summary(self):
248 """Provide quick summary over the SVM classifier""" 249 s = super(SVM, self).summary() 250 if self.trained: 251 s += '\n # of SVs: %d' % self.__model.getTotalNSV() 252 try: 253 prm = svm.svmc.svm_model_param_get(self.__model.model) 254 C = svm.svmc.svm_parameter_C_get(prm) 255 # extract information of how many SVs sit inside the margin, 256 # i.e. so called 'bounded SVs' 257 inside_margin = N.sum( 258 # take 0.99 to avoid rounding issues 259 N.abs(self.__model.getSVCoef()) 260 >= 0.99*svm.svmc.svm_parameter_C_get(prm)) 261 s += ' #bounded SVs:%d' % inside_margin 262 s += ' used C:%5g' % C 263 except: 264 pass 265 return s
266 267
268 - def untrain(self):
269 """Untrain libsvm's SVM: forget the model 270 """ 271 if __debug__: 272 debug("SVM", "Untraining %s and destroying libsvm model" % self) 273 super(SVM, self).untrain() 274 del self.__model 275 self.__model = None
276 277 model = property(fget=lambda self: self.__model) 278 """Access to the SVM model."""
279 280 281 282 #class LinearSVM(SVM): 283 # """Base class of all linear SVM classifiers that make use of the libSVM 284 # package. Still not meant to be used directly. 285 # """ 286 # 287 # def __init__(self, svm_impl, **kwargs): 288 # """The constructor arguments are virtually identical to the ones of 289 # the SVM class, except that 'kernel_type' is set to LINEAR. 290 # """ 291 # # init base class 292 # SVM.__init__(self, kernel_type='linear', 293 # svm_impl=svm_impl, **kwargs) 294 # 295 # 296 # def getSensitivityAnalyzer(self, **kwargs): 297 # """Returns an appropriate SensitivityAnalyzer.""" 298 # return LibSVMLinearSVMWeights(self, **kwargs) 299 # 300 # 301 302 #class LinearNuSVMC(LinearSVM): 303 # """Classifier for linear Nu-SVM classification. 304 # """ 305 # 306 # def __init__(self, **kwargs): 307 # """ 308 # """ 309 # # init base class 310 # LinearSVM.__init__(self, svm_impl='NU_SVC', **kwargs) 311 # 312 # 313 #class LinearCSVMC(LinearSVM): 314 # """Classifier for linear C-SVM classification. 315 # """ 316 # 317 # def __init__(self, **kwargs): 318 # """ 319 # """ 320 # # init base class 321 # LinearSVM.__init__(self, svm_impl='C_SVC', **kwargs) 322 # 323 # 324 # 325 #class RbfNuSVMC(SVM): 326 # """Nu-SVM classifier using a radial basis function kernel. 327 # """ 328 # 329 # def __init__(self, **kwargs): 330 # """ 331 # """ 332 # # init base class 333 # SVM.__init__(self, kernel_type='rbf', 334 # svm_impl='NU_SVC', **kwargs) 335 # 336 # 337 #class RbfCSVMC(SVM): 338 # """C-SVM classifier using a radial basis function kernel. 339 # """ 340 # 341 # def __init__(self, **kwargs): 342 # """ 343 # """ 344 # # init base class 345 # SVM.__init__(self, kernel_type='rbf', 346 # svm_impl='C_SVC', **kwargs) 347 # 348 349 # try to configure libsvm 'noise reduction'. Due to circular imports, 350 # we can't check externals here since it would not work. 351 try: 352 # if externals.exists('libsvm verbosity control'): 353 if __debug__ and "LIBSVM" in debug.active: 354 debug("LIBSVM", "Setting verbosity for libsvm to 255") 355 svm.svmc.svm_set_verbosity(255) 356 else: 357 svm.svmc.svm_set_verbosity(0) 358 except AttributeError: 359 warning("Available LIBSVM has no way to control verbosity of the output") 360 361 # Assign SVM class to limited set of LinearSVMWeights 362 LinearSVMWeights._LEGAL_CLFS = [SVM] 363