mvpa.clfs.libsvmc.svm

37 """Support Vector Machine Classifier. 38 39 This is a simple interface to the libSVM package. 40 """ 41 42 # Since this is internal feature of LibSVM, this state variable is present 43 # here 44 probabilities = StateVariable(enabled=False, 45 doc="Estimates of samples probabilities as provided by LibSVM") 46 47 _KERNELS = { "linear": (svm.svmc.LINEAR, None, LinearSVMWeights), 48 "rbf" : (svm.svmc.RBF, ('gamma',), None), 49 "poly": (svm.svmc.POLY, ('gamma', 'degree', 'coef0'), None), 50 "sigmoid": (svm.svmc.SIGMOID, ('gamma', 'coef0'), None), 51 } 52 # TODO: Complete the list ;-) 53 54 # TODO p is specific for SVR 55 _KNOWN_PARAMS = [ 'epsilon', 'probability', 'shrinking', 56 'weight_label', 'weight'] 57 58 _KNOWN_KERNEL_PARAMS = [ 'cache_size' ] 59 60 _KNOWN_IMPLEMENTATIONS = { 61 'C_SVC' : (svm.svmc.C_SVC, ('C',), 62 ('binary', 'multiclass'), 'C-SVM classification'), 63 'NU_SVC' : (svm.svmc.NU_SVC, ('nu',), 64 ('binary', 'multiclass'), 'nu-SVM classification'), 65 'ONE_CLASS' : (svm.svmc.ONE_CLASS, (), 66 ('oneclass',), 'one-class-SVM'), 67 'EPSILON_SVR' : (svm.svmc.EPSILON_SVR, ('C', 'tube_epsilon'), 68 ('regression',), 'epsilon-SVM regression'), 69 'NU_SVR' : (svm.svmc.NU_SVR, ('nu', 'tube_epsilon'), 70 ('regression',), 'nu-SVM regression') 71 } 72 73 _clf_internals = _SVM._clf_internals + [ 'libsvm' ] 74

75 - def __init__(self, 76 kernel_type='linear', 77 **kwargs):

78 # XXX Determine which parameters depend on each other and implement 79 # safety/simplifying logic around them 80 # already done for: nr_weight 81 # thought: weight and weight_label should be a dict 82 """Interface class to LIBSVM classifiers and regressions. 83 84 Default implementation (C/nu/epsilon SVM) is chosen depending 85 on the given parameters (C/nu/tube_epsilon). 86 """ 87 88 svm_impl = kwargs.get('svm_impl', None) 89 # Depending on given arguments, figure out desired SVM 90 # implementation 91 if svm_impl is None: 92 for arg, impl in [ ('tube_epsilon', 'EPSILON_SVR'), 93 ('C', 'C_SVC'), 94 ('nu', 'NU_SVC') ]: 95 if kwargs.has_key(arg): 96 svm_impl = impl 97 if __debug__: 98 debug('SVM', 'No implementation was specified. Since ' 99 '%s is given among arguments, assume %s' % 100 (arg, impl)) 101 break 102 if svm_impl is None: 103 svm_impl = 'C_SVC' 104 if __debug__: 105 debug('SVM', 'Assign C_SVC "by default"') 106 kwargs['svm_impl'] = svm_impl 107 108 # init base class 109 _SVM.__init__(self, kernel_type, **kwargs) 110 111 self._svm_type = self._KNOWN_IMPLEMENTATIONS[svm_impl][0] 112 113 if 'nu' in self._KNOWN_PARAMS and 'epsilon' in self._KNOWN_PARAMS: 114 # overwrite eps param with new default value (information 115 # taken from libSVM docs 116 self.params['epsilon'].setDefault(0.001) 117 118 self.__model = None 119 """Holds the trained SVM."""

120 121 122

123 - def _train(self, dataset):

124 """Train SVM 125 """ 126 # libsvm needs doubles 127 if dataset.samples.dtype == 'float64': 128 src = dataset.samples 129 else: 130 src = dataset.samples.astype('double') 131 132 svmprob = svm.SVMProblem( dataset.labels.tolist(), src ) 133 134 # Translate few params 135 TRANSLATEDICT = {'epsilon': 'eps', 136 'tube_epsilon': 'p'} 137 args = [] 138 for paramname, param in self.params.items.items() \ 139 + self.kernel_params.items.items(): 140 if paramname in TRANSLATEDICT: 141 argname = TRANSLATEDICT[paramname] 142 elif paramname in svm.SVMParameter.default_parameters: 143 argname = paramname 144 else: 145 if __debug__: 146 debug("SVM_", "Skipping parameter %s since it is not known" 147 "to libsvm" % paramname) 148 continue 149 args.append( (argname, param.value) ) 150 151 # ??? All those parameters should be fetched if present from 152 # **kwargs and create appropriate parameters within .params or 153 # .kernel_params 154 libsvm_param = svm.SVMParameter( 155 kernel_type=self._kernel_type, 156 svm_type=self._svm_type, 157 **dict(args)) 158 """Store SVM parameters in libSVM compatible format.""" 159 160 if self.params.isKnown('C'):#svm_type in [svm.svmc.C_SVC]: 161 C = self.params.C 162 if not operator.isSequenceType(C): 163 # we were not given a tuple for balancing between classes 164 C = [C] 165 166 Cs = list(C[:]) # copy 167 for i in xrange(len(Cs)): 168 if Cs[i] < 0: 169 Cs[i] = self._getDefaultC(dataset.samples)*abs(Cs[i]) 170 if __debug__: 171 debug("SVM", "Default C for %s was computed to be %s" % 172 (C[i], Cs[i])) 173 174 libsvm_param._setParameter('C', Cs[0]) 175 176 if len(Cs)>1: 177 C0 = abs(C[0]) 178 scale = 1.0/(C0)#*N.sqrt(C0)) 179 # so we got 1 C per label 180 if len(Cs) != len(dataset.uniquelabels): 181 raise ValueError, "SVM was parametrized with %d Cs but " \ 182 "there are %d labels in the dataset" % \ 183 (len(Cs), len(dataset.uniquelabels)) 184 weight = [ c*scale for c in Cs ] 185 libsvm_param._setParameter('weight', weight) 186 187 self.__model = svm.SVMModel(svmprob, libsvm_param)

188 189

190 - def _predict(self, data):

191 """Predict values for the data 192 """ 193 # libsvm needs doubles 194 if data.dtype == 'float64': 195 src = data 196 else: 197 src = data.astype('double') 198 states = self.states 199 200 predictions = [ self.model.predict(p) for p in src ] 201 202 if states.isEnabled("values"): 203 if self.regression: 204 values = [ self.model.predictValuesRaw(p)[0] for p in src ] 205 else: 206 trained_labels = self.trained_labels 207 nlabels = len(trained_labels) 208 # XXX We do duplicate work. model.predict calls 209 # predictValuesRaw internally and then does voting or 210 # thresholding. So if speed becomes a factor we might 211 # want to move out logic from libsvm over here to base 212 # predictions on obtined values, or adjust libsvm to 213 # spit out values from predict() as well 214 if nlabels == 2: 215 # Apperently libsvm reorders labels so we need to 216 # track (1,0) values instead of (0,1) thus just 217 # lets take negative reverse 218 values = [ self.model.predictValues(p)[(trained_labels[1], 219 trained_labels[0])] 220 for p in src ] 221 if len(values) > 0: 222 if __debug__: 223 debug("SVM", 224 "Forcing values to be ndarray and reshaping" 225 " them into 1D vector") 226 values = N.asarray(values).reshape(len(values)) 227 else: 228 # In multiclass we return dictionary for all pairs 229 # of labels, since libsvm does 1-vs-1 pairs 230 values = [ self.model.predictValues(p) for p in src ] 231 states.values = values 232 233 if states.isEnabled("probabilities"): 234 # XXX Is this really necesssary? yoh don't think so since 235 # assignment to states is doing the same 236 #self.probabilities = [ self.model.predictProbability(p) 237 # for p in src ] 238 try: 239 states.probabilities = [ self.model.predictProbability(p) 240 for p in src ] 241 except TypeError: 242 warning("Current SVM %s doesn't support probability " % 243 self + " estimation.") 244 return predictions

245 246

247 - def summary(self):

248 """Provide quick summary over the SVM classifier""" 249 s = super(SVM, self).summary() 250 if self.trained: 251 s += '\n # of SVs: %d' % self.__model.getTotalNSV() 252 try: 253 prm = svm.svmc.svm_model_param_get(self.__model.model) 254 C = svm.svmc.svm_parameter_C_get(prm) 255 # extract information of how many SVs sit inside the margin, 256 # i.e. so called 'bounded SVs' 257 inside_margin = N.sum( 258 # take 0.99 to avoid rounding issues 259 N.abs(self.__model.getSVCoef()) 260 >= 0.99*svm.svmc.svm_parameter_C_get(prm)) 261 s += ' #bounded SVs:%d' % inside_margin 262 s += ' used C:%5g' % C 263 except: 264 pass 265 return s

266 267

268 - def untrain(self):

269 """Untrain libsvm's SVM: forget the model 270 """ 271 if __debug__: 272 debug("SVM", "Untraining %s and destroying libsvm model" % self) 273 super(SVM, self).untrain() 274 del self.__model 275 self.__model = None

276 277 model = property(fget=lambda self: self.__model) 278 """Access to the SVM model."""

Source Code for Module mvpa.clfs.libsvmc.svm