Package mvpa :: Package featsel :: Module helpers
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.helpers

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  from math import floor 
 14  import numpy as N 
 15   
 16  from mvpa.misc.state import ClassWithCollections, StateVariable 
 17   
 18  if __debug__: 
 19      from mvpa.base import debug 
 20   
 21  # 
 22  # Functors to be used for FeatureSelection 
 23  # 
 24   
25 -class BestDetector(object):
26 """Determine whether the last value in a sequence is the best one given 27 some criterion. 28 """
29 - def __init__(self, func=min, lastminimum=False):
30 """Initialize with number of steps 31 32 :Parameters: 33 fun : functor 34 Functor to select the best results. Defaults to min 35 lastminimum : bool 36 Toggle whether the latest or the earliest minimum is used as 37 optimal value to determine the stopping criterion. 38 """ 39 self.__func = func 40 self.__lastminimum = lastminimum 41 self.__bestindex = None 42 """Stores the index of the last detected best value."""
43 44
45 - def __call__(self, errors):
46 """Returns True if the last value in `errors` is the best or False 47 otherwise. 48 """ 49 isbest = False 50 51 # just to prevent ValueError 52 if len(errors)==0: 53 return isbest 54 55 minerror = self.__func(errors) 56 57 if self.__lastminimum: 58 # make sure it is an array 59 errors = N.array(errors) 60 # to find out the location of the minimum but starting from the 61 # end! 62 minindex = N.array((errors == minerror).nonzero()).max() 63 else: 64 minindex = errors.index(minerror) 65 66 self.__bestindex = minindex 67 68 # if minimal is the last one reported -- it is the best 69 if minindex == len(errors)-1: 70 isbest = True 71 72 return isbest
73 74 bestindex = property(fget=lambda self:self.__bestindex)
75 76 77
78 -class StoppingCriterion(object):
79 """Base class for all functors to decide when to stop RFE (or may 80 be general optimization... so it probably will be moved out into 81 some other module 82 """ 83
84 - def __call__(self, errors):
85 """Instruct when to stop. 86 87 Every implementation should return `False` when an empty list is 88 passed as argument. 89 90 Returns tuple `stop`. 91 """ 92 raise NotImplementedError
93 94 95
96 -class MultiStopCrit(StoppingCriterion):
97 """Stop computation if the latest error drops below a certain threshold. 98 """
99 - def __init__(self, crits, mode='or'):
100 """ 101 :Parameters: 102 crits : list of StoppingCriterion instances 103 For each call to MultiStopCrit all of these criterions will 104 be evaluated. 105 mode : any of ('and', 'or') 106 Logical function to determine the multi criterion from the set 107 of base criteria. 108 """ 109 if not mode in ('and', 'or'): 110 raise ValueError, \ 111 "A mode '%s' is not supported." % `mode` 112 113 self.__mode = mode 114 self.__crits = crits
115 116
117 - def __call__(self, errors):
118 """Evaluate all criteria to determine the value of the multi criterion. 119 """ 120 # evaluate all crits 121 crits = [ c(errors) for c in self.__crits ] 122 123 if self.__mode == 'and': 124 return N.all(crits) 125 else: 126 return N.any(crits)
127 128 129
130 -class FixedErrorThresholdStopCrit(StoppingCriterion):
131 """Stop computation if the latest error drops below a certain threshold. 132 """
133 - def __init__(self, threshold):
134 """Initialize with threshold. 135 136 :Parameters: 137 threshold : float [0,1] 138 Error threshold. 139 """ 140 StoppingCriterion.__init__(self) 141 if threshold > 1.0 or threshold < 0.0: 142 raise ValueError, \ 143 "Threshold %f is out of a reasonable range [0,1]." \ 144 % `threshold` 145 self.__threshold = threshold
146 147
148 - def __call__(self, errors):
149 """Nothing special.""" 150 if len(errors)==0: 151 return False 152 if errors[-1] < self.__threshold: 153 return True 154 else: 155 return False
156 157 158 threshold = property(fget=lambda x:x.__threshold)
159 160 161
162 -class NStepsStopCrit(StoppingCriterion):
163 """Stop computation after a certain number of steps. 164 """
165 - def __init__(self, steps):
166 """Initialize with number of steps. 167 168 :Parameters: 169 steps : int 170 Number of steps after which to stop. 171 """ 172 StoppingCriterion.__init__(self) 173 if steps < 0: 174 raise ValueError, \ 175 "Number of steps %i is out of a reasonable range." \ 176 % `steps` 177 self.__steps = steps
178 179
180 - def __call__(self, errors):
181 """Nothing special.""" 182 if len(errors) >= self.__steps: 183 return True 184 else: 185 return False
186 187 188 steps = property(fget=lambda x:x.__steps)
189 190 191
192 -class NBackHistoryStopCrit(StoppingCriterion):
193 """Stop computation if for a number of steps error was increasing 194 """ 195
196 - def __init__(self, bestdetector=BestDetector(), steps=10):
197 """Initialize with number of steps 198 199 :Parameters: 200 bestdetector : BestDetector instance 201 used to determine where the best error is located. 202 steps : int 203 How many steps to check after optimal value. 204 """ 205 StoppingCriterion.__init__(self) 206 if steps < 0: 207 raise ValueError, \ 208 "Number of steps (got %d) should be non-negative" % steps 209 self.__bestdetector = bestdetector 210 self.__steps = steps
211 212
213 - def __call__(self, errors):
214 stop = False 215 216 # just to prevent ValueError 217 if len(errors)==0: 218 return stop 219 220 # charge best detector 221 self.__bestdetector(errors) 222 223 # if number of elements after the min >= len -- stop 224 if len(errors) - self.__bestdetector.bestindex > self.__steps: 225 stop = True 226 227 return stop
228 229 steps = property(fget=lambda x:x.__steps)
230 231 232
233 -class ElementSelector(ClassWithCollections):
234 """Base class to implement functors to select some elements based on a 235 sequence of values. 236 """ 237 238 ndiscarded = StateVariable(True, 239 doc="Store number of discarded elements.") 240
241 - def __init__(self, mode='discard', **kwargs):
242 """Cheap initialization. 243 244 :Parameters: 245 mode : ['discard', 'select'] 246 Decides whether to `select` or to `discard` features. 247 """ 248 ClassWithCollections.__init__(self, **kwargs) 249 250 self._setMode(mode) 251 """Flag whether to select or to discard elements."""
252 253
254 - def _setMode(self, mode):
255 """Choose `select` or `discard` mode.""" 256 257 if not mode in ['discard', 'select']: 258 raise ValueError, "Unkown selection mode [%s]. Can only be one " \ 259 "of 'select' or 'discard'." % mode 260 261 self.__mode = mode
262 263
264 - def __call__(self, seq):
265 """Implementations in derived classed have to return a list of selected 266 element IDs based on the given sequence. 267 """ 268 raise NotImplementedError
269 270 mode = property(fget=lambda self:self.__mode, fset=_setMode)
271 272
273 -class RangeElementSelector(ElementSelector):
274 """Select elements based on specified range of values""" 275
276 - def __init__(self, lower=None, upper=None, inclusive=False, 277 mode='select', **kwargs):
278 """Initialization `RangeElementSelector` 279 280 :Parameters: 281 lower 282 If not None -- select elements which are above of 283 specified value 284 upper 285 If not None -- select elements which are lower of 286 specified value 287 inclusive 288 Either to include end points 289 mode 290 overrides parent's default to be 'select' since it is more 291 native for RangeElementSelector 292 XXX TODO -- unify?? 293 294 `upper` could be lower than `lower` -- then selection is done 295 on values <= lower or >=upper (ie tails). This would produce 296 the same result if called with flipped values for mode and 297 inclusive. 298 299 If no upper no lower is set, assuming upper,lower=0, thus 300 outputing non-0 elements 301 """ 302 303 if lower is None and upper is None: 304 lower, upper = 0, 0 305 """Lets better return non-0 values if none of bounds is set""" 306 307 # init State before registering anything 308 ElementSelector.__init__(self, mode=mode, **kwargs) 309 310 self.__range = (lower, upper) 311 """Values on which to base selection""" 312 313 self.__inclusive = inclusive
314
315 - def __call__(self, seq):
316 """Returns selected IDs. 317 """ 318 lower, upper = self.__range 319 len_seq = len(seq) 320 if not lower is None: 321 if self.__inclusive: 322 selected = seq >= lower 323 else: 324 selected = seq > lower 325 else: 326 selected = N.ones( (len_seq), dtype=N.bool ) 327 328 if not upper is None: 329 if self.__inclusive: 330 selected_upper = seq <= upper 331 else: 332 selected_upper = seq < upper 333 if not lower is None: 334 if lower < upper: 335 # regular range 336 selected = N.logical_and(selected, selected_upper) 337 else: 338 # outside, though that would be similar to exclude 339 selected = N.logical_or(selected, selected_upper) 340 else: 341 selected = selected_upper 342 343 if self.mode == 'discard': 344 selected = N.logical_not(selected) 345 346 result = N.where(selected)[0] 347 348 if __debug__: 349 debug("ES", "Selected %d out of %d elements" % 350 (len(result), len_seq)) 351 return result
352 353
354 -class TailSelector(ElementSelector):
355 """Select elements from a tail of a distribution. 356 357 The default behaviour is to discard the lower tail of a given distribution. 358 """ 359 360 # TODO: 'both' to select from both tails
361 - def __init__(self, tail='lower', sort=True, **kwargs):
362 """Initialize TailSelector 363 364 :Parameters: 365 tail : ['lower', 'upper'] 366 Choose the tail to be processed. 367 sort : bool 368 Flag whether selected IDs will be sorted. Disable if not 369 necessary to save some CPU cycles. 370 371 """ 372 # init State before registering anything 373 ElementSelector.__init__(self, **kwargs) 374 375 self._setTail(tail) 376 """Know which tail to select.""" 377 378 self.__sort = sort
379 380
381 - def _setTail(self, tail):
382 """Set the tail to be processed.""" 383 if not tail in ['lower', 'upper']: 384 raise ValueError, "Unkown tail argument [%s]. Can only be one " \ 385 "of 'lower' or 'upper'." % tail 386 387 self.__tail = tail
388 389
390 - def _getNElements(self, seq):
391 """In derived classes has to return the number of elements to be 392 processed given a sequence values forming the distribution. 393 """ 394 raise NotImplementedError
395 396
397 - def __call__(self, seq):
398 """Returns selected IDs. 399 """ 400 # TODO: Think about selecting features which have equal values but 401 # some are selected and some are not 402 len_seq = len(seq) 403 # how many to select (cannot select more than available) 404 nelements = min(self._getNElements(seq), len_seq) 405 406 # make sure that data is ndarray and compute a sequence rank matrix 407 # lowest value is first 408 seqrank = N.array(seq).argsort() 409 410 if self.mode == 'discard' and self.__tail == 'upper': 411 good_ids = seqrank[:-1*nelements] 412 self.ndiscarded = nelements 413 elif self.mode == 'discard' and self.__tail == 'lower': 414 good_ids = seqrank[nelements:] 415 self.ndiscarded = nelements 416 elif self.mode == 'select' and self.__tail == 'upper': 417 good_ids = seqrank[-1*nelements:] 418 self.ndiscarded = len_seq - nelements 419 else: # select lower tail 420 good_ids = seqrank[:nelements] 421 self.ndiscarded = len_seq - nelements 422 423 # sort ids to keep order 424 # XXX should we do here are leave to other place 425 if self.__sort: 426 good_ids.sort() 427 428 return good_ids
429 430 431
432 -class FixedNElementTailSelector(TailSelector):
433 """Given a sequence, provide set of IDs for a fixed number of to be selected 434 elements. 435 """ 436
437 - def __init__(self, nelements, **kwargs):
438 """Cheap initialization. 439 440 :Parameters: 441 nelements : int 442 Number of elements to select/discard. 443 """ 444 TailSelector.__init__(self, **kwargs) 445 self.__nelements = None 446 self._setNElements(nelements)
447 448
449 - def __repr__(self):
450 return "%s number=%f" % ( 451 TailSelector.__repr__(self), self.nelements)
452 453
454 - def _getNElements(self, seq):
455 return self.__nelements
456 457
458 - def _setNElements(self, nelements):
459 if __debug__: 460 if nelements <= 0: 461 raise ValueError, "Number of elements less or equal to zero " \ 462 "does not make sense." 463 464 self.__nelements = nelements
465 466 467 nelements = property(fget=lambda x:x.__nelements, 468 fset=_setNElements)
469 470 471
472 -class FractionTailSelector(TailSelector):
473 """Given a sequence, provide Ids for a fraction of elements 474 """ 475
476 - def __init__(self, felements, **kwargs):
477 """Cheap initialization. 478 479 :Parameters: 480 felements : float (0,1.0] 481 Fraction of elements to select/discard. Note: Even when 0.0 is 482 specified at least one element will be selected. 483 """ 484 TailSelector.__init__(self, **kwargs) 485 self._setFElements(felements)
486 487
488 - def __repr__(self):
489 return "%s fraction=%f" % ( 490 TailSelector.__repr__(self), self.__felements)
491 492
493 - def _getNElements(self, seq):
494 num = int(floor(self.__felements * len(seq))) 495 num = max(1, num) # remove at least 1 496 # no need for checks as base class will do anyway 497 #return min(num, nselect) 498 return num
499 500
501 - def _setFElements(self, felements):
502 """What fraction to discard""" 503 if felements > 1.0 or felements < 0.0: 504 raise ValueError, \ 505 "Fraction (%f) cannot be outside of [0.0,1.0]" \ 506 % felements 507 508 self.__felements = felements
509 510 511 felements = property(fget=lambda x:x.__felements, 512 fset=_setFElements)
513