Package mvpa :: Package featsel :: Module ifs
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.ifs

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Incremental feature search (IFS). 
 10   
 11  Very similar to Recursive feature elimination (RFE), but instead of begining 
 12  with all features and stripping some sequentially, start with an empty feature 
 13  set and include important features successively. 
 14  """ 
 15   
 16  __docformat__ = 'restructuredtext' 
 17   
 18  import numpy as N 
 19  from mvpa.support.copy import copy 
 20   
 21  from mvpa.featsel.base import FeatureSelection 
 22  from mvpa.featsel.helpers import NBackHistoryStopCrit, \ 
 23                                   FixedNElementTailSelector, \ 
 24                                   BestDetector 
 25   
 26  from mvpa.misc.state import StateVariable 
 27   
 28  if __debug__: 
 29      from mvpa.base import debug 
 30   
 31   
32 -class IFS(FeatureSelection):
33 """Incremental feature search. 34 35 A scalar `DatasetMeasure` is computed multiple times on variations of a 36 certain dataset. These measures are in turn used to incrementally select 37 important features. Starting with an empty feature set the dataset measure 38 is first computed for each single feature. A number of features is selected 39 based on the resulting data measure map (using an `ElementSelector`). 40 41 Next the dataset measure is computed again using each feature in addition 42 to the already selected feature set. Again the `ElementSelector` is used to 43 select more features. 44 45 For each feature selection the transfer error on some testdatset is 46 computed. This procedure is repeated until a given `StoppingCriterion` 47 is reached. 48 """ 49 50 errors = StateVariable() 51
52 - def __init__(self, 53 data_measure, 54 transfer_error, 55 bestdetector=BestDetector(), 56 stopping_criterion=NBackHistoryStopCrit(BestDetector()), 57 feature_selector=FixedNElementTailSelector(1, 58 tail='upper', 59 mode='select'), 60 **kwargs 61 ):
62 """Initialize incremental feature search 63 64 :Parameters: 65 data_measure : DatasetMeasure 66 Computed for each candidate feature selection. 67 transfer_error : TransferError 68 Compute against a test dataset for each incremental feature 69 set. 70 bestdetector : Functor 71 Given a list of error values it has to return a boolean that 72 signals whether the latest error value is the total minimum. 73 stopping_criterion : Functor 74 Given a list of error values it has to return whether the 75 criterion is fulfilled. 76 """ 77 # bases init first 78 FeatureSelection.__init__(self, **kwargs) 79 80 self.__data_measure = data_measure 81 self.__transfer_error = transfer_error 82 self.__feature_selector = feature_selector 83 self.__bestdetector = bestdetector 84 self.__stopping_criterion = stopping_criterion
85 86
87 - def __call__(self, dataset, testdataset):
88 """Proceed and select the features recursively eliminating less 89 important ones. 90 91 :Parameters: 92 `dataset`: `Dataset` 93 used to select features and train classifiers to determine the 94 transfer error. 95 `testdataset`: `Dataset` 96 used to test the trained classifer on a certain feature set 97 to determine the transfer error. 98 99 Returns a tuple with the dataset containing the feature subset of 100 `dataset` that had the lowest transfer error of all tested sets until 101 the stopping criterion was reached. The tuple also contains a dataset 102 with the corrsponding features from the `testdataset`. 103 """ 104 errors = [] 105 """Computed error for each tested features set.""" 106 107 # feature candidate are all features in the pattern object 108 candidates = range( dataset.nfeatures ) 109 110 # initially empty list of selected features 111 selected = [] 112 113 # results in here please 114 results = None 115 116 # as long as there are candidates left 117 # the loop will most likely get broken earlier if the stopping 118 # criterion is reached 119 while len( candidates ): 120 # measures for all candidates 121 measures = [] 122 123 # for all possible candidates 124 for i, candidate in enumerate(candidates): 125 if __debug__: 126 debug('IFSC', "Tested %i" % i, cr=True) 127 128 # take the new candidate and all already selected features 129 # select a new temporay feature subset from the dataset 130 # XXX assume MappedDataset and issue plain=True ?? 131 tmp_dataset = \ 132 dataset.selectFeatures(selected + [candidate]) 133 134 # compute data measure on this feature set 135 measures.append(self.__data_measure(tmp_dataset)) 136 137 measures = [N.asscalar(m) for m in measures] 138 # Select promissing feature candidates (staging) 139 # IDs are only applicable to the current set of feature candidates 140 tmp_staging_ids = self.__feature_selector(measures) 141 142 # translate into real candidate ids 143 staging_ids = [ candidates[i] for i in tmp_staging_ids ] 144 145 # mark them as selected and remove from candidates 146 selected += staging_ids 147 for i in staging_ids: 148 candidates.remove(i) 149 150 # compute transfer error for the new set 151 # XXX assume MappedDataset and issue plain=True ?? 152 error = self.__transfer_error(testdataset.selectFeatures(selected), 153 dataset.selectFeatures(selected)) 154 errors.append(error) 155 156 # Check if it is time to stop and if we got 157 # the best result 158 stop = self.__stopping_criterion(errors) 159 isthebest = self.__bestdetector(errors) 160 161 if __debug__: 162 debug('IFSC', 163 "nselected %i; error: %.4f " \ 164 "best/stop=%d/%d\n" \ 165 % (len(selected), errors[-1], isthebest, stop), 166 cr=True, lf=True) 167 168 if isthebest: 169 # do copy to survive later selections 170 results = copy(selected) 171 172 # leave the loop when the criterion is reached 173 if stop: 174 break 175 176 # charge state 177 self.errors = errors 178 179 # best dataset ever is returned 180 return dataset.selectFeatures(results), \ 181 testdataset.selectFeatures(results)
182