1
2
3
4
5
6
7
8
9 """Incremental feature search (IFS).
10
11 Very similar to Recursive feature elimination (RFE), but instead of begining
12 with all features and stripping some sequentially, start with an empty feature
13 set and include important features successively.
14 """
15
16 __docformat__ = 'restructuredtext'
17
18 import numpy as N
19 from mvpa.support.copy import copy
20
21 from mvpa.featsel.base import FeatureSelection
22 from mvpa.featsel.helpers import NBackHistoryStopCrit, \
23 FixedNElementTailSelector, \
24 BestDetector
25
26 from mvpa.misc.state import StateVariable
27
28 if __debug__:
29 from mvpa.base import debug
30
31
32 -class IFS(FeatureSelection):
33 """Incremental feature search.
34
35 A scalar `DatasetMeasure` is computed multiple times on variations of a
36 certain dataset. These measures are in turn used to incrementally select
37 important features. Starting with an empty feature set the dataset measure
38 is first computed for each single feature. A number of features is selected
39 based on the resulting data measure map (using an `ElementSelector`).
40
41 Next the dataset measure is computed again using each feature in addition
42 to the already selected feature set. Again the `ElementSelector` is used to
43 select more features.
44
45 For each feature selection the transfer error on some testdatset is
46 computed. This procedure is repeated until a given `StoppingCriterion`
47 is reached.
48 """
49
50 errors = StateVariable()
51
62 """Initialize incremental feature search
63
64 :Parameters:
65 data_measure : DatasetMeasure
66 Computed for each candidate feature selection.
67 transfer_error : TransferError
68 Compute against a test dataset for each incremental feature
69 set.
70 bestdetector : Functor
71 Given a list of error values it has to return a boolean that
72 signals whether the latest error value is the total minimum.
73 stopping_criterion : Functor
74 Given a list of error values it has to return whether the
75 criterion is fulfilled.
76 """
77
78 FeatureSelection.__init__(self, **kwargs)
79
80 self.__data_measure = data_measure
81 self.__transfer_error = transfer_error
82 self.__feature_selector = feature_selector
83 self.__bestdetector = bestdetector
84 self.__stopping_criterion = stopping_criterion
85
86
87 - def __call__(self, dataset, testdataset):
88 """Proceed and select the features recursively eliminating less
89 important ones.
90
91 :Parameters:
92 `dataset`: `Dataset`
93 used to select features and train classifiers to determine the
94 transfer error.
95 `testdataset`: `Dataset`
96 used to test the trained classifer on a certain feature set
97 to determine the transfer error.
98
99 Returns a tuple with the dataset containing the feature subset of
100 `dataset` that had the lowest transfer error of all tested sets until
101 the stopping criterion was reached. The tuple also contains a dataset
102 with the corrsponding features from the `testdataset`.
103 """
104 errors = []
105 """Computed error for each tested features set."""
106
107
108 candidates = range( dataset.nfeatures )
109
110
111 selected = []
112
113
114 results = None
115
116
117
118
119 while len( candidates ):
120
121 measures = []
122
123
124 for i, candidate in enumerate(candidates):
125 if __debug__:
126 debug('IFSC', "Tested %i" % i, cr=True)
127
128
129
130
131 tmp_dataset = \
132 dataset.selectFeatures(selected + [candidate])
133
134
135 measures.append(self.__data_measure(tmp_dataset))
136
137 measures = [N.asscalar(m) for m in measures]
138
139
140 tmp_staging_ids = self.__feature_selector(measures)
141
142
143 staging_ids = [ candidates[i] for i in tmp_staging_ids ]
144
145
146 selected += staging_ids
147 for i in staging_ids:
148 candidates.remove(i)
149
150
151
152 error = self.__transfer_error(testdataset.selectFeatures(selected),
153 dataset.selectFeatures(selected))
154 errors.append(error)
155
156
157
158 stop = self.__stopping_criterion(errors)
159 isthebest = self.__bestdetector(errors)
160
161 if __debug__:
162 debug('IFSC',
163 "nselected %i; error: %.4f " \
164 "best/stop=%d/%d\n" \
165 % (len(selected), errors[-1], isthebest, stop),
166 cr=True, lf=True)
167
168 if isthebest:
169
170 results = copy(selected)
171
172
173 if stop:
174 break
175
176
177 self.errors = errors
178
179
180 return dataset.selectFeatures(results), \
181 testdataset.selectFeatures(results)
182