1
2
3
4
5
6
7
8
9 """Feature selection base class and related stuff base classes and helpers."""
10
11 __docformat__ = 'restructuredtext'
12
13 from mvpa.featsel.helpers import FractionTailSelector
14 from mvpa.misc.state import StateVariable, ClassWithCollections
15
16 if __debug__:
17 from mvpa.base import debug
18
20 """Base class for any feature selection
21
22 Base class for Functors which implement feature selection on the
23 datasets.
24 """
25
26 selected_ids = StateVariable(enabled=False)
27
31
32
33 - def __call__(self, dataset, testdataset=None):
34 """Invocation of the feature selection
35
36 :Parameters:
37 dataset : Dataset
38 dataset used to select features
39 testdataset : Dataset
40 dataset the might be used to compute a stopping criterion
41
42 Returns a tuple with the dataset containing the selected features.
43 If present the tuple also contains the selected features of the
44 test dataset. Derived classes must provide interface to access other
45 relevant to the feature selection process information (e.g. mask,
46 elimination step (in RFE), etc)
47 """
48 raise NotImplementedError
49
50
52 """ 'Untrain' feature selection
53
54 Necessary for full 'untraining' of the classifiers. By default
55 does nothing, needs to be overridden in corresponding feature
56 selections to pass to the sensitivities
57 """
58 pass
59
60
62 """Feature elimination.
63
64 A `FeaturewiseDatasetMeasure` is used to compute sensitivity maps given a certain
65 dataset. These sensitivity maps are in turn used to discard unimportant
66 features.
67 """
68
69 sensitivity = StateVariable(enabled=False)
70
76 """Initialize feature selection
77
78 :Parameters:
79 sensitivity_analyzer : FeaturewiseDatasetMeasure
80 sensitivity analyzer to come up with sensitivity
81 feature_selector : Functor
82 Given a sensitivity map it has to return the ids of those
83 features that should be kept.
84
85 """
86
87
88 FeatureSelection.__init__(self, **kwargs)
89
90 self.__sensitivity_analyzer = sensitivity_analyzer
91 """Sensitivity analyzer to use once"""
92
93 self.__feature_selector = feature_selector
94 """Functor which takes care about removing some features."""
95
96
98 if __debug__:
99 debug("FS_", "Untraining sensitivity-based FS: %s" % self)
100 self.__sensitivity_analyzer.untrain()
101
102
103 - def __call__(self, dataset, testdataset=None):
145
146
147 sensitivity_analyzer = property(fget=lambda self:self.__sensitivity_analyzer,
148 doc="Measure which was used to do selection")
149
150
152 """Feature elimination through the list of FeatureSelection's.
153
154 Given as list of FeatureSelections it applies them in turn.
155 """
156
157 nfeatures = StateVariable(
158 doc="Number of features before each step in pipeline")
159
160
161 - def __init__(self,
162 feature_selections,
163 **kwargs
164 ):
165 """Initialize feature selection pipeline
166
167 :Parameters:
168 feature_selections : lisf of FeatureSelection
169 selections which to use. Order matters
170 """
171
172 FeatureSelection.__init__(self, **kwargs)
173
174 self.__feature_selections = feature_selections
175 """Selectors to use in turn"""
176
177
179 if __debug__:
180 debug("FS_", "Untraining FS pipeline: %s" % self)
181 for fs in self.__feature_selections:
182 fs.untrain()
183
184
185 - def __call__(self, dataset, testdataset=None, **kwargs):
218
219 feature_selections = property(fget=lambda self:self.__feature_selections,
220 doc="List of `FeatureSelections`")
221
222
223
225 """Meta feature selection utilizing several embedded selection methods.
226
227 Each embedded feature selection method is computed individually. Afterwards
228 all feature sets are combined by either taking the union or intersection of
229 all sets.
230
231 The individual feature sets of all embedded methods are optionally avialable
232 from the `selections_ids` state variable.
233 """
234 selections_ids = StateVariable(
235 doc="List of feature id sets for each performed method.")
236
237 - def __init__(self, feature_selections, combiner, **kwargs):
238 """
239 :Parameters:
240 feature_selections: list
241 FeatureSelection instances to run. Order is not important.
242 combiner: 'union', 'intersection'
243 which method to be used to combine the feature selection set of
244 all computed methods.
245 """
246 FeatureSelection.__init__(self, **kwargs)
247
248 self.__feature_selections = feature_selections
249 self.__combiner = combiner
250
251
253 if __debug__:
254 debug("FS_", "Untraining combined FS: %s" % self)
255 for fs in self.__feature_selections:
256 fs.untrain()
257
258
259 - def __call__(self, dataset, testdataset=None):
308
309
310 feature_selections = property(fget=lambda self:self.__feature_selections,
311 doc="List of `FeatureSelections`")
312 combiner = property(fget=lambda self:self.__combiner,
313 doc="Selection set combination method.")
314