1
2
3
4
5
6
7
8
9 """Least angle regression (LARS) classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 import mvpa.base.externals as externals
17
18
19 if externals.exists('rpy', raiseException=True) and \
20 externals.exists('lars', raiseException=True):
21 import rpy
22 rpy.r.library('lars')
23
24
25
26 from mvpa.clfs.base import Classifier, FailedToTrainError
27 from mvpa.measures.base import Sensitivity
28
29 from mvpa.base import warning
30 if __debug__:
31 from mvpa.base import debug
32
33 known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise')
34
35 -class LARS(Classifier):
36 """Least angle regression (LARS) `Classifier`.
37
38 LARS is the model selection algorithm from:
39
40 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert
41 Tibshirani, Least Angle Regression Annals of Statistics (with
42 discussion) (2004) 32(2), 407-499. A new method for variable
43 subset selection, with the lasso and 'epsilon' forward stagewise
44 methods as special cases.
45
46 Similar to SMLR, it performs a feature selection while performing
47 classification, but instead of starting with all features, it
48 starts with none and adds them in, which is similar to boosting.
49
50 This classifier behaves more like a ridge regression in that it
51 returns prediction values and it treats the training labels as
52 continuous.
53
54 In the true nature of the PyMVPA framework, this algorithm is
55 actually implemented in R by Trevor Hastie and wrapped via RPy.
56 To make use of LARS, you must have R and RPy installed as well as
57 the LARS contributed package. You can install the R and RPy with
58 the following command on Debian-based machines:
59
60 sudo aptitude install python-rpy python-rpy-doc r-base-dev
61
62 You can then install the LARS package by running R as root and
63 calling:
64
65 install.packages()
66
67 """
68
69
70 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity',
71 'does_feature_selection',
72 ]
73 - def __init__(self, model_type="lasso", trace=False, normalize=True,
74 intercept=True, max_steps=None, use_Gram=False, **kwargs):
75 """
76 Initialize LARS.
77
78 See the help in R for further details on the following parameters:
79
80 :Parameters:
81 model_type : string
82 Type of LARS to run. Can be one of ('lasso', 'lar',
83 'forward.stagewise', 'stepwise').
84 trace : boolean
85 Whether to print progress in R as it works.
86 normalize : boolean
87 Whether to normalize the L2 Norm.
88 intercept : boolean
89 Whether to add a non-penalized intercept to the model.
90 max_steps : None or int
91 If not None, specify the total number of iterations to run. Each
92 iteration adds a feature, but leaving it none will add until
93 convergence.
94 use_Gram : boolean
95 Whether to compute the Gram matrix (this should be false if you
96 have more features than samples.)
97 """
98
99 Classifier.__init__(self, **kwargs)
100
101 if not model_type in known_models:
102 raise ValueError('Unknown model %s for LARS is specified. Known' %
103 model_type + 'are %s' % `known_models`)
104
105
106 self.__type = model_type
107 self.__normalize = normalize
108 self.__intercept = intercept
109 self.__trace = trace
110 self.__max_steps = max_steps
111 self.__use_Gram = use_Gram
112
113
114 self.__lowest_Cp_step = None
115 self.__weights = None
116 """The beta weights for each feature."""
117 self.__trained_model = None
118 """The model object after training that will be used for
119 predictions."""
120
121
122
123
124
125
127 """String summary of the object
128 """
129 return "LARS(type='%s', normalize=%s, intercept=%s, trace=%s, " \
130 "max_steps=%s, use_Gram=%s, regression=%s, " \
131 "enable_states=%s)" % \
132 (self.__type,
133 self.__normalize,
134 self.__intercept,
135 self.__trace,
136 self.__max_steps,
137 self.__use_Gram,
138 self.regression,
139 str(self.states.enabled))
140
141
143 """Train the classifier using `data` (`Dataset`).
144 """
145 if self.__max_steps is None:
146
147 trained_model = rpy.r.lars(data.samples,
148 data.labels[:,N.newaxis],
149 type=self.__type,
150 normalize=self.__normalize,
151 intercept=self.__intercept,
152 trace=self.__trace,
153 use_Gram=self.__use_Gram)
154 else:
155
156 trained_model = rpy.r.lars(data.samples,
157 data.labels[:,N.newaxis],
158 type=self.__type,
159 normalize=self.__normalize,
160 intercept=self.__intercept,
161 trace=self.__trace,
162 use_Gram=self.__use_Gram,
163 max_steps=self.__max_steps)
164
165
166
167
168 try:
169 Cp = trained_model['Cp']
170 if '0' in Cp:
171
172 Cp_vals = N.asarray([Cp[str(x)] for x in range(len(Cp))])
173 else:
174 Cp_vals = None
175 except TypeError, e:
176 raise FailedToTrainError, \
177 "Failed to train %s on %s. Got '%s' while trying to access " \
178 "trained model %s" % (self, data, e, trained_model)
179
180 if Cp_vals is None:
181
182 lowest_Cp_step = 0
183 elif N.isnan(Cp_vals[0]):
184
185 lowest_Cp_step = len(Cp_vals)-1
186 else:
187
188 lowest_Cp_step = Cp_vals.argmin()
189
190 self.__lowest_Cp_step = lowest_Cp_step
191
192 self.__weights = trained_model['beta'][lowest_Cp_step, :]
193
194 self.__trained_model = trained_model
195
196
197
198
200 """
201 Predict the output for the provided data.
202 """
203
204
205 try:
206 res = rpy.r.predict_lars(self.__trained_model,
207 data,
208 mode='step',
209 s=self.__lowest_Cp_step)
210
211 fit = N.atleast_1d(res['fit'])
212 except rpy.RPyRException, e:
213 warning("Failed to obtain predictions using %s on %s."
214 "Re-raising exception." % (self, data))
215 raise
216
217 self.values = fit
218 return fit
219
220
222 """Return ids of the used features
223 """
224 return N.where(N.abs(self.__weights)>0)[0]
225
226
227
229 """Returns a sensitivity analyzer for LARS."""
230 return LARSWeights(self, **kwargs)
231
232 weights = property(lambda self: self.__weights)
233
234
235
237 """`SensitivityAnalyzer` that reports the weights LARS trained
238 on a given `Dataset`.
239 """
240
241 _LEGAL_CLFS = [ LARS ]
242
243 - def _call(self, dataset=None):
244 """Extract weights from LARS classifier.
245
246 LARS always has weights available, so nothing has to be computed here.
247 """
248 clf = self.clf
249 weights = clf.weights
250
251 if __debug__:
252 debug('LARS',
253 "Extracting weights for LARS - "+
254 "Result: min=%f max=%f" %\
255 (N.min(weights), N.max(weights)))
256
257 return weights
258