Package mvpa :: Package clfs :: Module lars
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.lars

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Least angle regression (LARS) classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  # system imports 
 14  import numpy as N 
 15   
 16  import mvpa.base.externals as externals 
 17   
 18  # do conditional to be able to build module reference 
 19  if externals.exists('rpy', raiseException=True) and \ 
 20     externals.exists('lars', raiseException=True): 
 21      import rpy 
 22      rpy.r.library('lars') 
 23   
 24   
 25  # local imports 
 26  from mvpa.clfs.base import Classifier, FailedToTrainError 
 27  from mvpa.measures.base import Sensitivity 
 28   
 29  from mvpa.base import warning 
 30  if __debug__: 
 31      from mvpa.base import debug 
 32   
 33  known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise') 
 34   
35 -class LARS(Classifier):
36 """Least angle regression (LARS) `Classifier`. 37 38 LARS is the model selection algorithm from: 39 40 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert 41 Tibshirani, Least Angle Regression Annals of Statistics (with 42 discussion) (2004) 32(2), 407-499. A new method for variable 43 subset selection, with the lasso and 'epsilon' forward stagewise 44 methods as special cases. 45 46 Similar to SMLR, it performs a feature selection while performing 47 classification, but instead of starting with all features, it 48 starts with none and adds them in, which is similar to boosting. 49 50 This classifier behaves more like a ridge regression in that it 51 returns prediction values and it treats the training labels as 52 continuous. 53 54 In the true nature of the PyMVPA framework, this algorithm is 55 actually implemented in R by Trevor Hastie and wrapped via RPy. 56 To make use of LARS, you must have R and RPy installed as well as 57 the LARS contributed package. You can install the R and RPy with 58 the following command on Debian-based machines: 59 60 sudo aptitude install python-rpy python-rpy-doc r-base-dev 61 62 You can then install the LARS package by running R as root and 63 calling: 64 65 install.packages() 66 67 """ 68 69 # XXX from yoh: it is linear, isn't it? 70 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity', 71 'does_feature_selection', 72 ]
73 - def __init__(self, model_type="lasso", trace=False, normalize=True, 74 intercept=True, max_steps=None, use_Gram=False, **kwargs):
75 """ 76 Initialize LARS. 77 78 See the help in R for further details on the following parameters: 79 80 :Parameters: 81 model_type : string 82 Type of LARS to run. Can be one of ('lasso', 'lar', 83 'forward.stagewise', 'stepwise'). 84 trace : boolean 85 Whether to print progress in R as it works. 86 normalize : boolean 87 Whether to normalize the L2 Norm. 88 intercept : boolean 89 Whether to add a non-penalized intercept to the model. 90 max_steps : None or int 91 If not None, specify the total number of iterations to run. Each 92 iteration adds a feature, but leaving it none will add until 93 convergence. 94 use_Gram : boolean 95 Whether to compute the Gram matrix (this should be false if you 96 have more features than samples.) 97 """ 98 # init base class first 99 Classifier.__init__(self, **kwargs) 100 101 if not model_type in known_models: 102 raise ValueError('Unknown model %s for LARS is specified. Known' % 103 model_type + 'are %s' % `known_models`) 104 105 # set up the params 106 self.__type = model_type 107 self.__normalize = normalize 108 self.__intercept = intercept 109 self.__trace = trace 110 self.__max_steps = max_steps 111 self.__use_Gram = use_Gram 112 113 # pylint friendly initializations 114 self.__lowest_Cp_step = None 115 self.__weights = None 116 """The beta weights for each feature.""" 117 self.__trained_model = None 118 """The model object after training that will be used for 119 predictions."""
120 121 # It does not make sense to calculate a confusion matrix for a 122 # regression 123 # YOH: we do have summary statistics for regressions 124 #self.states.enable('training_confusion', False) 125
126 - def __repr__(self):
127 """String summary of the object 128 """ 129 return "LARS(type='%s', normalize=%s, intercept=%s, trace=%s, " \ 130 "max_steps=%s, use_Gram=%s, regression=%s, " \ 131 "enable_states=%s)" % \ 132 (self.__type, 133 self.__normalize, 134 self.__intercept, 135 self.__trace, 136 self.__max_steps, 137 self.__use_Gram, 138 self.regression, 139 str(self.states.enabled))
140 141
142 - def _train(self, data):
143 """Train the classifier using `data` (`Dataset`). 144 """ 145 if self.__max_steps is None: 146 # train without specifying max_steps 147 trained_model = rpy.r.lars(data.samples, 148 data.labels[:,N.newaxis], 149 type=self.__type, 150 normalize=self.__normalize, 151 intercept=self.__intercept, 152 trace=self.__trace, 153 use_Gram=self.__use_Gram) 154 else: 155 # train with specifying max_steps 156 trained_model = rpy.r.lars(data.samples, 157 data.labels[:,N.newaxis], 158 type=self.__type, 159 normalize=self.__normalize, 160 intercept=self.__intercept, 161 trace=self.__trace, 162 use_Gram=self.__use_Gram, 163 max_steps=self.__max_steps) 164 165 # find the step with the lowest Cp (risk) 166 # it is often the last step if you set a max_steps 167 # must first convert dictionary to array 168 try: 169 Cp = trained_model['Cp'] 170 if '0' in Cp: 171 # If there was any 172 Cp_vals = N.asarray([Cp[str(x)] for x in range(len(Cp))]) 173 else: 174 Cp_vals = None 175 except TypeError, e: 176 raise FailedToTrainError, \ 177 "Failed to train %s on %s. Got '%s' while trying to access " \ 178 "trained model %s" % (self, data, e, trained_model) 179 180 if Cp_vals is None: 181 # if there were no any -- just choose 0th 182 lowest_Cp_step = 0 183 elif N.isnan(Cp_vals[0]): 184 # sometimes may come back nan, so just pick the last one 185 lowest_Cp_step = len(Cp_vals)-1 186 else: 187 # determine the lowest 188 lowest_Cp_step = Cp_vals.argmin() 189 190 self.__lowest_Cp_step = lowest_Cp_step 191 # set the weights to the lowest Cp step 192 self.__weights = trained_model['beta'][lowest_Cp_step, :] 193 194 self.__trained_model = trained_model # bind to an instance
195 # # set the weights to the final state 196 # self.__weights = self.__trained_model['beta'][-1,:] 197 198
199 - def _predict(self, data):
200 """ 201 Predict the output for the provided data. 202 """ 203 # predict with the final state (i.e., the last step) 204 # predict with the lowest Cp step 205 try: 206 res = rpy.r.predict_lars(self.__trained_model, 207 data, 208 mode='step', 209 s=self.__lowest_Cp_step) 210 #s=self.__trained_model['beta'].shape[0]) 211 fit = N.atleast_1d(res['fit']) 212 except rpy.RPyRException, e: 213 warning("Failed to obtain predictions using %s on %s." 214 "Re-raising exception." % (self, data)) 215 raise 216 217 self.values = fit 218 return fit
219 220
221 - def _getFeatureIds(self):
222 """Return ids of the used features 223 """ 224 return N.where(N.abs(self.__weights)>0)[0]
225 226 227
228 - def getSensitivityAnalyzer(self, **kwargs):
229 """Returns a sensitivity analyzer for LARS.""" 230 return LARSWeights(self, **kwargs)
231 232 weights = property(lambda self: self.__weights)
233 234 235
236 -class LARSWeights(Sensitivity):
237 """`SensitivityAnalyzer` that reports the weights LARS trained 238 on a given `Dataset`. 239 """ 240 241 _LEGAL_CLFS = [ LARS ] 242
243 - def _call(self, dataset=None):
244 """Extract weights from LARS classifier. 245 246 LARS always has weights available, so nothing has to be computed here. 247 """ 248 clf = self.clf 249 weights = clf.weights 250 251 if __debug__: 252 debug('LARS', 253 "Extracting weights for LARS - "+ 254 "Result: min=%f max=%f" %\ 255 (N.min(weights), N.max(weights))) 256 257 return weights
258