mvpa.clfs.enet

33 """Elastic-Net regression (ENET) `Classifier`. 34 35 Elastic-Net is the model selection algorithm from: 36 37 :ref:`Zou and Hastie (2005) <ZH05>` 'Regularization and Variable 38 Selection via the Elastic Net' Journal of the Royal Statistical 39 Society, Series B, 67, 301-320. 40 41 Similar to SMLR, it performs a feature selection while performing 42 classification, but instead of starting with all features, it 43 starts with none and adds them in, which is similar to boosting. 44 45 Unlike LARS it has both L1 and L2 regularization (instead of just 46 L1). This means that while it tries to sparsify the features it 47 also tries to keep redundant features, which may be very very good 48 for fMRI classification. 49 50 In the true nature of the PyMVPA framework, this algorithm was 51 actually implemented in R by Zou and Hastie and wrapped via RPy. 52 To make use of ENET, you must have R and RPy installed as well as 53 both the lars and elasticnet contributed package. You can install 54 the R and RPy with the following command on Debian-based machines: 55 56 sudo aptitude install python-rpy python-rpy-doc r-base-dev 57 58 You can then install the lars and elasticnet package by running R 59 as root and calling: 60 61 install.packages() 62 63 """ 64 65 _clf_internals = [ 'enet', 'regression', 'linear', 'has_sensitivity', 66 'does_feature_selection' 67 ]

68 - def __init__(self, lm=1.0, trace=False, normalize=True, 69 intercept=True, max_steps=None, **kwargs):

70 """ 71 Initialize ENET. 72 73 See the help in R for further details on the following parameters: 74 75 :Parameters: 76 lm : float 77 Penalty parameter. 0 will perform LARS with no ridge regression. 78 Default is 1.0. 79 trace : boolean 80 Whether to print progress in R as it works. 81 normalize : boolean 82 Whether to normalize the L2 Norm. 83 intercept : boolean 84 Whether to add a non-penalized intercept to the model. 85 max_steps : None or int 86 If not None, specify the total number of iterations to run. Each 87 iteration adds a feature, but leaving it none will add until 88 convergence. 89 """ 90 # init base class first 91 Classifier.__init__(self, **kwargs) 92 93 # set up the params 94 self.__lm = lm 95 self.__normalize = normalize 96 self.__intercept = intercept 97 self.__trace = trace 98 self.__max_steps = max_steps 99 100 # pylint friendly initializations 101 self.__weights = None 102 """The beta weights for each feature.""" 103 self.__trained_model = None 104 """The model object after training that will be used for 105 predictions.""" 106 107 # It does not make sense to calculate a confusion matrix for a 108 # regression 109 self.states.enable('training_confusion', False)

110

111 - def __repr__(self):

112 """String summary of the object 113 """ 114 return """ENET(lm=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, enable_states=%s)""" % \ 115 (self.__lm, 116 self.__normalize, 117 self.__intercept, 118 self.__trace, 119 self.__max_steps, 120 str(self.states.enabled))

121 122

123 - def _train(self, data):

124 """Train the classifier using `data` (`Dataset`). 125 """ 126 if self.__max_steps is None: 127 # train without specifying max_steps 128 self.__trained_model = rpy.r.enet(data.samples, 129 data.labels[:,N.newaxis], 130 self.__lm, 131 normalize=self.__normalize, 132 intercept=self.__intercept, 133 trace=self.__trace) 134 else: 135 # train with specifying max_steps 136 self.__trained_model = rpy.r.enet(data.samples, 137 data.labels[:,N.newaxis], 138 self.__lm, 139 normalize=self.__normalize, 140 intercept=self.__intercept, 141 trace=self.__trace, 142 max_steps=self.__max_steps) 143 144 # find the step with the lowest Cp (risk) 145 # it is often the last step if you set a max_steps 146 # must first convert dictionary to array 147 # Cp_vals = N.asarray([self.__trained_model['Cp'][str(x)] 148 # for x in range(len(self.__trained_model['Cp']))]) 149 # self.__lowest_Cp_step = Cp_vals.argmin() 150 151 # set the weights to the last step 152 self.__weights = N.zeros(data.nfeatures,dtype=self.__trained_model['beta.pure'].dtype) 153 ind = N.asarray(self.__trained_model['allset'])-1 154 self.__weights[ind] = self.__trained_model['beta.pure'][-1,:]

155 156 # # set the weights to the final state 157 # self.__weights = self.__trained_model['beta'][-1,:] 158 159

160 - def _predict(self, data):

161 """Predict the output for the provided data. 162 """ 163 # predict with the final state (i.e., the last step) 164 res = rpy.r.predict_enet(self.__trained_model, 165 data, 166 mode='step', 167 type='fit', 168 s=self.__trained_model['beta.pure'].shape[0]) 169 #s=self.__lowest_Cp_step) 170 171 fit = N.asarray(res['fit']) 172 if len(fit.shape) == 0: 173 # if we just got 1 sample with a scalar 174 fit = fit.reshape( (1,) ) 175 return fit

176 177

178 - def _getFeatureIds(self):

179 """Return ids of the used features 180 """ 181 return N.where(N.abs(self.__weights)>0)[0]

182 183 184

185 - def getSensitivityAnalyzer(self, **kwargs):

186 """Returns a sensitivity analyzer for ENET.""" 187 return ENETWeights(self, **kwargs)

188 189 weights = property(lambda self: self.__weights)

Source Code for Module mvpa.clfs.enet