mvpa.clfs.plr

24 """Penalized logistic regression `Classifier`. 25 """ 26 27 _clf_internals = [ 'plr', 'binary', 'linear' ] 28

29 - def __init__(self, lm=1, criterion=1, reduced=0.0, maxiter=20, **kwargs):

30 """ 31 Initialize a penalized logistic regression analysis 32 33 :Parameters: 34 lm : int 35 the penalty term lambda. 36 criterion : int 37 the criterion applied to judge convergence. 38 reduced : float 39 if not 0, the rank of the data is reduced before 40 performing the calculations. In that case, reduce is taken 41 as the fraction of the first singular value, at which a 42 dimension is not considered significant anymore. A 43 reasonable criterion is reduced=0.01 44 maxiter : int 45 maximum number of iterations. If no convergence occurs 46 after this number of iterations, an exception is raised. 47 48 """ 49 # init base class first 50 Classifier.__init__(self, **kwargs) 51 52 self.__lm = lm 53 self.__criterion = criterion 54 self.__reduced = reduced 55 self.__maxiter = maxiter

56 57

58 - def __repr__(self):

59 """String summary over the object 60 """ 61 return """PLR(lm=%f, criterion=%d, reduced=%s, maxiter=%d, enable_states=%s)""" % \ 62 (self.__lm, self.__criterion, self.__reduced, self.__maxiter, 63 str(self.states.enabled))

64 65

66 - def _train(self, data):

67 """Train the classifier using `data` (`Dataset`). 68 """ 69 # Set up the environment for fitting the data 70 X = data.samples.T 71 d = data.labels 72 if set(d) != set([0, 1]): 73 raise ValueError, \ 74 "Regressors for logistic regression should be [0,1]. Got %s" \ 75 %(set(d),) 76 77 if self.__reduced != 0 : 78 # Data have reduced rank 79 from scipy.linalg import svd 80 81 # Compensate for reduced rank: 82 # Select only the n largest eigenvectors 83 U, S, V = svd(X.T) 84 if S[0] == 0: 85 raise FailedToTrainError( 86 "Data provided to PLR seems to be degenerate -- " 87 "0-th singular value is 0") 88 S /= S[0] 89 V = N.matrix(V[:, :N.max(N.where(S > self.__reduced)) + 1]) 90 # Map Data to the subspace spanned by the eigenvectors 91 X = (X.T * V).T 92 93 nfeatures, npatterns = X.shape 94 95 # Weighting vector 96 w = N.matrix(N.zeros( (nfeatures + 1, 1), 'd')) 97 # Error for convergence criterion 98 dw = N.matrix(N.ones( (nfeatures + 1, 1), 'd')) 99 # Patterns of interest in the columns 100 X = N.matrix( \ 101 N.concatenate((X, N.ones((1, npatterns), 'd')), 0) \ 102 ) 103 p = N.matrix(N.zeros((1, npatterns), 'd')) 104 # Matrix implementation of penalty term 105 Lambda = self.__lm * N.identity(nfeatures + 1, 'd') 106 Lambda[nfeatures, nfeatures] = 0 107 # Gradient 108 g = N.matrix(N.zeros((nfeatures + 1, 1), 'd')) 109 # Fisher information matrix 110 H = N.matrix(N.identity(nfeatures + 1, 'd')) 111 112 # Optimize 113 k = 0 114 while N.sum(N.ravel(dw.A ** 2)) > self.__criterion: 115 p[:, :] = self.__f(w.T * X) 116 g[:, :] = X * (d - p).T - Lambda * w 117 H[:, :] = X * N.diag(p.A1 * (1 - p.A1)) * X.T + Lambda 118 dw[:, :] = H.I * g 119 w += dw 120 k += 1 121 if k > self.__maxiter: 122 raise ConvergenceError, \ 123 "More than %d Iterations without convergence" % \ 124 (self.__maxiter) 125 126 if __debug__: 127 debug("PLR", \ 128 "PLR converged after %d steps. Error: %g" % \ 129 (k, N.sum(N.ravel(dw.A ** 2)))) 130 131 if self.__reduced: 132 # We have computed in rank reduced space -> 133 # Project to original space 134 self.w = V * w[:-1] 135 self.offset = w[-1] 136 else: 137 self.w = w[:-1] 138 self.offset = w[-1]

139 140

141 - def __f(self, y):

142 """This is the logistic function f, that is used for determination of 143 the vector w""" 144 return 1. / (1 + N.exp(-y))

145 146

147 - def _predict(self, data):

148 """ 149 Predict the class labels for the provided data 150 151 Returns a list of class labels 152 """ 153 # make sure the data are in matrix form 154 data = N.matrix(N.asarray(data)) 155 156 # get the values and then predictions 157 values = N.ravel(self.__f(self.offset + data * self.w)) 158 predictions = values > 0.5 159 160 # save the state if desired, relying on State._setitem_ to 161 # decide if we will actually save the values 162 self.predictions = predictions 163 self.values = values 164 165 return predictions

Source Code for Module mvpa.clfs.plr