mvpa.misc.errorfx

1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 2 # vi: set ft=python sts=4 ts=4 sw=4 et: 3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 4 # 5 # See COPYING file distributed along with the PyMVPA package for the 6 # copyright and license terms. 7 # 8 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 9 """Error functions helpers. 10 11 PyMVPA can use arbitrary function which takes 2 arguments: predictions 12 and targets and spits out a scalar value. Functions below are for the 13 convinience, and they confirm the agreement that 'smaller' is 'better'""" 14 15 __docformat__ = 'restructuredtext' 16 17 18 import numpy as N 19 from numpy import trapz 20 21 from mvpa.base import externals 22 23 # Various helper functions

24 -def meanPowerFx(data):

25 """Returns mean power 26 27 Similar to var but without demeaning 28 """ 29 return N.mean(N.asanyarray(data)**2)

30

31 -def rootMeanPowerFx(data):

32 """Returns root mean power 33 34 to be comparable against RMSE 35 """ 36 return N.sqrt(meanPowerFx(data))

37 38

39 -class _ErrorFx(object):

40 """Common error function interface, computing the difference between 41 some target and some predicted values. 42 """ 43 44 """XXX there is no reason to keep this class around imho -- it is 45 just the skeleton for all the _ErrorFxs -- interface they 46 must conform... and there is no reason to have all those ErrorFx 47 as classes... may be they should be just functions?""" 48

49 - def __str__(self):

50 """Print class name when asked for string 51 """ 52 return self.__class__.__name__

53

54 - def __repr__(self):

55 """Proper repr for _ErrorFx 56 """ 57 return self.__class__.__name__ + "()"

58

59 - def __call__(self, predicted, target):

60 """Compute some error value from the given target and predicted 61 values (both sequences). 62 """ 63 raise NotImplemented

64 65

66 -class RMSErrorFx(_ErrorFx):

67 """Computes the root mean squared error of some target and some 68 predicted values. 69 """

70 - def __call__(self, predicted, target):

71 """Both 'predicted' and 'target' can be either scalars or sequences, 72 but have to be of the same length. 73 """ 74 return N.sqrt(N.mean(N.subtract(predicted, target)**2))

75 76

77 -class MeanMismatchErrorFx(_ErrorFx):

78 """Computes the percentage of mismatches between some target and some 79 predicted values. 80 """

81 - def __call__(self, predicted, target):

82 """Both 'predicted' and 'target' can be either scalars or sequences, 83 but have to be of the same length. 84 """ 85 return 1 - N.mean( predicted == target )

86 87

88 -class AUCErrorFx(_ErrorFx):

89 """Computes the area under the ROC for the given the 90 target and predicted to make the prediction."""

91 - def __call__(self, predicted, target):

92 """Requires all arguments.""" 93 # sort the target in descending order based on the predicted and 94 # set to boolean 95 self.t = t = N.asanyarray(target)[N.argsort(predicted)[::-1]] > 0 96 97 # calculate the true positives 98 self.tp = tp = N.concatenate( 99 ([0], N.cumsum(t)/t.sum(dtype=N.float), [1])) 100 101 # calculate the false positives 102 self.fp = fp = N.concatenate( 103 ([0], N.cumsum(~t)/(~t).sum(dtype=N.float), [1])) 104 105 return trapz(tp, fp)

106 107 108 if externals.exists('scipy'): 109 from scipy.stats import pearsonr 110

111 - class CorrErrorFx(_ErrorFx):

112 """Computes the correlation between the target and the 113 predicted values. Resultant value is the 1 - correlation 114 coefficient, so minimization leads to the best value (at 0) 115 116 """

117 - def __call__(self, predicted, target):

118 """Requires all arguments.""" 119 return 1.0-pearsonr(predicted, target)[0]

120 121

122 - class CorrErrorPFx(_ErrorFx):

123 """Computes p-value of correlation between the target and the predicted 124 values. 125 126 """

127 - def __call__(self, predicted, target):

128 """Requires all arguments.""" 129 return pearsonr(predicted, target)[1]

130 131 else: 132 # slower(?) and bogus(p-value) implementations for non-scipy users 133 # TODO: implement them more or less correcly with numpy 134 # functionality

135 - class CorrErrorFx(_ErrorFx):

136 """Computes the correlation between the target and the predicted 137 values. Return 1-CC 138 139 """

140 - def __call__(self, predicted, target):

141 """Requires all arguments.""" 142 l = len(predicted) 143 return 1.0 - N.corrcoef(N.reshape(predicted, l), 144 N.reshape(target, l))[0,1]

145 146

147 - class CorrErrorPFx(_ErrorFx):

148 """Computes p-value of correlation between the target and the predicted 149 values. 150 151 """

152 - def __call__(self, predicted, target):

153 """Requires all arguments.""" 154 from mvpa.base import warning 155 warning("p-value for correlation is implemented only when scipy is " 156 "available. Bogus value -1.0 is returned otherwise") 157 return -1.0

158 159

160 -class RelativeRMSErrorFx(_ErrorFx):

161 """Ratio between RMSE and root mean power of target output. 162 163 So it can be considered as a scaled RMSE -- perfect reconstruction 164 has values near 0, while no reconstruction has values around 1.0. 165 Word of caution -- it is not commutative, ie exchange of predicted 166 and target might lead to completely different answers 167 """

168 - def __call__(self, predicted, target):

169 return RMSErrorFx()(predicted, target) / rootMeanPowerFx(target)

170 171

172 -class Variance1SVFx(_ErrorFx):

173 """Ratio of variance described by the first singular value component. 174 175 Of limited use -- left for the sake of not wasting it 176 """ 177

178 - def __call__(self, predicted, target):

179 data = N.vstack( (predicted, target) ).T 180 # demean 181 data_demeaned = data - N.mean(data, axis=0) 182 u, s, vh = N.linalg.svd(data_demeaned, full_matrices=0) 183 # assure sorting 184 s.sort() 185 s=s[::-1] 186 cvar = s[0]**2 / N.sum(s**2) 187 return cvar

188

Source Code for Module mvpa.misc.errorfx