mvpa.misc.stats

50 """DSMatrix allows for the creation of dissilimarity matrices using 51 arbitrary distance metrics. 52 """ 53 54 # metric is a string

55 - def __init__(self, data_vectors, metric='spearman'):

56 """Initialize DSMatrix 57 58 :Parameters: 59 data_vectors : ndarray 60 m x n collection of vectors, where m is the number of exemplars 61 and n is the number of features per exemplar 62 metric : string 63 Distance metric to use (e.g., 'euclidean', 'spearman', 'pearson', 64 'confusion') 65 """ 66 # init members 67 self.full_matrix = [] 68 self.u_triangle = None 69 self.vector_form = None 70 71 # this one we know straight away, so set it 72 self.metric = metric 73 74 # size of dataset (checking if we're dealing with a column vector only) 75 num_exem = N.shape(data_vectors)[0] 76 flag_1d = False 77 # changed 4/26/09 to new way of figuring out if array is 1-D 78 #if (isinstance(data_vectors, N.ndarray)): 79 if (not(num_exem == N.size(data_vectors))): 80 num_features = N.shape(data_vectors)[1] 81 else: 82 flag_1d = True 83 num_features = 1 84 85 # generate output (dissimilarity) matrix 86 dsmatrix = N.mat(N.zeros((num_exem, num_exem))) 87 88 if (metric == 'euclidean'): 89 #print 'Using Euclidean distance metric...' 90 # down rows 91 for i in range(num_exem): 92 # across columns 93 for j in range(num_exem): 94 if (not(flag_1d)): 95 dsmatrix[i, j] = N.linalg.norm( 96 data_vectors[i, :] - data_vectors[j, :]) 97 else: 98 dsmatrix[i, j] = N.linalg.norm( 99 data_vectors[i] - data_vectors[j]) 100 101 elif (metric == 'spearman'): 102 #print 'Using Spearman rank-correlation metric...' 103 # down rows 104 for i in range(num_exem): 105 # across columns 106 for j in range(num_exem): 107 dsmatrix[i, j] = 1 - stats.spearmanr( 108 data_vectors[i,:], data_vectors[j,:])[0] 109 110 elif (metric == 'pearson'): 111 #print 'Using Pearson correlation metric...' 112 # down rows 113 for i in range(num_exem): 114 # across columns 115 for j in range(num_exem): 116 dsmatrix[i, j] = 1 - stats.pearsonr( 117 data_vectors[i,:], data_vectors[j,:])[0] 118 119 elif (metric == 'confusion'): 120 #print 'Using confusion correlation metric...' 121 # down rows 122 for i in range(num_exem): 123 # across columns 124 for j in range(num_exem): 125 if (not(flag_1d)): 126 dsmatrix[i, j] = 1 - int( 127 N.floor(N.sum(( 128 data_vectors[i, :] == data_vectors[j, :] 129 ).astype(N.int32)) / num_features)) 130 else: 131 dsmatrix[i, j] = 1 - int( 132 data_vectors[i] == data_vectors[j]) 133 134 self.full_matrix = dsmatrix

135

136 - def getTriangle(self):

137 # if we need to create the u_triangle representation, do so 138 if (self.u_triangle is None): 139 self.u_triangle = N.triu(self.full_matrix) 140 141 return self.u_triangle

142 143 # create the dissimilarity matrix on the (upper) triangle of the two 144 # two dissimilarity matrices; we can just reuse the same dissimilarity 145 # matrix code, but since it will return a matrix, we need to pick out 146 # either dsm[0,1] or dsm[1,0] 147 # note: this is a bit of a kludge right now, but it's the only way to solve 148 # certain problems: 149 # 1. Set all 0-valued elements in the original matrix to -1 (an impossible 150 # value for a dissimilarity matrix) 151 # 2. Find the upper triangle of the matrix 152 # 3. Create a vector from the upper triangle, but only with the 153 # elements whose absolute value is greater than 0 -- this 154 # will keep everything from the original matrix that wasn't 155 # part of the zero'ed-out portion when we took the upper 156 # triangle 157 # 4. Set all the -1-valued elements in the vector to 0 (their 158 # original value) 159 # 5. Cast to numpy array

160 - def getVectorForm(self):

161 if (self.vector_form is not None): 162 return self.vector_form 163 164 orig_dsmatrix = copy.deepcopy(self.getFullMatrix()) 165 166 orig_dsmatrix[orig_dsmatrix == 0] = -1 167 168 orig_tri = N.triu(orig_dsmatrix) 169 170 vector_form = orig_tri[abs(orig_tri) > 0] 171 172 vector_form[vector_form == -1] = 0 173 174 vector_form = N.asarray(vector_form) 175 self.vector_form = vector_form[0] 176 177 return self.vector_form

178 179 # XXX is there any reason to have these get* methods 180 # instead of plain access to full_matrix and method?

181 - def getFullMatrix(self):

182 return self.full_matrix

183

184 - def getMetric(self):

185 return self.metric

Source Code for Module mvpa.misc.stats