1
2
3
4
5
6
7
8
9 """Little statistics helper"""
10
11 __docformat__ = 'restructuredtext'
12
13 from mvpa.base import externals
14
15 if externals.exists('scipy', raiseException=True):
16 import scipy.stats as stats
17
18 import numpy as N
19 import copy
20
22 """Compute the chisquare value of a contingency table with arbitrary
23 dimensions.
24
25 If no expected frequencies are supplied, the total N is assumed to be
26 equally distributed across all cells.
27
28 Returns: chisquare-stats, associated p-value (upper tail)
29 """
30 obs = N.array(obs)
31
32
33 nobs = N.sum(obs)
34
35
36 if exp == None:
37 exp = N.ones(obs.shape) * nobs / N.prod(obs.shape)
38
39
40 exp = exp.astype(float)
41
42
43 chisq = N.sum((obs - exp )**2 / exp)
44
45
46 return chisq, stats.chisqprob(chisq, N.prod(obs.shape) - 1)
47
48
50 """DSMatrix allows for the creation of dissilimarity matrices using
51 arbitrary distance metrics.
52 """
53
54
55 - def __init__(self, data_vectors, metric='spearman'):
56 """Initialize DSMatrix
57
58 :Parameters:
59 data_vectors : ndarray
60 m x n collection of vectors, where m is the number of exemplars
61 and n is the number of features per exemplar
62 metric : string
63 Distance metric to use (e.g., 'euclidean', 'spearman', 'pearson',
64 'confusion')
65 """
66
67 self.full_matrix = []
68 self.u_triangle = None
69 self.vector_form = None
70
71
72 self.metric = metric
73
74
75 num_exem = N.shape(data_vectors)[0]
76 flag_1d = False
77
78
79 if (not(num_exem == N.size(data_vectors))):
80 num_features = N.shape(data_vectors)[1]
81 else:
82 flag_1d = True
83 num_features = 1
84
85
86 dsmatrix = N.mat(N.zeros((num_exem, num_exem)))
87
88 if (metric == 'euclidean'):
89
90
91 for i in range(num_exem):
92
93 for j in range(num_exem):
94 if (not(flag_1d)):
95 dsmatrix[i, j] = N.linalg.norm(
96 data_vectors[i, :] - data_vectors[j, :])
97 else:
98 dsmatrix[i, j] = N.linalg.norm(
99 data_vectors[i] - data_vectors[j])
100
101 elif (metric == 'spearman'):
102
103
104 for i in range(num_exem):
105
106 for j in range(num_exem):
107 dsmatrix[i, j] = 1 - stats.spearmanr(
108 data_vectors[i,:], data_vectors[j,:])[0]
109
110 elif (metric == 'pearson'):
111
112
113 for i in range(num_exem):
114
115 for j in range(num_exem):
116 dsmatrix[i, j] = 1 - stats.pearsonr(
117 data_vectors[i,:], data_vectors[j,:])[0]
118
119 elif (metric == 'confusion'):
120
121
122 for i in range(num_exem):
123
124 for j in range(num_exem):
125 if (not(flag_1d)):
126 dsmatrix[i, j] = 1 - int(
127 N.floor(N.sum((
128 data_vectors[i, :] == data_vectors[j, :]
129 ).astype(N.int32)) / num_features))
130 else:
131 dsmatrix[i, j] = 1 - int(
132 data_vectors[i] == data_vectors[j])
133
134 self.full_matrix = dsmatrix
135
137
138 if (self.u_triangle is None):
139 self.u_triangle = N.triu(self.full_matrix)
140
141 return self.u_triangle
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
178
179
180
182 return self.full_matrix
183
186