1
2
3
4
5
6
7
8
9 """Misc. plotting helpers."""
10
11 __docformat__ = 'restructuredtext'
12
13 import pylab as P
14 import numpy as N
15
16 from mvpa.datasets.splitters import NFoldSplitter
17 from mvpa.clfs.distance import squared_euclidean_distance
18
19
20
21 -def plotErrLine(data, x=None, errtype='ste', curves=None, linestyle='--',
22 fmt='o', perc_sigchg=False, baseline=None):
23 """Make a line plot with errorbars on the data points.
24
25 :Parameters:
26 data: sequence of sequences
27 First axis separates samples and second axis will appear as
28 x-axis in the plot.
29 x: sequence
30 Value to be used as 'x-values' corresponding to the elements of
31 the 2nd axis id `data`. If `None`, a sequence of ascending integers
32 will be generated.
33 errtype: 'ste' | 'std'
34 Type of error value to be computed per datapoint.
35 'ste': standard error of the mean
36 'std': standard deviation
37 curves: None | list of tuple(x, y)
38 Each tuple represents an additional curve, with x and y coordinates of
39 each point on the curve.
40 linestyle: str
41 matplotlib linestyle argument. Applied to either the additional
42 curve or a the line connecting the datapoints. Set to 'None' to
43 disable the line completely.
44 fmt: str
45 matplotlib plot style argument to be applied to the data points
46 and errorbars.
47 perc_sigchg: bool
48 If `True` the plot will show percent signal changes relative to a
49 baseline.
50 baseline: float | None
51 Baseline used for converting values into percent signal changes.
52 If `None` and `perc_sigchg` is `True`, the absolute of the mean of the
53 first feature (i.e. [:,0]) will be used as a baseline.
54
55
56 :Example:
57
58 Make dataset with 20 samples from a full sinus wave period,
59 computed 100 times with individual noise pattern.
60
61 >>> x = N.linspace(0, N.pi * 2, 20)
62 >>> data = N.vstack([N.sin(x)] * 30)
63 >>> data += N.random.normal(size=data.shape)
64
65 Now, plot mean data points with error bars, plus a high-res
66 version of the original sinus wave.
67
68 >>> x = N.linspace(0, N.pi * 2, 200)
69 >>> plotErrLine(data, curves=[(x, N.sin(x))])
70 >>> #P.show()
71 """
72 data = N.asanyarray(data)
73
74 if len(data.shape) < 2:
75 data = N.atleast_2d(data)
76
77
78 md = data.mean(axis=0)
79
80 if baseline is None:
81 baseline = N.abs(md[0])
82
83 if perc_sigchg:
84 md /= baseline
85 md -= 1.0
86 md *= 100.0
87
88 data = data / baseline
89 data *= 100.0
90
91
92 if x is None:
93 x = N.arange(len(md))
94 else:
95 if not len(md) == len(x):
96 raise ValueError, "The length of `x` (%i) has to match the 2nd " \
97 "axis of the data array (%i)" % (len(x), len(md))
98
99
100 if curves is not None:
101 for c in curves:
102 xc, yc = c
103
104 P.plot(xc, yc, linestyle=linestyle)
105
106
107 linestyle = 'None'
108
109
110 if errtype == 'ste':
111 err = data.std(axis=0) / N.sqrt(len(data))
112 elif errtype == 'std':
113 err = data.std(axis=0)
114 else:
115 raise ValueError, "Unknown error type '%s'" % errtype
116
117
118 P.errorbar(x, md, err, fmt=fmt, linestyle=linestyle)
119
120
121 -def plotFeatureHist(dataset, xlim=None, noticks=True, perchunk=False,
122 **kwargs):
123 """Plot histograms of feature values for each labels.
124
125 :Parameters:
126 dataset: Dataset
127 xlim: None | 2-tuple
128 Common x-axis limits for all histograms.
129 noticks: boolean
130 If True, no axis ticks will be plotted. This is useful to save
131 space in large plots.
132 perchunk: boolean
133 If True, one histogramm will be plotted per each label and each
134 chunk, resulting is a histogram grid (labels x chunks).
135 **kwargs:
136 Any additional arguments are passed to matplotlib's hist().
137 """
138 lsplit = NFoldSplitter(1, attr='labels')
139 csplit = NFoldSplitter(1, attr='chunks')
140
141 nrows = len(dataset.uniquelabels)
142 ncols = len(dataset.uniquechunks)
143
144 def doplot(data):
145 P.hist(data, **kwargs)
146
147 if xlim is not None:
148 P.xlim(xlim)
149
150 if noticks:
151 P.yticks([])
152 P.xticks([])
153
154 fig = 1
155
156
157 for row, (ignore, ds) in enumerate(lsplit(dataset)):
158 if perchunk:
159 for col, (alsoignore, d) in enumerate(csplit(ds)):
160
161 P.subplot(nrows, ncols, fig)
162 doplot(d.samples.ravel())
163
164 if row == 0:
165 P.title('C:' + str(d.uniquechunks[0]))
166 if col == 0:
167 P.ylabel('L:' + str(d.uniquelabels[0]))
168
169 fig += 1
170 else:
171 P.subplot(1, nrows, fig)
172 doplot(ds.samples)
173
174 P.title('L:' + str(ds.uniquelabels[0]))
175
176 fig += 1
177
178
180 """Plot the euclidean distances between all samples of a dataset.
181
182 :Parameters:
183 dataset: Dataset
184 Providing the samples.
185 sortbyattr: None | str
186 If None, the samples distances will be in the same order as their
187 appearance in the dataset. Alternatively, the name of a samples
188 attribute can be given, which wil then be used to sort/group the
189 samples, e.g. to investigate the similarity samples by label or by
190 chunks.
191 """
192 if sortbyattr is not None:
193 slicer = []
194 for attr in dataset.__getattribute__('unique' + sortbyattr):
195 slicer += \
196 dataset.__getattribute__('idsby' + sortbyattr)(attr).tolist()
197 samples = dataset.samples[slicer]
198 else:
199 samples = dataset.samples
200
201 ed = N.sqrt(squared_euclidean_distance(samples))
202
203 P.imshow(ed)
204 P.colorbar()
205
206
207 -def plotBars(data, labels=None, title=None, ylim=None, ylabel=None,
208 width=0.2, offset=0.2, color='0.6', distance=1.0,
209 yerr='ste', **kwargs):
210 """Make bar plots with automatically computed error bars.
211
212 Candlestick plot (multiple interleaved barplots) can be done,
213 by calling this function multiple time with appropriatly modified
214 `offset` argument.
215
216 :Parameters:
217 data: array (nbars x nobservations) | other sequence type
218 Source data for the barplot. Error measure is computed along the
219 second axis.
220 labels: list | None
221 If not None, a label from this list is placed on each bar.
222 title: str
223 An optional title of the barplot.
224 ylim: 2-tuple
225 Y-axis range.
226 ylabel: str
227 An optional label for the y-axis.
228 width: float
229 Width of a bar. The value should be in a reasonable relation to
230 `distance`.
231 offset: float
232 Constant offset of all bar along the x-axis. Can be used to create
233 candlestick plots.
234 color: matplotlib color spec
235 Color of the bars.
236 distance: float
237 Distance of two adjacent bars.
238 yerr: 'ste' | 'std' | None
239 Type of error for the errorbars. If `None` no errorbars are plotted.
240 **kwargs:
241 Any additional arguments are passed to matplotlib's `bar()` function.
242 """
243
244 xlocations = (N.arange(len(data)) * distance) + offset
245
246 if yerr == 'ste':
247 yerr = [N.std(d) / N.sqrt(len(d)) for d in data]
248 elif yerr == 'std':
249 yerr = [N.std(d) for d in data]
250 else:
251
252 pass
253
254
255 plot = P.bar(xlocations,
256 [N.mean(d) for d in data],
257 yerr=yerr,
258 width=width,
259 color=color,
260 ecolor='black',
261 **kwargs)
262
263 if ylim:
264 P.ylim(*(ylim))
265 if title:
266 P.title(title)
267
268 if labels:
269 P.xticks(xlocations + width / 2, labels)
270
271 if ylabel:
272 P.ylabel(ylabel)
273
274
275 P.xlim(0, xlocations[-1] + width + offset)
276
277 return plot
278
279
281 """Create a new colormap from the named colormap, where it got reversed
282
283 """
284 import matplotlib._cm as _cm
285 import matplotlib as mpl
286 try:
287 cmap_data = eval('_cm._%s_data' % cmap_name)
288 except:
289 raise ValueError, "Cannot obtain data for the colormap %s" % cmap_name
290 new_data = dict( [(k, [(v[i][0], v[-(i+1)][1], v[-(i+1)][2])
291 for i in xrange(len(v))])
292 for k,v in cmap_data.iteritems()] )
293 return mpl.colors.LinearSegmentedColormap('%s_rev' % cmap_name,
294 new_data, _cm.LUTSIZE)
295
296
298 """Quick plot to see chunk sctructure in dataset with 2 features
299
300 if clf_labels is provided for the predicted labels, then
301 incorrectly labeled samples will have 'x' in them
302 """
303 if ds.nfeatures != 2:
304 raise ValueError, "Can plot only in 2D, ie for datasets with 2 features"
305 if P.matplotlib.get_backend() == 'TkAgg':
306 P.ioff()
307 if clf_labels is not None and len(clf_labels) != ds.nsamples:
308 clf_labels = None
309 colors = ('b', 'g', 'r', 'c', 'm', 'y', 'k', 'w')
310 labels = ds.uniquelabels
311 labels_map = dict(zip(labels, colors[:len(labels)]))
312 for chunk in ds.uniquechunks:
313 chunk_text = str(chunk)
314 ids = ds.where(chunks=chunk)
315 ds_chunk = ds[ids]
316 for i in xrange(ds_chunk.nsamples):
317 s = ds_chunk.samples[i]
318 l = ds_chunk.labels[i]
319 format = ''
320 if clf_labels != None:
321 if clf_labels[i] != ds_chunk.labels[i]:
322 P.plot([s[0]], [s[1]], 'x' + labels_map[l])
323 P.text(s[0], s[1], chunk_text, color=labels_map[l],
324 horizontalalignment='center',
325 verticalalignment='center',
326 )
327 dss = ds.samples
328 P.axis((1.1 * N.min(dss[:, 0]),
329 1.1 * N.max(dss[:, 1]),
330 1.1 * N.max(dss[:, 0]),
331 1.1 * N.min(dss[:, 1])))
332 P.draw()
333 P.ion()
334