1
2
3
4
5
6
7
8
9 """Miscelaneous functions/datasets to be used in the unit tests"""
10
11 __docformat__ = 'restructuredtext'
12
13 from os import environ
14
15 import unittest, traceback, sys
16 import numpy as N
17
18 from mvpa import cfg
19 from mvpa.datasets import Dataset
20 from mvpa.datasets.splitters import OddEvenSplitter
21 from mvpa.datasets.masked import MaskedDataset
22 from mvpa.clfs.base import Classifier
23 from mvpa.misc.state import ClassWithCollections
24 from mvpa.misc.data_generators import *
25
26 __all__ = [ 'datasets', 'sweepargs', 'N', 'unittest', '_all_states_enabled' ]
27
28 if __debug__:
29 from mvpa.base import debug
30 __all__.append('debug')
31
32 _all_states_enabled = 'ENFORCE_STATES_ENABLED' in debug.active
33 else:
34 _all_states_enabled = False
35
36
37
39 """Decorator function to sweep over a given set of classifiers
40
41 :Parameters:
42 clfs : list of `Classifier`
43 List of classifiers to run method on
44
45 Often some unittest method can be ran on multiple classifiers.
46 So this decorator aims to do that
47 """
48 def unittest_method(method):
49 def do_sweep(*args_, **kwargs_):
50 def untrain_clf(argvalue):
51 """Little helper"""
52 if isinstance(argvalue, Classifier):
53
54 argvalue.retrainable = False
55 argvalue.untrain()
56
57 failed_tests = {}
58 for argname in kwargs.keys():
59 for argvalue in kwargs[argname]:
60 if isinstance(argvalue, Classifier):
61
62 argvalue.untrain()
63 if isinstance(argvalue, ClassWithCollections):
64 argvalue.states.reset()
65
66 kwargs_[argname] = argvalue
67
68 try:
69 if __debug__:
70 debug('TEST', 'Running %s on args=%s and kwargs=%s' %
71 (method.__name__, `args_`, `kwargs_`))
72 method(*args_, **kwargs_)
73 except AssertionError, e:
74 estr = str(e)
75 etype, value, tb = sys.exc_info()
76
77
78 eidstr = ' '.join(
79 [l for l in traceback.format_exception(etype, value, tb)
80 if not ('do_sweep' in l or 'unittest.py' in l
81 or 'AssertionError' in l or 'Traceback (most' in l)])
82
83
84 if not eidstr in failed_tests:
85 failed_tests[eidstr] = []
86
87 failed_tests[eidstr].append(
88
89 (argname, `argvalue`, tb.tb_next, estr))
90
91 if __debug__:
92 msg = "%s on %s=%s" % (estr, argname, `argvalue`)
93 debug('TEST', 'Failed unittest: %s\n%s' % (eidstr, msg))
94 untrain_clf(argvalue)
95
96 if cfg.getboolean('tests', 'quick', False):
97
98
99
100 break
101
102 if len(failed_tests):
103
104
105 multiple = len(failed_tests) != 1
106
107
108 estr = ""
109 cestr = "lead to failures of unittest %s" % method.__name__
110 if multiple:
111 estr += "\n Different scenarios %s (specific tracebacks are below):" % cestr
112 else:
113 estr += "\n Single scenario %s:" % cestr
114 for ek, els in failed_tests.iteritems():
115 estr += '\n'
116 if multiple: estr += ek
117 estr += " on\n %s" % (" ".join(
118 ["%s=%s%s\n" % (ea, eav,
119
120 ":\n ".join([x for x in [' ', es] if x != '']))
121 for ea, eav, etb, es in els]))
122 etb = els[0][2]
123 raise AssertionError(estr), None, etb
124
125 do_sweep.func_name = method.func_name
126 return do_sweep
127
128 if len(kwargs) > 1:
129 raise NotImplementedError
130 return unittest_method
131
132
133
134
135 snr_scale = cfg.getAsDType('tests', 'snr scale', float, default=1.0)
136
137 specs = {'large' : { 'perlabel': 99, 'nchunks': 11, 'nfeatures': 20, 'snr': 8 * snr_scale},
138 'medium' :{ 'perlabel': 24, 'nchunks': 6, 'nfeatures': 14, 'snr': 8 * snr_scale},
139 'small' : { 'perlabel': 12, 'nchunks': 4, 'nfeatures': 6, 'snr' : 14 * snr_scale} }
140 nonbogus_pool = [0, 1, 3, 5]
141
142 datasets = {}
143
144 for kind, spec in specs.iteritems():
145
146 for nlabels in [ 2, 3, 4 ]:
147 basename = 'uni%d%s' % (nlabels, kind)
148 nonbogus_features=nonbogus_pool[:nlabels]
149 bogus_features = filter(lambda x:not x in nonbogus_features,
150 range(spec['nfeatures']))
151
152 dataset = normalFeatureDataset(
153 nlabels=nlabels,
154 nonbogus_features=nonbogus_features,
155 **spec)
156 dataset.nonbogus_features = nonbogus_features
157 dataset.bogus_features = bogus_features
158 oes = OddEvenSplitter()
159 splits = [(train, test) for (train, test) in oes(dataset)]
160 for i, replication in enumerate( ['test', 'train'] ):
161 dataset_ = splits[0][i]
162 dataset_.nonbogus_features = nonbogus_features
163 dataset_.bogus_features = bogus_features
164 datasets["%s_%s" % (basename, replication)] = dataset_
165
166
167 datasets[basename] = dataset
168
169
170 total = 2*spec['perlabel']
171 nchunks = spec['nchunks']
172 data = N.random.standard_normal(( total, 3, 6, 6 ))
173 labels = N.concatenate( ( N.repeat( 0, spec['perlabel'] ),
174 N.repeat( 1, spec['perlabel'] ) ) )
175 chunks = N.asarray(range(nchunks)*(total/nchunks))
176 mask = N.ones( (3, 6, 6) )
177 mask[0,0,0] = 0
178 mask[1,3,2] = 0
179 datasets['3d%s' % kind] = MaskedDataset(samples=data, labels=labels,
180 chunks=chunks, mask=mask)
181
182
183 datasets['dumb2'] = dumbFeatureBinaryDataset()
184 datasets['dumb'] = dumbFeatureDataset()
185
186 _dsinv = dumbFeatureDataset()
187 _dsinv.samples = N.hstack((_dsinv.samples,
188 N.zeros((_dsinv.nsamples, 1)),
189 N.ones((_dsinv.nsamples, 1))))
190 datasets['dumbinv'] = _dsinv
191
192
193 datasets['sin_modulated'] = multipleChunks(sinModulated, 4, 30, 1)
194 datasets['sin_modulated_test'] = sinModulated(30, 1, flat=True)
195
196
197 datasets['chirp_linear'] = multipleChunks(chirpLinear, 6, 50, 10, 2, 0.3, 0.1)
198 datasets['chirp_linear_test'] = chirpLinear(20, 5, 2, 0.4, 0.1)
199
200 datasets['wr1996'] = multipleChunks(wr1996, 4, 50)
201 datasets['wr1996_test'] = wr1996(50)
202