Package mvpa :: Package tests :: Module tests_warehouse
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.tests_warehouse

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Miscelaneous functions/datasets to be used in the unit tests""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  from os import environ 
 14   
 15  import unittest, traceback, sys 
 16  import numpy as N 
 17   
 18  from mvpa import cfg 
 19  from mvpa.datasets import Dataset 
 20  from mvpa.datasets.splitters import OddEvenSplitter 
 21  from mvpa.datasets.masked import MaskedDataset 
 22  from mvpa.clfs.base import Classifier 
 23  from mvpa.misc.state import ClassWithCollections 
 24  from mvpa.misc.data_generators import * 
 25   
 26  __all__ = [ 'datasets', 'sweepargs', 'N', 'unittest', '_all_states_enabled' ] 
 27   
 28  if __debug__: 
 29      from mvpa.base import debug 
 30      __all__.append('debug') 
 31   
 32      _all_states_enabled = 'ENFORCE_STATES_ENABLED' in debug.active 
 33  else: 
 34      _all_states_enabled = False 
 35   
 36   
 37   
38 -def sweepargs(**kwargs):
39 """Decorator function to sweep over a given set of classifiers 40 41 :Parameters: 42 clfs : list of `Classifier` 43 List of classifiers to run method on 44 45 Often some unittest method can be ran on multiple classifiers. 46 So this decorator aims to do that 47 """ 48 def unittest_method(method): 49 def do_sweep(*args_, **kwargs_): 50 def untrain_clf(argvalue): 51 """Little helper""" 52 if isinstance(argvalue, Classifier): 53 # clear classifier after its use -- just to be sure ;-) 54 argvalue.retrainable = False 55 argvalue.untrain()
56 57 failed_tests = {} 58 for argname in kwargs.keys(): 59 for argvalue in kwargs[argname]: 60 if isinstance(argvalue, Classifier): 61 # clear classifier before its use 62 argvalue.untrain() 63 if isinstance(argvalue, ClassWithCollections): 64 argvalue.states.reset() 65 # update kwargs_ 66 kwargs_[argname] = argvalue 67 # do actual call 68 try: 69 if __debug__: 70 debug('TEST', 'Running %s on args=%s and kwargs=%s' % 71 (method.__name__, `args_`, `kwargs_`)) 72 method(*args_, **kwargs_) 73 except AssertionError, e: 74 estr = str(e) 75 etype, value, tb = sys.exc_info() 76 # literal representation of exception tb, so 77 # we could group them later on 78 eidstr = ' '.join( 79 [l for l in traceback.format_exception(etype, value, tb) 80 if not ('do_sweep' in l or 'unittest.py' in l 81 or 'AssertionError' in l or 'Traceback (most' in l)]) 82 83 # Store exception information for later on groupping 84 if not eidstr in failed_tests: 85 failed_tests[eidstr] = [] 86 87 failed_tests[eidstr].append( 88 # skip top-most tb in sweep_args 89 (argname, `argvalue`, tb.tb_next, estr)) 90 91 if __debug__: 92 msg = "%s on %s=%s" % (estr, argname, `argvalue`) 93 debug('TEST', 'Failed unittest: %s\n%s' % (eidstr, msg)) 94 untrain_clf(argvalue) 95 # TODO: handle different levels of unittests properly 96 if cfg.getboolean('tests', 'quick', False): 97 # on TESTQUICK just run test for 1st entry in the list, 98 # the rest are omitted 99 # TODO: proper partitioning of unittests 100 break 101 102 if len(failed_tests): 103 # Lets now create a single AssertionError exception which would nicely 104 # incorporate all failed exceptions 105 multiple = len(failed_tests) != 1 # is it unique? 106 # if so, we don't need to reinclude traceback since it 107 # would be spitted out anyways below 108 estr = "" 109 cestr = "lead to failures of unittest %s" % method.__name__ 110 if multiple: 111 estr += "\n Different scenarios %s (specific tracebacks are below):" % cestr 112 else: 113 estr += "\n Single scenario %s:" % cestr 114 for ek, els in failed_tests.iteritems(): 115 estr += '\n' 116 if multiple: estr += ek 117 estr += " on\n %s" % (" ".join( 118 ["%s=%s%s\n" % (ea, eav, 119 # Why didn't I just do regular for loop? ;) 120 ":\n ".join([x for x in [' ', es] if x != ''])) 121 for ea, eav, etb, es in els])) 122 etb = els[0][2] # take first one... they all should be identical 123 raise AssertionError(estr), None, etb 124 125 do_sweep.func_name = method.func_name 126 return do_sweep 127 128 if len(kwargs) > 1: 129 raise NotImplementedError 130 return unittest_method 131 132 # Define datasets to be used all over. Split-half later on is used to 133 # split into training/testing 134 # 135 snr_scale = cfg.getAsDType('tests', 'snr scale', float, default=1.0) 136 137 specs = {'large' : { 'perlabel': 99, 'nchunks': 11, 'nfeatures': 20, 'snr': 8 * snr_scale}, 138 'medium' :{ 'perlabel': 24, 'nchunks': 6, 'nfeatures': 14, 'snr': 8 * snr_scale}, 139 'small' : { 'perlabel': 12, 'nchunks': 4, 'nfeatures': 6, 'snr' : 14 * snr_scale} } 140 nonbogus_pool = [0, 1, 3, 5] 141 142 datasets = {} 143 144 for kind, spec in specs.iteritems(): 145 # set of univariate datasets 146 for nlabels in [ 2, 3, 4 ]: 147 basename = 'uni%d%s' % (nlabels, kind) 148 nonbogus_features=nonbogus_pool[:nlabels] 149 bogus_features = filter(lambda x:not x in nonbogus_features, 150 range(spec['nfeatures'])) 151 152 dataset = normalFeatureDataset( 153 nlabels=nlabels, 154 nonbogus_features=nonbogus_features, 155 **spec) 156 dataset.nonbogus_features = nonbogus_features 157 dataset.bogus_features = bogus_features 158 oes = OddEvenSplitter() 159 splits = [(train, test) for (train, test) in oes(dataset)] 160 for i, replication in enumerate( ['test', 'train'] ): 161 dataset_ = splits[0][i] 162 dataset_.nonbogus_features = nonbogus_features 163 dataset_.bogus_features = bogus_features 164 datasets["%s_%s" % (basename, replication)] = dataset_ 165 166 # full dataset 167 datasets[basename] = dataset 168 169 # sample 3D 170 total = 2*spec['perlabel'] 171 nchunks = spec['nchunks'] 172 data = N.random.standard_normal(( total, 3, 6, 6 )) 173 labels = N.concatenate( ( N.repeat( 0, spec['perlabel'] ), 174 N.repeat( 1, spec['perlabel'] ) ) ) 175 chunks = N.asarray(range(nchunks)*(total/nchunks)) 176 mask = N.ones( (3, 6, 6) ) 177 mask[0,0,0] = 0 178 mask[1,3,2] = 0 179 datasets['3d%s' % kind] = MaskedDataset(samples=data, labels=labels, 180 chunks=chunks, mask=mask) 181 182 # some additional datasets 183 datasets['dumb2'] = dumbFeatureBinaryDataset() 184 datasets['dumb'] = dumbFeatureDataset() 185 # dataset with few invariant features 186 _dsinv = dumbFeatureDataset() 187 _dsinv.samples = N.hstack((_dsinv.samples, 188 N.zeros((_dsinv.nsamples, 1)), 189 N.ones((_dsinv.nsamples, 1)))) 190 datasets['dumbinv'] = _dsinv 191 192 # Datasets for regressions testing 193 datasets['sin_modulated'] = multipleChunks(sinModulated, 4, 30, 1) 194 datasets['sin_modulated_test'] = sinModulated(30, 1, flat=True) 195 196 # simple signal for linear regressors 197 datasets['chirp_linear'] = multipleChunks(chirpLinear, 6, 50, 10, 2, 0.3, 0.1) 198 datasets['chirp_linear_test'] = chirpLinear(20, 5, 2, 0.4, 0.1) 199 200 datasets['wr1996'] = multipleChunks(wr1996, 4, 50) 201 datasets['wr1996_test'] = wr1996(50) 202