Package mvpa :: Package tests :: Module test_dataset
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_dataset

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA dataset handling""" 
 10   
 11  import unittest 
 12  import random 
 13  import numpy as N 
 14  from mvpa.datasets import Dataset 
 15  from mvpa.datasets.miscfx import zscore, aggregateFeatures 
 16  from mvpa.mappers.mask import MaskMapper 
 17  from mvpa.misc.exceptions import DatasetError 
 18  from mvpa.support import copy 
 19   
 20  from tests_warehouse import datasets 
 21   
22 -class DatasetTests(unittest.TestCase):
23
24 - def testAddPatterns(self):
25 """Test composition of new datasets by addition of existing ones 26 """ 27 data = Dataset(samples=range(5), labels=1, chunks=1) 28 29 self.failUnlessEqual( 30 data.uniquelabels, [1], 31 msg="uniquelabels must be correctly recomputed") 32 33 # simple sequence has to be a single pattern 34 self.failUnlessEqual( data.nsamples, 1) 35 # check correct pattern layout (1x5) 36 self.failUnless( 37 (data.samples == N.array([[0, 1, 2, 3, 4]])).all() ) 38 39 # check for single labels and origin 40 self.failUnless( (data.labels == N.array([1])).all() ) 41 self.failUnless( (data.chunks == N.array([1])).all() ) 42 43 # now try adding pattern with wrong shape 44 self.failUnlessRaises( DatasetError, 45 data.__iadd__, Dataset(samples=N.ones((2,3)), 46 labels=1, 47 chunks=1)) 48 49 # now add two real patterns 50 dss = datasets['uni2large'].samples 51 data += Dataset(samples=dss[:2, :5], labels=2, chunks=2 ) 52 self.failUnlessEqual( data.nfeatures, 5 ) 53 self.failUnless((data.labels == N.array([1, 2, 2])).all() ) 54 self.failUnless((data.chunks == N.array([1, 2, 2])).all() ) 55 56 # test automatic origins 57 data += Dataset(samples=dss[3:5, :5], labels=3) 58 self.failUnless((data.chunks == N.array([1, 2, 2, 0, 1]) ).all()) 59 60 # test unique class labels 61 self.failUnless((data.uniquelabels == N.array([1, 2, 3]) ).all()) 62 63 # test wrong label length 64 self.failUnlessRaises(DatasetError, 65 Dataset, 66 samples=dss[:4, :5], 67 labels=[ 1, 2, 3 ], 68 chunks=2) 69 70 # test wrong origin length 71 self.failUnlessRaises(DatasetError, 72 Dataset, 73 samples=dss[:4, :5], 74 labels=[ 1, 2, 3, 4 ], 75 chunks=[ 2, 2, 2 ])
76 77
78 - def testFeatureSelection(self):
79 """Testing feature selection: sorted/not sorted, feature groups 80 """ 81 origdata = datasets['uni2large'].samples[:10, :20] 82 data = Dataset(samples=origdata, labels=2, chunks=2 ) 83 84 # define some feature groups 85 data.defineFeatureGroups(N.repeat(range(4), 5)) 86 87 unmasked = data.samples.copy() 88 89 # default must be no mask 90 self.failUnless( data.nfeatures == 20 ) 91 92 features_to_select = [3, 0, 17] 93 features_to_select_copy = copy.deepcopy(features_to_select) 94 features_to_select_sorted = copy.deepcopy(features_to_select) 95 features_to_select_sorted.sort() 96 97 bsel = N.array([False]*20) 98 bsel[ features_to_select ] = True 99 # check selection with feature list 100 for sel, issorted in \ 101 [(data.selectFeatures( features_to_select, sort=False), False), 102 (data.selectFeatures( features_to_select, sort=True), True), 103 (data.select(slice(None), features_to_select), True), 104 (data.select(slice(None), N.array(features_to_select)), True), 105 (data.select(slice(None), bsel), True) 106 ]: 107 self.failUnless(sel.nfeatures == 3) 108 109 # check size of the masked patterns 110 self.failUnless(sel.samples.shape == (10, 3)) 111 112 # check that the right features are selected 113 fts = (features_to_select, features_to_select_sorted)[int(issorted)] 114 self.failUnless((unmasked[:, fts] == sel.samples).all()) 115 116 # check grouping information 117 self.failUnless((sel._dsattr['featuregroups'] == [0, 0, 3]).all()) 118 119 # check side effect on features_to_select parameter: 120 self.failUnless(features_to_select==features_to_select_copy) 121 122 # check selection by feature group id 123 gsel = data.selectFeatures(groups=[2, 3]) 124 self.failUnless(gsel.nfeatures == 10) 125 self.failUnless(set(gsel._dsattr['featuregroups']) == set([2, 3]))
126 127
128 - def testSampleSelection(self):
129 origdata = datasets['uni2large'].samples[:100, :10].T 130 data = Dataset(samples=origdata, labels=2, chunks=2 ) 131 132 self.failUnless( data.nsamples == 10 ) 133 134 # set single pattern to enabled 135 for sel in [ data.selectSamples(5), 136 data.select(5), 137 data.select(slice(5, 6)), 138 ]: 139 self.failUnless( sel.nsamples == 1 ) 140 self.failUnless( data.nfeatures == 100 ) 141 self.failUnless( sel.origids == [5] ) 142 143 # check duplicate selections 144 for sel in [ data.selectSamples([5, 5]), 145 # Following ones would fail since select removes 146 # repetitions (XXX) 147 #data.select([5,5]), 148 #data.select([5,5], 'all'), 149 #data.select([5,5], slice(None)), 150 ]: 151 self.failUnless( sel.nsamples == 2 ) 152 self.failUnless( (sel.samples[0] == data.samples[5]).all() ) 153 self.failUnless( (sel.samples[0] == sel.samples[1]).all() ) 154 self.failUnless( len(sel.labels) == 2 ) 155 self.failUnless( len(sel.chunks) == 2 ) 156 self.failUnless((sel.origids == [5, 5]).all()) 157 158 self.failUnless( sel.samples.shape == (2, 100) ) 159 160 # check selection by labels 161 for sel in [ data.selectSamples(data.idsbylabels(2)), 162 data.select(labels=2), 163 data.select('labels', 2), 164 data.select('labels', [2]), 165 data['labels', [2]], 166 data['labels': [2], 'labels':2], 167 data['labels': [2]], 168 ]: 169 self.failUnless( sel.nsamples == data.nsamples ) 170 self.failUnless( N.all(sel.samples == data.samples) ) 171 # not present label 172 for sel in [ data.selectSamples(data.idsbylabels(3)), 173 data.select(labels=3), 174 data.select('labels', 3), 175 data.select('labels', [3]), 176 ]: 177 self.failUnless( sel.nsamples == 0 ) 178 179 data = Dataset(samples=origdata, 180 labels=[8, 9, 4, 3, 3, 3, 4, 2, 8, 9], 181 chunks=2) 182 for sel in [ data.selectSamples(data.idsbylabels([2, 3])), 183 data.select('labels', [2, 3]), 184 data.select('labels', [2, 3], labels=[1, 2, 3, 4]), 185 data.select('labels', [2, 3], chunks=[1, 2, 3, 4]), 186 data['labels':[2, 3], 'chunks':[1, 2, 3, 4]], 187 data['chunks':[1, 2, 3, 4], 'labels':[2, 3]], 188 ]: 189 self.failUnless(N.all(sel.origids == [ 3., 4., 5., 7.])) 190 191 # lets cause it to compute unique labels 192 self.failUnless( (data.uniquelabels == [2, 3, 4, 8, 9]).all() ); 193 194 195 # select some samples removing some labels completely 196 sel = data.selectSamples(data.idsbylabels([3, 4, 8, 9])) 197 self.failUnlessEqual(set(sel.uniquelabels), set([3, 4, 8, 9])) 198 self.failUnless((sel.origids == [0, 1, 2, 3, 4, 5, 6, 8, 9]).all())
199 200
201 - def testEvilSelects(self):
202 """Test some obscure selections of samples via select() or __getitem__ 203 """ 204 origdata = datasets['uni2large'].samples[:100, :10].T 205 data = Dataset(samples=origdata, 206 # 0 1 2 3 4 5 6 7 8 9 207 labels=[8, 9, 4, 3, 3, 3, 3, 2, 8, 9], 208 chunks=[1, 2, 3, 2, 3, 1, 5, 6, 3, 6]) 209 210 # malformed getitem 211 if __debug__: 212 # check is enforced only in __debug__ 213 self.failUnlessRaises(ValueError, data.__getitem__, 214 'labels', 'featu') 215 216 # too many indicies 217 self.failUnlessRaises(ValueError, data.__getitem__, 1, 1, 1) 218 219 # various getitems which should carry the same result 220 for sel in [ data.select('chunks', [2, 6], labels=[3, 2], 221 features=slice(None)), 222 data.select('all', 'all', labels=[2,3], chunks=[2, 6]), 223 data['chunks', [2, 6], 'labels', [3, 2]], 224 data[:, :, 'chunks', [2, 6], 'labels', [3, 2]], 225 # get warnings but should work as the rest for now 226 data[3:8, 'chunks', [2, 6, 2, 6], 'labels', [3, 2]], 227 ]: 228 self.failUnless(N.all(sel.origids == [3, 7])) 229 self.failUnless(sel.nfeatures == 100) 230 self.failUnless(N.all(sel.samples == origdata[ [3, 7] ])) 231 232 target = origdata[ [3, 7] ] 233 target = target[:, [1, 3] ] 234 # various getitems which should carry the same result 235 for sel in [ data.select('all', [1, 3], 236 'chunks', [2, 6], labels=[3, 2]), 237 data[:, [1,3], 'chunks', [2, 6], 'labels', [3, 2]], 238 data[:, [1,3], 'chunks', [2, 6], 'labels', [3, 2]], 239 # get warnings but should work as the rest for now 240 data[3:8, [1, 1, 3, 1], 241 'chunks', [2, 6, 2, 6], 'labels', [3, 2]], 242 ]: 243 self.failUnless(N.all(sel.origids == [3, 7])) 244 self.failUnless(sel.nfeatures == 2) 245 self.failUnless(N.all(sel.samples == target)) 246 247 # Check if we get empty selection if requesting impossible 248 self.failUnless(data.select(chunks=[23]).nsamples == 0) 249 250 # Check .where() 251 self.failUnless(N.all(data.where(chunks=[2, 6])==[1, 3, 7, 9])) 252 self.failUnless(N.all(data.where(chunks=[2, 6], labels=[22, 3])==[3])) 253 # both samples and features 254 idx = data.where('all', [1, 3, 10], labels=[2, 3, 4]) 255 self.failUnless(N.all(idx[1] == [1, 3, 10])) 256 self.failUnless(N.all(idx[0] == range(2, 8))) 257 # empty query 258 self.failUnless(data.where() is None) 259 # empty result 260 self.failUnless(data.where(labels=[123]) == [])
261 262
264 data = Dataset(samples=N.arange( 20 ).reshape( (4, 5) ), 265 labels=1, 266 chunks=1) 267 268 self.failUnless( data.nsamples == 4 ) 269 self.failUnless( data.nfeatures == 5 ) 270 fsel = data.selectFeatures([1, 2]) 271 fpsel = fsel.selectSamples([0, 3]) 272 self.failUnless( fpsel.nsamples == 2 ) 273 self.failUnless( fpsel.nfeatures == 2 ) 274 275 self.failUnless( (fpsel.samples == [[1, 2], [16, 17]]).all() )
276 277
278 - def testPatternMerge(self):
279 data1 = Dataset(samples=N.ones((5, 5)), labels=1, chunks=1 ) 280 data2 = Dataset(samples=N.ones((3, 5)), labels=2, chunks=1 ) 281 282 merged = data1 + data2 283 284 self.failUnless( merged.nfeatures == 5 ) 285 l12 = [1]*5 + [2]*3 286 l1 = [1]*8 287 self.failUnless( (merged.labels == l12).all() ) 288 self.failUnless( (merged.chunks == l1).all() ) 289 290 data1 += data2 291 292 self.failUnless( data1.nfeatures == 5 ) 293 self.failUnless( (data1.labels == l12).all() ) 294 self.failUnless( (data1.chunks == l1).all() )
295 296
298 """ 299 """ 300 data = Dataset(samples=N.ones((5, 1)), labels=range(5), chunks=1 ) 301 data += Dataset(samples=N.ones((5, 1))+1, labels=range(5), chunks=2 ) 302 data += Dataset(samples=N.ones((5, 1))+2, labels=range(5), chunks=3 ) 303 data += Dataset(samples=N.ones((5, 1))+3, labels=range(5), chunks=4 ) 304 data += Dataset(samples=N.ones((5, 1))+4, labels=range(5), chunks=5 ) 305 self.failUnless( data.samplesperlabel == {0:5, 1:5, 2:5, 3:5, 4:5} ) 306 307 308 sample = data.getRandomSamples( 2 ) 309 self.failUnless( sample.samplesperlabel.values() == [ 2, 2, 2, 2, 2 ] ) 310 311 self.failUnless( (data.uniquechunks == range(1, 6)).all() ) 312 313 # store the old labels 314 origlabels = data.labels.copy() 315 316 data.permuteLabels(True) 317 318 self.failIf( (data.labels == origlabels).all() ) 319 320 data.permuteLabels(False) 321 322 self.failUnless( (data.labels == origlabels).all() ) 323 324 # now try another object with the same data 325 data2 = Dataset(samples=data.samples, 326 labels=data.labels, 327 chunks=data.chunks ) 328 329 # labels are the same as the originals 330 self.failUnless( (data2.labels == origlabels).all() ) 331 332 # now permute in the new object 333 data2.permuteLabels( True ) 334 335 # must not affect the old one 336 self.failUnless( (data.labels == origlabels).all() ) 337 # but only the new one 338 self.failIf( (data2.labels == origlabels).all() )
339 340
341 - def testAttributes(self):
342 """Test adding custom attributes to a dataset 343 """ 344 #class BlobbyDataset(Dataset): 345 # pass 346 # TODO: we can't assign attributes to those for now... 347 ds = Dataset(samples=range(5), labels=1, chunks=1) 348 self.failUnlessRaises(AttributeError, lambda x:x.blobs, ds) 349 """Dataset.blobs should fail since .blobs wasn't yet registered""" 350 351 #register new attribute but it would alter only new instances 352 Dataset._registerAttribute("blobs", "_data", hasunique=True) 353 ds = Dataset(samples=range(5), labels=1, chunks=1) 354 self.failUnless(not ds.blobs != [ 0 ], 355 msg="By default new attributes supposed to get 0 as the value") 356 357 try: 358 ds.blobs = [1, 2] 359 self.fail(msg="Dataset.blobs=[1,2] should fail since " 360 "there is 5 samples") 361 except ValueError, e: 362 pass 363 364 try: 365 ds.blobs = [1] 366 except e: 367 self.fail(msg="We must be able to assign the attribute")
368 369 # Dataset still shouldn't have blobs... just BlobbyDataset 370 #self.failUnlessRaises(AttributeError, lambda x:x.blobs, 371 # Dataset(samples=range(5), labels=1, chunks=1)) 372 373
374 - def testRequiredAtrributes(self):
375 """Verify that we have required attributes 376 """ 377 self.failUnlessRaises(DatasetError, Dataset) 378 self.failUnlessRaises(DatasetError, Dataset, samples=[1]) 379 self.failUnlessRaises(DatasetError, Dataset, labels=[1]) 380 try: 381 ds = Dataset(samples=[1], labels=[1]) 382 except: 383 self.fail(msg="samples and labels are 2 required parameters") 384 assert(ds is not None) # silence pylint
385 386
387 - def testZScoring(self):
388 """Test z-scoring transformation 389 """ 390 # dataset: mean=2, std=1 391 samples = N.array( (0,1,3,4,2,2,3,1,1,3,3,1,2,2,2,2) ).\ 392 reshape((16, 1)) 393 data = Dataset(samples=samples, 394 labels=range(16), chunks=[0]*16) 395 self.failUnlessEqual( data.samples.mean(), 2.0 ) 396 self.failUnlessEqual( data.samples.std(), 1.0 ) 397 zscore(data, perchunk=True) 398 399 # check z-scoring 400 check = N.array([-2,-1,1,2,0,0,1,-1,-1,1,1,-1,0,0,0,0], 401 dtype='float64').reshape(16,1) 402 self.failUnless( (data.samples == check).all() ) 403 404 data = Dataset(samples=samples, 405 labels=range(16), chunks=[0]*16) 406 zscore(data, perchunk=False) 407 self.failUnless( (data.samples == check).all() ) 408 409 # check z-scoring taking set of labels as a baseline 410 data = Dataset(samples=samples, 411 labels=[0, 2, 2, 2, 1] + [2]*11, 412 chunks=[0]*16) 413 zscore(data, baselinelabels=[0, 1]) 414 self.failUnless((samples == data.samples+1.0).all())
415
417 dataset = Dataset(samples=N.arange( 20 ).reshape( (4, 5) ), 418 labels=1, 419 chunks=1) 420 zscore(dataset, mean=0, std=1, #N.ones(dataset.nfeatures), 421 perchunk=True, pervoxel=True, 422 targetdtype="float32")
423
424 - def testAggregation(self):
425 data = Dataset(samples=N.arange( 20 ).reshape( (4, 5) ), 426 labels=1, 427 chunks=1) 428 429 ag_data = aggregateFeatures(data, N.mean) 430 431 self.failUnless(ag_data.nsamples == 4) 432 self.failUnless(ag_data.nfeatures == 1) 433 self.failUnless((ag_data.samples[:, 0] == [2, 7, 12, 17]).all())
434 435
436 - def testApplyMapper(self):
437 """Test creation of new dataset by applying a mapper""" 438 mapper = MaskMapper(N.array([1, 0, 1])) 439 dataset = Dataset(samples=N.arange(12).reshape( (4, 3) ), 440 labels=1, 441 chunks=1) 442 seldataset = dataset.applyMapper(featuresmapper=mapper) 443 self.failUnless( (dataset.selectFeatures([0, 2]).samples 444 == seldataset.samples).all() ) 445 446 # Lets do simple test on maskmapper reverse since it seems to 447 # do evil things. Those checks are done only in __debug__ 448 if __debug__: 449 # should fail since in mask we have just 2 features now 450 self.failUnlessRaises(ValueError, mapper.reverse, [10, 20, 30]) 451 self.failUnlessRaises(ValueError, mapper.forward, [10, 20])
452 453 # XXX: the intended test is added as SampleGroupMapper test 454 # self.failUnlessRaises(NotImplementedError, 455 # dataset.applyMapper, None, [1]) 456 # """We don't yet have implementation for samplesmapper -- 457 # if we get one -- remove this check and place a test""" 458 459
460 - def testId(self):
461 """Test Dataset.idhash() if it gets changed if any of the 462 labels/chunks changes 463 """ 464 465 dataset = Dataset(samples=N.arange(12).reshape( (4, 3) ), 466 labels=1, 467 chunks=1) 468 origid = dataset.idhash 469 dataset.labels = [3, 1, 2, 3] # change all labels 470 self.failUnless(origid != dataset.idhash, 471 msg="Changing all labels should alter dataset's idhash") 472 473 origid = dataset.idhash 474 475 z = dataset.labels[1] 476 self.failUnlessEqual(origid, dataset.idhash, 477 msg="Accessing shouldn't change idhash") 478 z = dataset.chunks 479 self.failUnlessEqual(origid, dataset.idhash, 480 msg="Accessing shouldn't change idhash") 481 z[2] = 333 482 self.failUnless(origid != dataset.idhash, 483 msg="Changing value in attribute should change idhash") 484 485 origid = dataset.idhash 486 dataset.samples[1, 1] = 1000 487 self.failUnless(origid != dataset.idhash, 488 msg="Changing value in data should change idhash") 489 490 491 origid = dataset.idhash 492 dataset.permuteLabels(True) 493 self.failUnless(origid != dataset.idhash, 494 msg="Permutation also changes idhash") 495 496 dataset.permuteLabels(False) 497 self.failUnless(origid == dataset.idhash, 498 msg="idhash should be restored after " 499 "permuteLabels(False)")
500 501
503 dataset = Dataset(samples=N.arange(12).reshape((4, 3)), 504 labels=1, 505 chunks=1) 506 507 mask = dataset.convertFeatureIds2FeatureMask(range(dataset.nfeatures)) 508 self.failUnless(len(mask) == dataset.nfeatures) 509 self.failUnless((mask == True).all()) 510 511 self.failUnless( 512 (dataset.convertFeatureMask2FeatureIds(mask) == range(3)).all()) 513 514 mask[1] = False 515 516 self.failUnless( 517 (dataset.convertFeatureMask2FeatureIds(mask) == [0, 2]).all())
518 519
520 - def testSummary(self):
521 """Dummy test""" 522 ds = datasets['uni2large'] 523 ds = ds[N.random.permutation(range(ds.nsamples))[:20]] 524 summary = ds.summary() 525 self.failUnless(len(summary)>40)
526 527
528 - def testLabelsMapping(self):
529 """Test mapping of the labels from strings to numericals 530 """ 531 od = {'apple':0, 'orange':1} 532 samples = [[3], [2], [3]] 533 labels_l = ['apple', 'orange', 'apple'] 534 535 # test broadcasting of the label 536 ds = Dataset(samples=samples, labels='orange') 537 self.failUnless(N.all(ds.labels == ['orange']*3)) 538 539 # Test basic mapping of litteral labels 540 for ds in [Dataset(samples=samples, labels=labels_l, labels_map=od), 541 # Figure out mapping 542 Dataset(samples=samples, labels=labels_l, labels_map=True)]: 543 self.failUnless(N.all(ds.labels == [0, 1, 0])) 544 self.failUnless(ds.labels_map == od) 545 ds_ = ds[1] 546 self.failUnless(ds_.labels_map == od, 547 msg='selectSamples should provide full mapping preserved') 548 549 # We should complaint about insufficient mapping 550 self.failUnlessRaises(ValueError, Dataset, samples=samples, 551 labels=labels_l, labels_map = {'apple':0}) 552 553 # Conformance to older behavior -- if labels are given in 554 # strings, no mapping occur by default 555 ds2 = Dataset(samples=samples, labels=labels_l) 556 self.failUnlessEqual(ds2.labels_map, None) 557 558 # We should label numerical labels if it was requested: 559 od3 = {1:100, 2:101, 3:100} 560 ds3 = Dataset(samples=samples, labels=[1, 2, 3], 561 labels_map=od3) 562 self.failUnlessEqual(ds3.labels_map, od3) 563 self.failUnless(N.all(ds3.labels == [100, 101, 100])) 564 565 ds3_ = ds3[1] 566 self.failUnlessEqual(ds3.labels_map, od3) 567 568 ds4 = Dataset(samples=samples, labels=labels_l) 569 570 # Lets check setting the labels map 571 ds = Dataset(samples=samples, labels=labels_l, labels_map=od) 572 573 self.failUnlessRaises(ValueError, ds.setLabelsMap, 574 {'orange': 1, 'nonorange': 3}) 575 new_map = {'tasty':0, 'crappy':1} 576 ds.labels_map = new_map.copy() 577 self.failUnlessEqual(ds.labels_map, new_map)
578 579
581 """Adding datasets needs special care whenever labels mapping 582 is used.""" 583 samples = [[3], [2], [3]] 584 l1 = ['a', 'b', 'a'] 585 l2 = ['b', 'a', 'c'] 586 ds1 = Dataset(samples=samples, labels=l1, 587 labels_map={'a':1, 'b':2}) 588 ds2 = Dataset(samples=samples, labels=l2, 589 labels_map={'c':1, 'a':4, 'b':2}) 590 591 # some dataset without mapping 592 ds0 = Dataset(samples=samples, labels=l2) 593 594 # original mappings 595 lm1 = ds1.labels_map.copy() 596 lm2 = ds2.labels_map.copy() 597 598 ds3 = ds1 + ds2 599 self.failUnless(N.all(ds3.labels == 600 N.hstack((ds1.labels, [2, 1, 5])))) 601 self.failUnless(ds1.labels_map == lm1) 602 self.failUnless(ds2.labels_map == lm2) 603 604 # check iadd 605 ds1 += ds2 606 self.failUnless(N.all(ds1.labels == ds3.labels)) 607 608 # it should be deterministic 609 self.failUnless(N.all(ds1.labels_map == ds3.labels_map)) 610 611 # don't allow to add datasets where one of them doesn't have a labels_map 612 # whenever the other one does 613 self.failUnlessRaises(ValueError, ds1.__add__, ds0) 614 self.failUnlessRaises(ValueError, ds1.__iadd__, ds0)
615 616
617 - def testCopy(self):
618 # lets use some instance of somewhat evolved dataset 619 ds = datasets['uni2small'] 620 # Clone the beast 621 ds_ = ds.copy() 622 # verify that we have the same data 623 self.failUnless(N.all(ds.samples == ds_.samples)) 624 self.failUnless(N.all(ds.labels == ds_.labels)) 625 self.failUnless(N.all(ds.chunks == ds_.chunks)) 626 627 # modify and see if we don't change data in the original one 628 ds_.samples[0, 0] = 1234 629 self.failUnless(N.any(ds.samples != ds_.samples)) 630 self.failUnless(N.all(ds.labels == ds_.labels)) 631 self.failUnless(N.all(ds.chunks == ds_.chunks)) 632 633 ds_.labels = N.hstack(([123], ds_.labels[1:])) 634 self.failUnless(N.any(ds.samples != ds_.samples)) 635 self.failUnless(N.any(ds.labels != ds_.labels)) 636 self.failUnless(N.all(ds.chunks == ds_.chunks)) 637 638 ds_.chunks = N.hstack(([1234], ds_.chunks[1:])) 639 self.failUnless(N.any(ds.samples != ds_.samples)) 640 self.failUnless(N.any(ds.labels != ds_.labels)) 641 self.failUnless(N.any(ds.chunks != ds_.chunks)) 642 643 self.failUnless(N.any(ds.uniquelabels != ds_.uniquelabels)) 644 self.failUnless(N.any(ds.uniquechunks != ds_.uniquechunks))
645 646
647 - def testIdsonboundaries(self):
648 """Test detection of transition points 649 650 Shame on Yarik -- he didn't create unittests right away... damn me 651 """ 652 ds = Dataset(samples=N.array(range(10), ndmin=2).T, 653 labels=[0,0,1,1,0,0,1,1,0,0], 654 chunks=[0,0,0,0,0,1,1,1,1,1]) 655 self.failUnless(ds.idsonboundaries() == [0,2,4,5,6,8], 656 "We should have got ids whenever either chunk or " 657 "label changes") 658 self.failUnless(ds.idsonboundaries(attributes_to_track=['chunks']) 659 == [0, 5]) 660 # Preceding samples 661 self.failUnless(ds.idsonboundaries(prior=1, post=-1, 662 attributes_to_track=['chunks']) 663 == [4, 9]) 664 self.failUnless(ds.idsonboundaries(prior=2, post=-1, 665 attributes_to_track=['chunks']) 666 == [3, 4, 8, 9]) 667 self.failUnless(ds.idsonboundaries(prior=2, post=-1, 668 attributes_to_track=['chunks'], 669 revert=True) 670 == [0, 1, 2, 5, 6, 7]) 671 self.failUnless(ds.idsonboundaries(prior=1, post=1, 672 attributes_to_track=['chunks']) 673 == [0, 1, 4, 5, 6, 9]) 674 # all should be there 675 self.failUnless(ds.idsonboundaries(prior=2) == range(10))
676 677
678 -def suite():
679 return unittest.makeSuite(DatasetTests)
680 681 682 if __name__ == '__main__': 683 import runner 684