mvpa.tests.test

41

42 - def setUp(self):

43 self.clf_sign = SameSignClassifier() 44 self.clf_less1 = Less1Classifier() 45 46 # simple binary dataset 47 self.data_bin_1 = Dataset( 48 samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]], 49 labels=[1, 1, 1, -1, -1], # labels 50 chunks=[0, 1, 2, 2, 3]) # chunks

51

52 - def testDummy(self):

53 clf = SameSignClassifier(enable_states=['training_confusion']) 54 clf.train(self.data_bin_1) 55 self.failUnlessRaises(UnknownStateError, clf.states.__getattribute__, 56 "predictions") 57 """Should have no predictions after training. Predictions 58 state should be explicitely disabled""" 59 60 if not _all_states_enabled: 61 self.failUnlessRaises(UnknownStateError, clf.states.__getattribute__, 62 "trained_dataset") 63 64 self.failUnlessEqual(clf.training_confusion.percentCorrect, 65 100, 66 msg="Dummy clf should train perfectly") 67 self.failUnlessEqual(clf.predict(self.data_bin_1.samples), 68 list(self.data_bin_1.labels)) 69 70 self.failUnlessEqual(len(clf.predictions), self.data_bin_1.nsamples, 71 msg="Trained classifier stores predictions by default") 72 73 clf = SameSignClassifier(enable_states=['trained_dataset']) 74 clf.train(self.data_bin_1) 75 self.failUnless((clf.trained_dataset.samples == 76 self.data_bin_1.samples).all()) 77 self.failUnless((clf.trained_dataset.labels == 78 self.data_bin_1.labels).all())

79 80

81 - def testBoosted(self):

82 # XXXXXXX 83 # silly test if we get the same result with boosted as with a single one 84 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(), 85 self.clf_sign.clone()]) 86 87 self.failUnlessEqual(list(bclf.predict(self.data_bin_1.samples)), 88 list(self.data_bin_1.labels), 89 msg="Boosted classifier should work") 90 self.failUnlessEqual(bclf.predict(self.data_bin_1.samples), 91 self.clf_sign.predict(self.data_bin_1.samples), 92 msg="Boosted classifier should have the same as regular")

93 94

95 - def testBoostedStatePropagation(self):

96 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(), 97 self.clf_sign.clone()], 98 enable_states=['feature_ids']) 99 100 # check states enabling propagation 101 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), 102 _all_states_enabled) 103 self.failUnlessEqual(bclf.clfs[0].states.isEnabled('feature_ids'), True) 104 105 bclf2 = CombinedClassifier(clfs=[self.clf_sign.clone(), 106 self.clf_sign.clone()], 107 propagate_states=False, 108 enable_states=['feature_ids']) 109 110 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), 111 _all_states_enabled) 112 self.failUnlessEqual(bclf2.clfs[0].states.isEnabled('feature_ids'), 113 _all_states_enabled)

114 115 116

117 - def testBinaryDecorator(self):

118 ds = Dataset(samples=[ [0,0], [0,1], [1,100], [-1,0], [-1,-3], [ 0,-10] ], 119 labels=[ 'sp', 'sp', 'sp', 'dn', 'sn', 'dp']) 120 testdata = [ [0,0], [10,10], [-10, -1], [0.1, -0.1], [-0.2, 0.2] ] 121 # labels [s]ame/[d]ifferent (sign), and [p]ositive/[n]egative first element 122 123 clf = SameSignClassifier() 124 # lets create classifier to descriminate only between same/different, 125 # which is a primary task of SameSignClassifier 126 bclf1 = BinaryClassifier(clf=clf, 127 poslabels=['sp', 'sn'], 128 neglabels=['dp', 'dn']) 129 130 orig_labels = ds.labels[:] 131 bclf1.train(ds) 132 133 self.failUnless(bclf1.predict(testdata) == 134 [['sp', 'sn'], ['sp', 'sn'], ['sp', 'sn'], 135 ['dn', 'dp'], ['dn', 'dp']]) 136 137 self.failUnless((ds.labels == orig_labels).all(), 138 msg="BinaryClassifier should not alter labels")

139 140 141 @sweepargs(clf=clfswh['binary'])

142 - def testClassifierGeneralization(self, clf):

143 """Simple test if classifiers can generalize ok on simple data 144 """ 145 te = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) 146 cve = te(datasets['uni2medium']) 147 if cfg.getboolean('tests', 'labile', default='yes'): 148 self.failUnless(cve < 0.25, 149 msg="Got transfer error %g" % (cve))

150 151 152 @sweepargs(clf=clfswh[:] + regrswh[:])

153 - def testSummary(self, clf):

154 """Basic testing of the clf summary 155 """ 156 summary1 = clf.summary() 157 self.failUnless('not yet trained' in summary1) 158 clf.train(datasets['uni2small']) 159 summary = clf.summary() 160 # It should get bigger ;) 161 self.failUnless(len(summary) > len(summary1)) 162 self.failUnless(not 'not yet trained' in summary)

163 164 165 @sweepargs(clf=clfswh[:] + regrswh[:])

166 - def testDegenerateUsage(self, clf):

167 """Test how clf handles degenerate cases 168 """ 169 # Whenever we have only 1 feature with only 0s in it 170 ds1 = datasets['uni2small'][:, [0]] 171 # XXX this very line breaks LARS in many other unittests -- 172 # very interesting effect. but screw it -- for now it will be 173 # this way 174 ds1.samples[:] = 0.0 # all 0s 175 176 #ds2 = datasets['uni2small'][[0], :] 177 #ds2.samples[:] = 0.0 # all 0s 178 179 clf.states._changeTemporarily( 180 enable_states=['values', 'training_confusion']) 181 182 # Good pukes are good ;-) 183 # TODO XXX add 184 # - ", ds2):" to test degenerate ds with 1 sample 185 # - ds1 but without 0s -- just 1 feature... feature selections 186 # might lead to 'surprises' due to magic in combiners etc 187 for ds in (ds1, ): 188 try: 189 clf.train(ds) # should not crash or stall 190 # could we still get those? 191 summary = clf.summary() 192 cm = clf.states.training_confusion 193 # If succeeded to train/predict (due to 194 # training_confusion) without error -- results better be 195 # at "chance" 196 continue 197 if 'ACC' in cm.stats: 198 self.failUnlessEqual(cm.stats['ACC'], 0.5) 199 else: 200 self.failUnless(N.isnan(cm.stats['CCe'])) 201 except tuple(_degenerate_allowed_exceptions): 202 pass 203 clf.states._resetEnabledTemporarily()

204 205 206 # TODO: sg - remove our limitations, meta -- also 207 @sweepargs(clf=clfswh['!sg', '!plr', '!meta'])

208 - def test_single_class(self, clf):

209 """Test if binary and multiclass can handle single class training/testing 210 """ 211 ds = datasets['uni2small']['labels', (0,)] 212 try: 213 err = TransferError(clf)( 214 datasets['uni2small_test']['labels', (0,)], 215 datasets['uni2small_train']['labels', (0,)]) 216 except Exception, e: 217 self.fail(str(e)) 218 self.failUnless(err == 0.)

219 220 # TODO: validate for regressions as well!!!

221 - def testSplitClassifier(self):

222 ds = self.data_bin_1 223 clf = SplitClassifier(clf=SameSignClassifier(), 224 splitter=NFoldSplitter(1), 225 enable_states=['confusion', 'training_confusion', 226 'feature_ids']) 227 clf.train(ds) # train the beast 228 error = clf.confusion.error 229 tr_error = clf.training_confusion.error 230 231 clf2 = clf.clone() 232 cv = CrossValidatedTransferError( 233 TransferError(clf2), 234 NFoldSplitter(), 235 enable_states=['confusion', 'training_confusion']) 236 cverror = cv(ds) 237 tr_cverror = cv.training_confusion.error 238 239 self.failUnlessEqual(error, cverror, 240 msg="We should get the same error using split classifier as" 241 " using CrossValidatedTransferError. Got %s and %s" 242 % (error, cverror)) 243 244 self.failUnlessEqual(tr_error, tr_cverror, 245 msg="We should get the same training error using split classifier as" 246 " using CrossValidatedTransferError. Got %s and %s" 247 % (tr_error, tr_cverror)) 248 249 self.failUnlessEqual(clf.confusion.percentCorrect, 250 100, 251 msg="Dummy clf should train perfectly") 252 self.failUnlessEqual(len(clf.confusion.sets), 253 len(ds.uniquechunks), 254 msg="Should have 1 confusion per each split") 255 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), 256 msg="Should have number of classifiers equal # of epochs") 257 self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels), 258 msg="Should classify correctly") 259 260 # feature_ids must be list of lists, and since it is not 261 # feature-selecting classifier used - we expect all features 262 # to be utilized 263 # NOT ANYMORE -- for BoostedClassifier we have now union of all 264 # used features across slave classifiers. That makes 265 # semantics clear. If you need to get deeper -- use upcoming 266 # harvesting facility ;-) 267 # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) 268 # self.failUnless(N.array([len(ids)==ds.nfeatures 269 # for ids in clf.feature_ids]).all()) 270 271 # Just check if we get it at all ;-) 272 summary = clf.summary()

273 274 275 @sweepargs(clf_=clfswh['binary', '!meta'])

276 - def testSplitClassifierExtended(self, clf_):

277 clf2 = clf_.clone() 278 ds = datasets['uni2medium']#self.data_bin_1 279 clf = SplitClassifier(clf=clf_, #SameSignClassifier(), 280 splitter=NFoldSplitter(1), 281 enable_states=['confusion', 'feature_ids']) 282 clf.train(ds) # train the beast 283 error = clf.confusion.error 284 285 cv = CrossValidatedTransferError( 286 TransferError(clf2), 287 NFoldSplitter(), 288 enable_states=['confusion', 'training_confusion']) 289 cverror = cv(ds) 290 291 self.failUnless(abs(error-cverror)<0.01, 292 msg="We should get the same error using split classifier as" 293 " using CrossValidatedTransferError. Got %s and %s" 294 % (error, cverror)) 295 296 if cfg.getboolean('tests', 'labile', default='yes'): 297 self.failUnless(error < 0.25, 298 msg="clf should generalize more or less fine. " 299 "Got error %s" % error) 300 self.failUnlessEqual(len(clf.confusion.sets), len(ds.uniquechunks), 301 msg="Should have 1 confusion per each split") 302 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), 303 msg="Should have number of classifiers equal # of epochs")

304 #self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels), 305 # msg="Should classify correctly") 306 307 308

309 - def testHarvesting(self):

310 """Basic testing of harvesting based on SplitClassifier 311 """ 312 ds = self.data_bin_1 313 clf = SplitClassifier(clf=SameSignClassifier(), 314 splitter=NFoldSplitter(1), 315 enable_states=['confusion', 'training_confusion', 316 'feature_ids'], 317 harvest_attribs=['clf.feature_ids', 318 'clf.training_time'], 319 descr="DESCR") 320 clf.train(ds) # train the beast 321 # Number of harvested items should be equal to number of chunks 322 self.failUnlessEqual(len(clf.harvested['clf.feature_ids']), 323 len(ds.uniquechunks)) 324 # if we can blame multiple inheritance and ClassWithCollections.__init__ 325 self.failUnlessEqual(clf.descr, "DESCR")

326 327

328 - def testMappedClassifier(self):

329 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ]) 330 testdata3 = Dataset(samples=samples, labels=1) 331 res110 = [1, 1, 1, -1, -1] 332 res101 = [-1, 1, -1, -1, 1] 333 res011 = [-1, 1, -1, 1, -1] 334 335 clf110 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,1,0]))) 336 clf101 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,0,1]))) 337 clf011 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([0,1,1]))) 338 339 self.failUnlessEqual(clf110.predict(samples), res110) 340 self.failUnlessEqual(clf101.predict(samples), res101) 341 self.failUnlessEqual(clf011.predict(samples), res011)

342 343

344 - def testFeatureSelectionClassifier(self):

345 from test_rfe import SillySensitivityAnalyzer 346 from mvpa.featsel.base import \ 347 SensitivityBasedFeatureSelection 348 from mvpa.featsel.helpers import \ 349 FixedNElementTailSelector 350 351 # should give lowest weight to the feature with lowest index 352 sens_ana = SillySensitivityAnalyzer() 353 # should give lowest weight to the feature with highest index 354 sens_ana_rev = SillySensitivityAnalyzer(mult=-1) 355 356 # corresponding feature selections 357 feat_sel = SensitivityBasedFeatureSelection(sens_ana, 358 FixedNElementTailSelector(1, mode='discard')) 359 360 feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev, 361 FixedNElementTailSelector(1)) 362 363 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ]) 364 365 testdata3 = Dataset(samples=samples, labels=1) 366 # dummy train data so proper mapper gets created 367 traindata = Dataset(samples=N.array([ [0, 0,-1], [1,0,1] ]), labels=[1,2]) 368 369 # targets 370 res110 = [1, 1, 1, -1, -1] 371 res011 = [-1, 1, -1, 1, -1] 372 373 # first classifier -- 0th feature should be discarded 374 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, 375 enable_states=['feature_ids']) 376 377 self.clf_sign.states._changeTemporarily(enable_states=['values']) 378 clf011.train(traindata) 379 380 self.failUnlessEqual(clf011.predict(testdata3.samples), res011) 381 # just silly test if we get values assigned in the 'ProxyClassifier' 382 self.failUnless(len(clf011.values) == len(res110), 383 msg="We need to pass values into ProxyClassifier") 384 self.clf_sign.states._resetEnabledTemporarily() 385 386 self.failUnlessEqual(len(clf011.feature_ids), 2) 387 "Feature selection classifier had to be trained on 2 features" 388 389 # first classifier -- last feature should be discarded 390 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) 391 clf011.train(traindata) 392 self.failUnlessEqual(clf011.predict(testdata3.samples), res110)

393

394 - def testFeatureSelectionClassifierWithRegression(self):

395 from test_rfe import SillySensitivityAnalyzer 396 from mvpa.featsel.base import \ 397 SensitivityBasedFeatureSelection 398 from mvpa.featsel.helpers import \ 399 FixedNElementTailSelector 400 if sample_clf_reg is None: 401 # none regression was found, so nothing to test 402 return 403 # should give lowest weight to the feature with lowest index 404 sens_ana = SillySensitivityAnalyzer() 405 406 # corresponding feature selections 407 feat_sel = SensitivityBasedFeatureSelection(sens_ana, 408 FixedNElementTailSelector(1, mode='discard')) 409 410 # now test with regression-based classifier. The problem is 411 # that it is determining predictions twice from values and 412 # then setting the values from the results, which the second 413 # time is set to predictions. The final outcome is that the 414 # values are actually predictions... 415 dat = Dataset(samples=N.random.randn(4,10),labels=[-1,-1,1,1]) 416 clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) 417 clf_reg.train(dat) 418 res = clf_reg.predict(dat.samples) 419 self.failIf((N.array(clf_reg.values)-clf_reg.predictions).sum()==0, 420 msg="Values were set to the predictions in %s." % 421 sample_clf_reg)

422 423

424 - def testTreeClassifier(self):

425 """Basic tests for TreeClassifier 426 """ 427 ds = datasets['uni4medium'] 428 # excluding PLR since that one can deal only with 0,1 labels ATM 429 clfs = clfswh['binary', '!plr'] # pool of classifiers 430 # Lets permute so each time we try some different combination 431 # of the classifiers 432 clfs = [clfs[i] for i in N.random.permutation(len(clfs))] 433 # Test conflicting definition 434 tclf = TreeClassifier(clfs[0], { 435 'L0+2' : (('L0', 'L2'), clfs[1]), 436 'L2+3' : ((2, 3), clfs[2])}) 437 self.failUnlessRaises(ValueError, tclf.train, ds) 438 """Should raise exception since label 2 is in both""" 439 440 # Test insufficient definition 441 tclf = TreeClassifier(clfs[0], { 442 'L0+5' : (('L0', 'L5'), clfs[1]), 443 'L2+3' : ((2, 3), clfs[2])}) 444 self.failUnlessRaises(ValueError, tclf.train, ds) 445 """Should raise exception since no group for L1""" 446 447 # proper definition now 448 tclf = TreeClassifier(clfs[0], { 449 'L0+1' : (('L0', 1), clfs[1]), 450 'L2+3' : ((2, 3), clfs[2])}) 451 452 # Lets test train/test cycle using CVTE 453 cv = CrossValidatedTransferError( 454 TransferError(tclf), 455 OddEvenSplitter(), 456 enable_states=['confusion', 'training_confusion']) 457 cverror = cv(ds) 458 try: 459 rtclf = repr(tclf) 460 except: 461 self.fail(msg="Could not obtain repr for TreeClassifier") 462 463 # Test accessibility of .clfs 464 self.failUnless(tclf.clfs['L0+1'] is clfs[1]) 465 self.failUnless(tclf.clfs['L2+3'] is clfs[2]) 466 467 cvtrc = cv.training_confusion 468 cvtc = cv.confusion 469 if cfg.getboolean('tests', 'labile', default='yes'): 470 # just a dummy check to make sure everything is working 471 self.failUnless(cvtrc != cvtc) 472 self.failUnless(cverror < 0.3, 473 msg="Got too high error = %s using %s" 474 % (cverror, tclf)) 475 476 # Test trailing nodes with no classifier 477 tclf = TreeClassifier(clfs[0], { 478 'L0' : ((0,), None), 479 'L1+2+3' : ((1, 2, 3), clfswh['multiclass'][0])}) 480 cv = CrossValidatedTransferError( 481 TransferError(tclf), 482 OddEvenSplitter(), 483 enable_states=['confusion', 'training_confusion']) 484 cverror = cv(ds) 485 if cfg.getboolean('tests', 'labile', default='yes'): 486 self.failUnless(cverror < 0.3, 487 msg="Got too high error = %s using %s" 488 % (cverror, tclf))

489 490 491 @sweepargs(clf=clfswh[:])

492 - def testValues(self, clf):

493 if isinstance(clf, MulticlassClassifier): 494 # TODO: handle those values correctly 495 return 496 ds = datasets['uni2small'] 497 clf.states._changeTemporarily(enable_states = ['values']) 498 cv = CrossValidatedTransferError( 499 TransferError(clf), 500 OddEvenSplitter(), 501 enable_states=['confusion', 'training_confusion']) 502 cverror = cv(ds) 503 #print clf.descr, clf.values[0] 504 # basic test either we get 1 set of values per each sample 505 self.failUnlessEqual(len(clf.values), ds.nsamples/2) 506 507 clf.states._resetEnabledTemporarily()

508 509 @sweepargs(clf=clfswh['linear', 'svm', 'libsvm', '!meta'])

510 - def testMulticlassClassifier(self, clf):

511 oldC = None 512 # XXX somewhat ugly way to force non-dataspecific C value. 513 # Otherwise multiclass libsvm builtin and our MultiClass would differ 514 # in results 515 if clf.params.isKnown('C') and clf.C<0: 516 oldC = clf.C 517 clf.C = 1.0 # reset C to be 1 518 519 svm, svm2 = clf, clf.clone() 520 svm2.states.enable(['training_confusion']) 521 522 mclf = MulticlassClassifier(clf=svm, 523 enable_states=['training_confusion']) 524 525 svm2.train(datasets['uni2small_train']) 526 mclf.train(datasets['uni2small_train']) 527 s1 = str(mclf.training_confusion) 528 s2 = str(svm2.training_confusion) 529 self.failUnlessEqual(s1, s2, 530 msg="Multiclass clf should provide same results as built-in " 531 "libsvm's %s. Got %s and %s" % (svm2, s1, s2)) 532 533 svm2.untrain() 534 535 self.failUnless(svm2.trained == False, 536 msg="Un-Trained SVM should be untrained") 537 538 self.failUnless(N.array([x.trained for x in mclf.clfs]).all(), 539 msg="Trained Boosted classifier should have all primary classifiers trained") 540 self.failUnless(mclf.trained, 541 msg="Trained Boosted classifier should be marked as trained") 542 543 mclf.untrain() 544 545 self.failUnless(not mclf.trained, 546 msg="UnTrained Boosted classifier should not be trained") 547 self.failUnless(not N.array([x.trained for x in mclf.clfs]).any(), 548 msg="UnTrained Boosted classifier should have no primary classifiers trained") 549 550 if oldC is not None: 551 clf.C = oldC

552 553 # XXX meta should also work but TODO 554 @sweepargs(clf=clfswh['svm', '!meta'])

555 - def testSVMs(self, clf):

556 knows_probabilities = 'probabilities' in clf.states.names and clf.params.probability 557 enable_states = ['values'] 558 if knows_probabilities: enable_states += ['probabilities'] 559 560 clf.states._changeTemporarily(enable_states = enable_states) 561 for traindata, testdata in [ 562 (datasets['uni2small_train'], datasets['uni2small_test']) ]: 563 clf.train(traindata) 564 predicts = clf.predict(testdata.samples) 565 # values should be different from predictions for SVMs we have 566 self.failUnless( (predicts != clf.values).any() ) 567 568 if knows_probabilities and clf.states.isSet('probabilities'): 569 # XXX test more thoroughly what we are getting here ;-) 570 self.failUnlessEqual( len(clf.probabilities), len(testdata.samples) ) 571 clf.states._resetEnabledTemporarily()

572 573 574 @sweepargs(clf=clfswh['retrainable'])

575 - def testRetrainables(self, clf):

576 # we need a copy since will tune its internals later on 577 clf = clf.clone() 578 clf.states._changeTemporarily(enable_states = ['values'], 579 # ensure that it does do predictions 580 # while training 581 disable_states=['training_confusion']) 582 clf_re = clf.clone() 583 # TODO: .retrainable must have a callback to call smth like 584 # _setRetrainable 585 clf_re._setRetrainable(True) 586 587 # need to have high snr so we don't 'cope' with problematic 588 # datasets since otherwise unittests would fail. 589 dsargs = {'perlabel':50, 'nlabels':2, 'nfeatures':5, 'nchunks':1, 590 'nonbogus_features':[2,4], 'snr': 5.0} 591 592 ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 593 # NB datasets will be changed by the end of testing, so if 594 # are to change to use generic datasets - make sure to copy 595 # them here 596 dstrain = deepcopy(datasets['uni2large_train']) 597 dstest = deepcopy(datasets['uni2large_test']) 598 599 clf.untrain() 600 clf_re.untrain() 601 trerr, trerr_re = TransferError(clf), \ 602 TransferError(clf_re, disable_states=['training_confusion']) 603 604 # Just check for correctness of retraining 605 err_1 = trerr(dstest, dstrain) 606 self.failUnless(err_1<0.3, 607 msg="We should test here on easy dataset. Got error of %s" % err_1) 608 values_1 = clf.values[:] 609 # some times retraining gets into deeper optimization ;-) 610 eps = 0.05 611 corrcoef_eps = 0.85 # just to get no failures... usually > 0.95 612 613 614 def batch_test(retrain=True, retest=True, closer=True): 615 err = trerr(dstest, dstrain) 616 err_re = trerr_re(dstest, dstrain) 617 corr = N.corrcoef(clf.values, clf_re.values)[0,1] 618 corr_old = N.corrcoef(values_1, clf_re.values)[0,1] 619 if __debug__: 620 debug('TEST', "Retraining stats: errors %g %g corr %g " 621 "with old error %g corr %g" % 622 (err, err_re, corr, err_1, corr_old)) 623 self.failUnless(clf_re.states.retrained == retrain, 624 ("Must fully train", 625 "Must retrain instead of full training")[retrain]) 626 self.failUnless(clf_re.states.repredicted == retest, 627 ("Must fully test", 628 "Must retest instead of full testing")[retest]) 629 self.failUnless(corr > corrcoef_eps, 630 msg="Result must be close to the one without retraining." 631 " Got corrcoef=%s" % (corr)) 632 if closer: 633 self.failUnless(corr >= corr_old, 634 msg="Result must be closer to current without retraining" 635 " than to old one. Got corrcoef=%s" % (corr_old))

636 637 # Check sequential retraining/retesting 638 for i in xrange(3): 639 flag = bool(i!=0) 640 # ok - on 1st call we should train/test, then retrain/retest 641 # and we can't compare for closinest to old result since 642 # we are working on the same data/classifier 643 batch_test(retrain=flag, retest=flag, closer=False) 644 645 # should retrain nicely if we change a parameter 646 if 'C' in clf.params.names: 647 clf.params.C *= 0.1 648 clf_re.params.C *= 0.1 649 batch_test() 650 elif 'sigma_noise' in clf.params.names: 651 clf.params.sigma_noise *= 100 652 clf_re.params.sigma_noise *= 100 653 batch_test() 654 else: 655 raise RuntimeError, \ 656 'Please implement testing while changing some of the ' \ 657 'params for clf %s' % clf 658 659 # should retrain nicely if we change kernel parameter 660 if hasattr(clf, 'kernel_params') and len(clf.kernel_params.names): 661 clf.kernel_params.gamma = 0.1 662 clf_re.kernel_params.gamma = 0.1 663 # retest is false since kernel got recomputed thus 664 # can't expect to use the same kernel 665 batch_test(retest=not('gamma' in clf.kernel_params.names)) 666 667 # should retrain nicely if we change labels 668 oldlabels = dstrain.labels[:] 669 dstrain.permuteLabels(status=True, assure_permute=True) 670 self.failUnless((oldlabels != dstrain.labels).any(), 671 msg="We should succeed at permutting -- now got the same labels") 672 batch_test() 673 674 # Change labels in testing 675 oldlabels = dstest.labels[:] 676 dstest.permuteLabels(status=True, assure_permute=True) 677 self.failUnless((oldlabels != dstest.labels).any(), 678 msg="We should succeed at permutting -- now got the same labels") 679 batch_test() 680 681 # should re-train if we change data 682 # reuse trained SVM and its 'final' optimization point 683 if not clf.__class__.__name__ in ['GPR']: # on GPR everything depends on the data ;-) 684 oldsamples = dstrain.samples.copy() 685 dstrain.samples[:] += dstrain.samples*0.05 686 self.failUnless((oldsamples != dstrain.samples).any()) 687 batch_test(retest=False) 688 clf.states._resetEnabledTemporarily() 689 690 # test retrain() 691 # TODO XXX -- check validity 692 clf_re.retrain(dstrain); self.failUnless(clf_re.states.retrained) 693 clf_re.retrain(dstrain, labels=True); self.failUnless(clf_re.states.retrained) 694 clf_re.retrain(dstrain, traindataset=True); self.failUnless(clf_re.states.retrained) 695 696 # test repredict() 697 clf_re.repredict(dstest.samples); 698 self.failUnless(clf_re.states.repredicted) 699 self.failUnlessRaises(RuntimeError, clf_re.repredict, 700 dstest.samples, labels=True, 701 msg="for now retesting with anything changed makes no sense") 702 clf_re._setRetrainable(False)

Source Code for Module mvpa.tests.test_clf