Package mvpa :: Package misc :: Module state
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.state

   1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
   2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
   3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
   4  # 
   5  #   See COPYING file distributed along with the PyMVPA package for the 
   6  #   copyright and license terms. 
   7  # 
   8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
   9  """Classes to control and store state information. 
  10   
  11  It was devised to provide conditional storage  
  12  """ 
  13   
  14  # XXX: MH: The use of `index` as variable name confuses me. IMHO `index` refers 
  15  #          to a position in a container (i.e. list access). However, in this 
  16  #          file it is mostly used in the context of a `key` for dictionary 
  17  #          access. Can we refactor that? 
  18  __docformat__ = 'restructuredtext' 
  19   
  20  import operator, copy 
  21  from textwrap import TextWrapper 
  22   
  23  import numpy as N 
  24   
  25  from mvpa.misc.vproperty import VProperty 
  26  from mvpa.misc.exceptions import UnknownStateError 
  27  from mvpa.misc.attributes import CollectableAttribute, StateVariable 
  28  from mvpa.base.dochelpers import enhancedDocString 
  29   
  30  from mvpa.base import externals 
  31   
  32  if __debug__: 
  33      from mvpa.base import debug 
  34   
  35   
  36  _in_ipython = externals.exists('running ipython env') 
  37  # Separators around definitions, needed for ReST, but bogus for 
  38  # interactive sessions 
  39  _def_sep = ('`', '')[int(_in_ipython)] 
  40   
  41  _object_getattribute = object.__getattribute__ 
  42  _object_setattr = object.__setattr__ 
  43   
  44   
  45  ################################################################### 
  46  # Collections 
  47  # 
  48  # TODO: refactor into collections.py. state.py now has 
  49  #       little in common with the main part of this file 
  50  # 
51 -class Collection(object):
52 """Container of some CollectableAttributes. 53 54 :Groups: 55 - `Public Access Functions`: `isKnown` 56 - `Access Implementors`: `_getListing`, `_getNames` 57 - `Mutators`: `__init__` 58 - `R/O Properties`: `listing`, `names`, `items` 59 60 XXX Seems to be not used and duplicating functionality: `_getListing` 61 (thus `listing` property) 62 """ 63
64 - def __init__(self, items=None, owner=None, name=None):
65 """Initialize the Collection 66 67 :Parameters: 68 items : dict of CollectableAttribute's 69 items to initialize with 70 owner : object 71 an object to which collection belongs 72 name : basestring 73 name of the collection (as seen in the owner, e.g. 'states') 74 """ 75 76 self.__owner = owner 77 78 if items == None: 79 items = {} 80 self._items = items 81 """Dictionary to contain registered states as keys and 82 values signal either they are enabled 83 """ 84 self.__name = name
85
86 - def _setName(self, name):
87 self.__name = name
88
89 - def __str__(self):
90 num = len(self._items) 91 if __debug__ and "ST" in debug.active: 92 maxnumber = 1000 # I guess all 93 else: 94 maxnumber = 4 95 if self.__name is not None: 96 res = self.__name 97 else: 98 res = "" 99 res += "{" 100 for i in xrange(min(num, maxnumber)): 101 if i > 0: 102 res += " " 103 res += "%s" % str(self._items.values()[i]) 104 if len(self._items) > maxnumber: 105 res += "..." 106 res += "}" 107 if __debug__: 108 if "ST" in debug.active: 109 res += " owner:%s#%s" % (self.owner.__class__.__name__, 110 id(self.owner)) 111 return res
112 113
114 - def _cls_repr(self):
115 """Collection specific part of __repr__ for a class containing 116 it, ie a part of __repr__ for the owner object 117 118 :Return: 119 list of items to be appended within __repr__ after a .join() 120 """ 121 # XXX For now we do not expect any pure non-specialized 122 # collection , thus just override in derived classes 123 raise NotImplementedError, "Class %s should override _cls_repr" \ 124 % self.__class__.__name__
125
126 - def _is_initializable(self, index):
127 """Checks if index could be assigned within collection via 128 _initialize 129 130 :Return: bool value for a given `index` 131 132 It is to facilitate dynamic assignment of collections' items 133 within derived classes' __init__ depending on the present 134 collections in the class. 135 """ 136 # XXX Each collection has to provide what indexes it allows 137 # to be set within constructor. Custom handling of some 138 # arguments (like (dis|en)able_states) is to be performed 139 # in _initialize 140 # raise NotImplementedError, \ 141 # "Class %s should override _is_initializable" \ 142 # % self.__class__.__name__ 143 144 # YYY lets just check if it is in the keys 145 return index in self._items.keys()
146 147
148 - def _initialize(self, index, value):
149 """Initialize `index` (no check performed) with `value` 150 """ 151 # by default we just set corresponding value 152 self[index].value = value
153 154
155 - def __repr__(self):
156 s = "%s(" % self.__class__.__name__ 157 items_s = "" 158 sep = "" 159 for item in self._items: 160 try: 161 itemvalue = "%s" % `self._items[item].value` 162 if len(itemvalue)>50: 163 itemvalue = itemvalue[:10] + '...' + itemvalue[-10:] 164 items_s += "%s'%s':%s" % (sep, item, itemvalue) 165 sep = ', ' 166 except: 167 pass 168 if items_s != "": 169 s += "items={%s}" % items_s 170 if self.owner is not None: 171 s += "%sowner=%s" % (sep, `self.owner`) 172 s += ")" 173 return s
174 175 176 # 177 # XXX TODO: figure out if there is a way to define proper 178 # __copy__'s for a hierarchy of classes. Probably we had 179 # to define __getinitargs__, etc... read more... 180 # 181 #def __copy__(self): 182 # TODO Remove or refactor? 183 # def _copy_states_(self, fromstate, deep=False): 184 # """Copy known here states from `fromstate` object into current object 185 # 186 # Crafted to overcome a problem mentioned above in the comment 187 # and is to be called from __copy__ of derived classes 188 # 189 # Probably sooner than later will get proper __getstate__, 190 # __setstate__ 191 # """ 192 # # Bad check... doesn't generalize well... 193 # # if not issubclass(fromstate.__class__, self.__class__): 194 # # raise ValueError, \ 195 # # "Class %s is not subclass of %s, " % \ 196 # # (fromstate.__class__, self.__class__) + \ 197 # # "thus not eligible for _copy_states_" 198 # # TODO: FOR NOW NO TEST! But this beast needs to be fixed... 199 # operation = { True: copy.deepcopy, 200 # False: copy.copy }[deep] 201 # 202 # if isinstance(fromstate, ClassWithCollections): 203 # fromstate = fromstate.states 204 # 205 # self.enabled = fromstate.enabled 206 # for name in self.names: 207 # if fromstate.isKnown(name): 208 # self._items[name] = operation(fromstate._items[name]) 209 210
211 - def isKnown(self, index):
212 """Returns `True` if state `index` is known at all""" 213 return self._items.has_key(index)
214 215
216 - def isSet(self, index=None):
217 """If item (or any in the present or listed) was set 218 219 :Parameters: 220 index : None or basestring or list of basestring 221 What items to check if they were set in the collection 222 """ 223 _items = self._items 224 if not (index is None): 225 if isinstance(index, basestring): 226 self._checkIndex(index) # process just that single index 227 return _items[index].isSet 228 else: 229 items = index # assume that we got some list 230 else: 231 items = self._items # go through all the items 232 233 for index in items: 234 self._checkIndex(index) 235 if _items[index].isSet: 236 return True 237 return False
238 239
240 - def whichSet(self):
241 """Return list of indexes which were set""" 242 result = [] 243 # go through all members and if any isSet -- return True 244 for index,v in self._items.iteritems(): 245 if v.isSet: 246 result.append(index) 247 return result
248 249
250 - def _checkIndex(self, index):
251 """Verify that given `index` is a known/registered state. 252 253 :Raise `KeyError`: if given `index` is not known 254 """ 255 # OPT: lets not reuse isKnown, to don't incure 1 more function 256 # call 257 if not self._items.has_key(index): 258 raise KeyError, \ 259 "%s of %s has no key '%s' registered" \ 260 % (self.__class__.__name__, 261 self.__owner.__class__.__name__, 262 index)
263 264
265 - def add(self, item):
266 """Add a new CollectableAttribute to the collection 267 268 :Parameters: 269 item : CollectableAttribute 270 or of derived class. Must have 'name' assigned 271 272 TODO: we should make it stricter to don't add smth of 273 wrong type into Collection since it might lead to problems 274 275 Also we might convert to __setitem__ 276 """ 277 # local binding 278 name = item.name 279 if not isinstance(item, CollectableAttribute): 280 raise ValueError, \ 281 "Collection can add only instances of " + \ 282 "CollectableAttribute-derived classes. Got %s" % `item` 283 284 if name is None: 285 raise ValueError, \ 286 "CollectableAttribute to be added %s must have 'name' set" % \ 287 item 288 self._items[name] = item 289 290 if not self.owner is None: 291 self._updateOwner(name)
292 293
294 - def remove(self, index):
295 """Remove item from the collection 296 """ 297 self._checkIndex(index) 298 self._updateOwner(index, register=False) 299 discard = self._items.pop(index)
300 301
302 - def __getattribute__(self, index):
303 """ 304 """ 305 #return all private and protected ones first since we will not have 306 # collectable's with _ (we should not have!) 307 if index[0] == '_': 308 return _object_getattribute(self, index) 309 _items = _object_getattribute(self, '_items') 310 if index in _items: 311 return _items[index].value 312 return _object_getattribute(self, index)
313 314
315 - def __setattr__(self, index, value):
316 if index[0] == '_': 317 return _object_setattr(self, index, value) 318 _items = _object_getattribute(self, '_items') 319 if index in _items: 320 _items[index].value = value 321 else: 322 _object_setattr(self, index, value)
323 324
325 - def __getitem__(self, index):
326 _items = _object_getattribute(self, '_items') 327 if index in _items: 328 self._checkIndex(index) 329 return _items[index] 330 else: 331 raise AttributeError("State collection %s has no %s attribute" 332 % (self, index))
333 334 335 # Probably not needed -- enable if need arises 336 # 337 #def __setattr__(self, index, value): 338 # if self._items.has_key(index): 339 # self._updateOwner(index, register=False) 340 # self._items[index] = value 341 # self._updateOwner(index, register=True) 342 # 343 # _object_setattr(self, index, value) 344 345
346 - def get(self, index, default):
347 """Access the value by a given index. 348 349 Mimiquing regular dictionary behavior, if value cannot be obtained 350 (i.e. if any exception is caught) return default value. 351 """ 352 try: 353 return self[index].value 354 except Exception, e: 355 #if default is not None: 356 return default
357 #else: 358 # raise e 359 360
361 - def _action(self, index, func, missingok=False, **kwargs):
362 """Run specific func either on a single item or on all of them 363 364 :Parameters: 365 index : basestr 366 Name of the state variable 367 func 368 Function (not bound) to call given an item, and **kwargs 369 missingok : bool 370 If True - do not complain about wrong index 371 """ 372 if isinstance(index, basestring): 373 if index.upper() == 'ALL': 374 for index_ in self._items: 375 self._action(index_, func, missingok=missingok, **kwargs) 376 else: 377 try: 378 self._checkIndex(index) 379 func(self._items[index], **kwargs) 380 except: 381 if missingok: 382 return 383 raise 384 elif operator.isSequenceType(index): 385 for item in index: 386 self._action(item, func, missingok=missingok, **kwargs) 387 else: 388 raise ValueError, \ 389 "Don't know how to handle variable given by %s" % index
390 391
392 - def reset(self, index=None):
393 """Reset the state variable defined by `index`""" 394 395 if not index is None: 396 indexes = [ index ] 397 else: 398 indexes = self.names 399 400 if len(self.items): 401 for index in indexes: 402 # XXX Check if that works as desired 403 self._action(index, self._items.values()[0].__class__.reset, 404 missingok=False)
405 406
407 - def _getListing(self):
408 """Return a list of registered states along with the documentation""" 409 410 # lets assure consistent litsting order 411 items = self._items.items() 412 items.sort() 413 return [ "%s%s%s: %s" % (_def_sep, str(x[1]), _def_sep, x[1].__doc__) 414 for x in items ]
415 416
417 - def _getNames(self):
418 """Return ids for all registered state variables""" 419 return self._items.keys()
420 421
422 - def _getOwner(self):
423 return self.__owner
424 425
426 - def _setOwner(self, owner):
427 if not isinstance(owner, ClassWithCollections): 428 raise ValueError, \ 429 "Owner of the StateCollection must be ClassWithCollections object" 430 if __debug__: 431 try: strowner = str(owner) 432 except: strowner = "UNDEF: <%s#%s>" % (owner.__class__, id(owner)) 433 debug("ST", "Setting owner for %s to be %s" % (self, strowner)) 434 if not self.__owner is None: 435 # Remove attributes which were registered to that owner previousely 436 self._updateOwner(register=False) 437 self.__owner = owner 438 if not self.__owner is None: 439 self._updateOwner(register=True)
440 441
442 - def _updateOwner(self, index=None, register=True):
443 """Define an entry within owner's __dict__ 444 so ipython could easily complete it 445 446 :Parameters: 447 index : basestring or list of basestring 448 Name of the attribute. If None -- all known get registered 449 register : bool 450 Register if True or unregister if False 451 452 XXX Needs refactoring since we duplicate the logic of expansion of 453 index value 454 """ 455 if not index is None: 456 if not index in self._items: 457 raise ValueError, \ 458 "Attribute %s is not known to %s" % (index, self) 459 indexes = [ index ] 460 else: 461 indexes = self.names 462 463 ownerdict = self.owner.__dict__ 464 selfdict = self.__dict__ 465 owner_known = ownerdict['_known_attribs'] 466 for index_ in indexes: 467 if register: 468 if index_ in ownerdict: 469 raise RuntimeError, \ 470 "Cannot register attribute %s within %s " % \ 471 (index_, self.owner) + "since it has one already" 472 ownerdict[index_] = self._items[index_] 473 if index_ in selfdict: 474 raise RuntimeError, \ 475 "Cannot register attribute %s within %s " % \ 476 (index_, self) + "since it has one already" 477 selfdict[index_] = self._items[index_] 478 owner_known[index_] = self.__name 479 else: 480 if index_ in ownerdict: 481 # yoh doesn't think that we need to complain if False 482 ownerdict.pop(index_) 483 owner_known.pop(index_) 484 if index_ in selfdict: 485 selfdict.pop(index_)
486 487 488 # Properties 489 names = property(fget=_getNames) 490 items = property(fget=lambda x:x._items) 491 owner = property(fget=_getOwner, fset=_setOwner) 492 name = property(fget=lambda x:x.__name, fset=_setName) 493 494 # Virtual properties 495 listing = VProperty(fget=_getListing)
496 497 498
499 -class ParameterCollection(Collection):
500 """Container of Parameters for a stateful object. 501 """ 502 503 # def __init__(self, items=None, owner=None, name=None): 504 # """Initialize the state variables of a derived class 505 # 506 # :Parameters: 507 # items : dict 508 # dictionary of states 509 # """ 510 # Collection.__init__(self, items, owner, name) 511 # 512
513 - def _cls_repr(self):
514 """Part of __repr__ for the owner object 515 """ 516 prefixes = [] 517 for k in self.names: 518 # list only params with not default values 519 if self[k].isDefault: 520 continue 521 prefixes.append("%s=%s" % (k, self[k].value)) 522 return prefixes
523 524
525 - def resetvalue(self, index, missingok=False):
526 """Reset all parameters to default values""" 527 from param import Parameter 528 self._action(index, Parameter.resetvalue, missingok=False)
529 530
531 -class SampleAttributesCollection(Collection):
532 """Container for data and attributes of samples (ie data/labels/chunks/...) 533 """ 534 535 # def __init__(self, items=None, owner=None, name=None): 536 # """Initialize the state variables of a derived class 537 # 538 # :Parameters: 539 # items : dict 540 # dictionary of states 541 # """ 542 # Collection.__init__(self, items, owner, name) 543 # 544
545 - def _cls_repr(self):
546 """Part of __repr__ for the owner object 547 """ 548 return [] # TODO: return I guess samples/labels/chunks
549 550 551
552 -class StateCollection(Collection):
553 """Container of StateVariables for a stateful object. 554 555 :Groups: 556 - `Public Access Functions`: `isKnown`, `isEnabled`, `isActive` 557 - `Access Implementors`: `_getListing`, `_getNames`, `_getEnabled` 558 - `Mutators`: `__init__`, `enable`, `disable`, `_setEnabled` 559 - `R/O Properties`: `listing`, `names`, `items` 560 - `R/W Properties`: `enabled` 561 """ 562
563 - def __init__(self, items=None, owner=None):
564 """Initialize the state variables of a derived class 565 566 :Parameters: 567 items : dict 568 dictionary of states 569 owner : ClassWithCollections 570 object which owns the collection 571 name : basestring 572 literal description. Usually just attribute name for the 573 collection, e.g. 'states' 574 """ 575 Collection.__init__(self, items=items, owner=owner) 576 577 self.__storedTemporarily = [] 578 """List to contain sets of enabled states which were enabled 579 temporarily. 580 """
581 582 # 583 # XXX TODO: figure out if there is a way to define proper 584 # __copy__'s for a hierarchy of classes. Probably we had 585 # to define __getinitargs__, etc... read more... 586 # 587 #def __copy__(self): 588
589 - def _cls_repr(self):
590 """Part of __repr__ for the owner object 591 """ 592 prefixes = [] 593 for name, invert in ( ('enable', False), ('disable', True) ): 594 states = self._getEnabled(nondefault=False, 595 invert=invert) 596 if len(states): 597 prefixes.append("%s_states=%s" % (name, str(states))) 598 return prefixes
599 600
601 - def _is_initializable(self, index):
602 """Checks if index could be assigned within collection via 603 setvalue 604 """ 605 return index in ['enable_states', 'disable_states']
606 607
608 - def _initialize(self, index, value):
609 if value is None: 610 value = [] 611 if index == 'enable_states': 612 self.enable(value, missingok=True) 613 elif index == 'disable_states': 614 self.disable(value) 615 else: 616 raise ValueError, "StateCollection can accept only enable_states " \ 617 "and disable_states arguments for the initialization. " \ 618 "Got %s" % index
619 620
621 - def _copy_states_(self, fromstate, index=None, deep=False):
622 """Copy known here states from `fromstate` object into current object 623 624 :Parameters: 625 fromstate : Collection or ClassWithCollections 626 Source states to copy from 627 index : None or list of basestring 628 If not to copy all set state variables, index provides 629 selection of what to copy 630 deep : bool 631 Optional control over the way to copy 632 633 Crafted to overcome a problem mentioned above in the comment 634 and is to be called from __copy__ of derived classes 635 636 Probably sooner than later will get proper __getstate__, 637 __setstate__ 638 """ 639 # Bad check... doesn't generalize well... 640 # if not issubclass(fromstate.__class__, self.__class__): 641 # raise ValueError, \ 642 # "Class %s is not subclass of %s, " % \ 643 # (fromstate.__class__, self.__class__) + \ 644 # "thus not eligible for _copy_states_" 645 # TODO: FOR NOW NO TEST! But this beast needs to be fixed... 646 operation = { True: copy.deepcopy, 647 False: copy.copy }[deep] 648 649 if isinstance(fromstate, ClassWithCollections): 650 fromstate = fromstate.states 651 652 #self.enabled = fromstate.enabled 653 _items, from_items = self._items, fromstate._items 654 if index is None: 655 # copy all set ones 656 for name in fromstate.whichSet():#self.names: 657 #if fromstate.isKnown(name): 658 _items[name] = operation(from_items[name]) 659 else: 660 isKnown = fromstate.isKnown 661 for name in index: 662 if isKnown(name): 663 _items[name] = operation(from_items[name])
664 665
666 - def isEnabled(self, index):
667 """Returns `True` if state `index` is enabled""" 668 self._checkIndex(index) 669 return self._items[index].isEnabled
670 671
672 - def isActive(self, index):
673 """Returns `True` if state `index` is known and is enabled""" 674 return self.isKnown(index) and self.isEnabled(index)
675 676
677 - def enable(self, index, value=True, missingok=False):
678 """Enable state variable given in `index`""" 679 self._action(index, StateVariable.enable, missingok=missingok, 680 value=value)
681 682
683 - def disable(self, index):
684 """Disable state variable defined by `index` id""" 685 self._action(index, StateVariable.enable, missingok=False, value=False)
686 687 688 # TODO XXX think about some more generic way to grab temporary 689 # snapshot of CollectableAttributes to be restored later on...
690 - def _changeTemporarily(self, enable_states=None, 691 disable_states=None, other=None):
692 """Temporarily enable/disable needed states for computation 693 694 Enable or disable states which are enabled in `other` and listed in 695 `enable _states`. Use `resetEnabledTemporarily` to reset 696 to previous state of enabled. 697 698 `other` can be a ClassWithCollections object or StateCollection 699 """ 700 if enable_states == None: 701 enable_states = [] 702 if disable_states == None: 703 disable_states = [] 704 self.__storedTemporarily.append(self.enabled) 705 other_ = other 706 if isinstance(other, ClassWithCollections): 707 other = other.states 708 709 if not other is None: 710 # lets take states which are enabled in other but not in 711 # self 712 add_enable_states = list(set(other.enabled).difference( 713 set(enable_states)).intersection(self.names)) 714 if len(add_enable_states)>0: 715 if __debug__: 716 debug("ST", 717 "Adding states %s from %s to be enabled temporarily" % 718 (add_enable_states, other_) + 719 " since they are not enabled in %s" % 720 (self)) 721 enable_states += add_enable_states 722 723 # Lets go one by one enabling only disabled once... but could be as 724 # simple as 725 self.enable(enable_states) 726 self.disable(disable_states)
727 728
729 - def _resetEnabledTemporarily(self):
730 """Reset to previousely stored set of enabled states""" 731 if __debug__: 732 debug("ST", "Resetting to previous set of enabled states") 733 if len(self.enabled)>0: 734 self.enabled = self.__storedTemporarily.pop() 735 else: 736 raise ValueError("Trying to restore not-stored list of enabled " \ 737 "states")
738 739
740 - def _getEnabled(self, nondefault=True, invert=False):
741 """Return list of enabled states 742 743 :Parameters: 744 nondefault : bool 745 Either to return also states which are enabled simply by default 746 invert : bool 747 Would invert the meaning, ie would return disabled states 748 """ 749 if invert: 750 fmatch = lambda y: not self.isEnabled(y) 751 else: 752 fmatch = lambda y: self.isEnabled(y) 753 754 if nondefault: 755 ffunc = fmatch 756 else: 757 ffunc = lambda y: fmatch(y) and \ 758 self._items[y]._defaultenabled != self.isEnabled(y) 759 return filter(ffunc, self.names)
760 761
762 - def _setEnabled(self, indexlist):
763 """Given `indexlist` make only those in the list enabled 764 765 It might be handy to store set of enabled states and then to restore 766 it later on. It can be easily accomplished now:: 767 768 >>> from mvpa.misc.state import ClassWithCollections, StateVariable 769 >>> class Blah(ClassWithCollections): 770 ... bleh = StateVariable(enabled=False, doc='Example') 771 ... 772 >>> blah = Blah() 773 >>> states_enabled = blah.states.enabled 774 >>> blah.states.enabled = ['bleh'] 775 >>> blah.states.enabled = states_enabled 776 """ 777 for index in self._items.keys(): 778 self.enable(index, index in indexlist)
779 780 781 # Properties 782 enabled = property(fget=_getEnabled, fset=_setEnabled)
783 784 785 ################################################################## 786 # Base classes (and metaclass) which use collections 787 # 788 789 790 # 791 # Helper dictionaries for AttributesCollector 792 # 793 _known_collections = { 794 # Quite a generic one but mostly in classifiers 795 'StateVariable': ("states", StateCollection), 796 # For classifiers only 797 'Parameter': ("params", ParameterCollection), 798 'KernelParameter': ("kernel_params", ParameterCollection), 799 # For datasets 800 # XXX custom collections needed? 801 'SampleAttribute': ("sa", SampleAttributesCollection), 802 'FeatureAttribute': ("fa", SampleAttributesCollection), 803 'DatasetAttribute': ("dsa", SampleAttributesCollection), 804 } 805 806 807 _col2class = dict(_known_collections.values()) 808 """Mapping from collection name into Collection class""" 809 810 811 _COLLECTIONS_ORDER = ['sa', 'fa', 'dsa', 812 'params', 'kernel_params', 'states'] 813 814
815 -class AttributesCollector(type):
816 """Intended to collect and compose StateCollection for any child 817 class of this metaclass 818 """ 819 820
821 - def __init__(cls, name, bases, dict):
822 823 if __debug__: 824 debug( 825 "COLR", 826 "AttributesCollector call for %s.%s, where bases=%s, dict=%s " \ 827 % (cls, name, bases, dict)) 828 829 super(AttributesCollector, cls).__init__(name, bases, dict) 830 831 collections = {} 832 for name, value in dict.iteritems(): 833 if isinstance(value, CollectableAttribute): 834 baseclassname = value.__class__.__name__ 835 col = _known_collections[baseclassname][0] 836 # XXX should we allow to throw exceptions here? 837 if not collections.has_key(col): 838 collections[col] = {} 839 collections[col][name] = value 840 # and assign name if not yet was set 841 if value.name is None: 842 value._setName(name) 843 # !!! We do not keep copy of this attribute static in the class. 844 # Due to below traversal of base classes, we should be 845 # able to construct proper collections even in derived classes 846 delattr(cls, name) 847 848 # XXX can we first collect parent's states and then populate with ours? 849 # TODO 850 851 for base in bases: 852 if hasattr(base, "__metaclass__") and \ 853 base.__metaclass__ == AttributesCollector: 854 # TODO take care about overriding one from super class 855 # for state in base.states: 856 # if state[0] = 857 newcollections = base._collections_template 858 if len(newcollections) == 0: 859 continue 860 if __debug__: 861 debug("COLR", 862 "Collect collections %s for %s from %s" % 863 (newcollections, cls, base)) 864 for col, collection in newcollections.iteritems(): 865 newitems = collection.items 866 if collections.has_key(col): 867 collections[col].update(newitems) 868 else: 869 collections[col] = newitems 870 871 872 if __debug__: 873 debug("COLR", 874 "Creating StateCollection template %s with collections %s" 875 % (cls, collections.keys())) 876 877 # if there is an explicit 878 if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"): 879 for col in cls._ATTRIBUTE_COLLECTIONS: 880 if not col in _col2class: 881 raise ValueError, \ 882 "Requested collection %s is unknown to collector" % \ 883 col 884 if not col in collections: 885 collections[col] = None 886 887 # TODO: check on conflict in names of Collections' items! since 888 # otherwise even order is not definite since we use dict for 889 # collections. 890 # XXX should we switch to tuple? 891 892 for col, colitems in collections.iteritems(): 893 collections[col] = _col2class[col](colitems) 894 895 setattr(cls, "_collections_template", collections) 896 897 # 898 # Expand documentation for the class based on the listed 899 # parameters an if it is stateful 900 # 901 # TODO -- figure nice way on how to alter __init__ doc directly... 902 textwrapper = TextWrapper(subsequent_indent=" ", 903 initial_indent=" ", 904 width=70) 905 906 # Parameters 907 paramsdoc = "" 908 paramscols = [] 909 for col in ('params', 'kernel_params'): 910 if collections.has_key(col): 911 paramscols.append(col) 912 # lets at least sort the parameters for consistent output 913 col_items = collections[col].items 914 params = [(v._instance_index, k) for k,v in col_items.iteritems()] 915 params.sort() 916 paramsdoc += '\n'.join( 917 [col_items[param].doc(indent=' ') 918 for index,param in params]) + '\n' 919 920 # Parameters collection could be taked hash of to decide if 921 # any were changed? XXX may be not needed at all? 922 setattr(cls, "_paramscols", paramscols) 923 924 # States doc 925 statesdoc = "" 926 if collections.has_key('states'): 927 paramsdoc += """ enable_states : None or list of basestring 928 Names of the state variables which should be enabled additionally 929 to default ones 930 disable_states : None or list of basestring 931 Names of the state variables which should be disabled 932 """ 933 statesdoc = " * " 934 statesdoc += '\n * '.join(collections['states'].listing) 935 statesdoc += "\n\n(States enabled by default are listed with `+`)" 936 if __debug__: 937 debug("COLR", "Assigning __statesdoc to be %s" % statesdoc) 938 setattr(cls, "_statesdoc", statesdoc) 939 940 if paramsdoc != "": 941 if __debug__ and 'COLR' in debug.active: 942 debug("COLR", "Assigning __paramsdoc to be %s" % paramsdoc) 943 setattr(cls, "_paramsdoc", paramsdoc) 944 945 if paramsdoc + statesdoc != "": 946 cls.__doc__ = enhancedDocString(cls, *bases)
947 948 949
950 -class ClassWithCollections(object):
951 """Base class for objects which contain any known collection 952 953 Classes inherited from this class gain ability to access 954 collections and their items as simple attributes. Access to 955 collection items "internals" is done via <collection_name> attribute 956 and interface of a corresponding `Collection`. 957 """ 958 959 _DEV__doc__ = """ 960 TODO: rename 'descr'? -- it should simply 961 be 'doc' -- no need to drag classes docstring imho. 962 """ 963 964 __metaclass__ = AttributesCollector 965
966 - def __new__(cls, *args, **kwargs):
967 """Initialize ClassWithCollections object 968 969 :Parameters: 970 descr : basestring 971 Description of the instance 972 """ 973 self = super(ClassWithCollections, cls).__new__(cls) 974 975 s__dict__ = self.__dict__ 976 977 # init variable 978 # XXX: Added as pylint complained (rightfully) -- not sure if false 979 # is the proper default 980 self.__params_set = False 981 982 # need to check to avoid override of enabled states in the case 983 # of multiple inheritance, like both ClassWithCollectionsl and Harvestable 984 if not s__dict__.has_key('_collections'): 985 s__class__ = self.__class__ 986 987 collections = copy.deepcopy(s__class__._collections_template) 988 s__dict__['_collections'] = collections 989 s__dict__['_known_attribs'] = {} 990 """Dictionary to contain 'links' to the collections from each 991 known attribute. Is used to gain some speed up in lookup within 992 __getattribute__ and __setattr__ 993 """ 994 995 # Assign owner to all collections 996 for col, collection in collections.iteritems(): 997 if col in s__dict__: 998 raise ValueError, \ 999 "Object %s has already attribute %s" % \ 1000 (self, col) 1001 s__dict__[col] = collection 1002 collection.name = col 1003 collection.owner = self 1004 1005 self.__params_set = False 1006 1007 if __debug__: 1008 descr = kwargs.get('descr', None) 1009 debug("COL", "ClassWithCollections.__new__ was done " 1010 "for %s#%s with descr=%s" \ 1011 % (s__class__.__name__, id(self), descr)) 1012 1013 return self
1014 1015
1016 - def __init__(self, descr=None, **kwargs):
1017 1018 if not self.__params_set: 1019 self.__descr = descr 1020 """Set humane description for the object""" 1021 1022 # To avoid double initialization in case of multiple inheritance 1023 self.__params_set = True 1024 1025 collections = self._collections 1026 # Assign attributes values if they are given among 1027 # **kwargs 1028 for arg, argument in kwargs.items(): 1029 set = False 1030 for collection in collections.itervalues(): 1031 if collection._is_initializable(arg): 1032 collection._initialize(arg, argument) 1033 set = True 1034 break 1035 if set: 1036 trash = kwargs.pop(arg) 1037 else: 1038 known_params = reduce( 1039 lambda x,y:x+y, 1040 [x.items.keys() for x in collections.itervalues()], []) 1041 raise TypeError, \ 1042 "Unexpected keyword argument %s=%s for %s." \ 1043 % (arg, argument, self) \ 1044 + " Valid parameters are %s" % known_params 1045 1046 ## Initialize other base classes 1047 ## commented out since it seems to be of no use for now 1048 #if init_classes is not None: 1049 # # return back stateful arguments since they might be 1050 # # processed by underlying classes 1051 # kwargs.update(kwargs_stateful) 1052 # for cls in init_classes: 1053 # cls.__init__(self, **kwargs) 1054 #else: 1055 # if len(kwargs)>0: 1056 # known_params = reduce(lambda x, y: x + y, \ 1057 # [x.items.keys() for x in collections], 1058 # []) 1059 # raise TypeError, \ 1060 # "Unknown parameters %s for %s." % (kwargs.keys(), 1061 # self) \ 1062 # + " Valid parameters are %s" % known_params 1063 if __debug__: 1064 debug("COL", "ClassWithCollections.__init__ was done " 1065 "for %s#%s with descr=%s" \ 1066 % (self.__class__.__name__, id(self), descr))
1067 1068 1069 #__doc__ = enhancedDocString('ClassWithCollections', locals()) 1070 1071
1072 - def __getattribute__(self, index):
1073 # return all private ones first since smth like __dict__ might be 1074 # queried by copy before instance is __init__ed 1075 if index[0] == '_': 1076 return _object_getattribute(self, index) 1077 1078 s_dict = _object_getattribute(self, '__dict__') 1079 # check if it is a known collection 1080 collections = s_dict['_collections'] 1081 if index in collections: 1082 return collections[index] 1083 1084 # check if it is a part of any collection 1085 known_attribs = s_dict['_known_attribs'] 1086 if index in known_attribs: 1087 return collections[known_attribs[index]]._items[index].value 1088 1089 # just a generic return 1090 return _object_getattribute(self, index)
1091 1092
1093 - def __setattr__(self, index, value):
1094 if index[0] == '_': 1095 return _object_setattr(self, index, value) 1096 1097 # Check if a part of a collection, and set appropriately 1098 s_dict = _object_getattribute(self, '__dict__') 1099 known_attribs = s_dict['_known_attribs'] 1100 if index in known_attribs: 1101 collections = s_dict['_collections'] 1102 collections[known_attribs[index]][index].value = value 1103 return value 1104 1105 # Generic setattr 1106 return _object_setattr(self, index, value)
1107 1108 1109 # XXX not sure if we shouldn't implement anything else...
1110 - def reset(self):
1111 for collection in self._collections.values(): 1112 collection.reset()
1113 1114
1115 - def __str__(self):
1116 s = "%s:" % (self.__class__.__name__) 1117 if self.__descr is not None: 1118 s += "/%s " % self.__descr 1119 if hasattr(self, "_collections"): 1120 for col, collection in self._collections.iteritems(): 1121 s += " %d %s:%s" % (len(collection.items), col, str(collection)) 1122 return s
1123 1124
1125 - def __repr__(self, prefixes=None, fullname=False):
1126 """String definition of the object of ClassWithCollections object 1127 1128 :Parameters: 1129 fullname : bool 1130 Either to include full name of the module 1131 prefixes : list of strings 1132 What other prefixes to prepend to list of arguments 1133 """ 1134 if prefixes is None: 1135 prefixes = [] 1136 prefixes = prefixes[:] # copy list 1137 id_str = "" 1138 module_str = "" 1139 if __debug__: 1140 if 'MODULE_IN_REPR' in debug.active: 1141 fullname = True 1142 if 'ID_IN_REPR' in debug.active: 1143 id_str = '#%s' % id(self) 1144 1145 if fullname: 1146 modulename = '%s' % self.__class__.__module__ 1147 if modulename != "__main__": 1148 module_str = "%s." % modulename 1149 1150 # Collections' attributes 1151 collections = self._collections 1152 # we want them in this particular order 1153 for col in _COLLECTIONS_ORDER: 1154 collection = collections.get(col, None) 1155 if collection is None: 1156 continue 1157 prefixes += collection._cls_repr() 1158 1159 # Description if present 1160 descr = self.__descr 1161 if descr is not None: 1162 prefixes.append("descr=%s" % repr(descr)) 1163 1164 return "%s%s(%s)%s" % (module_str, self.__class__.__name__, 1165 ', '.join(prefixes), id_str)
1166 1167 1168 descr = property(lambda self: self.__descr, 1169 doc="Description of the object if any")
1170 1171 1172
1173 -class Harvestable(ClassWithCollections):
1174 """Classes inherited from this class intend to collect attributes 1175 within internal processing. 1176 1177 Subclassing Harvestable we gain ability to collect any internal 1178 data from the processing which is especially important if an 1179 object performs something in loop and discards some intermidiate 1180 possibly interesting results (like in case of 1181 CrossValidatedTransferError and states of the trained classifier 1182 or TransferError). 1183 1184 """ 1185 1186 harvested = StateVariable(enabled=False, doc= 1187 """Store specified attributes of classifiers at each split""") 1188 1189 _KNOWN_COPY_METHODS = [ None, 'copy', 'deepcopy' ] 1190 1191
1192 - def __init__(self, harvest_attribs=None, copy_attribs='copy', **kwargs):
1193 """Initialize state of harvestable 1194 1195 :Parameters: 1196 harvest_attribs : list of basestr or dicts 1197 What attributes of call to store and return within 1198 harvested state variable. If an item is a dictionary, 1199 following keys are used ['name', 'copy'] 1200 copy_attribs : None or basestr 1201 Default copying. If None -- no copying, 'copy' 1202 - shallow copying, 'deepcopy' -- deepcopying 1203 1204 """ 1205 ClassWithCollections.__init__(self, **kwargs) 1206 1207 self.__atribs = harvest_attribs 1208 self.__copy_attribs = copy_attribs 1209 1210 self._setAttribs(harvest_attribs)
1211 1212
1213 - def _setAttribs(self, attribs):
1214 """Set attributes to harvest 1215 1216 Each attribute in self.__attribs must have following fields 1217 - name : functional (or arbitrary if 'obj' or 'attr' is set) 1218 description of the thing to harvest, 1219 e.g. 'transerror.clf.training_time' 1220 - obj : name of the object to harvest from (if empty, 1221 'self' is assumed), 1222 e.g 'transerror' 1223 - attr : attribute of 'obj' to harvest, 1224 e.g. 'clf.training_time' 1225 - copy : None, 'copy' or 'deepcopy' - way to copy attribute 1226 """ 1227 if attribs: 1228 # force the state 1229 self.states.enable('harvested') 1230 self.__attribs = [] 1231 for i, attrib in enumerate(attribs): 1232 if isinstance(attrib, dict): 1233 if not 'name' in attrib: 1234 raise ValueError, \ 1235 "Harvestable: attribute must be a string or " + \ 1236 "a dictionary with 'name'" 1237 else: 1238 attrib = {'name': attrib} 1239 1240 # assign default method to copy 1241 if not 'copy' in attrib: 1242 attrib['copy'] = self.__copy_attribs 1243 1244 # check copy method 1245 if not attrib['copy'] in self._KNOWN_COPY_METHODS: 1246 raise ValueError, "Unknown method %s. Known are %s" % \ 1247 (attrib['copy'], self._KNOWN_COPY_METHODS) 1248 1249 if not ('obj' in attrib or 'attr' in attrib): 1250 # Process the item to harvest 1251 # split into obj, attr. If obj is empty, then assume self 1252 split = attrib['name'].split('.', 1) 1253 if len(split)==1: 1254 obj, attr = split[0], None 1255 else: 1256 obj, attr = split 1257 attrib.update({'obj':obj, 'attr':attr}) 1258 1259 if attrib['obj'] == '': 1260 attrib['obj'] = 'self' 1261 1262 # TODO: may be enabling of the states?? 1263 1264 self.__attribs.append(attrib) # place value back 1265 else: 1266 # just to make sure it is not None or 0 1267 self.__attribs = []
1268 1269
1270 - def _harvest(self, vars):
1271 """The harvesting function: must obtain dictionary of variables 1272 from the caller. 1273 1274 :Parameters: 1275 vars : dict 1276 Dictionary of available data. Most often locals() could be 1277 passed as `vars`. Mention that desired to be harvested 1278 private attributes better be bound locally to some variable 1279 1280 :Returns: 1281 nothing 1282 """ 1283 1284 if not self.states.isEnabled('harvested') or len(self.__attribs)==0: 1285 return 1286 1287 if not self.states.isSet('harvested'): 1288 self.harvested = dict([(a['name'], []) for a in self.__attribs]) 1289 1290 for attrib in self.__attribs: 1291 attrv = vars[attrib['obj']] 1292 1293 # access particular attribute if needed 1294 if not attrib['attr'] is None: 1295 attrv = eval('attrv.%s' % attrib['attr']) 1296 1297 # copy the value if needed 1298 attrv = {'copy':copy.copy, 1299 'deepcopy':copy.deepcopy, 1300 None:lambda x:x}[attrib['copy']](attrv) 1301 1302 self.harvested[attrib['name']].append(attrv)
1303 1304 1305 harvest_attribs = property(fget=lambda self:self.__attribs, 1306 fset=_setAttribs)
1307