Package mvpa :: Package misc :: Module support
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.support

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Support function -- little helpers in everyday life""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14  import re, os 
 15   
 16  # for SmartVersion 
 17  from distutils.version import Version 
 18  from types import StringType, TupleType, ListType 
 19   
 20  from mvpa.base import warning 
 21  from mvpa.support.copy import copy, deepcopy 
 22  from operator import isSequenceType 
 23   
 24  if __debug__: 
 25      from mvpa.base import debug 
 26   
 27   
28 -def reuseAbsolutePath(file1, file2, force=False):
29 """Use path to file1 as the path to file2 is no absolute 30 path is given for file2 31 32 :Parameters: 33 force : bool 34 if True, force it even if the file2 starts with / 35 """ 36 if not file2.startswith(os.path.sep) or force: 37 # lets reuse path to file1 38 return os.path.join(os.path.dirname(file1), file2.lstrip(os.path.sep)) 39 else: 40 return file2
41 42
43 -def transformWithBoxcar(data, startpoints, boxlength, offset=0, fx=N.mean):
44 """This function extracts boxcar windows from an array. Such a boxcar is 45 defined by a starting point and the size of the window along the first axis 46 of the array (`boxlength`). Afterwards a customizable function is applied 47 to each boxcar individually (Default: averaging). 48 49 :param data: An array with an arbitrary number of dimensions. 50 :type data: array 51 :param startpoints: Boxcar startpoints as index along the first array axis 52 :type startpoints: sequence 53 :param boxlength: Length of the boxcar window in #array elements 54 :type boxlength: int 55 :param offset: Optional offset between the configured starting point and the 56 actual begining of the boxcar window. 57 :type offset: int 58 :rtype: array (len(startpoints) x data.shape[1:]) 59 """ 60 if boxlength < 1: 61 raise ValueError, "Boxlength lower than 1 makes no sense." 62 63 # check for illegal boxes 64 for sp in startpoints: 65 if ( sp + offset + boxlength - 1 > len(data)-1 ) \ 66 or ( sp + offset < 0 ): 67 raise ValueError, \ 68 'Illegal box: start: %i, offset: %i, length: %i' \ 69 % (sp, offset, boxlength) 70 71 # build a list of list where each sublist contains the indexes of to be 72 # averaged data elements 73 selector = [ range( i + offset, i + offset + boxlength ) \ 74 for i in startpoints ] 75 76 # average each box 77 selected = [ fx( data[ N.array(box) ], axis=0 ) for box in selector ] 78 79 return N.array( selected )
80 81
82 -def _getUniqueLengthNCombinations_lt3(data, n):
83 """Generates a list of lists containing all combinations of 84 elements of data of length 'n' without repetitions. 85 86 data: list 87 n: integer 88 89 This function is adapted from a Java version posted in some forum on 90 the web as an answer to the question 'How can I generate all possible 91 combinations of length n?'. Unfortunately I cannot remember which 92 forum it was. 93 94 NOTE: implementation is broken for n>2 95 """ 96 97 if n > 2: 98 raise ValueError, "_getUniqueLengthNCombinations_lt3 " \ 99 "is broken for n>2, hence should not be used directly." 100 101 # to be returned 102 combos = [] 103 104 # local function that will be called recursively to collect the 105 # combination elements 106 def take(data, occupied, depth, taken): 107 for i, d in enumerate(data): 108 # only do something if this element hasn't been touch yet 109 if occupied[i] == False: 110 # see whether will reached the desired length 111 if depth < n-1: 112 # flag the current element as touched 113 occupied[i] = True 114 # next level 115 take(data, occupied, depth+1, taken + [d]) 116 # if the current element would be set 'free', it would 117 # results in ALL combinations of elements (obeying order 118 # of elements) and not just in the unique sets of 119 # combinations (without order) 120 #occupied[i] = False 121 else: 122 # store the final combination 123 combos.append(taken + [d])
124 # some kind of bitset that stores the status of each element 125 # (contained in combination or not) 126 occupied = [False] * len(data) 127 # get the combinations 128 take(data, occupied, 0, []) 129 130 # return the result 131 return combos 132
133 -def xuniqueCombinations(L, n):
134 """Generator of unique combinations form a list L of objects in 135 groups of size n. 136 137 # XXX EO: I guess they are already sorted. 138 # XXX EO: It seems to work well for n>20 :) 139 140 :Parameters: 141 L : list 142 list of unique ids 143 n : int 144 grouping size 145 146 Adopted from Li Daobing 147 http://code.activestate.com/recipes/190465/ 148 (MIT license, according to activestate.com's policy) 149 """ 150 if n==0: yield [] 151 else: 152 for i in xrange(len(L)-n+1): 153 for cc in xuniqueCombinations(L[i+1:],n-1): 154 yield [L[i]]+cc
155
156 -def _getUniqueLengthNCombinations_binary(L, n=None, sort=True):
157 """Find all subsets of data 158 159 :Parameters: 160 L : list 161 list of unique ids 162 n : None or int 163 If None, all possible subsets are returned. If n is specified (int), 164 then only the ones of the length n are returned 165 sort : bool 166 Either to sort the resultant sequence 167 168 Adopted from Alex Martelli: 169 http://mail.python.org/pipermail/python-list/2001-January/067815.html 170 """ 171 N = len(L) 172 if N > 20 and n == 1: 173 warning("getUniqueLengthNCombinations_binary should not be used for " 174 "large N") 175 result = [] 176 for X in range(2**N): 177 x = [ L[i] for i in range(N) if X & (1L<<i) ] 178 if n is None or len(x) == n: 179 # yield x # if we wanted to use it as a generator 180 result.append(x) 181 result.sort() 182 # if __debug__ and n is not None: 183 # # verify the result 184 # # would need scipy... screw it 185 # assert(len(result) == ...) 186 return result
187 188
189 -def getUniqueLengthNCombinations(L, n=None, sort=True):
190 """Find all subsets of data 191 192 :Parameters: 193 L : list 194 list of unique ids 195 n : None or int 196 If None, all possible subsets are returned. If n is specified (int), 197 then only the ones of the length n are returned 198 199 TODO: work out single stable implementation -- probably just by fixing 200 _getUniqueLengthNCombinations_lt3 201 """ 202 if n == 1: 203 return _getUniqueLengthNCombinations_lt3(L, n) 204 elif n==None: 205 return _getUniqueLengthNCombinations_binary(L, n, sort=True) 206 else: 207 # XXX EO: Is it safe to remove "list" and use generator 208 # directly to save memory for large number of 209 # combinations? It seems OK according to tests but 210 # I'd like a second opinion. 211 # XXX EO: what about inserting a warning if number of 212 # combinations > 10000? No one would run it... 213 return list(xuniqueCombinations(L, n))
214 215
216 -def indentDoc(v):
217 """Given a `value` returns a string where each line is indented 218 219 Needed for a cleaner __repr__ output 220 `v` - arbitrary 221 """ 222 return re.sub('\n', '\n ', str(v))
223 224
225 -def idhash(val):
226 """Craft unique id+hash for an object 227 """ 228 res = "%s" % id(val) 229 if isinstance(val, list): 230 val = tuple(val) 231 try: 232 res += ":%s" % hash(buffer(val)) 233 except: 234 try: 235 res += ":%s" % hash(val) 236 except: 237 pass 238 pass 239 return res
240
241 -def isSorted(items):
242 """Check if listed items are in sorted order. 243 244 :Parameters: 245 `items`: iterable container 246 247 :return: `True` if were sorted. Otherwise `False` + Warning 248 """ 249 items_sorted = deepcopy(items) 250 items_sorted.sort() 251 equality = items_sorted == items 252 # XXX yarik forgotten analog to isiterable 253 if hasattr(equality, '__iter__'): 254 equality = N.all(equality) 255 return equality
256 257
258 -def isInVolume(coord, shape):
259 """For given coord check if it is within a specified volume size. 260 261 Returns True/False. Assumes that volume coordinates start at 0. 262 No more generalization (arbitrary minimal coord) is done to save 263 on performance 264 """ 265 for i in xrange(len(coord)): 266 if coord[i] < 0 or coord[i] >= shape[i]: 267 return False 268 return True
269 270
271 -def version_to_tuple(v):
272 """Convert literal string into a tuple, if possible of ints 273 274 Tuple of integers constructed by splitting at '.' or interleaves 275 of numerics and alpha numbers 276 """ 277 if isinstance(v, basestring): 278 v = v.split('.') 279 elif isinstance(v, tuple) or isinstance(v, list): 280 # assure tuple 281 pass 282 else: 283 raise ValueError, "Do not know how to treat version '%s'" % str(v) 284 285 # Try to convert items into ints 286 vres = [] 287 288 regex = re.compile('(?P<numeric>[0-9]*)' 289 '(?P<alpha>[~+-]*[A-Za-z]*)(?P<suffix>.*)') 290 for x in v: 291 try: 292 vres += [int(x)] 293 except ValueError: 294 # try to split into sequences of literals and numerics 295 suffix = x 296 while suffix != '': 297 res = regex.search(suffix) 298 if res: 299 resd = res.groupdict() 300 if resd['numeric'] != '': 301 vres += [int(resd['numeric'])] 302 if resd['alpha'] != '': 303 vres += [resd['alpha']] 304 suffix = resd['suffix'] 305 else: 306 # We can't detech anything meaningful -- let it go as is 307 resd += [suffix] 308 break 309 v = tuple(vres) 310 311 return v
312
313 -class SmartVersion(Version):
314 """A bit evolved comparison of versions 315 316 The reason for not using python's distutil.version is that it 317 seems to have no clue about somewhat common conventions of using 318 '-dev' or 'dev' or 'rc' suffixes for upcoming releases (so major 319 version does contain upcoming release already). 320 321 So here is an ad-hoc and not as nice implementation 322 """ 323
324 - def parse(self, vstring):
325 self.vstring = vstring 326 self.version = version_to_tuple(vstring)
327
328 - def __str__(self):
329 return self.vstring
330
331 - def __cmp__(self, other):
332 if isinstance(other, (StringType, TupleType, ListType)): 333 other = SmartVersion(other) 334 elif isinstance(other, SmartVersion): 335 pass 336 elif isinstance(other, Version): 337 other = SmartVersion(other.vstring) 338 else: 339 raise ValueError("Do not know how to treat version %s" 340 % str(other)) 341 342 # Do ad-hoc comparison of strings 343 i = 0 344 s, o = self.version, other.version 345 regex_prerelease = re.compile('~|-?dev|-?rc|-?svn|-?pre', re.I) 346 for i in xrange(max(len(s), len(o))): 347 if i < len(s): si = s[i] 348 else: si = None 349 if i < len(o): oi = o[i] 350 else: oi = None 351 352 if si == oi: 353 continue 354 355 for x,y,mult in ((si, oi, 1), (oi, si, -1)): 356 if x is None: 357 if isinstance(y, int): 358 return -mult # we got '.1' suffix 359 if isinstance(y, StringType): 360 if (regex_prerelease.match(y)): 361 return mult # so we got something to signal 362 # pre-release, so first one won 363 else: 364 # otherwise the other one wins 365 return -mult 366 else: 367 raise RuntimeError, "Should not have got here with %s" \ 368 % y 369 elif isinstance(x, int): 370 if not isinstance(y, int): 371 return mult 372 return mult*cmp(x, y) # both are ints 373 elif isinstance(x, StringType): 374 if isinstance(y, StringType): 375 return mult*cmp(x,y) 376 return 0
377
378 -def getBreakPoints(items, contiguous=True):
379 """Return a list of break points. 380 381 :Parameters: 382 items : iterable 383 list of items, such as chunks 384 contiguous : bool 385 if `True` (default) then raise Value Error if items are not 386 contiguous, i.e. a label occur in multiple contiguous sets 387 388 :raises: ValueError 389 390 :return: list of indexes for every new set of items 391 """ 392 prev = None # pylint happiness event! 393 known = [] 394 """List of items which was already seen""" 395 result = [] 396 """Resultant list""" 397 for index in xrange(len(items)): 398 item = items[index] 399 if item in known: 400 if index > 0: 401 if prev != item: # breakpoint 402 if contiguous: 403 raise ValueError, \ 404 "Item %s was already seen before" % str(item) 405 else: 406 result.append(index) 407 else: 408 known.append(item) 409 result.append(index) 410 prev = item 411 return result
412 413
414 -def RFEHistory2maps(history):
415 """Convert history generated by RFE into the array of binary maps 416 417 Example: 418 history2maps(N.array( [ 3,2,1,0 ] )) 419 results in 420 array([[ 1., 1., 1., 1.], 421 [ 1., 1., 1., 0.], 422 [ 1., 1., 0., 0.], 423 [ 1., 0., 0., 0.]]) 424 """ 425 426 # assure that it is an array 427 history = N.array(history) 428 nfeatures, steps = len(history), max(history) - min(history) + 1 429 history_maps = N.zeros((steps, nfeatures)) 430 431 for step in xrange(steps): 432 history_maps[step, history >= step] = 1 433 434 return history_maps
435 436
437 -class MapOverlap(object):
438 """Compute some overlap stats from a sequence of binary maps. 439 440 When called with a sequence of binary maps (e.g. lists or arrays) the 441 fraction of mask elements that are non-zero in a customizable proportion 442 of the maps is returned. By default this threshold is set to 1.0, i.e. 443 such an element has to be non-zero in *all* maps. 444 445 Three additional maps (same size as original) are computed: 446 447 * overlap_map: binary map which is non-zero for each overlapping element. 448 * spread_map: binary map which is non-zero for each element that is 449 non-zero in any map, but does not exceed the overlap 450 threshold. 451 * ovstats_map: map of float with the raw elementwise fraction of overlap. 452 453 All maps are available via class members. 454 """
455 - def __init__(self, overlap_threshold=1.0):
456 """Nothing to be seen here. 457 """ 458 self.__overlap_threshold = overlap_threshold 459 460 # pylint happiness block 461 self.overlap_map = None 462 self.spread_map = None 463 self.ovstats_map = None
464 465
466 - def __call__(self, maps):
467 """Returns fraction of overlapping elements. 468 """ 469 ovstats = N.mean(maps, axis=0) 470 471 self.overlap_map = (ovstats >= self.__overlap_threshold ) 472 self.spread_map = N.logical_and(ovstats > 0.0, 473 ovstats < self.__overlap_threshold) 474 self.ovstats_map = ovstats 475 476 return N.mean(ovstats >= self.__overlap_threshold)
477 478
479 -class Event(dict):
480 """Simple class to define properties of an event. 481 482 The class is basically a dictionary. Any properties can 483 be passed as keyword arguments to the constructor, e.g.: 484 485 >>> ev = Event(onset=12, duration=2.45) 486 487 Conventions for keys: 488 489 `onset` 490 The onset of the event in some unit. 491 `duration` 492 The duration of the event in the same unit as `onset`. 493 `label` 494 E.g. the condition this event is part of. 495 `chunk` 496 Group this event is part of (if any), e.g. experimental run. 497 `features` 498 Any amount of additional features of the event. This might include 499 things like physiological measures, stimulus intensity. Must be a mutable 500 sequence (e.g. list), if present. 501 """ 502 _MUSTHAVE = ['onset'] 503
504 - def __init__(self, **kwargs):
505 # store everything 506 dict.__init__(self, **kwargs) 507 508 # basic checks 509 for k in Event._MUSTHAVE: 510 if not self.has_key(k): 511 raise ValueError, "Event must have '%s' defined." % k
512 513
514 - def asDescreteTime(self, dt, storeoffset=False):
515 """Convert `onset` and `duration` information into descrete timepoints. 516 517 :Parameters: 518 dt: float 519 Temporal distance between two timepoints in the same unit as `onset` 520 and `duration`. 521 storeoffset: bool 522 If True, the temporal offset between original `onset` and 523 descretized `onset` is stored as an additional item in `features`. 524 525 :Return: 526 A copy of the original `Event` with `onset` and optionally `duration` 527 replaced by their corresponding descrete timepoint. The new onset will 528 correspond to the timepoint just before or exactly at the original 529 onset. The new duration will be the number of timepoints covering the 530 event from the computed onset timepoint till the timepoint exactly at 531 the end, or just after the event. 532 533 Note again, that the new values are expressed as #timepoint and not 534 in their original unit! 535 """ 536 dt = float(dt) 537 onset = self['onset'] 538 out = deepcopy(self) 539 540 # get the timepoint just prior the onset 541 out['onset'] = int(N.floor(onset / dt)) 542 543 if storeoffset: 544 # compute offset 545 offset = onset - (out['onset'] * dt) 546 547 if out.has_key('features'): 548 out['features'].append(offset) 549 else: 550 out['features'] = [offset] 551 552 if out.has_key('duration'): 553 # how many timepoint cover the event (from computed onset 554 # to the one timepoint just after the end of the event 555 out['duration'] = int(N.ceil((onset + out['duration']) / dt) \ 556 - out['onset']) 557 558 return out
559 560 561
562 -class HarvesterCall(object):
563 - def __init__(self, call, attribs=None, argfilter=None, expand_args=True, 564 copy_attribs=True):
565 """Initialize 566 567 :Parameters: 568 expand_args : bool 569 Either to expand the output of looper into a list of arguments for 570 call 571 attribs : list of basestr 572 What attributes of call to store and return later on? 573 copy_attribs : bool 574 Force copying values of attributes 575 """ 576 577 self.call = call 578 """Call which gets called in the harvester.""" 579 580 if attribs is None: 581 attribs = [] 582 if not isSequenceType(attribs): 583 raise ValueError, "'attribs' have to specified as a sequence." 584 585 if not (argfilter is None or isSequenceType(argfilter)): 586 raise ValueError, "'argfilter' have to be a sequence or None." 587 588 # now give it to me... 589 self.argfilter = argfilter 590 self.expand_args = expand_args 591 self.copy_attribs = copy_attribs 592 self.attribs = attribs
593 594 595
596 -class Harvester(object):
597 """World domination helper: do whatever it is asked and accumulate results 598 599 XXX Thinks about: 600 - Might we need to deepcopy attributes values? 601 - Might we need to specify what attribs to copy and which just to bind? 602 """ 603
604 - def __init__(self, source, calls, simplify_results=True):
605 """Initialize 606 607 :Parameters: 608 source 609 Generator which produce food for the calls. 610 calls : sequence of HarvesterCall instances 611 Calls which are processed in the loop. All calls are processed in 612 order of apperance in the sequence. 613 simplify_results: bool 614 Remove unecessary overhead in results if possible (nested lists 615 and dictionaries). 616 """ 617 if not isSequenceType(calls): 618 raise ValueError, "'calls' have to specified as a sequence." 619 620 self.__source = source 621 """Generator which feeds the harvester""" 622 623 self.__calls = calls 624 """Calls which gets called with each generated source""" 625 626 self.__simplify_results = simplify_results
627 628
629 - def __call__(self, *args, **kwargs):
630 """ 631 """ 632 # prepare complex result structure for all calls and their respective 633 # attributes: calls x dict(attributes x loop iterations) 634 results = [dict([('result', [])] + [(a, []) for a in c.attribs]) \ 635 for c in self.__calls] 636 637 # Lets do it! 638 for (i, X) in enumerate(self.__source(*args, **kwargs)): 639 for (c, call) in enumerate(self.__calls): 640 # sanity check 641 if i == 0 and call.expand_args and not isSequenceType(X): 642 raise RuntimeError, \ 643 "Cannot expand non-sequence result from %s" % \ 644 `self.__source` 645 646 # apply argument filter (and reorder) if requested 647 if call.argfilter: 648 filtered_args = [X[f] for f in call.argfilter] 649 else: 650 filtered_args = X 651 652 if call.expand_args: 653 result = call.call(*filtered_args) 654 else: 655 result = call.call(filtered_args) 656 657 # # XXX pylint doesn't like `` for some reason 658 # if __debug__: 659 # debug("LOOP", "Iteration %i on call %s. Got result %s" % 660 # (i, `self.__call`, `result`)) 661 662 663 results[c]['result'].append(result) 664 665 for attrib in call.attribs: 666 attrv = call.call.__getattribute__(attrib) 667 668 if call.copy_attribs: 669 attrv = copy(attrv) 670 671 results[c][attrib].append(attrv) 672 673 # reduce results structure 674 if self.__simplify_results: 675 # get rid of dictionary if just the results are requested 676 for (c, call) in enumerate(self.__calls): 677 if not len(call.attribs): 678 results[c] = results[c]['result'] 679 680 if len(self.__calls) == 1: 681 results = results[0] 682 683 return results
684 685 686 687 # XXX MH: this doesn't work in all cases, as you cannot have *args after a 688 # kwarg. 689 #def loop(looper, call, 690 # unroll=True, attribs=None, copy_attribs=True, *args, **kwargs): 691 # """XXX Loop twin brother 692 # 693 # Helper for those who just wants to do smth like 694 # loop(blah, bleh, grgr) 695 # instead of 696 # Loop(blah, bleh)(grgr) 697 # """ 698 # print looper, call, unroll, attribs, copy_attribs 699 # print args, kwargs 700 # return Loop(looper=looper, call=call, unroll=unroll, 701 # attribs=attribs, copy_attribs=copy_attribs)(*args, **kwargs) 702