1
2
3
4
5
6
7
8
9 """Support function -- little helpers in everyday life"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14 import re, os
15
16
17 from distutils.version import Version
18 from types import StringType, TupleType, ListType
19
20 from mvpa.base import warning
21 from mvpa.support.copy import copy, deepcopy
22 from operator import isSequenceType
23
24 if __debug__:
25 from mvpa.base import debug
26
27
29 """Use path to file1 as the path to file2 is no absolute
30 path is given for file2
31
32 :Parameters:
33 force : bool
34 if True, force it even if the file2 starts with /
35 """
36 if not file2.startswith(os.path.sep) or force:
37
38 return os.path.join(os.path.dirname(file1), file2.lstrip(os.path.sep))
39 else:
40 return file2
41
42
80
81
83 """Generates a list of lists containing all combinations of
84 elements of data of length 'n' without repetitions.
85
86 data: list
87 n: integer
88
89 This function is adapted from a Java version posted in some forum on
90 the web as an answer to the question 'How can I generate all possible
91 combinations of length n?'. Unfortunately I cannot remember which
92 forum it was.
93
94 NOTE: implementation is broken for n>2
95 """
96
97 if n > 2:
98 raise ValueError, "_getUniqueLengthNCombinations_lt3 " \
99 "is broken for n>2, hence should not be used directly."
100
101
102 combos = []
103
104
105
106 def take(data, occupied, depth, taken):
107 for i, d in enumerate(data):
108
109 if occupied[i] == False:
110
111 if depth < n-1:
112
113 occupied[i] = True
114
115 take(data, occupied, depth+1, taken + [d])
116
117
118
119
120
121 else:
122
123 combos.append(taken + [d])
124
125
126 occupied = [False] * len(data)
127
128 take(data, occupied, 0, [])
129
130
131 return combos
132
134 """Generator of unique combinations form a list L of objects in
135 groups of size n.
136
137 # XXX EO: I guess they are already sorted.
138 # XXX EO: It seems to work well for n>20 :)
139
140 :Parameters:
141 L : list
142 list of unique ids
143 n : int
144 grouping size
145
146 Adopted from Li Daobing
147 http://code.activestate.com/recipes/190465/
148 (MIT license, according to activestate.com's policy)
149 """
150 if n==0: yield []
151 else:
152 for i in xrange(len(L)-n+1):
153 for cc in xuniqueCombinations(L[i+1:],n-1):
154 yield [L[i]]+cc
155
157 """Find all subsets of data
158
159 :Parameters:
160 L : list
161 list of unique ids
162 n : None or int
163 If None, all possible subsets are returned. If n is specified (int),
164 then only the ones of the length n are returned
165 sort : bool
166 Either to sort the resultant sequence
167
168 Adopted from Alex Martelli:
169 http://mail.python.org/pipermail/python-list/2001-January/067815.html
170 """
171 N = len(L)
172 if N > 20 and n == 1:
173 warning("getUniqueLengthNCombinations_binary should not be used for "
174 "large N")
175 result = []
176 for X in range(2**N):
177 x = [ L[i] for i in range(N) if X & (1L<<i) ]
178 if n is None or len(x) == n:
179
180 result.append(x)
181 result.sort()
182
183
184
185
186 return result
187
188
190 """Find all subsets of data
191
192 :Parameters:
193 L : list
194 list of unique ids
195 n : None or int
196 If None, all possible subsets are returned. If n is specified (int),
197 then only the ones of the length n are returned
198
199 TODO: work out single stable implementation -- probably just by fixing
200 _getUniqueLengthNCombinations_lt3
201 """
202 if n == 1:
203 return _getUniqueLengthNCombinations_lt3(L, n)
204 elif n==None:
205 return _getUniqueLengthNCombinations_binary(L, n, sort=True)
206 else:
207
208
209
210
211
212
213 return list(xuniqueCombinations(L, n))
214
215
217 """Given a `value` returns a string where each line is indented
218
219 Needed for a cleaner __repr__ output
220 `v` - arbitrary
221 """
222 return re.sub('\n', '\n ', str(v))
223
224
226 """Craft unique id+hash for an object
227 """
228 res = "%s" % id(val)
229 if isinstance(val, list):
230 val = tuple(val)
231 try:
232 res += ":%s" % hash(buffer(val))
233 except:
234 try:
235 res += ":%s" % hash(val)
236 except:
237 pass
238 pass
239 return res
240
242 """Check if listed items are in sorted order.
243
244 :Parameters:
245 `items`: iterable container
246
247 :return: `True` if were sorted. Otherwise `False` + Warning
248 """
249 items_sorted = deepcopy(items)
250 items_sorted.sort()
251 equality = items_sorted == items
252
253 if hasattr(equality, '__iter__'):
254 equality = N.all(equality)
255 return equality
256
257
259 """For given coord check if it is within a specified volume size.
260
261 Returns True/False. Assumes that volume coordinates start at 0.
262 No more generalization (arbitrary minimal coord) is done to save
263 on performance
264 """
265 for i in xrange(len(coord)):
266 if coord[i] < 0 or coord[i] >= shape[i]:
267 return False
268 return True
269
270
272 """Convert literal string into a tuple, if possible of ints
273
274 Tuple of integers constructed by splitting at '.' or interleaves
275 of numerics and alpha numbers
276 """
277 if isinstance(v, basestring):
278 v = v.split('.')
279 elif isinstance(v, tuple) or isinstance(v, list):
280
281 pass
282 else:
283 raise ValueError, "Do not know how to treat version '%s'" % str(v)
284
285
286 vres = []
287
288 regex = re.compile('(?P<numeric>[0-9]*)'
289 '(?P<alpha>[~+-]*[A-Za-z]*)(?P<suffix>.*)')
290 for x in v:
291 try:
292 vres += [int(x)]
293 except ValueError:
294
295 suffix = x
296 while suffix != '':
297 res = regex.search(suffix)
298 if res:
299 resd = res.groupdict()
300 if resd['numeric'] != '':
301 vres += [int(resd['numeric'])]
302 if resd['alpha'] != '':
303 vres += [resd['alpha']]
304 suffix = resd['suffix']
305 else:
306
307 resd += [suffix]
308 break
309 v = tuple(vres)
310
311 return v
312
314 """A bit evolved comparison of versions
315
316 The reason for not using python's distutil.version is that it
317 seems to have no clue about somewhat common conventions of using
318 '-dev' or 'dev' or 'rc' suffixes for upcoming releases (so major
319 version does contain upcoming release already).
320
321 So here is an ad-hoc and not as nice implementation
322 """
323
324 - def parse(self, vstring):
327
330
332 if isinstance(other, (StringType, TupleType, ListType)):
333 other = SmartVersion(other)
334 elif isinstance(other, SmartVersion):
335 pass
336 elif isinstance(other, Version):
337 other = SmartVersion(other.vstring)
338 else:
339 raise ValueError("Do not know how to treat version %s"
340 % str(other))
341
342
343 i = 0
344 s, o = self.version, other.version
345 regex_prerelease = re.compile('~|-?dev|-?rc|-?svn|-?pre', re.I)
346 for i in xrange(max(len(s), len(o))):
347 if i < len(s): si = s[i]
348 else: si = None
349 if i < len(o): oi = o[i]
350 else: oi = None
351
352 if si == oi:
353 continue
354
355 for x,y,mult in ((si, oi, 1), (oi, si, -1)):
356 if x is None:
357 if isinstance(y, int):
358 return -mult
359 if isinstance(y, StringType):
360 if (regex_prerelease.match(y)):
361 return mult
362
363 else:
364
365 return -mult
366 else:
367 raise RuntimeError, "Should not have got here with %s" \
368 % y
369 elif isinstance(x, int):
370 if not isinstance(y, int):
371 return mult
372 return mult*cmp(x, y)
373 elif isinstance(x, StringType):
374 if isinstance(y, StringType):
375 return mult*cmp(x,y)
376 return 0
377
379 """Return a list of break points.
380
381 :Parameters:
382 items : iterable
383 list of items, such as chunks
384 contiguous : bool
385 if `True` (default) then raise Value Error if items are not
386 contiguous, i.e. a label occur in multiple contiguous sets
387
388 :raises: ValueError
389
390 :return: list of indexes for every new set of items
391 """
392 prev = None
393 known = []
394 """List of items which was already seen"""
395 result = []
396 """Resultant list"""
397 for index in xrange(len(items)):
398 item = items[index]
399 if item in known:
400 if index > 0:
401 if prev != item:
402 if contiguous:
403 raise ValueError, \
404 "Item %s was already seen before" % str(item)
405 else:
406 result.append(index)
407 else:
408 known.append(item)
409 result.append(index)
410 prev = item
411 return result
412
413
414 -def RFEHistory2maps(history):
415 """Convert history generated by RFE into the array of binary maps
416
417 Example:
418 history2maps(N.array( [ 3,2,1,0 ] ))
419 results in
420 array([[ 1., 1., 1., 1.],
421 [ 1., 1., 1., 0.],
422 [ 1., 1., 0., 0.],
423 [ 1., 0., 0., 0.]])
424 """
425
426
427 history = N.array(history)
428 nfeatures, steps = len(history), max(history) - min(history) + 1
429 history_maps = N.zeros((steps, nfeatures))
430
431 for step in xrange(steps):
432 history_maps[step, history >= step] = 1
433
434 return history_maps
435
436
438 """Compute some overlap stats from a sequence of binary maps.
439
440 When called with a sequence of binary maps (e.g. lists or arrays) the
441 fraction of mask elements that are non-zero in a customizable proportion
442 of the maps is returned. By default this threshold is set to 1.0, i.e.
443 such an element has to be non-zero in *all* maps.
444
445 Three additional maps (same size as original) are computed:
446
447 * overlap_map: binary map which is non-zero for each overlapping element.
448 * spread_map: binary map which is non-zero for each element that is
449 non-zero in any map, but does not exceed the overlap
450 threshold.
451 * ovstats_map: map of float with the raw elementwise fraction of overlap.
452
453 All maps are available via class members.
454 """
455 - def __init__(self, overlap_threshold=1.0):
456 """Nothing to be seen here.
457 """
458 self.__overlap_threshold = overlap_threshold
459
460
461 self.overlap_map = None
462 self.spread_map = None
463 self.ovstats_map = None
464
465
467 """Returns fraction of overlapping elements.
468 """
469 ovstats = N.mean(maps, axis=0)
470
471 self.overlap_map = (ovstats >= self.__overlap_threshold )
472 self.spread_map = N.logical_and(ovstats > 0.0,
473 ovstats < self.__overlap_threshold)
474 self.ovstats_map = ovstats
475
476 return N.mean(ovstats >= self.__overlap_threshold)
477
478
480 """Simple class to define properties of an event.
481
482 The class is basically a dictionary. Any properties can
483 be passed as keyword arguments to the constructor, e.g.:
484
485 >>> ev = Event(onset=12, duration=2.45)
486
487 Conventions for keys:
488
489 `onset`
490 The onset of the event in some unit.
491 `duration`
492 The duration of the event in the same unit as `onset`.
493 `label`
494 E.g. the condition this event is part of.
495 `chunk`
496 Group this event is part of (if any), e.g. experimental run.
497 `features`
498 Any amount of additional features of the event. This might include
499 things like physiological measures, stimulus intensity. Must be a mutable
500 sequence (e.g. list), if present.
501 """
502 _MUSTHAVE = ['onset']
503
505
506 dict.__init__(self, **kwargs)
507
508
509 for k in Event._MUSTHAVE:
510 if not self.has_key(k):
511 raise ValueError, "Event must have '%s' defined." % k
512
513
515 """Convert `onset` and `duration` information into descrete timepoints.
516
517 :Parameters:
518 dt: float
519 Temporal distance between two timepoints in the same unit as `onset`
520 and `duration`.
521 storeoffset: bool
522 If True, the temporal offset between original `onset` and
523 descretized `onset` is stored as an additional item in `features`.
524
525 :Return:
526 A copy of the original `Event` with `onset` and optionally `duration`
527 replaced by their corresponding descrete timepoint. The new onset will
528 correspond to the timepoint just before or exactly at the original
529 onset. The new duration will be the number of timepoints covering the
530 event from the computed onset timepoint till the timepoint exactly at
531 the end, or just after the event.
532
533 Note again, that the new values are expressed as #timepoint and not
534 in their original unit!
535 """
536 dt = float(dt)
537 onset = self['onset']
538 out = deepcopy(self)
539
540
541 out['onset'] = int(N.floor(onset / dt))
542
543 if storeoffset:
544
545 offset = onset - (out['onset'] * dt)
546
547 if out.has_key('features'):
548 out['features'].append(offset)
549 else:
550 out['features'] = [offset]
551
552 if out.has_key('duration'):
553
554
555 out['duration'] = int(N.ceil((onset + out['duration']) / dt) \
556 - out['onset'])
557
558 return out
559
560
561
563 - def __init__(self, call, attribs=None, argfilter=None, expand_args=True,
564 copy_attribs=True):
565 """Initialize
566
567 :Parameters:
568 expand_args : bool
569 Either to expand the output of looper into a list of arguments for
570 call
571 attribs : list of basestr
572 What attributes of call to store and return later on?
573 copy_attribs : bool
574 Force copying values of attributes
575 """
576
577 self.call = call
578 """Call which gets called in the harvester."""
579
580 if attribs is None:
581 attribs = []
582 if not isSequenceType(attribs):
583 raise ValueError, "'attribs' have to specified as a sequence."
584
585 if not (argfilter is None or isSequenceType(argfilter)):
586 raise ValueError, "'argfilter' have to be a sequence or None."
587
588
589 self.argfilter = argfilter
590 self.expand_args = expand_args
591 self.copy_attribs = copy_attribs
592 self.attribs = attribs
593
594
595
597 """World domination helper: do whatever it is asked and accumulate results
598
599 XXX Thinks about:
600 - Might we need to deepcopy attributes values?
601 - Might we need to specify what attribs to copy and which just to bind?
602 """
603
604 - def __init__(self, source, calls, simplify_results=True):
605 """Initialize
606
607 :Parameters:
608 source
609 Generator which produce food for the calls.
610 calls : sequence of HarvesterCall instances
611 Calls which are processed in the loop. All calls are processed in
612 order of apperance in the sequence.
613 simplify_results: bool
614 Remove unecessary overhead in results if possible (nested lists
615 and dictionaries).
616 """
617 if not isSequenceType(calls):
618 raise ValueError, "'calls' have to specified as a sequence."
619
620 self.__source = source
621 """Generator which feeds the harvester"""
622
623 self.__calls = calls
624 """Calls which gets called with each generated source"""
625
626 self.__simplify_results = simplify_results
627
628
630 """
631 """
632
633
634 results = [dict([('result', [])] + [(a, []) for a in c.attribs]) \
635 for c in self.__calls]
636
637
638 for (i, X) in enumerate(self.__source(*args, **kwargs)):
639 for (c, call) in enumerate(self.__calls):
640
641 if i == 0 and call.expand_args and not isSequenceType(X):
642 raise RuntimeError, \
643 "Cannot expand non-sequence result from %s" % \
644 `self.__source`
645
646
647 if call.argfilter:
648 filtered_args = [X[f] for f in call.argfilter]
649 else:
650 filtered_args = X
651
652 if call.expand_args:
653 result = call.call(*filtered_args)
654 else:
655 result = call.call(filtered_args)
656
657
658
659
660
661
662
663 results[c]['result'].append(result)
664
665 for attrib in call.attribs:
666 attrv = call.call.__getattribute__(attrib)
667
668 if call.copy_attribs:
669 attrv = copy(attrv)
670
671 results[c][attrib].append(attrv)
672
673
674 if self.__simplify_results:
675
676 for (c, call) in enumerate(self.__calls):
677 if not len(call.attribs):
678 results[c] = results[c]['result']
679
680 if len(self.__calls) == 1:
681 results = results[0]
682
683 return results
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702