1
2
3
4
5
6
7
8
9 """Dataset container"""
10
11 __docformat__ = 'restructuredtext'
12
13 import operator
14 import random
15 import mvpa.support.copy as copy
16 import numpy as N
17
18
19
20
21
22
23 from mvpa.misc.exceptions import DatasetError
24 from mvpa.misc.support import idhash as idhash_
25 from mvpa.base.dochelpers import enhancedDocString, table2string
26
27 from mvpa.base import warning
28
29 if __debug__:
30 from mvpa.base import debug
33 """Helper function to validate that seq contains unique sorted values
34 """
35 if operator.isSequenceType(seq):
36 seq_unique = N.unique(seq)
37 if len(seq) != len(seq_unique):
38 warning("%s() operates only with indexes for %s without"
39 " repetitions. Repetitions were removed."
40 % (fname, item))
41 if N.any(N.sort(seq) != seq_unique):
42 warning("%s() does not guarantee the original order"
43 " of selected %ss. Use selectSamples() and "
44 " selectFeatures(sort=False) instead" % (fname, item))
45
49 """*The* Dataset.
50
51 This class provides a container to store all necessary data to
52 perform MVPA analyses. These are the data samples, as well as the
53 labels associated with the samples. Additionally, samples can be
54 grouped into chunks.
55
56 :Groups:
57 - `Creators`: `__init__`, `selectFeatures`, `selectSamples`,
58 `applyMapper`
59 - `Mutators`: `permuteLabels`
60
61 Important: labels assumed to be immutable, i.e. no one should modify
62 them externally by accessing indexed items, ie something like
63 ``dataset.labels[1] += 100`` should not be used. If a label has
64 to be modified, full copy of labels should be obtained, operated on,
65 and assigned back to the dataset, otherwise dataset.uniquelabels
66 would not work. The same applies to any other attribute which has
67 corresponding unique* access property.
68
69 """
70
71
72
73
74
75
76
77
78
79
80
81
82
83 _uniqueattributes = []
84 """Unique attributes associated with the data"""
85
86 _registeredattributes = []
87 """Registered attributes (stored in _data)"""
88
89 _requiredattributes = ['samples', 'labels']
90 """Attributes which have to be provided to __init__, or otherwise
91 no default values would be assumed and construction of the
92 instance would fail"""
93
94
95
96
97
98
99
100
101
102
103 - def __init__(self,
104
105 data=None,
106 dsattr=None,
107
108 dtype=None,
109
110 samples=None,
111 labels=None,
112 labels_map=None,
113 chunks=None,
114 origids=None,
115
116 check_data=True,
117 copy_samples=False,
118 copy_data=True,
119 copy_dsattr=True):
120 """Initialize dataset instance
121
122 There are basically two different way to create a dataset:
123
124 1. Create a new dataset from samples and sample attributes. In
125 this mode a two-dimensional `ndarray` has to be passed to the
126 `samples` keyword argument and the corresponding samples
127 attributes are provided via the `labels` and `chunks`
128 arguments.
129
130 2. Copy contructor mode
131 The second way is used internally to perform quick coyping
132 of datasets, e.g. when performing feature selection. In this
133 mode and the two dictionaries (`data` and `dsattr`) are
134 required. For performance reasons this mode bypasses most of
135 the sanity check performed by the previous mode, as for
136 internal operations data integrity is assumed.
137
138
139 :Parameters:
140 data : dict
141 Dictionary with an arbitrary number of entries. The value for
142 each key in the dict has to be an ndarray with the
143 same length as the number of rows in the samples array.
144 A special entry in this dictionary is 'samples', a 2d array
145 (samples x features). A shallow copy is stored in the object.
146 dsattr : dict
147 Dictionary of dataset attributes. An arbitrary number of
148 arbitrarily named and typed objects can be stored here. A
149 shallow copy of the dictionary is stored in the object.
150 dtype: type | None
151 If None -- do not change data type if samples
152 is an ndarray. Otherwise convert samples to dtype.
153
154
155 :Keywords:
156 samples : ndarray
157 2d array (samples x features)
158 labels
159 An array or scalar value defining labels for each samples.
160 Generally `labels` should be numeric, unless `labels_map`
161 is used
162 labels_map : None or bool or dict
163 Map original labels into numeric labels. If True, the
164 mapping is computed if labels are literal. If is False,
165 no mapping is computed. If dict instance -- provided
166 mapping is verified and applied. If you want to have
167 labels_map just be present given already numeric labels,
168 just assign labels_map dictionary to existing dataset
169 instance
170 chunks
171 An array or scalar value defining chunks for each sample
172
173 Each of the Keywords arguments overwrites what is/might be
174 already in the `data` container.
175
176 """
177
178
179
180
181 if data is None:
182 data = {}
183 if dsattr is None:
184 dsattr = {}
185
186
187
188
189 if copy_data:
190
191
192
193
194 lcl_data = data.copy()
195 for k, v in data.iteritems():
196
197 if k == 'samples' and not copy_samples:
198 continue
199 lcl_data[k] = v.copy()
200 else:
201
202
203
204 lcl_data = data.copy()
205
206 if copy_dsattr and len(dsattr)>0:
207
208 if __debug__:
209 debug('DS', "Deep copying dsattr %s" % `dsattr`)
210 lcl_dsattr = copy.deepcopy(dsattr)
211
212 else:
213
214 lcl_dsattr = copy.copy(dsattr)
215
216
217
218
219 self._data = lcl_data
220 """What makes a dataset."""
221
222 self._dsattr = lcl_dsattr
223 """Dataset attriibutes."""
224
225
226 if not samples is None:
227 if __debug__:
228 if lcl_data.has_key('samples'):
229 debug('DS',
230 "`Data` dict has `samples` (%s) but there is also" \
231 " __init__ parameter `samples` which overrides " \
232 " stored in `data`" % (`lcl_data['samples'].shape`))
233 lcl_data['samples'] = self._shapeSamples(samples, dtype,
234 copy_samples)
235
236
237
238
239
240 if not labels is None:
241 if __debug__:
242 if lcl_data.has_key('labels'):
243 debug('DS',
244 "`Data` dict has `labels` (%s) but there is also" +
245 " __init__ parameter `labels` which overrides " +
246 " stored in `data`" % (`lcl_data['labels']`))
247 if lcl_data.has_key('samples'):
248 lcl_data['labels'] = \
249 self._expandSampleAttribute(labels, 'labels')
250
251
252 for attr in self._requiredattributes:
253 if not lcl_data.has_key(attr):
254 raise DatasetError, \
255 "Attribute %s is required to initialize dataset" % \
256 attr
257
258 nsamples = self.nsamples
259
260
261 if not chunks == None:
262 lcl_data['chunks'] = \
263 self._expandSampleAttribute(chunks, 'chunks')
264 elif not lcl_data.has_key('chunks'):
265
266
267 lcl_data['chunks'] = N.arange(nsamples)
268
269
270 if not origids is None:
271
272 lcl_data['origids'] = origids
273 elif not lcl_data.has_key('origids'):
274
275 lcl_data['origids'] = N.arange(len(lcl_data['labels']))
276 else:
277
278
279
280 pass
281
282
283 for attr in self._registeredattributes:
284 if not lcl_data.has_key(attr):
285 if __debug__:
286 debug("DS", "Initializing attribute %s" % attr)
287 lcl_data[attr] = N.zeros(nsamples)
288
289
290 labels_ = N.asarray(lcl_data['labels'])
291 labels_map_known = lcl_dsattr.has_key('labels_map')
292 if labels_map is True:
293
294 if labels_.dtype.char == 'S':
295
296 ulabels = list(set(labels_))
297 ulabels.sort()
298 labels_map = dict([ (x[1], x[0]) for x in enumerate(ulabels) ])
299 if __debug__:
300 debug('DS', 'Mapping for the labels computed to be %s'
301 % labels_map)
302 else:
303 if __debug__:
304 debug('DS', 'Mapping of labels was requested but labels '
305 'are not strings. Skipped')
306 labels_map = None
307 pass
308 elif labels_map is False:
309 labels_map = None
310
311 if isinstance(labels_map, dict):
312 if labels_map_known:
313 if __debug__:
314 debug('DS',
315 "`dsattr` dict has `labels_map` (%s) but there is also" \
316 " __init__ parameter `labels_map` (%s) which overrides " \
317 " stored in `dsattr`" % (lcl_dsattr['labels_map'], labels_map))
318
319 lcl_dsattr['labels_map'] = labels_map
320
321 if labels_.dtype.char == 'S' or not labels_map_known:
322 if __debug__:
323 debug('DS_', "Remapping labels using mapping %s" % labels_map)
324
325
326 try:
327 lcl_data['labels'] = N.array(
328 [labels_map[x] for x in lcl_data['labels']])
329 except KeyError, e:
330 raise ValueError, "Provided labels_map %s is insufficient " \
331 "to map all the labels. Mapping for label %s is " \
332 "missing" % (labels_map, e)
333
334 elif not lcl_dsattr.has_key('labels_map'):
335 lcl_dsattr['labels_map'] = labels_map
336 elif __debug__:
337 debug('DS_', 'Not overriding labels_map in dsattr since it has one')
338
339 if check_data:
340 self._checkData()
341
342
343
344
345
346
347
348 if not labels is None or not chunks is None:
349
350
351 lcl_dsattr['__uniquereseted'] = False
352 self._resetallunique(force=True)
353
354
355 __doc__ = enhancedDocString('Dataset', locals())
356
357
358 @property
360 """To verify if dataset is in the same state as when smth else was done
361
362 Like if classifier was trained on the same dataset as in question"""
363
364 _data = self._data
365 res = idhash_(_data)
366
367
368
369
370 keys = _data.keys()
371 keys.sort()
372 for k in keys:
373 res += idhash_(_data[k])
374 return res
375
376
378 """Set to None all unique* attributes of corresponding dictionary
379 """
380 _dsattr = self._dsattr
381
382 if not force and _dsattr['__uniquereseted']:
383 return
384
385 _uniqueattributes = self._uniqueattributes
386
387 if __debug__ and "DS_" in debug.active:
388 debug("DS_", "Reseting all attributes %s for dataset %s"
389 % (_uniqueattributes,
390 self.summary(uniq=False, idhash=False,
391 stats=False, lstats=False)))
392
393
394 for k in _uniqueattributes:
395 _dsattr[k] = None
396 _dsattr['__uniquereseted'] = True
397
398
400 """Provide common facility to return unique attributes
401
402 XXX `dict_` can be simply replaced now with self._dsattr
403 """
404
405
406 _dsattr = self._dsattr
407
408 if not _dsattr.has_key(attrib) or _dsattr[attrib] is None:
409 if __debug__ and 'DS_' in debug.active:
410 debug("DS_", "Recomputing unique set for attrib %s within %s" %
411 (attrib, self.summary(uniq=False,
412 stats=False, lstats=False)))
413
414
415 _dsattr[attrib] = N.unique( N.asanyarray(dict_[attrib[6:]]) )
416 assert(not _dsattr[attrib] is None)
417 _dsattr['__uniquereseted'] = False
418
419 return _dsattr[attrib]
420
421
423 """Provide common facility to set attributes
424
425 """
426 if len(value) != self.nsamples:
427 raise ValueError, \
428 "Provided %s have %d entries while there is %d samples" % \
429 (attrib, len(value), self.nsamples)
430 self._data[attrib] = N.asarray(value)
431 uniqueattr = "unique" + attrib
432
433 _dsattr = self._dsattr
434 if _dsattr.has_key(uniqueattr):
435 _dsattr[uniqueattr] = None
436
437
439 """Returns the number of samples per unique label.
440 """
441
442 _data = self._data
443
444
445 uniqueattr = self._getuniqueattr(attrib="unique" + attrib,
446 dict_=_data)
447
448
449 result = dict(zip(uniqueattr, [ 0 ] * len(uniqueattr)))
450 for l in _data[attrib]:
451 result[l] += 1
452
453
454
455
456 return result
457
458
461 """Return indecies of samples given a list of attributes
462 """
463
464 if not operator.isSequenceType(values) \
465 or isinstance(values, basestring):
466 values = [ values ]
467
468
469
470 sel = N.array([], dtype=N.int16)
471 _data = self._data
472 for value in values:
473 sel = N.concatenate((
474 sel, N.where(_data[attrib]==value)[0]))
475
476 if sort:
477
478 sel.sort()
479
480 return sel
481
482
483 - def idsonboundaries(self, prior=0, post=0,
484 attributes_to_track=['labels', 'chunks'],
485 affected_labels=None,
486 revert=False):
487 """Find samples which are on the boundaries of the blocks
488
489 Such samples might need to be removed. By default (with
490 prior=0, post=0) ids of the first samples in a 'block' are
491 reported
492
493 :Parameters:
494 prior : int
495 how many samples prior to transition sample to include
496 post : int
497 how many samples post the transition sample to include
498 attributes_to_track : list of basestring
499 which attributes to track to decide on the boundary condition
500 affected_labels : list of basestring
501 for which labels to perform selection. If None - for all
502 revert : bool
503 either to revert the meaning and provide ids of samples which are found
504 to not to be boundary samples
505 """
506
507 _data = self._data
508 labels = self.labels
509 nsamples = self.nsamples
510
511 lastseen = none = [None for attr in attributes_to_track]
512 transitions = []
513
514 for i in xrange(nsamples+1):
515 if i < nsamples:
516 current = [_data[attr][i] for attr in attributes_to_track]
517 else:
518 current = none
519 if lastseen != current:
520
521 new_transitions = range(max(0, i-prior),
522 min(nsamples-1, i+post)+1)
523 if affected_labels is not None:
524 new_transitions = [labels[i] for i in new_transitions
525 if i in affected_labels]
526 transitions += new_transitions
527 lastseen = current
528
529 transitions = set(transitions)
530 if revert:
531 transitions = set(range(nsamples)).difference(transitions)
532
533
534 transitions = N.array(list(transitions))
535 transitions.sort()
536 return list(transitions)
537
538
540 """Adapt different kinds of samples
541
542 Handle all possible input value for 'samples' and tranform
543 them into a 2d (samples x feature) representation.
544 """
545
546
547 if (not isinstance(samples, N.ndarray)):
548
549
550 samples = N.array(samples, ndmin=2, dtype=dtype, copy=copy)
551 else:
552 if samples.ndim < 2 \
553 or (not dtype is None and dtype != samples.dtype):
554 if dtype is None:
555 dtype = samples.dtype
556 samples = N.array(samples, ndmin=2, dtype=dtype, copy=copy)
557 elif copy:
558 samples = samples.copy()
559
560
561 if len(samples.shape) > 2:
562 raise DatasetError, "Only (samples x features) -> 2d sample " \
563 + "are supported (got %s shape of samples)." \
564 % (`samples.shape`) \
565 +" Consider MappedDataset if applicable."
566
567 return samples
568
569
571 """Checks `_data` members to have the same # of samples.
572 """
573
574
575
576
577
578
579 nsamples = self.nsamples
580 _data = self._data
581
582 for k, v in _data.iteritems():
583 if not len(v) == nsamples:
584 raise DatasetError, \
585 "Length of sample attribute '%s' [%i] does not " \
586 "match the number of samples in the dataset [%i]." \
587 % (k, len(v), nsamples)
588
589
590 uniques = N.unique(_data['origids'])
591 uniques.sort()
592
593 sorted_ids = _data['origids'].copy()
594 sorted_ids.sort()
595
596 if not (uniques == sorted_ids).all():
597 raise DatasetError, "Samples IDs are not unique."
598
599
600 if N.asanyarray(_data['labels'].dtype.char == 'S'):
601 warning('Labels for dataset %s are literal, should be numeric. '
602 'You might like to use labels_map argument.' % self)
603
605 """If a sample attribute is given as a scalar expand/repeat it to a
606 length matching the number of samples in the dataset.
607 """
608 try:
609
610
611 if isinstance(attr, basestring):
612 raise TypeError
613 if len(attr) != self.nsamples:
614 raise DatasetError, \
615 "Length of sample attribute '%s' [%d]" \
616 % (attr_name, len(attr)) \
617 + " has to match the number of samples" \
618 + " [%d]." % self.nsamples
619
620 return N.array(attr)
621
622 except TypeError:
623
624
625 return N.repeat(attr, self.nsamples)
626
627
628 @classmethod
630 """Register an attribute for any Dataset class.
631
632 Creates property assigning getters/setters depending on the
633 availability of corresponding _get, _set functions.
634 """
635 classdict = cls.__dict__
636 if not classdict.has_key(key):
637 if __debug__:
638 debug("DS", "Registering new attribute %s" % key)
639
640
641 getter = '_get%s' % key
642 if classdict.has_key(getter):
643 getter = '%s.%s' % (cls.__name__, getter)
644 else:
645 getter = "lambda x: x.%s['%s']" % (dictname, key)
646
647
648
649 setter = '_set%s' % key
650 if classdict.has_key(setter):
651 setter = '%s.%s' % (cls.__name__, setter)
652 elif dictname=="_data":
653 setter = "lambda self,x: self._setdataattr" + \
654 "(attrib='%s', value=x)" % (key)
655 else:
656 setter = None
657
658 if __debug__:
659 debug("DS", "Registering new property %s.%s" %
660 (cls.__name__, key))
661 exec "%s.%s = property(fget=%s, fset=%s)" % \
662 (cls.__name__, key, getter, setter)
663
664 if abbr is not None:
665 exec "%s.%s = property(fget=%s, fset=%s)" % \
666 (cls.__name__, abbr, getter, setter)
667
668 if hasunique:
669 uniquekey = "unique%s" % key
670 getter = '_get%s' % uniquekey
671 if classdict.has_key(getter):
672 getter = '%s.%s' % (cls.__name__, getter)
673 else:
674 getter = "lambda x: x._getuniqueattr" + \
675 "(attrib='%s', dict_=x.%s)" % (uniquekey, dictname)
676
677 if __debug__:
678 debug("DS", "Registering new property %s.%s" %
679 (cls.__name__, uniquekey))
680
681 exec "%s.%s = property(fget=%s)" % \
682 (cls.__name__, uniquekey, getter)
683 if abbr is not None:
684 exec "%s.U%s = property(fget=%s)" % \
685 (cls.__name__, abbr, getter)
686
687
688 sampleskey = "samplesper%s" % key[:-1]
689 if __debug__:
690 debug("DS", "Registering new property %s.%s" %
691 (cls.__name__, sampleskey))
692
693 exec "%s.%s = property(fget=%s)" % \
694 (cls.__name__, sampleskey,
695 "lambda x: x._getNSamplesPerAttr(attrib='%s')" % key)
696
697 cls._uniqueattributes.append(uniquekey)
698
699
700 sampleskey = "idsby%s" % key
701 if __debug__:
702 debug("DS", "Registering new property %s.%s" %
703 (cls.__name__, sampleskey))
704
705 exec "%s.%s = %s" % (cls.__name__, sampleskey,
706 "lambda self, x: " +
707 "self._getSampleIdsByAttr(x,attrib='%s')" % key)
708
709 cls._uniqueattributes.append(uniquekey)
710
711 cls._registeredattributes.append(key)
712 elif __debug__:
713 warning('Trying to reregister attribute `%s`. For now ' % key +
714 'such capability is not present')
715
716
718 """String summary over the object
719 """
720 try:
721 ssummary = self.summary(uniq=True,
722 idhash=__debug__ and ('DS_ID' in debug.active),
723 stats=__debug__ and ('DS_STATS' in debug.active),
724 lstats=__debug__ and ('DS_STATS' in debug.active),
725 )
726 except (AttributeError, KeyError), e:
727
728
729
730
731 ssummary = str(e)
732 return ssummary
733
734
736 return "<%s>" % str(self)
737
738
739 - def summary(self, uniq=True, stats=True, idhash=False, lstats=True,
740 maxc=30, maxl=20):
741 """String summary over the object
742
743 :Parameters:
744 uniq : bool
745 Include summary over data attributes which have unique
746 idhash : bool
747 Include idhash value for dataset and samples
748 stats : bool
749 Include some basic statistics (mean, std, var) over dataset samples
750 lstats : bool
751 Include statistics on chunks/labels
752 maxc : int
753 Maximal number of chunks when provide details on labels/chunks
754 maxl : int
755 Maximal number of labels when provide details on labels/chunks
756 """
757
758 samples = self.samples
759 _data = self._data
760 _dsattr = self._dsattr
761
762 if idhash:
763 idhash_ds = "{%s}" % self.idhash
764 idhash_samples = "{%s}" % idhash_(samples)
765 else:
766 idhash_ds = ""
767 idhash_samples = ""
768
769 s = """Dataset %s/ %s %d%s x %d""" % \
770 (idhash_ds, samples.dtype,
771 self.nsamples, idhash_samples, self.nfeatures)
772
773 ssep = (' ', '\n')[lstats]
774 if uniq:
775 s += "%suniq:" % ssep
776 for uattr in _dsattr.keys():
777 if not uattr.startswith("unique"):
778 continue
779 attr = uattr[6:]
780 try:
781 value = self._getuniqueattr(attrib=uattr,
782 dict_=_data)
783 s += " %d %s" % (len(value), attr)
784 except:
785 pass
786
787 if isinstance(self.labels_map, dict):
788 s += ' labels_mapped'
789
790 if stats:
791
792
793
794 if self.nfeatures:
795 s += "%sstats: mean=%g std=%g var=%g min=%g max=%g\n" % \
796 (ssep, N.mean(samples), N.std(samples),
797 N.var(samples), N.min(samples), N.max(samples))
798 else:
799 s += "%sstats: dataset has no features\n" % ssep
800
801 if lstats:
802 s += self.summary_labels(maxc=maxc, maxl=maxl)
803
804 return s
805
806
808 """Provide summary statistics over the labels and chunks
809
810 :Parameters:
811 maxc : int
812 Maximal number of chunks when provide details
813 maxl : int
814 Maximal number of labels when provide details
815 """
816
817
818 from mvpa.datasets.miscfx import getSamplesPerChunkLabel
819 spcl = getSamplesPerChunkLabel(self)
820
821 ul = self.uniquelabels.tolist()
822 uc = self.uniquechunks.tolist()
823 s = ""
824 if len(ul) < maxl and len(uc) < maxc:
825 s += "\nCounts of labels in each chunk:"
826
827 table = [[' chunks\labels'] + ul]
828 table += [[''] + ['---'] * len(ul)]
829 for c, counts in zip(uc, spcl):
830 table.append([ str(c) ] + counts.tolist())
831 s += '\n' + table2string(table)
832 else:
833 s += "No details due to large number of labels or chunks. " \
834 "Increase maxc and maxl if desired"
835
836 labels_map = self.labels_map
837 if isinstance(labels_map, dict):
838 s += "\nOriginal labels were mapped using following mapping:"
839 s += '\n\t'+'\n\t'.join([':\t'.join(map(str, x))
840 for x in labels_map.items()]) + '\n'
841
842 def cl_stats(axis, u, name1, name2):
843 """ Compute statistics per label
844 """
845 stats = {'min': N.min(spcl, axis=axis),
846 'max': N.max(spcl, axis=axis),
847 'mean': N.mean(spcl, axis=axis),
848 'std': N.std(spcl, axis=axis),
849 '#%ss' % name2: N.sum(spcl>0, axis=axis)}
850 entries = [' ' + name1, 'mean', 'std', 'min', 'max', '#%ss' % name2]
851 table = [ entries ]
852 for i, l in enumerate(u):
853 d = {' ' + name1 : l}
854 d.update(dict([ (k, stats[k][i]) for k in stats.keys()]))
855 table.append( [ ('%.3g', '%s')[isinstance(d[e], basestring)]
856 % d[e] for e in entries] )
857 return '\nSummary per %s across %ss\n' % (name1, name2) \
858 + table2string(table)
859
860 if len(ul) < maxl:
861 s += cl_stats(0, ul, 'label', 'chunk')
862 if len(uc) < maxc:
863 s += cl_stats(1, uc, 'chunk', 'label')
864 return s
865
866
868 """Merge the samples of one Dataset object to another (in-place).
869
870 No dataset attributes, besides labels_map, will be merged!
871 Additionally, a new set of unique `origids` will be generated.
872 """
873
874 _data = self._data
875 other_data = other._data
876
877 if not self.nfeatures == other.nfeatures:
878 raise DatasetError, "Cannot add Dataset, because the number of " \
879 "feature do not match."
880
881
882 slm = self.labels_map
883 olm = other.labels_map
884 if N.logical_xor(slm is None, olm is None):
885 raise ValueError, "Cannot add datasets where only one of them " \
886 "has labels map assigned. If needed -- implement it"
887
888
889 for k,v in _data.iteritems():
890 if k == 'origids':
891
892
893
894 _data[k] = N.arange(len(v) + len(other_data[k]))
895
896 elif k == 'labels' and slm is not None:
897
898
899
900 nlm = slm.copy()
901
902 nextid = N.sort(nlm.values())[-1] + 1
903 olabels = other.labels
904 olabels_remap = {}
905 for ol, olnum in olm.iteritems():
906 if not nlm.has_key(ol):
907
908
909
910 if olnum in nlm.values():
911 nextid = N.sort(nlm.values() + olm.values())[-1] + 1
912 else:
913 nextid = olnum
914 olabels_remap[olnum] = nextid
915 nlm[ol] = nextid
916 nextid += 1
917 else:
918 olabels_remap[olnum] = nlm[ol]
919 olabels = [olabels_remap[x] for x in olabels]
920
921 _data['labels'] = N.concatenate((v, olabels), axis=0)
922
923 self._dsattr['labels_map'] = nlm
924
925 if __debug__:
926
927
928
929 if (len(set(slm.keys())) != len(set(slm.values()))) or \
930 (len(set(olm.keys())) != len(set(olm.values()))):
931 warning("Adding datasets where multiple labels "
932 "mapped to the same ID is not recommended. "
933 "Please check the outcome. Original mappings "
934 "were %s and %s. Resultant is %s"
935 % (slm, olm, nlm))
936
937 else:
938 _data[k] = N.concatenate((v, other_data[k]), axis=0)
939
940
941 self._resetallunique()
942
943 return self
944
945
947 """Merge the samples two Dataset objects.
948
949 All data of both datasets is copied, concatenated and a new Dataset is
950 returned.
951
952 NOTE: This can be a costly operation (both memory and time). If
953 performance is important consider the '+=' operator.
954 """
955
956 out = super(Dataset, self).__new__(self.__class__)
957
958
959
960 out.__init__(data=self._data,
961 dsattr=self._dsattr,
962 copy_samples=True,
963 copy_data=True,
964 copy_dsattr=True)
965
966 out += other
967
968 return out
969
970
971 - def copy(self, deep=True):
972 """Create a copy (clone) of the dataset, by fully copying current one
973
974 :Keywords:
975 deep : bool
976 deep flag is provided to __init__ for
977 copy_{samples,data,dsattr}. By default full copy is done.
978 """
979
980 out = super(Dataset, self).__new__(self.__class__)
981
982
983
984 out.__init__(data=self._data,
985 dsattr=self._dsattr,
986 copy_samples=True,
987 copy_data=True,
988 copy_dsattr=True)
989
990 return out
991
992
994 """Select a number of features from the current set.
995
996 :Parameters:
997 ids
998 iterable container to select ids
999 sort : bool
1000 if to sort Ids. Order matters and `selectFeatures` assumes
1001 incremental order. If not such, in non-optimized code
1002 selectFeatures would verify the order and sort
1003
1004 Returns a new Dataset object with a copy of corresponding features
1005 from the original samples array.
1006
1007 WARNING: The order of ids determines the order of features in
1008 the returned dataset. This might be useful sometimes, but can
1009 also cause major headaches! Order would is verified when
1010 running in non-optimized code (if __debug__)
1011 """
1012 if ids is None and groups is None:
1013 raise ValueError, "No feature selection specified."
1014
1015
1016 if ids is None:
1017 ids = []
1018
1019 if not groups is None:
1020 if not self._dsattr.has_key('featuregroups'):
1021 raise RuntimeError, \
1022 "Dataset has no feature grouping information."
1023
1024 for g in groups:
1025 ids += (self._dsattr['featuregroups'] == g).nonzero()[0].tolist()
1026
1027
1028
1029 if sort:
1030 ids = copy.deepcopy(ids)
1031 ids.sort()
1032 elif __debug__ and 'CHECK_DS_SORTED' in debug.active:
1033 from mvpa.misc.support import isSorted
1034 if not isSorted(ids):
1035 warning("IDs for selectFeatures must be provided " +
1036 "in sorted order, otherwise major headache might occur")
1037
1038
1039 new_data = self._data.copy()
1040
1041
1042
1043 new_data['samples'] = self._data['samples'][:, ids]
1044
1045
1046 if self._dsattr.has_key('featuregroups'):
1047 new_dsattr = self._dsattr.copy()
1048 new_dsattr['featuregroups'] = self._dsattr['featuregroups'][ids]
1049 else:
1050 new_dsattr = self._dsattr
1051
1052
1053 dataset = super(Dataset, self).__new__(self.__class__)
1054
1055
1056
1057 dataset.__init__(data=new_data,
1058 dsattr=new_dsattr,
1059 check_data=False,
1060 copy_samples=False,
1061 copy_data=False,
1062 copy_dsattr=False
1063 )
1064
1065 return dataset
1066
1067
1068 - def applyMapper(self, featuresmapper=None, samplesmapper=None,
1069 train=True):
1070 """Obtain new dataset by applying mappers over features and/or samples.
1071
1072 While featuresmappers leave the sample attributes information
1073 unchanged, as the number of samples in the dataset is invariant,
1074 samplesmappers are also applied to the samples attributes themselves!
1075
1076 Applying a featuresmapper will destroy any feature grouping information.
1077
1078 :Parameters:
1079 featuresmapper : Mapper
1080 `Mapper` to somehow transform each sample's features
1081 samplesmapper : Mapper
1082 `Mapper` to transform each feature across samples
1083 train : bool
1084 Flag whether to train the mapper with this dataset before applying
1085 it.
1086
1087 TODO: selectFeatures is pretty much
1088 applyMapper(featuresmapper=MaskMapper(...))
1089 """
1090
1091
1092 new_data = self._data.copy()
1093
1094
1095
1096 if samplesmapper:
1097 if __debug__:
1098 debug("DS", "Training samplesmapper %s" % `samplesmapper`)
1099 samplesmapper.train(self)
1100
1101 if __debug__:
1102 debug("DS", "Applying samplesmapper %s" % `samplesmapper` +
1103 " to samples of dataset `%s`" % `self`)
1104
1105
1106
1107 if new_data.has_key('origids'):
1108 del(new_data['origids'])
1109
1110
1111 for k in new_data.keys():
1112 new_data[k] = samplesmapper.forward(self._data[k])
1113
1114
1115
1116 new_dsattr = self._dsattr
1117
1118 if featuresmapper:
1119 if __debug__:
1120 debug("DS", "Training featuresmapper %s" % `featuresmapper`)
1121 featuresmapper.train(self)
1122
1123 if __debug__:
1124 debug("DS", "Applying featuresmapper %s" % `featuresmapper` +
1125 " to samples of dataset `%s`" % `self`)
1126 new_data['samples'] = featuresmapper.forward(self._data['samples'])
1127
1128
1129
1130 if self._dsattr.has_key('featuregroups'):
1131 new_dsattr = self._dsattr.copy()
1132 del(new_dsattr['featuregroups'])
1133 else:
1134 new_dsattr = self._dsattr
1135
1136
1137 dataset = super(Dataset, self).__new__(self.__class__)
1138
1139
1140
1141 dataset.__init__(data=new_data,
1142 dsattr=new_dsattr,
1143 check_data=False,
1144 copy_samples=False,
1145 copy_data=False,
1146 copy_dsattr=False
1147 )
1148
1149
1150 if samplesmapper:
1151 dataset._resetallunique(force=True)
1152
1153 return dataset
1154
1155
1157 """Choose a subset of samples defined by samples IDs.
1158
1159 Returns a new dataset object containing the selected sample
1160 subset.
1161
1162 TODO: yoh, we might need to sort the mask if the mask is a
1163 list of ids and is not ordered. Clarify with Michael what is
1164 our intent here!
1165 """
1166
1167
1168 if not operator.isSequenceType( ids ):
1169 ids = [ids]
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188 data = {}
1189 for k, v in self._data.iteritems():
1190 data[k] = v[ids, ]
1191
1192
1193 dataset = super(Dataset, self).__new__(self.__class__)
1194
1195
1196
1197 dataset.__init__(data=data,
1198 dsattr=self._dsattr,
1199 check_data=False,
1200 copy_samples=False,
1201 copy_data=False,
1202 copy_dsattr=False)
1203
1204 dataset._resetallunique(force=True)
1205 return dataset
1206
1207
1208
1209 - def index(self, *args, **kwargs):
1210 """Universal indexer to obtain indexes of interesting samples/features.
1211 See .select() for more information
1212
1213 :Return: tuple of (samples indexes, features indexes). Each
1214 item could be also None, if no selection on samples or
1215 features was requested (to discriminate between no selected
1216 items, and no selections)
1217 """
1218 s_indx = []
1219 f_indx = []
1220 return_dataset = kwargs.pop('return_dataset', False)
1221 largs = len(args)
1222
1223 args = list(args)
1224
1225 largs_nonstring = 0
1226
1227 for i in xrange(largs):
1228 l = args[i]
1229 if isinstance(l, basestring):
1230 if l.lower() == 'all':
1231
1232 args[i] = slice(None)
1233 else:
1234 break
1235 largs_nonstring += 1
1236
1237 if largs_nonstring >= 1:
1238 s_indx.append(args[0])
1239 if __debug__ and 'CHECK_DS_SELECT' in debug.active:
1240 _validate_indexes_uniq_sorted(args[0], 'select', 'samples')
1241 if largs_nonstring == 2:
1242 f_indx.append(args[1])
1243 if __debug__ and 'CHECK_DS_SELECT' in debug.active:
1244 _validate_indexes_uniq_sorted(args[1], 'select', 'features')
1245 elif largs_nonstring > 2:
1246 raise ValueError, "Only two positional arguments are allowed" \
1247 ". 1st for samples, 2nd for features"
1248
1249
1250
1251
1252 if (largs - largs_nonstring) % 2 != 0:
1253 raise ValueError, "Positional selections must come in pairs:" \
1254 " e.g. ('labels', [1,2,3])"
1255
1256 for i in xrange(largs_nonstring, largs, 2):
1257 k, v = args[i:i+2]
1258 kwargs[k] = v
1259
1260
1261 data_ = self._data
1262 for k, v in kwargs.iteritems():
1263 if k == 'samples':
1264 s_indx.append(v)
1265 elif k == 'features':
1266 f_indx.append(v)
1267 elif data_.has_key(k):
1268
1269
1270 if __debug__:
1271 if not N.any([isinstance(v, cls) for cls in
1272 [list, tuple, slice, int]]):
1273 raise ValueError, "Trying to specify selection for %s " \
1274 "based on unsupported '%s'" % (k, v)
1275 s_indx.append(self._getSampleIdsByAttr(v, attrib=k, sort=False))
1276 else:
1277 raise ValueError, 'Keyword "%s" is not known, thus' \
1278 'select() failed' % k
1279
1280 def combine_indexes(indx, nelements):
1281 """Helper function: intersect selections given in indx
1282
1283 :Parameters:
1284 indxs : list of lists or slices
1285 selections of elements
1286 nelements : int
1287 number of elements total for deriving indexes from slices
1288 """
1289 indx_sel = None
1290 for s in indx:
1291 if isinstance(s, slice) or \
1292 isinstance(s, N.ndarray) and s.dtype==bool:
1293
1294
1295
1296 all_indexes = N.arange(nelements)
1297 s = all_indexes[s]
1298 elif not operator.isSequenceType(s):
1299 s = [ s ]
1300
1301 if indx_sel is None:
1302 indx_sel = set(s)
1303 else:
1304
1305
1306
1307 indx_sel = indx_sel.intersection(s)
1308
1309
1310 if isinstance(indx_sel, set):
1311 indx_sel = list(indx_sel)
1312
1313
1314 indx_sel.sort()
1315
1316 return indx_sel
1317
1318
1319 if len(s_indx) == 1 and isinstance(s_indx[0], slice) \
1320 and s_indx[0] == slice(None):
1321
1322 s_indx = s_indx[0]
1323 else:
1324
1325 if len(s_indx) == 0:
1326 s_indx = None
1327 else:
1328 s_indx = combine_indexes(s_indx, self.nsamples)
1329
1330
1331 if len(f_indx):
1332 f_indx = combine_indexes(f_indx, self.nfeatures)
1333 else:
1334 f_indx = None
1335
1336 return s_indx, f_indx
1337
1338
1339 - def select(self, *args, **kwargs):
1340 """Universal selector
1341
1342 WARNING: if you need to select duplicate samples
1343 (e.g. samples=[5,5]) or order of selected samples of features
1344 is important and has to be not ordered (e.g. samples=[3,2,1]),
1345 please use selectFeatures or selectSamples functions directly
1346
1347 Examples:
1348 Mimique plain selectSamples::
1349
1350 dataset.select([1,2,3])
1351 dataset[[1,2,3]]
1352
1353 Mimique plain selectFeatures::
1354
1355 dataset.select(slice(None), [1,2,3])
1356 dataset.select('all', [1,2,3])
1357 dataset[:, [1,2,3]]
1358
1359 Mixed (select features and samples)::
1360
1361 dataset.select([1,2,3], [1, 2])
1362 dataset[[1,2,3], [1, 2]]
1363
1364 Select samples matching some attributes::
1365
1366 dataset.select(labels=[1,2], chunks=[2,4])
1367 dataset.select('labels', [1,2], 'chunks', [2,4])
1368 dataset['labels', [1,2], 'chunks', [2,4]]
1369
1370 Mixed -- out of first 100 samples, select only those with
1371 labels 1 or 2 and belonging to chunks 2 or 4, and select
1372 features 2 and 3::
1373
1374 dataset.select(slice(0,100), [2,3], labels=[1,2], chunks=[2,4])
1375 dataset[:100, [2,3], 'labels', [1,2], 'chunks', [2,4]]
1376
1377 """
1378 s_indx, f_indx = self.index(*args, **kwargs)
1379
1380
1381 if s_indx == slice(None):
1382
1383
1384 if __debug__:
1385 debug('DS', 'in select() not selecting samples')
1386 ds = self
1387 else:
1388
1389 if __debug__:
1390 debug('DS', 'in select() selecting samples given selections'
1391 + str(s_indx))
1392 ds = self.selectSamples(s_indx)
1393
1394
1395 if f_indx is not None:
1396 if __debug__:
1397 debug('DS', 'in select() selecting features given selections'
1398 + str(f_indx))
1399 ds = ds.selectFeatures(f_indx)
1400
1401 return ds
1402
1403
1404
1405 - def where(self, *args, **kwargs):
1406 """Obtain indexes of interesting samples/features. See select() for more information
1407
1408 XXX somewhat obsoletes idsby...
1409 """
1410 s_indx, f_indx = self.index(*args, **kwargs)
1411 if s_indx is not None and f_indx is not None:
1412 return s_indx, f_indx
1413 elif s_indx is not None:
1414 return s_indx
1415 else:
1416 return f_indx
1417
1418
1420 """Convinience dataset parts selection
1421
1422 See select for more information
1423 """
1424
1425 if len(args) == 1 and isinstance(args[0], tuple):
1426 args = args[0]
1427
1428 args_, args = args, ()
1429 for a in args_:
1430 if isinstance(a, slice) and \
1431 isinstance(a.start, basestring):
1432
1433 if a.stop is None or a.step is not None:
1434 raise ValueError, \
1435 "Selection must look like ['chunks':[2,3]]"
1436 args += (a.start, a.stop)
1437 else:
1438 args += (a,)
1439 return self.select(*args)
1440
1441
1442 - def permuteLabels(self, status, perchunk=True, assure_permute=False):
1443 """Permute the labels.
1444
1445 TODO: rename status into something closer in semantics.
1446
1447 :Parameters:
1448 status : bool
1449 Calling this method with set to True, the labels are
1450 permuted among all samples. If 'status' is False the
1451 original labels are restored.
1452 perchunk : bool
1453 If True permutation is limited to samples sharing the same
1454 chunk value. Therefore only the association of a certain
1455 sample with a label is permuted while keeping the absolute
1456 number of occurences of each label value within a certain
1457 chunk constant.
1458 assure_permute : bool
1459 If True, assures that labels are permutted, ie any one is
1460 different from the original one
1461 """
1462
1463 _data = self._data
1464
1465 if len(self.uniquelabels)<2:
1466 raise RuntimeError, \
1467 "Call to permuteLabels is bogus since there is insuficient" \
1468 " number of labels: %s" % self.uniquelabels
1469
1470 if not status:
1471
1472 if _data.get('origlabels', None) is None:
1473 raise RuntimeError, 'Cannot restore labels. ' \
1474 'permuteLabels() has never been ' \
1475 'called with status == True.'
1476 self.labels = _data['origlabels']
1477 _data.pop('origlabels')
1478 else:
1479
1480
1481 if not _data.has_key('origlabels') \
1482 or _data['origlabels'] == None:
1483
1484 _data['origlabels'] = _data['labels']
1485
1486 _data['labels'] = copy.copy(_data['labels'])
1487
1488 labels = _data['labels']
1489
1490 if perchunk:
1491 for o in self.uniquechunks:
1492 labels[self.chunks == o] = \
1493 N.random.permutation(labels[self.chunks == o])
1494 else:
1495 labels = N.random.permutation(labels)
1496
1497 self.labels = labels
1498
1499 if assure_permute:
1500 if not (_data['labels'] != _data['origlabels']).any():
1501 if not (assure_permute is True):
1502 if assure_permute == 1:
1503 raise RuntimeError, \
1504 "Cannot assure permutation of labels %s for " \
1505 "some reason with chunks %s and while " \
1506 "perchunk=%s . Should not happen" % \
1507 (self.labels, self.chunks, perchunk)
1508 else:
1509 assure_permute = 11
1510 if __debug__:
1511 debug("DS", "Recalling permute to assure different labels")
1512 self.permuteLabels(status, perchunk=perchunk,
1513 assure_permute=assure_permute-1)
1514
1515
1517 """Select a random set of samples.
1518
1519 If 'nperlabel' is an integer value, the specified number of samples is
1520 randomly choosen from the group of samples sharing a unique label
1521 value ( total number of selected samples: nperlabel x len(uniquelabels).
1522
1523 If 'nperlabel' is a list which's length has to match the number of
1524 unique label values. In this case 'nperlabel' specifies the number of
1525 samples that shall be selected from the samples with the corresponding
1526 label.
1527
1528 The method returns a Dataset object containing the selected
1529 samples.
1530 """
1531
1532 if isinstance(nperlabel, int):
1533 nperlabel = [ nperlabel for i in self.uniquelabels ]
1534
1535 sample = []
1536
1537 labels = self.labels
1538 for i, r in enumerate(self.uniquelabels):
1539
1540 sample += random.sample( (labels == r).nonzero()[0],
1541 nperlabel[i] )
1542
1543 return self.selectSamples( sample )
1544
1545
1546
1547
1548
1549
1550
1551
1552
1554 """Currently available number of patterns.
1555 """
1556 return self._data['samples'].shape[0]
1557
1558
1560 """Number of features per pattern.
1561 """
1562 return self._data['samples'].shape[1]
1563
1564
1566 """Stored labels map (if any)
1567 """
1568 return self._dsattr.get('labels_map', None)
1569
1570
1572 """Set labels map.
1573
1574 Checks for the validity of the mapping -- values should cover
1575 all existing labels in the dataset
1576 """
1577 values = set(lm.values())
1578 labels = set(self.uniquelabels)
1579 if not values.issuperset(labels):
1580 raise ValueError, \
1581 "Provided mapping %s has some existing labels (out of %s) " \
1582 "missing from mapping" % (list(values), list(labels))
1583 self._dsattr['labels_map'] = lm
1584
1585
1587 """Set the data type of the samples array.
1588 """
1589
1590 _data = self._data
1591
1592 if _data['samples'].dtype != dtype:
1593 _data['samples'] = _data['samples'].astype(dtype)
1594
1595
1597 """Assign `definition` to featuregroups
1598
1599 XXX Feature-groups was not finished to be useful
1600 """
1601 if not len(definition) == self.nfeatures:
1602 raise ValueError, \
1603 "Length of feature group definition %i " \
1604 "does not match the number of features %i " \
1605 % (len(definition), self.nfeatures)
1606
1607 self._dsattr['featuregroups'] = N.array(definition)
1608
1609
1611 """Returns a boolean mask with all features in `ids` selected.
1612
1613 :Parameters:
1614 ids: list or 1d array
1615 To be selected features ids.
1616
1617 :Returns:
1618 ndarray: dtype='bool'
1619 All selected features are set to True; False otherwise.
1620 """
1621 fmask = N.repeat(False, self.nfeatures)
1622 fmask[ids] = True
1623
1624 return fmask
1625
1626
1628 """Returns feature ids corresponding to non-zero elements in the mask.
1629
1630 :Parameters:
1631 mask: 1d ndarray
1632 Feature mask.
1633
1634 :Returns:
1635 ndarray: integer
1636 Ids of non-zero (non-False) mask elements.
1637 """
1638 return mask.nonzero()[0]
1639
1640
1641 @staticmethod
1643 """Common sanity check for Dataset copy constructor calls."""
1644
1645 samples = None
1646 if kwargs.has_key('samples'):
1647 samples = kwargs['samples']
1648 if samples is None and kwargs.has_key('data') \
1649 and kwargs['data'].has_key('samples'):
1650 samples = kwargs['data']['samples']
1651 if samples is None:
1652 raise DatasetError, \
1653 "`samples` must be provided to copy constructor call."
1654
1655 if not len(samples.shape) == 2:
1656 raise DatasetError, \
1657 "samples must be in 2D shape in copy constructor call."
1658
1659
1660
1661 nsamples = property( fget=getNSamples )
1662 nfeatures = property( fget=getNFeatures )
1663 labels_map = property( fget=getLabelsMap, fset=setLabelsMap )
1664
1666 """Decorator to easily bind functions to a Dataset class
1667 """
1668 if __debug__:
1669 debug("DS_", "Binding function %s to Dataset class" % func.func_name)
1670
1671
1672 setattr(Dataset, func.func_name, func)
1673
1674
1675 return func
1676
1677
1678
1679 Dataset._registerAttribute("samples", "_data", abbr='S', hasunique=False)
1680 Dataset._registerAttribute("labels", "_data", abbr='L', hasunique=True)
1681 Dataset._registerAttribute("chunks", "_data", abbr='C', hasunique=True)
1682
1683 Dataset._registerAttribute("origids", "_data", abbr='I', hasunique=False)
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716 from mvpa.misc.state import ClassWithCollections, Collection
1717 from mvpa.misc.attributes import SampleAttribute, FeatureAttribute, \
1718 DatasetAttribute
1719
1720
1721 -class _Dataset(ClassWithCollections):
1722 """The successor of Dataset.
1723 """
1724
1725
1726
1727
1728 sa = None
1729 fa = None
1730 dsa = None
1731
1732
1733 samples = None
1734
1735 - def __init__(self, samples, sa=None, fa=None, dsa=None):
1736 """
1737 This is the generic internal constructor. Its main task is to allow
1738 for a maximum level of customization during dataset construction,
1739 including fast copy construction.
1740
1741 Parameters
1742 ----------
1743 samples : ndarray
1744 Data samples.
1745 sa : Collection
1746 Samples attributes collection.
1747 fa : Collection
1748 Features attributes collection.
1749 dsa : Collection
1750 Dataset attributes collection.
1751 """
1752
1753 ClassWithCollections.__init__(self)
1754
1755
1756
1757
1758
1759
1760
1761 self.samples = samples
1762
1763
1764
1765
1766
1767 for scol, tcol in ((sa, self.sa),
1768 (fa, self.fa),
1769 (dsa, self.dsa)):
1770
1771 if tcol is None:
1772
1773
1774 tcol = Collection(owner=self)
1775
1776
1777 if not scol is None:
1778 for name, attr in scol.items.iteritems():
1779
1780
1781 tcol.add(copy.copy(attr))
1782
1783
1784 @classmethod
1786
1787 """
1788 One line summary.
1789
1790 Long description.
1791
1792 Parameters
1793 ----------
1794 samples : ndarray
1795 The two-dimensional samples matrix.
1796 labels : ndarray
1797 chunks : ndarray
1798
1799 Returns
1800 -------
1801 blah blah
1802
1803 Notes
1804 -----
1805 blah blah
1806
1807 See Also
1808 --------
1809 blah blah
1810
1811 Examples
1812 --------
1813 blah blah
1814 """
1815
1816
1817
1818 labels_ = SampleAttribute(name='labels')
1819 labels_.value = labels
1820 chunks_ = SampleAttribute(name='chunks')
1821 chunks_.value = chunks
1822
1823
1824
1825
1826 sa = Collection(items={'labels': labels_, 'chunks': chunks_})
1827
1828
1829 return klass(samples, sa=sa)
1830
1831
1833 """Currently available number of patterns.
1834 """
1835 return self.samples.shape[0]
1836
1837
1839 """Number of features per pattern.
1840 """
1841 return self.samples.shape[1]
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937