1from __future__ import absolute_import, print_function, division
2
3
4# standard library dependencies
5from itertools import islice, chain
6from collections import deque
7from itertools import count
8from petl.compat import izip, izip_longest, next, string_types, text_type
9
10
11# internal dependencies
12from petl.util.base import asindices, rowgetter, Record, Table
13
14
15import logging
16logger = logging.getLogger(__name__)
17warning = logger.warning
18info = logger.info
19debug = logger.debug
20
21
22def cut(table, *args, **kwargs):
23    """
24    Choose and/or re-order fields. E.g.::
25
26        >>> import petl as etl
27        >>> table1 = [['foo', 'bar', 'baz'],
28        ...           ['A', 1, 2.7],
29        ...           ['B', 2, 3.4],
30        ...           ['B', 3, 7.8],
31        ...           ['D', 42, 9.0],
32        ...           ['E', 12]]
33        >>> table2 = etl.cut(table1, 'foo', 'baz')
34        >>> table2
35        +-----+------+
36        | foo | baz  |
37        +=====+======+
38        | 'A' |  2.7 |
39        +-----+------+
40        | 'B' |  3.4 |
41        +-----+------+
42        | 'B' |  7.8 |
43        +-----+------+
44        | 'D' |  9.0 |
45        +-----+------+
46        | 'E' | None |
47        +-----+------+
48
49        >>> # fields can also be specified by index, starting from zero
50        ... table3 = etl.cut(table1, 0, 2)
51        >>> table3
52        +-----+------+
53        | foo | baz  |
54        +=====+======+
55        | 'A' |  2.7 |
56        +-----+------+
57        | 'B' |  3.4 |
58        +-----+------+
59        | 'B' |  7.8 |
60        +-----+------+
61        | 'D' |  9.0 |
62        +-----+------+
63        | 'E' | None |
64        +-----+------+
65
66        >>> # field names and indices can be mixed
67        ... table4 = etl.cut(table1, 'bar', 0)
68        >>> table4
69        +-----+-----+
70        | bar | foo |
71        +=====+=====+
72        |   1 | 'A' |
73        +-----+-----+
74        |   2 | 'B' |
75        +-----+-----+
76        |   3 | 'B' |
77        +-----+-----+
78        |  42 | 'D' |
79        +-----+-----+
80        |  12 | 'E' |
81        +-----+-----+
82
83        >>> # select a range of fields
84        ... table5 = etl.cut(table1, *range(0, 2))
85        >>> table5
86        +-----+-----+
87        | foo | bar |
88        +=====+=====+
89        | 'A' |   1 |
90        +-----+-----+
91        | 'B' |   2 |
92        +-----+-----+
93        | 'B' |   3 |
94        +-----+-----+
95        | 'D' |  42 |
96        +-----+-----+
97        | 'E' |  12 |
98        +-----+-----+
99
100    Note that any short rows will be padded with `None` values (or whatever is
101    provided via the `missing` keyword argument).
102
103    See also :func:`petl.transform.basics.cutout`.
104
105    """
106
107    # support passing a single list or tuple of fields
108    if len(args) == 1 and isinstance(args[0], (list, tuple)):
109        args = args[0]
110
111    return CutView(table, args, **kwargs)
112
113
114Table.cut = cut
115
116
117class CutView(Table):
118
119    def __init__(self, source, spec, missing=None):
120        self.source = source
121        self.spec = spec
122        self.missing = missing
123
124    def __iter__(self):
125        return itercut(self.source, self.spec, self.missing)
126
127
128def itercut(source, spec, missing=None):
129    it = iter(source)
130    spec = tuple(spec)  # make sure no-one can change midstream
131
132    # convert field selection into field indices
133    hdr = next(it)
134    indices = asindices(hdr, spec)
135
136    # define a function to transform each row in the source data
137    # according to the field selection
138    transform = rowgetter(*indices)
139
140    # yield the transformed header
141    yield transform(hdr)
142
143    # construct the transformed data
144    for row in it:
145        try:
146            yield transform(row)
147        except IndexError:
148            # row is short, let's be kind and fill in any missing fields
149            yield tuple(row[i] if i < len(row) else missing for i in indices)
150
151
152def cutout(table, *args, **kwargs):
153    """
154    Remove fields. E.g.::
155
156        >>> import petl as etl
157        >>> table1 = [['foo', 'bar', 'baz'],
158        ...           ['A', 1, 2.7],
159        ...           ['B', 2, 3.4],
160        ...           ['B', 3, 7.8],
161        ...           ['D', 42, 9.0],
162        ...           ['E', 12]]
163        >>> table2 = etl.cutout(table1, 'bar')
164        >>> table2
165        +-----+------+
166        | foo | baz  |
167        +=====+======+
168        | 'A' |  2.7 |
169        +-----+------+
170        | 'B' |  3.4 |
171        +-----+------+
172        | 'B' |  7.8 |
173        +-----+------+
174        | 'D' |  9.0 |
175        +-----+------+
176        | 'E' | None |
177        +-----+------+
178
179    See also :func:`petl.transform.basics.cut`.
180
181    """
182
183    return CutOutView(table, args, **kwargs)
184
185
186Table.cutout = cutout
187
188
189class CutOutView(Table):
190
191    def __init__(self, source, spec, missing=None):
192        self.source = source
193        self.spec = spec
194        self.missing = missing
195
196    def __iter__(self):
197        return itercutout(self.source, self.spec, self.missing)
198
199
200def itercutout(source, spec, missing=None):
201    it = iter(source)
202    spec = tuple(spec)  # make sure no-one can change midstream
203
204    # convert field selection into field indices
205    hdr = next(it)
206    indicesout = asindices(hdr, spec)
207    indices = [i for i in range(len(hdr)) if i not in indicesout]
208
209    # define a function to transform each row in the source data
210    # according to the field selection
211    transform = rowgetter(*indices)
212
213    # yield the transformed header
214    yield transform(hdr)
215
216    # construct the transformed data
217    for row in it:
218        try:
219            yield transform(row)
220        except IndexError:
221            # row is short, let's be kind and fill in any missing fields
222            yield tuple(row[i] if i < len(row) else missing for i in indices)
223
224
225def cat(*tables, **kwargs):
226    """
227    Concatenate tables. E.g.::
228
229        >>> import petl as etl
230        >>> table1 = [['foo', 'bar'],
231        ...           [1, 'A'],
232        ...           [2, 'B']]
233        >>> table2 = [['bar', 'baz'],
234        ...           ['C', True],
235        ...           ['D', False]]
236        >>> table3 = etl.cat(table1, table2)
237        >>> table3
238        +------+-----+-------+
239        | foo  | bar | baz   |
240        +======+=====+=======+
241        |    1 | 'A' | None  |
242        +------+-----+-------+
243        |    2 | 'B' | None  |
244        +------+-----+-------+
245        | None | 'C' | True  |
246        +------+-----+-------+
247        | None | 'D' | False |
248        +------+-----+-------+
249
250        >>> # can also be used to square up a single table with uneven rows
251        ... table4 = [['foo', 'bar', 'baz'],
252        ...           ['A', 1, 2],
253        ...           ['B', '2', '3.4'],
254        ...           [u'B', u'3', u'7.8', True],
255        ...           ['D', 'xyz', 9.0],
256        ...           ['E', None]]
257        >>> table5 = etl.cat(table4)
258        >>> table5
259        +-----+-------+-------+
260        | foo | bar   | baz   |
261        +=====+=======+=======+
262        | 'A' |     1 |     2 |
263        +-----+-------+-------+
264        | 'B' | '2'   | '3.4' |
265        +-----+-------+-------+
266        | 'B' | '3'   | '7.8' |
267        +-----+-------+-------+
268        | 'D' | 'xyz' |   9.0 |
269        +-----+-------+-------+
270        | 'E' | None  | None  |
271        +-----+-------+-------+
272
273        >>> # use the header keyword argument to specify a fixed set of fields
274        ... table6 = [['bar', 'foo'],
275        ...           ['A', 1],
276        ...           ['B', 2]]
277        >>> table7 = etl.cat(table6, header=['A', 'foo', 'B', 'bar', 'C'])
278        >>> table7
279        +------+-----+------+-----+------+
280        | A    | foo | B    | bar | C    |
281        +======+=====+======+=====+======+
282        | None |   1 | None | 'A' | None |
283        +------+-----+------+-----+------+
284        | None |   2 | None | 'B' | None |
285        +------+-----+------+-----+------+
286
287        >>> # using the header keyword argument with two input tables
288        ... table8 = [['bar', 'foo'],
289        ...           ['A', 1],
290        ...           ['B', 2]]
291        >>> table9 = [['bar', 'baz'],
292        ...           ['C', True],
293        ...           ['D', False]]
294        >>> table10 = etl.cat(table8, table9, header=['A', 'foo', 'B', 'bar', 'C'])
295        >>> table10
296        +------+------+------+-----+------+
297        | A    | foo  | B    | bar | C    |
298        +======+======+======+=====+======+
299        | None |    1 | None | 'A' | None |
300        +------+------+------+-----+------+
301        | None |    2 | None | 'B' | None |
302        +------+------+------+-----+------+
303        | None | None | None | 'C' | None |
304        +------+------+------+-----+------+
305        | None | None | None | 'D' | None |
306        +------+------+------+-----+------+
307
308    Note that the tables do not need to share exactly the same fields, any
309    missing fields will be padded with `None` or whatever is provided via the
310    `missing` keyword argument.
311
312    Note that this function can be used with a single table argument, in which
313    case it has the effect of ensuring all data rows are the same length as
314    the header row, truncating any long rows and padding any short rows with
315    the value of the `missing` keyword argument.
316
317    By default, the fields for the output table will be determined as the
318    union of all fields found in the input tables. Use the `header` keyword
319    argument to override this behaviour and specify a fixed set of fields for
320    the output table.
321
322    """
323
324    return CatView(tables, **kwargs)
325
326
327Table.cat = cat
328
329
330class CatView(Table):
331
332    def __init__(self, sources, missing=None, header=None):
333        self.sources = sources
334        self.missing = missing
335        self.header = header
336
337    def __iter__(self):
338        return itercat(self.sources, self.missing, self.header)
339
340
341def itercat(sources, missing, header):
342    its = [iter(t) for t in sources]
343    hdrs = [list(next(it)) for it in its]
344
345    if header is None:
346        # determine output fields by gathering all fields found in the sources
347        outhdr = list(hdrs[0])
348        for hdr in hdrs[1:]:
349            for h in hdr:
350                if h not in outhdr:
351                    # add any new fields as we find them
352                    outhdr.append(h)
353    else:
354        # predetermined output fields
355        outhdr = header
356    yield tuple(outhdr)
357
358    # output data rows
359    for hdr, it in zip(hdrs, its):
360
361        # now construct and yield the data rows
362        for row in it:
363            outrow = list()
364            for h in outhdr:
365                val = missing
366                try:
367                    val = row[hdr.index(h)]
368                except IndexError:
369                    # short row
370                    pass
371                except ValueError:
372                    # field not in table
373                    pass
374                outrow.append(val)
375            yield tuple(outrow)
376
377
378def stack(*tables, **kwargs):
379    """Concatenate tables, without trying to match headers. E.g.::
380
381        >>> import petl as etl
382        >>> table1 = [['foo', 'bar'],
383        ...           [1, 'A'],
384        ...           [2, 'B']]
385        >>> table2 = [['bar', 'baz'],
386        ...           ['C', True],
387        ...           ['D', False]]
388        >>> table3 = etl.stack(table1, table2)
389        >>> table3
390        +-----+-------+
391        | foo | bar   |
392        +=====+=======+
393        |   1 | 'A'   |
394        +-----+-------+
395        |   2 | 'B'   |
396        +-----+-------+
397        | 'C' | True  |
398        +-----+-------+
399        | 'D' | False |
400        +-----+-------+
401
402        >>> # can also be used to square up a single table with uneven rows
403        ... table4 = [['foo', 'bar', 'baz'],
404        ...           ['A', 1, 2],
405        ...           ['B', '2', '3.4'],
406        ...           [u'B', u'3', u'7.8', True],
407        ...           ['D', 'xyz', 9.0],
408        ...           ['E', None]]
409        >>> table5 = etl.stack(table4)
410        >>> table5
411        +-----+-------+-------+
412        | foo | bar   | baz   |
413        +=====+=======+=======+
414        | 'A' |     1 |     2 |
415        +-----+-------+-------+
416        | 'B' | '2'   | '3.4' |
417        +-----+-------+-------+
418        | 'B' | '3'   | '7.8' |
419        +-----+-------+-------+
420        | 'D' | 'xyz' |   9.0 |
421        +-----+-------+-------+
422        | 'E' | None  | None  |
423        +-----+-------+-------+
424
425    Similar to :func:`petl.transform.basics.cat` except that no attempt is
426    made to align fields from different tables. Data rows are simply emitted
427    in order, trimmed or padded to the length of the header row from the
428    first table.
429
430    .. versionadded:: 1.1.0
431
432    """
433
434    return StackView(tables, **kwargs)
435
436
437Table.stack = stack
438
439
440class StackView(Table):
441
442    def __init__(self, sources, missing=None, trim=True, pad=True):
443        self.sources = sources
444        self.missing = missing
445        self.trim = trim
446        self.pad = pad
447
448    def __iter__(self):
449        return iterstack(self.sources, self.missing, self.trim, self.pad)
450
451
452def iterstack(sources, missing, trim, pad):
453    its = [iter(t) for t in sources]
454    hdrs = [next(it) for it in its]
455    hdr = hdrs[0]
456    n = len(hdr)
457    yield tuple(hdr)
458    for it in its:
459        for row in it:
460            outrow = tuple(row)
461            if trim:
462                outrow = outrow[:n]
463            if pad and len(outrow) < n:
464                outrow += (missing,) * (n - len(outrow))
465            yield outrow
466
467
468def addfield(table, field, value=None, index=None, missing=None):
469    """
470    Add a field with a fixed or calculated value. E.g.::
471
472        >>> import petl as etl
473        >>> table1 = [['foo', 'bar'],
474        ...           ['M', 12],
475        ...           ['F', 34],
476        ...           ['-', 56]]
477        >>> # using a fixed value
478        ... table2 = etl.addfield(table1, 'baz', 42)
479        >>> table2
480        +-----+-----+-----+
481        | foo | bar | baz |
482        +=====+=====+=====+
483        | 'M' |  12 |  42 |
484        +-----+-----+-----+
485        | 'F' |  34 |  42 |
486        +-----+-----+-----+
487        | '-' |  56 |  42 |
488        +-----+-----+-----+
489
490        >>> # calculating the value
491        ... table2 = etl.addfield(table1, 'baz', lambda rec: rec['bar'] * 2)
492        >>> table2
493        +-----+-----+-----+
494        | foo | bar | baz |
495        +=====+=====+=====+
496        | 'M' |  12 |  24 |
497        +-----+-----+-----+
498        | 'F' |  34 |  68 |
499        +-----+-----+-----+
500        | '-' |  56 | 112 |
501        +-----+-----+-----+
502
503    Use the `index` parameter to control the position of the inserted field.
504
505    """
506
507    return AddFieldView(table, field, value=value, index=index,
508                        missing=missing)
509
510
511Table.addfield = addfield
512
513
514class AddFieldView(Table):
515
516    def __init__(self, source, field, value=None, index=None, missing=None):
517        # ensure rows are all the same length
518        self.source = stack(source, missing=missing)
519        self.field = field
520        self.value = value
521        self.index = index
522
523    def __iter__(self):
524        return iteraddfield(self.source, self.field, self.value, self.index)
525
526
527def iteraddfield(source, field, value, index):
528    it = iter(source)
529    hdr = next(it)
530    flds = list(map(text_type, hdr))
531
532    # determine index of new field
533    if index is None:
534        index = len(hdr)
535
536    # construct output fields
537    outhdr = list(hdr)
538    outhdr.insert(index, field)
539    yield tuple(outhdr)
540
541    if callable(value):
542        # wrap rows as records if using calculated value
543        it = (Record(row, flds) for row in it)
544        for row in it:
545            outrow = list(row)
546            v = value(row)
547            outrow.insert(index, v)
548            yield tuple(outrow)
549    else:
550        for row in it:
551            outrow = list(row)
552            outrow.insert(index, value)
553            yield tuple(outrow)
554
555
556def addfields(table, field_defs, missing=None):
557    """
558    Add fields with fixed or calculated values. E.g.::
559
560        >>> import petl as etl
561        >>> table1 = [['foo', 'bar'],
562        ...           ['M', 12],
563        ...           ['F', 34],
564        ...           ['-', 56]]
565        >>> # using a fixed value or a calculation
566        ... table2 = etl.addfields(table1,
567        ...                        [('baz', 42),
568        ...                         ('luhrmann', lambda rec: rec['bar'] * 2)])
569        >>> table2
570        +-----+-----+-----+----------+
571        | foo | bar | baz | luhrmann |
572        +=====+=====+=====+==========+
573        | 'M' |  12 |  42 |       24 |
574        +-----+-----+-----+----------+
575        | 'F' |  34 |  42 |       68 |
576        +-----+-----+-----+----------+
577        | '-' |  56 |  42 |      112 |
578        +-----+-----+-----+----------+
579
580        >>> # you can specify an index as a 3rd item in each tuple -- indicies
581        ... # are evaluated in order.
582        ... table2 = etl.addfields(table1,
583        ...                        [('baz', 42, 0),
584        ...                         ('luhrmann', lambda rec: rec['bar'] * 2, 0)])
585        >>> table2
586        +----------+-----+-----+-----+
587        | luhrmann | baz | foo | bar |
588        +==========+=====+=====+=====+
589        |       24 |  42 | 'M' |  12 |
590        +----------+-----+-----+-----+
591        |       68 |  42 | 'F' |  34 |
592        +----------+-----+-----+-----+
593        |      112 |  42 | '-' |  56 |
594        +----------+-----+-----+-----+
595
596    """
597
598    return AddFieldsView(table, field_defs, missing=missing)
599
600
601Table.addfields = addfields
602
603
604class AddFieldsView(Table):
605
606    def __init__(self, source, field_defs, missing=None):
607        # ensure rows are all the same length
608        self.source = stack(source, missing=missing)
609        # convert tuples to FieldDefinitions, if necessary
610        self.field_defs = field_defs
611
612    def __iter__(self):
613        return iteraddfields(self.source, self.field_defs)
614
615
616def iteraddfields(source, field_defs):
617    it = iter(source)
618    hdr = next(it)
619    flds = list(map(text_type, hdr))
620
621    # initialize output fields and indices
622    outhdr = list(hdr)
623    value_indexes = []
624
625    for fdef in field_defs:
626        # determine the defined field index
627        if len(fdef) == 2:
628            name, value = fdef
629            index = len(outhdr)
630        else:
631            name, value, index = fdef
632
633        # insert the name into the header at the appropriate index
634        outhdr.insert(index, name)
635
636        # remember the value/index pairs for later
637        value_indexes.append((value, index))
638    yield tuple(outhdr)
639
640    for row in it:
641        outrow = list(row)
642
643        # add each defined field into the row at the appropriate index
644        for value, index in value_indexes:
645            if callable(value):
646                # wrap row as record if using calculated value
647                row = Record(row, flds)
648                v = value(row)
649                outrow.insert(index, v)
650            else:
651                outrow.insert(index, value)
652
653        yield tuple(outrow)
654
655
656def rowslice(table, *sliceargs):
657    """
658    Choose a subsequence of data rows. E.g.::
659
660        >>> import petl as etl
661        >>> table1 = [['foo', 'bar'],
662        ...           ['a', 1],
663        ...           ['b', 2],
664        ...           ['c', 5],
665        ...           ['d', 7],
666        ...           ['f', 42]]
667        >>> table2 = etl.rowslice(table1, 2)
668        >>> table2
669        +-----+-----+
670        | foo | bar |
671        +=====+=====+
672        | 'a' |   1 |
673        +-----+-----+
674        | 'b' |   2 |
675        +-----+-----+
676
677        >>> table3 = etl.rowslice(table1, 1, 4)
678        >>> table3
679        +-----+-----+
680        | foo | bar |
681        +=====+=====+
682        | 'b' |   2 |
683        +-----+-----+
684        | 'c' |   5 |
685        +-----+-----+
686        | 'd' |   7 |
687        +-----+-----+
688
689        >>> table4 = etl.rowslice(table1, 0, 5, 2)
690        >>> table4
691        +-----+-----+
692        | foo | bar |
693        +=====+=====+
694        | 'a' |   1 |
695        +-----+-----+
696        | 'c' |   5 |
697        +-----+-----+
698        | 'f' |  42 |
699        +-----+-----+
700
701    Positional arguments are used to slice the data rows. The `sliceargs` are
702    passed through to :func:`itertools.islice`.
703
704    See also :func:`petl.transform.basics.head`,
705    :func:`petl.transform.basics.tail`.
706
707    """
708
709    return RowSliceView(table, *sliceargs)
710
711
712Table.rowslice = rowslice
713
714
715class RowSliceView(Table):
716
717    def __init__(self, source, *sliceargs):
718        self.source = source
719        if not sliceargs:
720            self.sliceargs = (None,)
721        else:
722            self.sliceargs = sliceargs
723
724    def __iter__(self):
725        return iterrowslice(self.source, self.sliceargs)
726
727
728def iterrowslice(source, sliceargs):
729    it = iter(source)
730    yield tuple(next(it))  # fields
731    for row in islice(it, *sliceargs):
732        yield tuple(row)
733
734
735def head(table, n=5):
736    """
737    Select the first `n` data rows. E.g.::
738
739        >>> import petl as etl
740        >>> table1 = [['foo', 'bar'],
741        ...           ['a', 1],
742        ...           ['b', 2],
743        ...           ['c', 5],
744        ...           ['d', 7],
745        ...           ['f', 42],
746        ...           ['f', 3],
747        ...           ['h', 90]]
748        >>> table2 = etl.head(table1, 4)
749        >>> table2
750        +-----+-----+
751        | foo | bar |
752        +=====+=====+
753        | 'a' |   1 |
754        +-----+-----+
755        | 'b' |   2 |
756        +-----+-----+
757        | 'c' |   5 |
758        +-----+-----+
759        | 'd' |   7 |
760        +-----+-----+
761
762    See also :func:`petl.transform.basics.tail`,
763    :func:`petl.transform.basics.rowslice`.
764
765    """
766
767    return rowslice(table, n)
768
769
770Table.head = head
771
772
773def tail(table, n=5):
774    """
775    Select the last `n` data rows. E.g.::
776
777        >>> import petl as etl
778        >>> table1 = [['foo', 'bar'],
779        ...           ['a', 1],
780        ...           ['b', 2],
781        ...           ['c', 5],
782        ...           ['d', 7],
783        ...           ['f', 42],
784        ...           ['f', 3],
785        ...           ['h', 90],
786        ...           ['k', 12],
787        ...           ['l', 77],
788        ...           ['q', 2]]
789        >>> table2 = etl.tail(table1, 4)
790        >>> table2
791        +-----+-----+
792        | foo | bar |
793        +=====+=====+
794        | 'h' |  90 |
795        +-----+-----+
796        | 'k' |  12 |
797        +-----+-----+
798        | 'l' |  77 |
799        +-----+-----+
800        | 'q' |   2 |
801        +-----+-----+
802
803    See also :func:`petl.transform.basics.head`,
804    :func:`petl.transform.basics.rowslice`.
805
806    """
807
808    return TailView(table, n)
809
810
811Table.tail = tail
812
813
814class TailView(Table):
815
816    def __init__(self, source, n):
817        self.source = source
818        self.n = n
819
820    def __iter__(self):
821        return itertail(self.source, self.n)
822
823
824def itertail(source, n):
825    it = iter(source)
826    yield tuple(next(it))  # fields
827    cache = deque()
828    for row in it:
829        cache.append(row)
830        if len(cache) > n:
831            cache.popleft()
832    for row in cache:
833        yield tuple(row)
834
835
836def skipcomments(table, prefix):
837    """
838    Skip any row where the first value is a string and starts with
839    `prefix`. E.g.::
840
841        >>> import petl as etl
842        >>> table1 = [['##aaa', 'bbb', 'ccc'],
843        ...           ['##mmm',],
844        ...           ['#foo', 'bar'],
845        ...           ['##nnn', 1],
846        ...           ['a', 1],
847        ...           ['b', 2]]
848        >>> table2 = etl.skipcomments(table1, '##')
849        >>> table2
850        +------+-----+
851        | #foo | bar |
852        +======+=====+
853        | 'a'  |   1 |
854        +------+-----+
855        | 'b'  |   2 |
856        +------+-----+
857
858    Use the `prefix` parameter to determine which string to consider as
859    indicating a comment.
860
861    """
862
863    return SkipCommentsView(table, prefix)
864
865
866Table.skipcomments = skipcomments
867
868
869class SkipCommentsView(Table):
870
871    def __init__(self, source, prefix):
872        self.source = source
873        self.prefix = prefix
874
875    def __iter__(self):
876        return iterskipcomments(self.source, self.prefix)
877
878
879def iterskipcomments(source, prefix):
880    return (row for row in source
881            if (len(row) > 0
882                and not(isinstance(row[0], string_types)
883                and row[0].startswith(prefix))))
884
885
886def movefield(table, field, index):
887    """
888    Move a field to a new position.
889
890    """
891
892    return MoveFieldView(table, field, index)
893
894
895Table.movefield = movefield
896
897
898class MoveFieldView(Table):
899
900    def __init__(self, table, field, index, missing=None):
901        self.table = table
902        self.field = field
903        self.index = index
904        self.missing = missing
905
906    def __iter__(self):
907        it = iter(self.table)
908
909        # determine output fields
910        hdr = next(it)
911        outhdr = [f for f in hdr if f != self.field]
912        outhdr.insert(self.index, self.field)
913        yield tuple(outhdr)
914
915        # define a function to transform each row in the source data
916        # according to the field selection
917        outflds = list(map(str, outhdr))
918        indices = asindices(hdr, outflds)
919        transform = rowgetter(*indices)
920
921        # construct the transformed data
922        for row in it:
923            try:
924                yield transform(row)
925            except IndexError:
926                # row is short, let's be kind and fill in any missing fields
927                yield tuple(row[i] if i < len(row) else self.missing
928                            for i in indices)
929
930
931def annex(*tables, **kwargs):
932    """
933    Join two or more tables by row order. E.g.::
934
935        >>> import petl as etl
936        >>> table1 = [['foo', 'bar'],
937        ...           ['A', 9],
938        ...           ['C', 2],
939        ...           ['F', 1]]
940        >>> table2 = [['foo', 'baz'],
941        ...           ['B', 3],
942        ...           ['D', 10]]
943        >>> table3 = etl.annex(table1, table2)
944        >>> table3
945        +-----+-----+------+------+
946        | foo | bar | foo  | baz  |
947        +=====+=====+======+======+
948        | 'A' |   9 | 'B'  |    3 |
949        +-----+-----+------+------+
950        | 'C' |   2 | 'D'  |   10 |
951        +-----+-----+------+------+
952        | 'F' |   1 | None | None |
953        +-----+-----+------+------+
954
955    See also :func:`petl.transform.joins.join`.
956
957    """
958
959    return AnnexView(tables, **kwargs)
960
961
962Table.annex = annex
963
964
965class AnnexView(Table):
966
967    def __init__(self, tables, missing=None):
968        self.tables = tables
969        self.missing = missing
970
971    def __iter__(self):
972        return iterannex(self.tables, self.missing)
973
974
975def iterannex(tables, missing):
976    its = [iter(t) for t in tables]
977    hdrs = [next(it) for it in its]
978    outhdr = tuple(chain(*hdrs))
979    yield outhdr
980    for rows in izip_longest(*its):
981        outrow = list()
982        for i, row in enumerate(rows):
983            lh = len(hdrs[i])
984            if row is None:  # handle uneven length tables
985                row = [missing] * len(hdrs[i])
986            else:
987                lr = len(row)
988                if lr < lh:  # handle short rows
989                    row = list(row)
990                    row.extend([missing] * (lh-lr))
991                elif lr > lh:  # handle long rows
992                    row = row[:lh]
993            outrow.extend(row)
994        yield tuple(outrow)
995
996
997def addrownumbers(table, start=1, step=1, field='row'):
998    """
999    Add a field of row numbers. E.g.::
1000
1001        >>> import petl as etl
1002        >>> table1 = [['foo', 'bar'],
1003        ...           ['A', 9],
1004        ...           ['C', 2],
1005        ...           ['F', 1]]
1006        >>> table2 = etl.addrownumbers(table1)
1007        >>> table2
1008        +-----+-----+-----+
1009        | row | foo | bar |
1010        +=====+=====+=====+
1011        |   1 | 'A' |   9 |
1012        +-----+-----+-----+
1013        |   2 | 'C' |   2 |
1014        +-----+-----+-----+
1015        |   3 | 'F' |   1 |
1016        +-----+-----+-----+
1017
1018    Parameters `start` and `step` control the numbering.
1019
1020    """
1021
1022    return AddRowNumbersView(table, start, step, field)
1023
1024
1025Table.addrownumbers = addrownumbers
1026
1027
1028class AddRowNumbersView(Table):
1029
1030    def __init__(self, table, start=1, step=1, field='row'):
1031        self.table = table
1032        self.start = start
1033        self.step = step
1034        self.field = field
1035
1036    def __iter__(self):
1037        return iteraddrownumbers(self.table, self.start, self.step, self.field)
1038
1039
1040def iteraddrownumbers(table, start, step, field):
1041    it = iter(table)
1042    hdr = next(it)
1043    outhdr = [field]
1044    outhdr.extend(hdr)
1045    yield tuple(outhdr)
1046    for row, n in izip(it, count(start, step)):
1047        outrow = [n]
1048        outrow.extend(row)
1049        yield tuple(outrow)
1050
1051
1052def addcolumn(table, field, col, index=None, missing=None):
1053    """
1054    Add a column of data to the table. E.g.::
1055
1056        >>> import petl as etl
1057        >>> table1 = [['foo', 'bar'],
1058        ...           ['A', 1],
1059        ...           ['B', 2]]
1060        >>> col = [True, False]
1061        >>> table2 = etl.addcolumn(table1, 'baz', col)
1062        >>> table2
1063        +-----+-----+-------+
1064        | foo | bar | baz   |
1065        +=====+=====+=======+
1066        | 'A' |   1 | True  |
1067        +-----+-----+-------+
1068        | 'B' |   2 | False |
1069        +-----+-----+-------+
1070
1071    Use the `index` parameter to control the position of the new column.
1072
1073    """
1074
1075    return AddColumnView(table, field, col, index=index, missing=missing)
1076
1077
1078Table.addcolumn = addcolumn
1079
1080
1081class AddColumnView(Table):
1082
1083    def __init__(self, table, field, col, index=None, missing=None):
1084        self._table = table
1085        self._field = field
1086        self._col = col
1087        self._index = index
1088        self._missing = missing
1089
1090    def __iter__(self):
1091        return iteraddcolumn(self._table, self._field, self._col,
1092                             self._index, self._missing)
1093
1094
1095def iteraddcolumn(table, field, col, index, missing):
1096    it = iter(table)
1097    hdr = next(it)
1098
1099    # determine position of new column
1100    if index is None:
1101        index = len(hdr)
1102
1103    # construct output header
1104    outhdr = list(hdr)
1105    outhdr.insert(index, field)
1106    yield tuple(outhdr)
1107
1108    # construct output data
1109    for row, val in izip_longest(it, col, fillvalue=missing):
1110        # run out of rows?
1111        if row == missing:
1112            row = [missing] * len(hdr)
1113        outrow = list(row)
1114        outrow.insert(index, val)
1115        yield tuple(outrow)
1116
1117
1118class TransformError(Exception):
1119    pass
1120
1121
1122def addfieldusingcontext(table, field, query):
1123    """
1124    Like :func:`petl.transform.basics.addfield` but the `query` function is
1125    passed the previous, current and next rows, so values may be calculated
1126    based on data in adjacent rows. E.g.::
1127
1128        >>> import petl as etl
1129        >>> table1 = [['foo', 'bar'],
1130        ...           ['A', 1],
1131        ...           ['B', 4],
1132        ...           ['C', 5],
1133        ...           ['D', 9]]
1134        >>> def upstream(prv, cur, nxt):
1135        ...     if prv is None:
1136        ...         return None
1137        ...     else:
1138        ...         return cur.bar - prv.bar
1139        ...
1140        >>> def downstream(prv, cur, nxt):
1141        ...     if nxt is None:
1142        ...         return None
1143        ...     else:
1144        ...         return nxt.bar - cur.bar
1145        ...
1146        >>> table2 = etl.addfieldusingcontext(table1, 'baz', upstream)
1147        >>> table3 = etl.addfieldusingcontext(table2, 'quux', downstream)
1148        >>> table3
1149        +-----+-----+------+------+
1150        | foo | bar | baz  | quux |
1151        +=====+=====+======+======+
1152        | 'A' |   1 | None |    3 |
1153        +-----+-----+------+------+
1154        | 'B' |   4 |    3 |    1 |
1155        +-----+-----+------+------+
1156        | 'C' |   5 |    1 |    4 |
1157        +-----+-----+------+------+
1158        | 'D' |   9 |    4 | None |
1159        +-----+-----+------+------+
1160
1161    The `field` parameter is the name of the field to be added. The `query`
1162    parameter is a function operating on the curent, previous and next rows
1163    and returning the value.
1164
1165    """
1166
1167    return AddFieldUsingContextView(table, field, query)
1168
1169
1170Table.addfieldusingcontext = addfieldusingcontext
1171
1172
1173class AddFieldUsingContextView(Table):
1174
1175    def __init__(self, table, field, query):
1176        self.table = table
1177        self.field = field
1178        self.query = query
1179
1180    def __iter__(self):
1181        return iteraddfieldusingcontext(self.table, self.field, self.query)
1182
1183
1184def iteraddfieldusingcontext(table, field, query):
1185    it = iter(table)
1186    hdr = tuple(next(it))
1187    flds = list(map(text_type, hdr))
1188    yield hdr + (field,)
1189    flds.append(field)
1190    it = (Record(row, flds) for row in it)
1191    prv = None
1192    cur = next(it)
1193    for nxt in it:
1194        v = query(prv, cur, nxt)
1195        yield tuple(cur) + (v,)
1196        prv = Record(tuple(cur) + (v,), flds)
1197        cur = nxt
1198    # handle last row
1199    v = query(prv, cur, None)
1200    yield tuple(cur) + (v,)
1201