1from __future__ import absolute_import, print_function, division 2 3 4# standard library dependencies 5from itertools import islice, chain 6from collections import deque 7from itertools import count 8from petl.compat import izip, izip_longest, next, string_types, text_type 9 10 11# internal dependencies 12from petl.util.base import asindices, rowgetter, Record, Table 13 14 15import logging 16logger = logging.getLogger(__name__) 17warning = logger.warning 18info = logger.info 19debug = logger.debug 20 21 22def cut(table, *args, **kwargs): 23 """ 24 Choose and/or re-order fields. E.g.:: 25 26 >>> import petl as etl 27 >>> table1 = [['foo', 'bar', 'baz'], 28 ... ['A', 1, 2.7], 29 ... ['B', 2, 3.4], 30 ... ['B', 3, 7.8], 31 ... ['D', 42, 9.0], 32 ... ['E', 12]] 33 >>> table2 = etl.cut(table1, 'foo', 'baz') 34 >>> table2 35 +-----+------+ 36 | foo | baz | 37 +=====+======+ 38 | 'A' | 2.7 | 39 +-----+------+ 40 | 'B' | 3.4 | 41 +-----+------+ 42 | 'B' | 7.8 | 43 +-----+------+ 44 | 'D' | 9.0 | 45 +-----+------+ 46 | 'E' | None | 47 +-----+------+ 48 49 >>> # fields can also be specified by index, starting from zero 50 ... table3 = etl.cut(table1, 0, 2) 51 >>> table3 52 +-----+------+ 53 | foo | baz | 54 +=====+======+ 55 | 'A' | 2.7 | 56 +-----+------+ 57 | 'B' | 3.4 | 58 +-----+------+ 59 | 'B' | 7.8 | 60 +-----+------+ 61 | 'D' | 9.0 | 62 +-----+------+ 63 | 'E' | None | 64 +-----+------+ 65 66 >>> # field names and indices can be mixed 67 ... table4 = etl.cut(table1, 'bar', 0) 68 >>> table4 69 +-----+-----+ 70 | bar | foo | 71 +=====+=====+ 72 | 1 | 'A' | 73 +-----+-----+ 74 | 2 | 'B' | 75 +-----+-----+ 76 | 3 | 'B' | 77 +-----+-----+ 78 | 42 | 'D' | 79 +-----+-----+ 80 | 12 | 'E' | 81 +-----+-----+ 82 83 >>> # select a range of fields 84 ... table5 = etl.cut(table1, *range(0, 2)) 85 >>> table5 86 +-----+-----+ 87 | foo | bar | 88 +=====+=====+ 89 | 'A' | 1 | 90 +-----+-----+ 91 | 'B' | 2 | 92 +-----+-----+ 93 | 'B' | 3 | 94 +-----+-----+ 95 | 'D' | 42 | 96 +-----+-----+ 97 | 'E' | 12 | 98 +-----+-----+ 99 100 Note that any short rows will be padded with `None` values (or whatever is 101 provided via the `missing` keyword argument). 102 103 See also :func:`petl.transform.basics.cutout`. 104 105 """ 106 107 # support passing a single list or tuple of fields 108 if len(args) == 1 and isinstance(args[0], (list, tuple)): 109 args = args[0] 110 111 return CutView(table, args, **kwargs) 112 113 114Table.cut = cut 115 116 117class CutView(Table): 118 119 def __init__(self, source, spec, missing=None): 120 self.source = source 121 self.spec = spec 122 self.missing = missing 123 124 def __iter__(self): 125 return itercut(self.source, self.spec, self.missing) 126 127 128def itercut(source, spec, missing=None): 129 it = iter(source) 130 spec = tuple(spec) # make sure no-one can change midstream 131 132 # convert field selection into field indices 133 hdr = next(it) 134 indices = asindices(hdr, spec) 135 136 # define a function to transform each row in the source data 137 # according to the field selection 138 transform = rowgetter(*indices) 139 140 # yield the transformed header 141 yield transform(hdr) 142 143 # construct the transformed data 144 for row in it: 145 try: 146 yield transform(row) 147 except IndexError: 148 # row is short, let's be kind and fill in any missing fields 149 yield tuple(row[i] if i < len(row) else missing for i in indices) 150 151 152def cutout(table, *args, **kwargs): 153 """ 154 Remove fields. E.g.:: 155 156 >>> import petl as etl 157 >>> table1 = [['foo', 'bar', 'baz'], 158 ... ['A', 1, 2.7], 159 ... ['B', 2, 3.4], 160 ... ['B', 3, 7.8], 161 ... ['D', 42, 9.0], 162 ... ['E', 12]] 163 >>> table2 = etl.cutout(table1, 'bar') 164 >>> table2 165 +-----+------+ 166 | foo | baz | 167 +=====+======+ 168 | 'A' | 2.7 | 169 +-----+------+ 170 | 'B' | 3.4 | 171 +-----+------+ 172 | 'B' | 7.8 | 173 +-----+------+ 174 | 'D' | 9.0 | 175 +-----+------+ 176 | 'E' | None | 177 +-----+------+ 178 179 See also :func:`petl.transform.basics.cut`. 180 181 """ 182 183 return CutOutView(table, args, **kwargs) 184 185 186Table.cutout = cutout 187 188 189class CutOutView(Table): 190 191 def __init__(self, source, spec, missing=None): 192 self.source = source 193 self.spec = spec 194 self.missing = missing 195 196 def __iter__(self): 197 return itercutout(self.source, self.spec, self.missing) 198 199 200def itercutout(source, spec, missing=None): 201 it = iter(source) 202 spec = tuple(spec) # make sure no-one can change midstream 203 204 # convert field selection into field indices 205 hdr = next(it) 206 indicesout = asindices(hdr, spec) 207 indices = [i for i in range(len(hdr)) if i not in indicesout] 208 209 # define a function to transform each row in the source data 210 # according to the field selection 211 transform = rowgetter(*indices) 212 213 # yield the transformed header 214 yield transform(hdr) 215 216 # construct the transformed data 217 for row in it: 218 try: 219 yield transform(row) 220 except IndexError: 221 # row is short, let's be kind and fill in any missing fields 222 yield tuple(row[i] if i < len(row) else missing for i in indices) 223 224 225def cat(*tables, **kwargs): 226 """ 227 Concatenate tables. E.g.:: 228 229 >>> import petl as etl 230 >>> table1 = [['foo', 'bar'], 231 ... [1, 'A'], 232 ... [2, 'B']] 233 >>> table2 = [['bar', 'baz'], 234 ... ['C', True], 235 ... ['D', False]] 236 >>> table3 = etl.cat(table1, table2) 237 >>> table3 238 +------+-----+-------+ 239 | foo | bar | baz | 240 +======+=====+=======+ 241 | 1 | 'A' | None | 242 +------+-----+-------+ 243 | 2 | 'B' | None | 244 +------+-----+-------+ 245 | None | 'C' | True | 246 +------+-----+-------+ 247 | None | 'D' | False | 248 +------+-----+-------+ 249 250 >>> # can also be used to square up a single table with uneven rows 251 ... table4 = [['foo', 'bar', 'baz'], 252 ... ['A', 1, 2], 253 ... ['B', '2', '3.4'], 254 ... [u'B', u'3', u'7.8', True], 255 ... ['D', 'xyz', 9.0], 256 ... ['E', None]] 257 >>> table5 = etl.cat(table4) 258 >>> table5 259 +-----+-------+-------+ 260 | foo | bar | baz | 261 +=====+=======+=======+ 262 | 'A' | 1 | 2 | 263 +-----+-------+-------+ 264 | 'B' | '2' | '3.4' | 265 +-----+-------+-------+ 266 | 'B' | '3' | '7.8' | 267 +-----+-------+-------+ 268 | 'D' | 'xyz' | 9.0 | 269 +-----+-------+-------+ 270 | 'E' | None | None | 271 +-----+-------+-------+ 272 273 >>> # use the header keyword argument to specify a fixed set of fields 274 ... table6 = [['bar', 'foo'], 275 ... ['A', 1], 276 ... ['B', 2]] 277 >>> table7 = etl.cat(table6, header=['A', 'foo', 'B', 'bar', 'C']) 278 >>> table7 279 +------+-----+------+-----+------+ 280 | A | foo | B | bar | C | 281 +======+=====+======+=====+======+ 282 | None | 1 | None | 'A' | None | 283 +------+-----+------+-----+------+ 284 | None | 2 | None | 'B' | None | 285 +------+-----+------+-----+------+ 286 287 >>> # using the header keyword argument with two input tables 288 ... table8 = [['bar', 'foo'], 289 ... ['A', 1], 290 ... ['B', 2]] 291 >>> table9 = [['bar', 'baz'], 292 ... ['C', True], 293 ... ['D', False]] 294 >>> table10 = etl.cat(table8, table9, header=['A', 'foo', 'B', 'bar', 'C']) 295 >>> table10 296 +------+------+------+-----+------+ 297 | A | foo | B | bar | C | 298 +======+======+======+=====+======+ 299 | None | 1 | None | 'A' | None | 300 +------+------+------+-----+------+ 301 | None | 2 | None | 'B' | None | 302 +------+------+------+-----+------+ 303 | None | None | None | 'C' | None | 304 +------+------+------+-----+------+ 305 | None | None | None | 'D' | None | 306 +------+------+------+-----+------+ 307 308 Note that the tables do not need to share exactly the same fields, any 309 missing fields will be padded with `None` or whatever is provided via the 310 `missing` keyword argument. 311 312 Note that this function can be used with a single table argument, in which 313 case it has the effect of ensuring all data rows are the same length as 314 the header row, truncating any long rows and padding any short rows with 315 the value of the `missing` keyword argument. 316 317 By default, the fields for the output table will be determined as the 318 union of all fields found in the input tables. Use the `header` keyword 319 argument to override this behaviour and specify a fixed set of fields for 320 the output table. 321 322 """ 323 324 return CatView(tables, **kwargs) 325 326 327Table.cat = cat 328 329 330class CatView(Table): 331 332 def __init__(self, sources, missing=None, header=None): 333 self.sources = sources 334 self.missing = missing 335 self.header = header 336 337 def __iter__(self): 338 return itercat(self.sources, self.missing, self.header) 339 340 341def itercat(sources, missing, header): 342 its = [iter(t) for t in sources] 343 hdrs = [list(next(it)) for it in its] 344 345 if header is None: 346 # determine output fields by gathering all fields found in the sources 347 outhdr = list(hdrs[0]) 348 for hdr in hdrs[1:]: 349 for h in hdr: 350 if h not in outhdr: 351 # add any new fields as we find them 352 outhdr.append(h) 353 else: 354 # predetermined output fields 355 outhdr = header 356 yield tuple(outhdr) 357 358 # output data rows 359 for hdr, it in zip(hdrs, its): 360 361 # now construct and yield the data rows 362 for row in it: 363 outrow = list() 364 for h in outhdr: 365 val = missing 366 try: 367 val = row[hdr.index(h)] 368 except IndexError: 369 # short row 370 pass 371 except ValueError: 372 # field not in table 373 pass 374 outrow.append(val) 375 yield tuple(outrow) 376 377 378def stack(*tables, **kwargs): 379 """Concatenate tables, without trying to match headers. E.g.:: 380 381 >>> import petl as etl 382 >>> table1 = [['foo', 'bar'], 383 ... [1, 'A'], 384 ... [2, 'B']] 385 >>> table2 = [['bar', 'baz'], 386 ... ['C', True], 387 ... ['D', False]] 388 >>> table3 = etl.stack(table1, table2) 389 >>> table3 390 +-----+-------+ 391 | foo | bar | 392 +=====+=======+ 393 | 1 | 'A' | 394 +-----+-------+ 395 | 2 | 'B' | 396 +-----+-------+ 397 | 'C' | True | 398 +-----+-------+ 399 | 'D' | False | 400 +-----+-------+ 401 402 >>> # can also be used to square up a single table with uneven rows 403 ... table4 = [['foo', 'bar', 'baz'], 404 ... ['A', 1, 2], 405 ... ['B', '2', '3.4'], 406 ... [u'B', u'3', u'7.8', True], 407 ... ['D', 'xyz', 9.0], 408 ... ['E', None]] 409 >>> table5 = etl.stack(table4) 410 >>> table5 411 +-----+-------+-------+ 412 | foo | bar | baz | 413 +=====+=======+=======+ 414 | 'A' | 1 | 2 | 415 +-----+-------+-------+ 416 | 'B' | '2' | '3.4' | 417 +-----+-------+-------+ 418 | 'B' | '3' | '7.8' | 419 +-----+-------+-------+ 420 | 'D' | 'xyz' | 9.0 | 421 +-----+-------+-------+ 422 | 'E' | None | None | 423 +-----+-------+-------+ 424 425 Similar to :func:`petl.transform.basics.cat` except that no attempt is 426 made to align fields from different tables. Data rows are simply emitted 427 in order, trimmed or padded to the length of the header row from the 428 first table. 429 430 .. versionadded:: 1.1.0 431 432 """ 433 434 return StackView(tables, **kwargs) 435 436 437Table.stack = stack 438 439 440class StackView(Table): 441 442 def __init__(self, sources, missing=None, trim=True, pad=True): 443 self.sources = sources 444 self.missing = missing 445 self.trim = trim 446 self.pad = pad 447 448 def __iter__(self): 449 return iterstack(self.sources, self.missing, self.trim, self.pad) 450 451 452def iterstack(sources, missing, trim, pad): 453 its = [iter(t) for t in sources] 454 hdrs = [next(it) for it in its] 455 hdr = hdrs[0] 456 n = len(hdr) 457 yield tuple(hdr) 458 for it in its: 459 for row in it: 460 outrow = tuple(row) 461 if trim: 462 outrow = outrow[:n] 463 if pad and len(outrow) < n: 464 outrow += (missing,) * (n - len(outrow)) 465 yield outrow 466 467 468def addfield(table, field, value=None, index=None, missing=None): 469 """ 470 Add a field with a fixed or calculated value. E.g.:: 471 472 >>> import petl as etl 473 >>> table1 = [['foo', 'bar'], 474 ... ['M', 12], 475 ... ['F', 34], 476 ... ['-', 56]] 477 >>> # using a fixed value 478 ... table2 = etl.addfield(table1, 'baz', 42) 479 >>> table2 480 +-----+-----+-----+ 481 | foo | bar | baz | 482 +=====+=====+=====+ 483 | 'M' | 12 | 42 | 484 +-----+-----+-----+ 485 | 'F' | 34 | 42 | 486 +-----+-----+-----+ 487 | '-' | 56 | 42 | 488 +-----+-----+-----+ 489 490 >>> # calculating the value 491 ... table2 = etl.addfield(table1, 'baz', lambda rec: rec['bar'] * 2) 492 >>> table2 493 +-----+-----+-----+ 494 | foo | bar | baz | 495 +=====+=====+=====+ 496 | 'M' | 12 | 24 | 497 +-----+-----+-----+ 498 | 'F' | 34 | 68 | 499 +-----+-----+-----+ 500 | '-' | 56 | 112 | 501 +-----+-----+-----+ 502 503 Use the `index` parameter to control the position of the inserted field. 504 505 """ 506 507 return AddFieldView(table, field, value=value, index=index, 508 missing=missing) 509 510 511Table.addfield = addfield 512 513 514class AddFieldView(Table): 515 516 def __init__(self, source, field, value=None, index=None, missing=None): 517 # ensure rows are all the same length 518 self.source = stack(source, missing=missing) 519 self.field = field 520 self.value = value 521 self.index = index 522 523 def __iter__(self): 524 return iteraddfield(self.source, self.field, self.value, self.index) 525 526 527def iteraddfield(source, field, value, index): 528 it = iter(source) 529 hdr = next(it) 530 flds = list(map(text_type, hdr)) 531 532 # determine index of new field 533 if index is None: 534 index = len(hdr) 535 536 # construct output fields 537 outhdr = list(hdr) 538 outhdr.insert(index, field) 539 yield tuple(outhdr) 540 541 if callable(value): 542 # wrap rows as records if using calculated value 543 it = (Record(row, flds) for row in it) 544 for row in it: 545 outrow = list(row) 546 v = value(row) 547 outrow.insert(index, v) 548 yield tuple(outrow) 549 else: 550 for row in it: 551 outrow = list(row) 552 outrow.insert(index, value) 553 yield tuple(outrow) 554 555 556def addfields(table, field_defs, missing=None): 557 """ 558 Add fields with fixed or calculated values. E.g.:: 559 560 >>> import petl as etl 561 >>> table1 = [['foo', 'bar'], 562 ... ['M', 12], 563 ... ['F', 34], 564 ... ['-', 56]] 565 >>> # using a fixed value or a calculation 566 ... table2 = etl.addfields(table1, 567 ... [('baz', 42), 568 ... ('luhrmann', lambda rec: rec['bar'] * 2)]) 569 >>> table2 570 +-----+-----+-----+----------+ 571 | foo | bar | baz | luhrmann | 572 +=====+=====+=====+==========+ 573 | 'M' | 12 | 42 | 24 | 574 +-----+-----+-----+----------+ 575 | 'F' | 34 | 42 | 68 | 576 +-----+-----+-----+----------+ 577 | '-' | 56 | 42 | 112 | 578 +-----+-----+-----+----------+ 579 580 >>> # you can specify an index as a 3rd item in each tuple -- indicies 581 ... # are evaluated in order. 582 ... table2 = etl.addfields(table1, 583 ... [('baz', 42, 0), 584 ... ('luhrmann', lambda rec: rec['bar'] * 2, 0)]) 585 >>> table2 586 +----------+-----+-----+-----+ 587 | luhrmann | baz | foo | bar | 588 +==========+=====+=====+=====+ 589 | 24 | 42 | 'M' | 12 | 590 +----------+-----+-----+-----+ 591 | 68 | 42 | 'F' | 34 | 592 +----------+-----+-----+-----+ 593 | 112 | 42 | '-' | 56 | 594 +----------+-----+-----+-----+ 595 596 """ 597 598 return AddFieldsView(table, field_defs, missing=missing) 599 600 601Table.addfields = addfields 602 603 604class AddFieldsView(Table): 605 606 def __init__(self, source, field_defs, missing=None): 607 # ensure rows are all the same length 608 self.source = stack(source, missing=missing) 609 # convert tuples to FieldDefinitions, if necessary 610 self.field_defs = field_defs 611 612 def __iter__(self): 613 return iteraddfields(self.source, self.field_defs) 614 615 616def iteraddfields(source, field_defs): 617 it = iter(source) 618 hdr = next(it) 619 flds = list(map(text_type, hdr)) 620 621 # initialize output fields and indices 622 outhdr = list(hdr) 623 value_indexes = [] 624 625 for fdef in field_defs: 626 # determine the defined field index 627 if len(fdef) == 2: 628 name, value = fdef 629 index = len(outhdr) 630 else: 631 name, value, index = fdef 632 633 # insert the name into the header at the appropriate index 634 outhdr.insert(index, name) 635 636 # remember the value/index pairs for later 637 value_indexes.append((value, index)) 638 yield tuple(outhdr) 639 640 for row in it: 641 outrow = list(row) 642 643 # add each defined field into the row at the appropriate index 644 for value, index in value_indexes: 645 if callable(value): 646 # wrap row as record if using calculated value 647 row = Record(row, flds) 648 v = value(row) 649 outrow.insert(index, v) 650 else: 651 outrow.insert(index, value) 652 653 yield tuple(outrow) 654 655 656def rowslice(table, *sliceargs): 657 """ 658 Choose a subsequence of data rows. E.g.:: 659 660 >>> import petl as etl 661 >>> table1 = [['foo', 'bar'], 662 ... ['a', 1], 663 ... ['b', 2], 664 ... ['c', 5], 665 ... ['d', 7], 666 ... ['f', 42]] 667 >>> table2 = etl.rowslice(table1, 2) 668 >>> table2 669 +-----+-----+ 670 | foo | bar | 671 +=====+=====+ 672 | 'a' | 1 | 673 +-----+-----+ 674 | 'b' | 2 | 675 +-----+-----+ 676 677 >>> table3 = etl.rowslice(table1, 1, 4) 678 >>> table3 679 +-----+-----+ 680 | foo | bar | 681 +=====+=====+ 682 | 'b' | 2 | 683 +-----+-----+ 684 | 'c' | 5 | 685 +-----+-----+ 686 | 'd' | 7 | 687 +-----+-----+ 688 689 >>> table4 = etl.rowslice(table1, 0, 5, 2) 690 >>> table4 691 +-----+-----+ 692 | foo | bar | 693 +=====+=====+ 694 | 'a' | 1 | 695 +-----+-----+ 696 | 'c' | 5 | 697 +-----+-----+ 698 | 'f' | 42 | 699 +-----+-----+ 700 701 Positional arguments are used to slice the data rows. The `sliceargs` are 702 passed through to :func:`itertools.islice`. 703 704 See also :func:`petl.transform.basics.head`, 705 :func:`petl.transform.basics.tail`. 706 707 """ 708 709 return RowSliceView(table, *sliceargs) 710 711 712Table.rowslice = rowslice 713 714 715class RowSliceView(Table): 716 717 def __init__(self, source, *sliceargs): 718 self.source = source 719 if not sliceargs: 720 self.sliceargs = (None,) 721 else: 722 self.sliceargs = sliceargs 723 724 def __iter__(self): 725 return iterrowslice(self.source, self.sliceargs) 726 727 728def iterrowslice(source, sliceargs): 729 it = iter(source) 730 yield tuple(next(it)) # fields 731 for row in islice(it, *sliceargs): 732 yield tuple(row) 733 734 735def head(table, n=5): 736 """ 737 Select the first `n` data rows. E.g.:: 738 739 >>> import petl as etl 740 >>> table1 = [['foo', 'bar'], 741 ... ['a', 1], 742 ... ['b', 2], 743 ... ['c', 5], 744 ... ['d', 7], 745 ... ['f', 42], 746 ... ['f', 3], 747 ... ['h', 90]] 748 >>> table2 = etl.head(table1, 4) 749 >>> table2 750 +-----+-----+ 751 | foo | bar | 752 +=====+=====+ 753 | 'a' | 1 | 754 +-----+-----+ 755 | 'b' | 2 | 756 +-----+-----+ 757 | 'c' | 5 | 758 +-----+-----+ 759 | 'd' | 7 | 760 +-----+-----+ 761 762 See also :func:`petl.transform.basics.tail`, 763 :func:`petl.transform.basics.rowslice`. 764 765 """ 766 767 return rowslice(table, n) 768 769 770Table.head = head 771 772 773def tail(table, n=5): 774 """ 775 Select the last `n` data rows. E.g.:: 776 777 >>> import petl as etl 778 >>> table1 = [['foo', 'bar'], 779 ... ['a', 1], 780 ... ['b', 2], 781 ... ['c', 5], 782 ... ['d', 7], 783 ... ['f', 42], 784 ... ['f', 3], 785 ... ['h', 90], 786 ... ['k', 12], 787 ... ['l', 77], 788 ... ['q', 2]] 789 >>> table2 = etl.tail(table1, 4) 790 >>> table2 791 +-----+-----+ 792 | foo | bar | 793 +=====+=====+ 794 | 'h' | 90 | 795 +-----+-----+ 796 | 'k' | 12 | 797 +-----+-----+ 798 | 'l' | 77 | 799 +-----+-----+ 800 | 'q' | 2 | 801 +-----+-----+ 802 803 See also :func:`petl.transform.basics.head`, 804 :func:`petl.transform.basics.rowslice`. 805 806 """ 807 808 return TailView(table, n) 809 810 811Table.tail = tail 812 813 814class TailView(Table): 815 816 def __init__(self, source, n): 817 self.source = source 818 self.n = n 819 820 def __iter__(self): 821 return itertail(self.source, self.n) 822 823 824def itertail(source, n): 825 it = iter(source) 826 yield tuple(next(it)) # fields 827 cache = deque() 828 for row in it: 829 cache.append(row) 830 if len(cache) > n: 831 cache.popleft() 832 for row in cache: 833 yield tuple(row) 834 835 836def skipcomments(table, prefix): 837 """ 838 Skip any row where the first value is a string and starts with 839 `prefix`. E.g.:: 840 841 >>> import petl as etl 842 >>> table1 = [['##aaa', 'bbb', 'ccc'], 843 ... ['##mmm',], 844 ... ['#foo', 'bar'], 845 ... ['##nnn', 1], 846 ... ['a', 1], 847 ... ['b', 2]] 848 >>> table2 = etl.skipcomments(table1, '##') 849 >>> table2 850 +------+-----+ 851 | #foo | bar | 852 +======+=====+ 853 | 'a' | 1 | 854 +------+-----+ 855 | 'b' | 2 | 856 +------+-----+ 857 858 Use the `prefix` parameter to determine which string to consider as 859 indicating a comment. 860 861 """ 862 863 return SkipCommentsView(table, prefix) 864 865 866Table.skipcomments = skipcomments 867 868 869class SkipCommentsView(Table): 870 871 def __init__(self, source, prefix): 872 self.source = source 873 self.prefix = prefix 874 875 def __iter__(self): 876 return iterskipcomments(self.source, self.prefix) 877 878 879def iterskipcomments(source, prefix): 880 return (row for row in source 881 if (len(row) > 0 882 and not(isinstance(row[0], string_types) 883 and row[0].startswith(prefix)))) 884 885 886def movefield(table, field, index): 887 """ 888 Move a field to a new position. 889 890 """ 891 892 return MoveFieldView(table, field, index) 893 894 895Table.movefield = movefield 896 897 898class MoveFieldView(Table): 899 900 def __init__(self, table, field, index, missing=None): 901 self.table = table 902 self.field = field 903 self.index = index 904 self.missing = missing 905 906 def __iter__(self): 907 it = iter(self.table) 908 909 # determine output fields 910 hdr = next(it) 911 outhdr = [f for f in hdr if f != self.field] 912 outhdr.insert(self.index, self.field) 913 yield tuple(outhdr) 914 915 # define a function to transform each row in the source data 916 # according to the field selection 917 outflds = list(map(str, outhdr)) 918 indices = asindices(hdr, outflds) 919 transform = rowgetter(*indices) 920 921 # construct the transformed data 922 for row in it: 923 try: 924 yield transform(row) 925 except IndexError: 926 # row is short, let's be kind and fill in any missing fields 927 yield tuple(row[i] if i < len(row) else self.missing 928 for i in indices) 929 930 931def annex(*tables, **kwargs): 932 """ 933 Join two or more tables by row order. E.g.:: 934 935 >>> import petl as etl 936 >>> table1 = [['foo', 'bar'], 937 ... ['A', 9], 938 ... ['C', 2], 939 ... ['F', 1]] 940 >>> table2 = [['foo', 'baz'], 941 ... ['B', 3], 942 ... ['D', 10]] 943 >>> table3 = etl.annex(table1, table2) 944 >>> table3 945 +-----+-----+------+------+ 946 | foo | bar | foo | baz | 947 +=====+=====+======+======+ 948 | 'A' | 9 | 'B' | 3 | 949 +-----+-----+------+------+ 950 | 'C' | 2 | 'D' | 10 | 951 +-----+-----+------+------+ 952 | 'F' | 1 | None | None | 953 +-----+-----+------+------+ 954 955 See also :func:`petl.transform.joins.join`. 956 957 """ 958 959 return AnnexView(tables, **kwargs) 960 961 962Table.annex = annex 963 964 965class AnnexView(Table): 966 967 def __init__(self, tables, missing=None): 968 self.tables = tables 969 self.missing = missing 970 971 def __iter__(self): 972 return iterannex(self.tables, self.missing) 973 974 975def iterannex(tables, missing): 976 its = [iter(t) for t in tables] 977 hdrs = [next(it) for it in its] 978 outhdr = tuple(chain(*hdrs)) 979 yield outhdr 980 for rows in izip_longest(*its): 981 outrow = list() 982 for i, row in enumerate(rows): 983 lh = len(hdrs[i]) 984 if row is None: # handle uneven length tables 985 row = [missing] * len(hdrs[i]) 986 else: 987 lr = len(row) 988 if lr < lh: # handle short rows 989 row = list(row) 990 row.extend([missing] * (lh-lr)) 991 elif lr > lh: # handle long rows 992 row = row[:lh] 993 outrow.extend(row) 994 yield tuple(outrow) 995 996 997def addrownumbers(table, start=1, step=1, field='row'): 998 """ 999 Add a field of row numbers. E.g.:: 1000 1001 >>> import petl as etl 1002 >>> table1 = [['foo', 'bar'], 1003 ... ['A', 9], 1004 ... ['C', 2], 1005 ... ['F', 1]] 1006 >>> table2 = etl.addrownumbers(table1) 1007 >>> table2 1008 +-----+-----+-----+ 1009 | row | foo | bar | 1010 +=====+=====+=====+ 1011 | 1 | 'A' | 9 | 1012 +-----+-----+-----+ 1013 | 2 | 'C' | 2 | 1014 +-----+-----+-----+ 1015 | 3 | 'F' | 1 | 1016 +-----+-----+-----+ 1017 1018 Parameters `start` and `step` control the numbering. 1019 1020 """ 1021 1022 return AddRowNumbersView(table, start, step, field) 1023 1024 1025Table.addrownumbers = addrownumbers 1026 1027 1028class AddRowNumbersView(Table): 1029 1030 def __init__(self, table, start=1, step=1, field='row'): 1031 self.table = table 1032 self.start = start 1033 self.step = step 1034 self.field = field 1035 1036 def __iter__(self): 1037 return iteraddrownumbers(self.table, self.start, self.step, self.field) 1038 1039 1040def iteraddrownumbers(table, start, step, field): 1041 it = iter(table) 1042 hdr = next(it) 1043 outhdr = [field] 1044 outhdr.extend(hdr) 1045 yield tuple(outhdr) 1046 for row, n in izip(it, count(start, step)): 1047 outrow = [n] 1048 outrow.extend(row) 1049 yield tuple(outrow) 1050 1051 1052def addcolumn(table, field, col, index=None, missing=None): 1053 """ 1054 Add a column of data to the table. E.g.:: 1055 1056 >>> import petl as etl 1057 >>> table1 = [['foo', 'bar'], 1058 ... ['A', 1], 1059 ... ['B', 2]] 1060 >>> col = [True, False] 1061 >>> table2 = etl.addcolumn(table1, 'baz', col) 1062 >>> table2 1063 +-----+-----+-------+ 1064 | foo | bar | baz | 1065 +=====+=====+=======+ 1066 | 'A' | 1 | True | 1067 +-----+-----+-------+ 1068 | 'B' | 2 | False | 1069 +-----+-----+-------+ 1070 1071 Use the `index` parameter to control the position of the new column. 1072 1073 """ 1074 1075 return AddColumnView(table, field, col, index=index, missing=missing) 1076 1077 1078Table.addcolumn = addcolumn 1079 1080 1081class AddColumnView(Table): 1082 1083 def __init__(self, table, field, col, index=None, missing=None): 1084 self._table = table 1085 self._field = field 1086 self._col = col 1087 self._index = index 1088 self._missing = missing 1089 1090 def __iter__(self): 1091 return iteraddcolumn(self._table, self._field, self._col, 1092 self._index, self._missing) 1093 1094 1095def iteraddcolumn(table, field, col, index, missing): 1096 it = iter(table) 1097 hdr = next(it) 1098 1099 # determine position of new column 1100 if index is None: 1101 index = len(hdr) 1102 1103 # construct output header 1104 outhdr = list(hdr) 1105 outhdr.insert(index, field) 1106 yield tuple(outhdr) 1107 1108 # construct output data 1109 for row, val in izip_longest(it, col, fillvalue=missing): 1110 # run out of rows? 1111 if row == missing: 1112 row = [missing] * len(hdr) 1113 outrow = list(row) 1114 outrow.insert(index, val) 1115 yield tuple(outrow) 1116 1117 1118class TransformError(Exception): 1119 pass 1120 1121 1122def addfieldusingcontext(table, field, query): 1123 """ 1124 Like :func:`petl.transform.basics.addfield` but the `query` function is 1125 passed the previous, current and next rows, so values may be calculated 1126 based on data in adjacent rows. E.g.:: 1127 1128 >>> import petl as etl 1129 >>> table1 = [['foo', 'bar'], 1130 ... ['A', 1], 1131 ... ['B', 4], 1132 ... ['C', 5], 1133 ... ['D', 9]] 1134 >>> def upstream(prv, cur, nxt): 1135 ... if prv is None: 1136 ... return None 1137 ... else: 1138 ... return cur.bar - prv.bar 1139 ... 1140 >>> def downstream(prv, cur, nxt): 1141 ... if nxt is None: 1142 ... return None 1143 ... else: 1144 ... return nxt.bar - cur.bar 1145 ... 1146 >>> table2 = etl.addfieldusingcontext(table1, 'baz', upstream) 1147 >>> table3 = etl.addfieldusingcontext(table2, 'quux', downstream) 1148 >>> table3 1149 +-----+-----+------+------+ 1150 | foo | bar | baz | quux | 1151 +=====+=====+======+======+ 1152 | 'A' | 1 | None | 3 | 1153 +-----+-----+------+------+ 1154 | 'B' | 4 | 3 | 1 | 1155 +-----+-----+------+------+ 1156 | 'C' | 5 | 1 | 4 | 1157 +-----+-----+------+------+ 1158 | 'D' | 9 | 4 | None | 1159 +-----+-----+------+------+ 1160 1161 The `field` parameter is the name of the field to be added. The `query` 1162 parameter is a function operating on the curent, previous and next rows 1163 and returning the value. 1164 1165 """ 1166 1167 return AddFieldUsingContextView(table, field, query) 1168 1169 1170Table.addfieldusingcontext = addfieldusingcontext 1171 1172 1173class AddFieldUsingContextView(Table): 1174 1175 def __init__(self, table, field, query): 1176 self.table = table 1177 self.field = field 1178 self.query = query 1179 1180 def __iter__(self): 1181 return iteraddfieldusingcontext(self.table, self.field, self.query) 1182 1183 1184def iteraddfieldusingcontext(table, field, query): 1185 it = iter(table) 1186 hdr = tuple(next(it)) 1187 flds = list(map(text_type, hdr)) 1188 yield hdr + (field,) 1189 flds.append(field) 1190 it = (Record(row, flds) for row in it) 1191 prv = None 1192 cur = next(it) 1193 for nxt in it: 1194 v = query(prv, cur, nxt) 1195 yield tuple(cur) + (v,) 1196 prv = Record(tuple(cur) + (v,), flds) 1197 cur = nxt 1198 # handle last row 1199 v = query(prv, cur, None) 1200 yield tuple(cur) + (v,) 1201