1package Spreadsheet::ParseXLSX; 2BEGIN { 3 $Spreadsheet::ParseXLSX::AUTHORITY = 'cpan:DOY'; 4} 5$Spreadsheet::ParseXLSX::VERSION = '0.16'; 6use strict; 7use warnings; 8# ABSTRACT: parse XLSX files 9 10use Archive::Zip; 11use Graphics::ColorUtils 'rgb2hls', 'hls2rgb'; 12use Scalar::Util 'openhandle'; 13use Spreadsheet::ParseExcel 0.61; 14use XML::Twig; 15 16 17 18sub new { 19 bless {}, shift; 20} 21 22 23sub parse { 24 my $self = shift; 25 my ($file, $formatter) = @_; 26 27 my $zip = Archive::Zip->new; 28 my $workbook = Spreadsheet::ParseExcel::Workbook->new; 29 if (openhandle($file)) { 30 bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh 31 $zip->readFromFileHandle($file) == Archive::Zip::AZ_OK 32 or die "Can't open filehandle as a zip file"; 33 $workbook->{File} = undef; 34 } 35 elsif (!ref($file)) { 36 $zip->read($file) == Archive::Zip::AZ_OK 37 or die "Can't open file '$file' as a zip file"; 38 $workbook->{File} = $file; 39 } 40 else { 41 die "Argument to 'new' must be a filename or open filehandle"; 42 } 43 44 return $self->_parse_workbook($zip, $workbook, $formatter); 45} 46 47sub _parse_workbook { 48 my $self = shift; 49 my ($zip, $workbook, $formatter) = @_; 50 51 my $files = $self->_extract_files($zip); 52 53 my ($version) = $files->{workbook}->find_nodes('//fileVersion'); 54 my ($properties) = $files->{workbook}->find_nodes('//workbookPr'); 55 56 if ($version) { 57 $workbook->{Version} = $version->att('appName') 58 . ($version->att('lowestEdited') 59 ? ('-' . $version->att('lowestEdited')) 60 : ("")); 61 } 62 63 $workbook->{Flag1904} = $properties && $properties->att('date1904') ? 1 : 0; 64 65 $workbook->{FmtClass} = $formatter || Spreadsheet::ParseExcel::FmtDefault->new; 66 67 my $themes = $self->_parse_themes((values %{ $files->{themes} })[0]); # XXX 68 69 $workbook->{Color} = $themes->{Color}; 70 71 my $styles = $self->_parse_styles($workbook, $files->{styles}); 72 73 $workbook->{Format} = $styles->{Format}; 74 $workbook->{FormatStr} = $styles->{FormatStr}; 75 $workbook->{Font} = $styles->{Font}; 76 77 $workbook->{PkgStr} = $self->_parse_shared_strings($files->{strings}) 78 if $files->{strings}; 79 80 # $workbook->{StandardWidth} = ...; 81 82 # $workbook->{Author} = ...; 83 84 # $workbook->{PrintArea} = ...; 85 # $workbook->{PrintTitle} = ...; 86 87 my @sheets = map { 88 my $idx = $_->att('r:id'); 89 my $sheet = Spreadsheet::ParseExcel::Worksheet->new( 90 Name => $_->att('name'), 91 _Book => $workbook, 92 _SheetNo => $idx, 93 ); 94 $self->_parse_sheet($sheet, $files->{sheets}{$idx}); 95 $sheet 96 } $files->{workbook}->find_nodes('//sheets/sheet'); 97 98 $workbook->{Worksheet} = \@sheets; 99 $workbook->{SheetCount} = scalar(@sheets); 100 101 my ($node) = $files->{workbook}->find_nodes('//workbookView'); 102 my $selected = $node->att('activeTab'); 103 $workbook->{SelectedSheet} = defined($selected) ? 0+$selected : 0; 104 105 return $workbook; 106} 107 108sub _parse_sheet { 109 my $self = shift; 110 my ($sheet, $sheet_file) = @_; 111 112 $sheet->{MinRow} = 0; 113 $sheet->{MinCol} = 0; 114 $sheet->{MaxRow} = -1; 115 $sheet->{MaxCol} = -1; 116 $sheet->{Selection} = [ 0, 0 ]; 117 118 my @merged_cells; 119 120 my @column_formats; 121 my @column_widths; 122 my @row_heights; 123 124 my $default_row_height = 15; 125 my $default_column_width = 10; 126 127 my $sheet_xml = XML::Twig->new( 128 twig_roots => { 129 #XXX need a fallback here, the dimension tag is optional 130 'dimension' => sub { 131 my ($twig, $dimension) = @_; 132 133 my ($rmin, $cmin, $rmax, $cmax) = $self->_dimensions( 134 $dimension->att('ref') 135 ); 136 137 $sheet->{MinRow} = $rmin; 138 $sheet->{MinCol} = $cmin; 139 $sheet->{MaxRow} = $rmax; 140 $sheet->{MaxCol} = $cmax; 141 142 $twig->purge; 143 }, 144 145 'mergeCells/mergeCell' => sub { 146 my ( $twig, $merge_area ) = @_; 147 148 if (my $ref = $merge_area->att('ref')) { 149 my ($topleft, $bottomright) = $ref =~ /([^:]+):([^:]+)/; 150 151 my ($toprow, $leftcol) = $self->_cell_to_row_col($topleft); 152 my ($bottomrow, $rightcol) = $self->_cell_to_row_col($bottomright); 153 154 push @{ $sheet->{MergedArea} }, [ 155 $toprow, $leftcol, 156 $bottomrow, $rightcol, 157 ]; 158 for my $row ($toprow .. $bottomrow) { 159 for my $col ($leftcol .. $rightcol) { 160 push(@merged_cells, [$row, $col]); 161 } 162 } 163 } 164 165 $twig->purge; 166 }, 167 168 'sheetFormatPr' => sub { 169 my ( $twig, $format ) = @_; 170 171 $default_row_height //= $format->att('defaultRowHeight'); 172 $default_column_width //= $format->att('baseColWidth'); 173 174 $twig->purge; 175 }, 176 177 'col' => sub { 178 my ( $twig, $col ) = @_; 179 180 for my $colnum ($col->att('min')..$col->att('max')) { 181 $column_widths[$colnum - 1] = $col->att('width'); 182 $column_formats[$colnum - 1] = $col->att('style'); 183 } 184 185 $twig->purge; 186 }, 187 188 'row' => sub { 189 my ( $twig, $row ) = @_; 190 191 $row_heights[ $row->att('r') - 1 ] = $row->att('ht'); 192 193 $twig->purge; 194 }, 195 196 'selection' => sub { 197 my ( $twig, $selection ) = @_; 198 199 if (my $cell = $selection->att('activeCell')) { 200 $sheet->{Selection} = [ $self->_cell_to_row_col($cell) ]; 201 } 202 elsif (my $range = $selection->att('sqref')) { 203 my ($topleft, $bottomright) = $range =~ /([^:]+):([^:]+)/; 204 $sheet->{Selection} = [ 205 $self->_cell_to_row_col($topleft), 206 $self->_cell_to_row_col($bottomright), 207 ]; 208 } 209 210 $twig->purge; 211 }, 212 213 'sheetPr/tabColor' => sub { 214 my ( $twig, $tab_color ) = @_; 215 216 $sheet->{TabColor} = $self->_color($sheet->{_Book}{Color}, $tab_color); 217 218 $twig->purge; 219 }, 220 221 } 222 ); 223 224 $sheet_xml->parse( $sheet_file ); 225 226 # 2nd pass: cell/row building is dependent on having parsed the merge definitions 227 # beforehand. 228 229 $sheet_xml = XML::Twig->new( 230 twig_roots => { 231 'sheetData/row' => sub { 232 my ( $twig, $row_elt ) = @_; 233 234 for my $cell ( $row_elt->children('c') ){ 235 my ($row, $col) = $self->_cell_to_row_col($cell->att('r')); 236 my $type = $cell->att('t') || 'n'; 237 my $val_xml = $type eq 'inlineStr' 238 ? $cell->first_child('is')->first_child('t') 239 : $cell->first_child('v'); 240 my $val = $val_xml ? $val_xml->text : undef; 241 242 my $long_type; 243 if (!defined($val)) { 244 $long_type = 'Text'; 245 $val = ''; 246 } 247 elsif ($type eq 's') { 248 $long_type = 'Text'; 249 $val = $sheet->{_Book}{PkgStr}[$val]; 250 } 251 elsif ($type eq 'n') { 252 $long_type = 'Numeric'; 253 $val = defined($val) ? 0+$val : undef; 254 } 255 elsif ($type eq 'd') { 256 $long_type = 'Date'; 257 } 258 elsif ($type eq 'b') { 259 $long_type = 'Text'; 260 $val = $val ? "TRUE" : "FALSE"; 261 } 262 elsif ($type eq 'e') { 263 $long_type = 'Text'; 264 } 265 elsif ($type eq 'str' || $type eq 'inlineStr') { 266 $long_type = 'Text'; 267 } 268 else { 269 die "unimplemented type $type"; # XXX 270 } 271 272 my $format_idx = $cell->att('s') || 0; 273 my $format = $sheet->{_Book}{Format}[$format_idx]; 274 $format->{Merged} = !!grep { 275 $row == $_->[0] && $col == $_->[1] 276 } @merged_cells; 277 278 # see the list of built-in formats below in _parse_styles 279 # XXX probably should figure this out from the actual format string, 280 # but that's not entirely trivial 281 if (grep { $format->{FmtIdx} == $_ } 14..22, 45..47) { 282 $long_type = 'Date'; 283 } 284 285 my $cell = Spreadsheet::ParseExcel::Cell->new( 286 Val => $val, 287 Type => $long_type, 288 Merged => $format->{Merged}, 289 Format => $format, 290 FormatNo => $format_idx, 291 ($cell->first_child('f') 292 ? (Formula => $cell->first_child('f')->text) 293 : ()), 294 ); 295 $cell->{_Value} = $sheet->{_Book}{FmtClass}->ValFmt( 296 $cell, $sheet->{_Book} 297 ); 298 $sheet->{Cells}[$row][$col] = $cell; 299 } 300 301 $twig->purge; 302 }, 303 304 } 305 ); 306 307 $sheet_xml->parse( $sheet_file ); 308 309 if ( ! $sheet->{Cells} ){ 310 $sheet->{MaxRow} = $sheet->{MaxCol} = -1; 311 } 312 313 $sheet->{DefRowHeight} = 0+$default_row_height; 314 $sheet->{DefColWidth} = 0+$default_column_width; 315 $sheet->{RowHeight} = [ 316 map { defined $_ ? 0+$_ : 0+$default_row_height } @row_heights 317 ]; 318 $sheet->{ColWidth} = [ 319 map { defined $_ ? 0+$_ : 0+$default_column_width } @column_widths 320 ]; 321 $sheet->{ColFmtNo} = \@column_formats; 322 323} 324 325sub _parse_shared_strings { 326 my $self = shift; 327 my ($strings) = @_; 328 329 my $PkgStr = []; 330 331 if ($strings) { 332 my $xml = XML::Twig->new( 333 twig_handlers => { 334 'si' => sub { 335 my ( $twig, $si ) = @_; 336 337 # XXX this discards information about formatting within cells 338 # not sure how to represent that 339 push @$PkgStr, 340 join( '', map { $_->text } $si->find_nodes('.//t') ); 341 $twig->purge; 342 }, 343 } 344 ); 345 $xml->parse( $strings ); 346 } 347 return $PkgStr; 348} 349 350sub _parse_themes { 351 my $self = shift; 352 my ($themes) = @_; 353 354 return {} unless $themes; 355 356 my @color = map { 357 $_->name eq 'a:sysClr' ? $_->att('lastClr') : $_->att('val') 358 } $themes->find_nodes('//a:clrScheme/*/*'); 359 360 # this shouldn't be necessary, but the documentation is wrong here 361 # see http://stackoverflow.com/questions/2760976/theme-confusion-in-spreadsheetml 362 ($color[0], $color[1]) = ($color[1], $color[0]); 363 ($color[2], $color[3]) = ($color[3], $color[2]); 364 365 return { 366 Color => \@color, 367 } 368} 369 370sub _parse_styles { 371 my $self = shift; 372 my ($workbook, $styles) = @_; 373 374 my %halign = ( 375 center => 2, 376 centerContinuous => 6, 377 distributed => 7, 378 fill => 4, 379 general => 0, 380 justify => 5, 381 left => 1, 382 right => 3, 383 ); 384 385 my %valign = ( 386 bottom => 2, 387 center => 1, 388 distributed => 4, 389 justify => 3, 390 top => 0, 391 ); 392 393 my %border = ( 394 dashDot => 9, 395 dashDotDot => 11, 396 dashed => 3, 397 dotted => 4, 398 double => 6, 399 hair => 7, 400 medium => 2, 401 mediumDashDot => 10, 402 mediumDashDotDot => 12, 403 mediumDashed => 8, 404 none => 0, 405 slantDashDot => 13, 406 thick => 5, 407 thin => 1, 408 ); 409 410 my %fill = ( 411 darkDown => 7, 412 darkGray => 3, 413 darkGrid => 9, 414 darkHorizontal => 5, 415 darkTrellis => 10, 416 darkUp => 8, 417 darkVertical => 6, 418 gray0625 => 18, 419 gray125 => 17, 420 lightDown => 13, 421 lightGray => 4, 422 lightGrid => 15, 423 lightHorizontal => 11, 424 lightTrellis => 16, 425 lightUp => 14, 426 lightVertical => 12, 427 mediumGray => 2, 428 none => 0, 429 solid => 1, 430 ); 431 432 my @fills = map { 433 [ 434 $fill{$_->att('patternType')}, 435 $self->_color($workbook->{Color}, $_->first_child('fgColor'), 1), 436 $self->_color($workbook->{Color}, $_->first_child('bgColor'), 1), 437 ] 438 } $styles->find_nodes('//fills/fill/patternFill'); 439 440 my @borders = map { 441 my $border = $_; 442 # XXX specs say "begin" and "end" rather than "left" and "right", 443 # but... that's not what seems to be in the file itself (sigh) 444 { 445 colors => [ 446 map { 447 $self->_color( 448 $workbook->{Color}, 449 $border->first_child($_)->first_child('color') 450 ) 451 } qw(left right top bottom) 452 ], 453 styles => [ 454 map { 455 $border{$border->first_child($_)->att('style') || 'none'} 456 } qw(left right top bottom) 457 ], 458 diagonal => [ 459 0, # XXX ->att('diagonalDown') and ->att('diagonalUp') 460 0, # XXX ->att('style') 461 $self->_color( 462 $workbook->{Color}, 463 $border->first_child('diagonal')->first_child('color') 464 ), 465 ], 466 } 467 } $styles->find_nodes('//borders/border'); 468 469 # these defaults are from 470 # http://social.msdn.microsoft.com/Forums/en-US/oxmlsdk/thread/e27aaf16-b900-4654-8210-83c5774a179c 471 my %format_str = ( 472 0 => 'GENERAL', 473 1 => '0', 474 2 => '0.00', 475 3 => '#,##0', 476 4 => '#,##0.00', 477 5 => '$#,##0_);($#,##0)', 478 6 => '$#,##0_);[Red]($#,##0)', 479 7 => '$#,##0.00_);($#,##0.00)', 480 8 => '$#,##0.00_);[Red]($#,##0.00)', 481 9 => '0%', 482 10 => '0.00%', 483 11 => '0.00E+00', 484 12 => '# ?/?', 485 13 => '# ??/??', 486 14 => 'm/d/yyyy', 487 15 => 'd-mmm-yy', 488 16 => 'd-mmm', 489 17 => 'mmm-yy', 490 18 => 'h:mm AM/PM', 491 19 => 'h:mm:ss AM/PM', 492 20 => 'h:mm', 493 21 => 'h:mm:ss', 494 22 => 'm/d/yyyy h:mm', 495 37 => '#,##0_);(#,##0)', 496 38 => '#,##0_);[Red](#,##0)', 497 39 => '#,##0.00_);(#,##0.00)', 498 40 => '#,##0.00_);[Red](#,##0.00)', 499 45 => 'mm:ss', 500 46 => '[h]:mm:ss', 501 47 => 'mm:ss.0', 502 48 => '##0.0E+0', 503 49 => '@', 504 (map { 505 $_->att('numFmtId') => $_->att('formatCode') 506 } $styles->find_nodes('//numFmts/numFmt')), 507 ); 508 509 my @font = map { 510 my $vert = $_->first_child('vertAlign'); 511 my $under = $_->first_child('u'); 512 Spreadsheet::ParseExcel::Font->new( 513 Height => 0+$_->first_child('sz')->att('val'), 514 # Attr => $iAttr, 515 # XXX not sure if there's a better way to keep the indexing stuff 516 # intact rather than just going straight to #xxxxxx 517 # XXX also not sure what it means for the color tag to be missing, 518 # just assuming black for now 519 Color => ($_->first_child('color') 520 ? $self->_color( 521 $workbook->{Color}, 522 $_->first_child('color') 523 ) 524 : '#000000' 525 ), 526 Super => ($vert 527 ? ($vert->att('val') eq 'superscript' ? 1 528 : $vert->att('val') eq 'subscript' ? 2 529 : 0) 530 : 0 531 ), 532 # XXX not sure what the single accounting and double accounting 533 # underline styles map to in xlsx. also need to map the new 534 # underline styles 535 UnderlineStyle => ($under 536 # XXX sometimes style xml files can contain just <u/> with no 537 # val attribute. i think this means single underline, but not 538 # sure 539 ? (!$under->att('val') ? 1 540 : $under->att('val') eq 'single' ? 1 541 : $under->att('val') eq 'double' ? 2 542 : 0) 543 : 0 544 ), 545 Name => $_->first_child('name')->att('val'), 546 547 Bold => $_->has_child('b') ? 1 : 0, 548 Italic => $_->has_child('i') ? 1 : 0, 549 Underline => $_->has_child('u') ? 1 : 0, 550 Strikeout => $_->has_child('strike') ? 1 : 0, 551 ) 552 } $styles->find_nodes('//fonts/font'); 553 554 my @format = map { 555 my $alignment = $_->first_child('alignment'); 556 my $protection = $_->first_child('protection'); 557 Spreadsheet::ParseExcel::Format->new( 558 IgnoreFont => !$_->att('applyFont'), 559 IgnoreFill => !$_->att('applyFill'), 560 IgnoreBorder => !$_->att('applyBorder'), 561 IgnoreAlignment => !$_->att('applyAlignment'), 562 IgnoreNumberFormat => !$_->att('applyNumberFormat'), 563 IgnoreProtection => !$_->att('applyProtection'), 564 565 FontNo => 0+$_->att('fontId'), 566 Font => $font[$_->att('fontId')], 567 FmtIdx => 0+$_->att('numFmtId'), 568 569 Lock => $protection && defined $protection->att('locked') 570 ? $protection->att('locked') 571 : 1, 572 Hidden => $protection 573 ? $protection->att('hidden') 574 : 0, 575 # Style => $iStyle, 576 # Key123 => $i123, 577 AlignH => $alignment 578 ? $halign{$alignment->att('horizontal') || 'general'} 579 : 0, 580 Wrap => $alignment 581 ? $alignment->att('wrapText') 582 : 0, 583 AlignV => $alignment 584 ? $valign{$alignment->att('vertical') || 'bottom'} 585 : 2, 586 # JustLast => $iJustL, 587 # Rotate => $iRotate, 588 589 # Indent => $iInd, 590 # Shrink => $iShrink, 591 # Merge => $iMerge, 592 # ReadDir => $iReadDir, 593 594 BdrStyle => $borders[$_->att('borderId')]{styles}, 595 BdrColor => $borders[$_->att('borderId')]{colors}, 596 BdrDiag => $borders[$_->att('borderId')]{diagonal}, 597 Fill => $fills[$_->att('fillId')], 598 ) 599 } $styles->find_nodes('//cellXfs/xf'); 600 601 return { 602 FormatStr => \%format_str, 603 Font => \@font, 604 Format => \@format, 605 } 606} 607 608sub _extract_files { 609 my $self = shift; 610 my ($zip) = @_; 611 612 my $type_base = 613 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'; 614 615 my $rels = $self->_parse_xml( 616 $zip, 617 $self->_rels_for('') 618 ); 619 my $wb_name = ($rels->find_nodes( 620 qq<//Relationship[\@Type="$type_base/officeDocument"]> 621 ))[0]->att('Target'); 622 my $wb_xml = $self->_parse_xml($zip, $wb_name); 623 624 my $path_base = $self->_base_path_for($wb_name); 625 my $wb_rels = $self->_parse_xml( 626 $zip, 627 $self->_rels_for($wb_name) 628 ); 629 630 my ($strings_xml) = map { 631 $zip->memberNamed($path_base . $_->att('Target'))->contents 632 } $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/sharedStrings"]>); 633 634 my $styles_xml = $self->_parse_xml( 635 $zip, 636 $path_base . ($wb_rels->find_nodes( 637 qq<//Relationship[\@Type="$type_base/styles"]> 638 ))[0]->att('Target') 639 ); 640 641 my %worksheet_xml = map { 642 if ( my $sheetfile = $zip->memberNamed($path_base . $_->att('Target'))->contents ) { 643 ( $_->att('Id') => $sheetfile ); 644 } 645 } $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/worksheet"]>); 646 647 my %themes_xml = map { 648 $_->att('Id') => $self->_parse_xml($zip, $path_base . $_->att('Target')) 649 } $wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/theme"]>); 650 651 return { 652 workbook => $wb_xml, 653 styles => $styles_xml, 654 sheets => \%worksheet_xml, 655 themes => \%themes_xml, 656 ($strings_xml 657 ? (strings => $strings_xml) 658 : ()), 659 }; 660} 661 662sub _parse_xml { 663 my $self = shift; 664 my ($zip, $subfile) = @_; 665 666 my $member = $zip->memberNamed($subfile); 667 die "no subfile named $subfile" unless $member; 668 669 my $xml = XML::Twig->new; 670 $xml->parse(scalar $member->contents); 671 672 return $xml; 673} 674 675sub _rels_for { 676 my $self = shift; 677 my ($file) = @_; 678 679 my @path = split '/', $file; 680 my $name = pop @path; 681 $name = '' unless defined $name; 682 push @path, '_rels'; 683 push @path, "$name.rels"; 684 685 return join '/', @path; 686} 687 688sub _base_path_for { 689 my $self = shift; 690 my ($file) = @_; 691 692 my @path = split '/', $file; 693 pop @path; 694 695 return join('/', @path) . '/'; 696} 697 698sub _dimensions { 699 my $self = shift; 700 my ($dim) = @_; 701 702 my ($topleft, $bottomright) = split ':', $dim; 703 $bottomright = $topleft unless defined $bottomright; 704 705 my ($rmin, $cmin) = $self->_cell_to_row_col($topleft); 706 my ($rmax, $cmax) = $self->_cell_to_row_col($bottomright); 707 708 return ($rmin, $cmin, $rmax, $cmax); 709} 710 711sub _cell_to_row_col { 712 my $self = shift; 713 my ($cell) = @_; 714 715 my ($col, $row) = $cell =~ /([A-Z]+)([0-9]+)/; 716 717 my $ncol = 0; 718 for my $char (split //, $col) { 719 $ncol *= 26; 720 $ncol += ord($char) - ord('A') + 1; 721 } 722 $ncol = $ncol - 1; 723 724 my $nrow = $row - 1; 725 726 return ($nrow, $ncol); 727} 728 729sub _color { 730 my $self = shift; 731 my ($colors, $color_node, $fill) = @_; 732 733 my $color; 734 if ($color_node && !$color_node->att('auto')) { 735 if (defined $color_node->att('indexed')) { 736 # see https://rt.cpan.org/Public/Bug/Display.html?id=93065 737 if ($fill && $color_node->att('indexed') == 64) { 738 return '#FFFFFF'; 739 } 740 else { 741 $color = '#' . Spreadsheet::ParseExcel->ColorIdxToRGB( 742 $color_node->att('indexed') 743 ); 744 } 745 } 746 elsif (defined $color_node->att('rgb')) { 747 $color = '#' . substr($color_node->att('rgb'), 2, 6); 748 } 749 elsif (defined $color_node->att('theme')) { 750 $color = '#' . $colors->[$color_node->att('theme')]; 751 } 752 753 $color = $self->_apply_tint($color, $color_node->att('tint')) 754 if $color_node->att('tint'); 755 } 756 757 return $color; 758} 759 760sub _apply_tint { 761 my $self = shift; 762 my ($color, $tint) = @_; 763 764 my ($r, $g, $b) = map { oct("0x$_") } $color =~ /#(..)(..)(..)/; 765 my ($h, $l, $s) = rgb2hls($r, $g, $b); 766 767 if ($tint < 0) { 768 $l = $l * (1.0 + $tint); 769 } 770 else { 771 $l = $l * (1.0 - $tint) + (1.0 - 1.0 * (1.0 - $tint)); 772 } 773 774 return scalar hls2rgb($h, $l, $s); 775} 776 777 7781; 779 780__END__ 781 782=pod 783 784=encoding UTF-8 785 786=head1 NAME 787 788Spreadsheet::ParseXLSX - parse XLSX files 789 790=head1 VERSION 791 792version 0.16 793 794=head1 SYNOPSIS 795 796 use Spreadsheet::ParseXLSX; 797 798 my $parser = Spreadsheet::ParseXLSX->new; 799 my $workbook = $parser->parse("file.xlsx"); 800 # see Spreadsheet::ParseExcel for further documentation 801 802=head1 DESCRIPTION 803 804This module is an adaptor for L<Spreadsheet::ParseExcel> that reads XLSX files. 805 806=head1 METHODS 807 808=head2 new 809 810Returns a new parser instance. Takes no parameters. 811 812=head2 parse($file, $formatter) 813 814Parses an XLSX file. Parsing errors throw an exception. C<$file> can be either 815a filename or an open filehandle. Returns a 816L<Spreadsheet::ParseExcel::Workbook> instance containing the parsed data. 817The C<$formatter> argument is an optional formatter class as described in L<Spreadsheet::ParseExcel>. 818 819=head1 INCOMPATIBILITIES 820 821This module returns data using classes from L<Spreadsheet::ParseExcel>, so for 822the most part, it should just be a drop-in replacement. That said, there are a 823couple areas where the data returned is intentionally different: 824 825=over 4 826 827=item Colors 828 829In Spreadsheet::ParseExcel, colors are represented by integers which index into 830the color table, and you have to use 831C<< Spreadsheet::ParseExcel->ColorIdxToRGB >> in order to get the actual value 832out. In Spreadsheet::ParseXLSX, while the color table still exists, cells are 833also allowed to specify their color directly rather than going through the 834color table. In order to avoid confusion, I normalize all color values in 835Spreadsheet::ParseXLSX to their string RGB format (C<"#0088ff">). This affects 836the C<Fill>, C<BdrColor>, and C<BdrDiag> properties of formats, and the 837C<Color> property of fonts. Note that the default color is represented by 838C<undef> (the same thing that C<ColorIdxToRGB> would return). 839 840=item Formulas 841 842Spreadsheet::ParseExcel doesn't support formulas. Spreadsheet::ParseXLSX 843provides basic formula support by returning the text of the formula as part of 844the cell data. You can access it via C<< $cell->{Formula} >>. Note that the 845restriction still holds that formula cell values aren't available unless they 846were explicitly provided when the spreadsheet was written. 847 848=back 849 850=head1 BUGS 851 852=over 4 853 854=item Large spreadsheets may cause segfaults on perl 5.14 and earlier 855 856This module internally uses XML::Twig, which makes it potentially subject to 857L<Bug #71636 for XML-Twig: Segfault with medium-sized document|https://rt.cpan.org/Public/Bug/Display.html?id=71636> 858on perl versions 5.14 and below (the underlying bug with perl weak references 859was fixed in perl 5.15.5). The larger and more complex the spreadsheet, the 860more likely to be affected, but the actual size at which it segfaults is 861platform dependent. On a 64-bit perl with 7.6gb memory, it was seen on 862spreadsheets about 300mb and above. You can work around this adding 863C<XML::Twig::_set_weakrefs(0)> to your code before parsing the spreadsheet, 864although this may have other consequences such as memory leaks. 865 866=item Worksheets without the C<dimension> tag are not supported 867 868=item Intra-cell formatting is discarded 869 870=item Diagonal border styles are ignored 871 872=back 873 874In addition, there are still a few areas which are not yet implemented (the 875XLSX spec is quite large). If you run into any of those, bug reports are quite 876welcome. 877 878Please report any bugs to GitHub Issues at 879L<https://github.com/doy/spreadsheet-parsexlsx/issues>. 880 881=head1 SEE ALSO 882 883L<Spreadsheet::ParseExcel>: The equivalent, for XLS files. 884 885L<Spreadsheet::XLSX>: An older, less robust and featureful implementation. 886 887=head1 SUPPORT 888 889You can find this documentation for this module with the perldoc command. 890 891 perldoc Spreadsheet::ParseXLSX 892 893You can also look for information at: 894 895=over 4 896 897=item * MetaCPAN 898 899L<https://metacpan.org/release/Spreadsheet-ParseXLSX> 900 901=item * RT: CPAN's request tracker 902 903L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Spreadsheet-ParseXLSX> 904 905=item * Github 906 907L<https://github.com/doy/spreadsheet-parsexlsx> 908 909=item * CPAN Ratings 910 911L<http://cpanratings.perl.org/d/Spreadsheet-ParseXLSX> 912 913=back 914 915=head1 SPONSORS 916 917Parts of this code were paid for by 918 919=over 4 920 921=item Socialflow L<http://socialflow.com> 922 923=back 924 925=head1 AUTHOR 926 927Jesse Luehrs <doy@tozt.net> 928 929=head1 COPYRIGHT AND LICENSE 930 931This software is Copyright (c) 2014 by Jesse Luehrs. 932 933This is free software, licensed under: 934 935 The MIT (X11) License 936 937=cut 938