1<?php 2 3namespace PhpOffice\PhpSpreadsheet\Reader; 4 5use DateTime; 6use DateTimeZone; 7use PhpOffice\PhpSpreadsheet\Calculation\Calculation; 8use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 9use PhpOffice\PhpSpreadsheet\Cell\DataType; 10use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; 11use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; 12use PhpOffice\PhpSpreadsheet\RichText\RichText; 13use PhpOffice\PhpSpreadsheet\Settings; 14use PhpOffice\PhpSpreadsheet\Shared\Date; 15use PhpOffice\PhpSpreadsheet\Shared\File; 16use PhpOffice\PhpSpreadsheet\Spreadsheet; 17use PhpOffice\PhpSpreadsheet\Style\NumberFormat; 18use XMLReader; 19use ZipArchive; 20 21class Ods extends BaseReader 22{ 23 /** 24 * Create a new Ods Reader instance. 25 */ 26 public function __construct() 27 { 28 parent::__construct(); 29 $this->securityScanner = XmlScanner::getInstance($this); 30 } 31 32 /** 33 * Can the current IReader read the file? 34 * 35 * @param string $pFilename 36 * 37 * @throws Exception 38 * 39 * @return bool 40 */ 41 public function canRead($pFilename) 42 { 43 File::assertFile($pFilename); 44 45 $mimeType = 'UNKNOWN'; 46 47 // Load file 48 49 $zip = new ZipArchive(); 50 if ($zip->open($pFilename) === true) { 51 // check if it is an OOXML archive 52 $stat = $zip->statName('mimetype'); 53 if ($stat && ($stat['size'] <= 255)) { 54 $mimeType = $zip->getFromName($stat['name']); 55 } elseif ($zip->statName('META-INF/manifest.xml')) { 56 $xml = simplexml_load_string( 57 $this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')), 58 'SimpleXMLElement', 59 Settings::getLibXmlLoaderOptions() 60 ); 61 $namespacesContent = $xml->getNamespaces(true); 62 if (isset($namespacesContent['manifest'])) { 63 $manifest = $xml->children($namespacesContent['manifest']); 64 foreach ($manifest as $manifestDataSet) { 65 $manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']); 66 if ($manifestAttributes->{'full-path'} == '/') { 67 $mimeType = (string) $manifestAttributes->{'media-type'}; 68 69 break; 70 } 71 } 72 } 73 } 74 75 $zip->close(); 76 77 return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet'; 78 } 79 80 return false; 81 } 82 83 /** 84 * Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. 85 * 86 * @param string $pFilename 87 * 88 * @throws Exception 89 * 90 * @return string[] 91 */ 92 public function listWorksheetNames($pFilename) 93 { 94 File::assertFile($pFilename); 95 96 $zip = new ZipArchive(); 97 if (!$zip->open($pFilename)) { 98 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.'); 99 } 100 101 $worksheetNames = []; 102 103 $xml = new XMLReader(); 104 $xml->xml( 105 $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), 106 null, 107 Settings::getLibXmlLoaderOptions() 108 ); 109 $xml->setParserProperty(2, true); 110 111 // Step into the first level of content of the XML 112 $xml->read(); 113 while ($xml->read()) { 114 // Quickly jump through to the office:body node 115 while ($xml->name !== 'office:body') { 116 if ($xml->isEmptyElement) { 117 $xml->read(); 118 } else { 119 $xml->next(); 120 } 121 } 122 // Now read each node until we find our first table:table node 123 while ($xml->read()) { 124 if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { 125 // Loop through each table:table node reading the table:name attribute for each worksheet name 126 do { 127 $worksheetNames[] = $xml->getAttribute('table:name'); 128 $xml->next(); 129 } while ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT); 130 } 131 } 132 } 133 134 return $worksheetNames; 135 } 136 137 /** 138 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 139 * 140 * @param string $pFilename 141 * 142 * @throws Exception 143 * 144 * @return array 145 */ 146 public function listWorksheetInfo($pFilename) 147 { 148 File::assertFile($pFilename); 149 150 $worksheetInfo = []; 151 152 $zip = new ZipArchive(); 153 if (!$zip->open($pFilename)) { 154 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.'); 155 } 156 157 $xml = new XMLReader(); 158 $xml->xml( 159 $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), 160 null, 161 Settings::getLibXmlLoaderOptions() 162 ); 163 $xml->setParserProperty(2, true); 164 165 // Step into the first level of content of the XML 166 $xml->read(); 167 while ($xml->read()) { 168 // Quickly jump through to the office:body node 169 while ($xml->name !== 'office:body') { 170 if ($xml->isEmptyElement) { 171 $xml->read(); 172 } else { 173 $xml->next(); 174 } 175 } 176 // Now read each node until we find our first table:table node 177 while ($xml->read()) { 178 if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { 179 $worksheetNames[] = $xml->getAttribute('table:name'); 180 181 $tmpInfo = [ 182 'worksheetName' => $xml->getAttribute('table:name'), 183 'lastColumnLetter' => 'A', 184 'lastColumnIndex' => 0, 185 'totalRows' => 0, 186 'totalColumns' => 0, 187 ]; 188 189 // Loop through each child node of the table:table element reading 190 $currCells = 0; 191 do { 192 $xml->read(); 193 if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { 194 $rowspan = $xml->getAttribute('table:number-rows-repeated'); 195 $rowspan = empty($rowspan) ? 1 : $rowspan; 196 $tmpInfo['totalRows'] += $rowspan; 197 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); 198 $currCells = 0; 199 // Step into the row 200 $xml->read(); 201 do { 202 if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { 203 if (!$xml->isEmptyElement) { 204 ++$currCells; 205 $xml->next(); 206 } else { 207 $xml->read(); 208 } 209 } elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { 210 $mergeSize = $xml->getAttribute('table:number-columns-repeated'); 211 $currCells += (int) $mergeSize; 212 $xml->read(); 213 } else { 214 $xml->read(); 215 } 216 } while ($xml->name != 'table:table-row'); 217 } 218 } while ($xml->name != 'table:table'); 219 220 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); 221 $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; 222 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); 223 $worksheetInfo[] = $tmpInfo; 224 } 225 } 226 } 227 228 return $worksheetInfo; 229 } 230 231 /** 232 * Loads PhpSpreadsheet from file. 233 * 234 * @param string $pFilename 235 * 236 * @throws Exception 237 * 238 * @return Spreadsheet 239 */ 240 public function load($pFilename) 241 { 242 // Create new Spreadsheet 243 $spreadsheet = new Spreadsheet(); 244 245 // Load into this instance 246 return $this->loadIntoExisting($pFilename, $spreadsheet); 247 } 248 249 /** 250 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 251 * 252 * @param string $pFilename 253 * @param Spreadsheet $spreadsheet 254 * 255 * @throws Exception 256 * 257 * @return Spreadsheet 258 */ 259 public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) 260 { 261 File::assertFile($pFilename); 262 263 $timezoneObj = new DateTimeZone('Europe/London'); 264 $GMT = new \DateTimeZone('UTC'); 265 266 $zip = new ZipArchive(); 267 if (!$zip->open($pFilename)) { 268 throw new Exception("Could not open {$pFilename} for reading! Error opening file."); 269 } 270 271 // Meta 272 273 $xml = simplexml_load_string( 274 $this->securityScanner->scan($zip->getFromName('meta.xml')), 275 'SimpleXMLElement', 276 Settings::getLibXmlLoaderOptions() 277 ); 278 if ($xml === false) { 279 throw new Exception('Unable to read data from {$pFilename}'); 280 } 281 282 $namespacesMeta = $xml->getNamespaces(true); 283 284 (new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta); 285 286 // Content 287 288 $dom = new \DOMDocument('1.01', 'UTF-8'); 289 $dom->loadXML( 290 $this->securityScanner->scan($zip->getFromName('content.xml')), 291 Settings::getLibXmlLoaderOptions() 292 ); 293 294 $officeNs = $dom->lookupNamespaceUri('office'); 295 $tableNs = $dom->lookupNamespaceUri('table'); 296 $textNs = $dom->lookupNamespaceUri('text'); 297 $xlinkNs = $dom->lookupNamespaceUri('xlink'); 298 299 $spreadsheets = $dom->getElementsByTagNameNS($officeNs, 'body') 300 ->item(0) 301 ->getElementsByTagNameNS($officeNs, 'spreadsheet'); 302 303 foreach ($spreadsheets as $workbookData) { 304 /** @var \DOMElement $workbookData */ 305 $tables = $workbookData->getElementsByTagNameNS($tableNs, 'table'); 306 307 $worksheetID = 0; 308 foreach ($tables as $worksheetDataSet) { 309 /** @var \DOMElement $worksheetDataSet */ 310 $worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name'); 311 312 // Check loadSheetsOnly 313 if (isset($this->loadSheetsOnly) 314 && $worksheetName 315 && !in_array($worksheetName, $this->loadSheetsOnly)) { 316 continue; 317 } 318 319 // Create sheet 320 if ($worksheetID > 0) { 321 $spreadsheet->createSheet(); // First sheet is added by default 322 } 323 $spreadsheet->setActiveSheetIndex($worksheetID); 324 325 if ($worksheetName) { 326 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in 327 // formula cells... during the load, all formulae should be correct, and we're simply 328 // bringing the worksheet name in line with the formula, not the reverse 329 $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false); 330 } 331 332 // Go through every child of table element 333 $rowID = 1; 334 foreach ($worksheetDataSet->childNodes as $childNode) { 335 /** @var \DOMElement $childNode */ 336 337 // Filter elements which are not under the "table" ns 338 if ($childNode->namespaceURI != $tableNs) { 339 continue; 340 } 341 342 $key = $childNode->nodeName; 343 344 // Remove ns from node name 345 if (strpos($key, ':') !== false) { 346 $keyChunks = explode(':', $key); 347 $key = array_pop($keyChunks); 348 } 349 350 switch ($key) { 351 case 'table-header-rows': 352 /// TODO :: Figure this out. This is only a partial implementation I guess. 353 // ($rowData it's not used at all and I'm not sure that PHPExcel 354 // has an API for this) 355 356// foreach ($rowData as $keyRowData => $cellData) { 357// $rowData = $cellData; 358// break; 359// } 360 break; 361 case 'table-row': 362 if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) { 363 $rowRepeats = $childNode->getAttributeNS($tableNs, 'number-rows-repeated'); 364 } else { 365 $rowRepeats = 1; 366 } 367 368 $columnID = 'A'; 369 foreach ($childNode->childNodes as $key => $cellData) { 370 // @var \DOMElement $cellData 371 372 if ($this->getReadFilter() !== null) { 373 if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { 374 ++$columnID; 375 376 continue; 377 } 378 } 379 380 // Initialize variables 381 $formatting = $hyperlink = null; 382 $hasCalculatedValue = false; 383 $cellDataFormula = ''; 384 385 if ($cellData->hasAttributeNS($tableNs, 'formula')) { 386 $cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula'); 387 $hasCalculatedValue = true; 388 } 389 390 // Annotations 391 $annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation'); 392 393 if ($annotation->length > 0) { 394 $textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p'); 395 396 if ($textNode->length > 0) { 397 $text = $this->scanElementForText($textNode->item(0)); 398 399 $spreadsheet->getActiveSheet() 400 ->getComment($columnID . $rowID) 401 ->setText($this->parseRichText($text)); 402// ->setAuthor( $author ) 403 } 404 } 405 406 // Content 407 408 /** @var \DOMElement[] $paragraphs */ 409 $paragraphs = []; 410 411 foreach ($cellData->childNodes as $item) { 412 /** @var \DOMElement $item */ 413 414 // Filter text:p elements 415 if ($item->nodeName == 'text:p') { 416 $paragraphs[] = $item; 417 } 418 } 419 420 if (count($paragraphs) > 0) { 421 // Consolidate if there are multiple p records (maybe with spans as well) 422 $dataArray = []; 423 424 // Text can have multiple text:p and within those, multiple text:span. 425 // text:p newlines, but text:span does not. 426 // Also, here we assume there is no text data is span fields are specified, since 427 // we have no way of knowing proper positioning anyway. 428 429 foreach ($paragraphs as $pData) { 430 $dataArray[] = $this->scanElementForText($pData); 431 } 432 $allCellDataText = implode("\n", $dataArray); 433 434 $type = $cellData->getAttributeNS($officeNs, 'value-type'); 435 436 switch ($type) { 437 case 'string': 438 $type = DataType::TYPE_STRING; 439 $dataValue = $allCellDataText; 440 441 foreach ($paragraphs as $paragraph) { 442 $link = $paragraph->getElementsByTagNameNS($textNs, 'a'); 443 if ($link->length > 0) { 444 $hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href'); 445 } 446 } 447 448 break; 449 case 'boolean': 450 $type = DataType::TYPE_BOOL; 451 $dataValue = ($allCellDataText == 'TRUE') ? true : false; 452 453 break; 454 case 'percentage': 455 $type = DataType::TYPE_NUMERIC; 456 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); 457 458 if (floor($dataValue) == $dataValue) { 459 $dataValue = (int) $dataValue; 460 } 461 $formatting = NumberFormat::FORMAT_PERCENTAGE_00; 462 463 break; 464 case 'currency': 465 $type = DataType::TYPE_NUMERIC; 466 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); 467 468 if (floor($dataValue) == $dataValue) { 469 $dataValue = (int) $dataValue; 470 } 471 $formatting = NumberFormat::FORMAT_CURRENCY_USD_SIMPLE; 472 473 break; 474 case 'float': 475 $type = DataType::TYPE_NUMERIC; 476 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); 477 478 if (floor($dataValue) == $dataValue) { 479 if ($dataValue == (int) $dataValue) { 480 $dataValue = (int) $dataValue; 481 } else { 482 $dataValue = (float) $dataValue; 483 } 484 } 485 486 break; 487 case 'date': 488 $type = DataType::TYPE_NUMERIC; 489 $value = $cellData->getAttributeNS($officeNs, 'date-value'); 490 491 $dateObj = new DateTime($value, $GMT); 492 $dateObj->setTimeZone($timezoneObj); 493 [$year, $month, $day, $hour, $minute, $second] = explode( 494 ' ', 495 $dateObj->format('Y m d H i s') 496 ); 497 498 $dataValue = Date::formattedPHPToExcel( 499 (int) $year, 500 (int) $month, 501 (int) $day, 502 (int) $hour, 503 (int) $minute, 504 (int) $second 505 ); 506 507 if ($dataValue != floor($dataValue)) { 508 $formatting = NumberFormat::FORMAT_DATE_XLSX15 509 . ' ' 510 . NumberFormat::FORMAT_DATE_TIME4; 511 } else { 512 $formatting = NumberFormat::FORMAT_DATE_XLSX15; 513 } 514 515 break; 516 case 'time': 517 $type = DataType::TYPE_NUMERIC; 518 519 $timeValue = $cellData->getAttributeNS($officeNs, 'time-value'); 520 521 $dataValue = Date::PHPToExcel( 522 strtotime( 523 '01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS')) 524 ) 525 ); 526 $formatting = NumberFormat::FORMAT_DATE_TIME4; 527 528 break; 529 default: 530 $dataValue = null; 531 } 532 } else { 533 $type = DataType::TYPE_NULL; 534 $dataValue = null; 535 } 536 537 if ($hasCalculatedValue) { 538 $type = DataType::TYPE_FORMULA; 539 $cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); 540 $temp = explode('"', $cellDataFormula); 541 $tKey = false; 542 foreach ($temp as &$value) { 543 // Only replace in alternate array entries (i.e. non-quoted blocks) 544 if ($tKey = !$tKey) { 545 // Cell range reference in another sheet 546 $value = preg_replace('/\[([^\.]+)\.([^\.]+):\.([^\.]+)\]/U', '$1!$2:$3', $value); 547 548 // Cell reference in another sheet 549 $value = preg_replace('/\[([^\.]+)\.([^\.]+)\]/U', '$1!$2', $value); 550 551 // Cell range reference 552 $value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/U', '$1:$2', $value); 553 554 // Simple cell reference 555 $value = preg_replace('/\[\.([^\.]+)\]/U', '$1', $value); 556 557 $value = Calculation::translateSeparator(';', ',', $value, $inBraces); 558 } 559 } 560 unset($value); 561 562 // Then rebuild the formula string 563 $cellDataFormula = implode('"', $temp); 564 } 565 566 if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { 567 $colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); 568 } else { 569 $colRepeats = 1; 570 } 571 572 if ($type !== null) { 573 for ($i = 0; $i < $colRepeats; ++$i) { 574 if ($i > 0) { 575 ++$columnID; 576 } 577 578 if ($type !== DataType::TYPE_NULL) { 579 for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) { 580 $rID = $rowID + $rowAdjust; 581 582 $cell = $spreadsheet->getActiveSheet() 583 ->getCell($columnID . $rID); 584 585 // Set value 586 if ($hasCalculatedValue) { 587 $cell->setValueExplicit($cellDataFormula, $type); 588 } else { 589 $cell->setValueExplicit($dataValue, $type); 590 } 591 592 if ($hasCalculatedValue) { 593 $cell->setCalculatedValue($dataValue); 594 } 595 596 // Set other properties 597 if ($formatting !== null) { 598 $spreadsheet->getActiveSheet() 599 ->getStyle($columnID . $rID) 600 ->getNumberFormat() 601 ->setFormatCode($formatting); 602 } else { 603 $spreadsheet->getActiveSheet() 604 ->getStyle($columnID . $rID) 605 ->getNumberFormat() 606 ->setFormatCode(NumberFormat::FORMAT_GENERAL); 607 } 608 609 if ($hyperlink !== null) { 610 $cell->getHyperlink() 611 ->setUrl($hyperlink); 612 } 613 } 614 } 615 } 616 } 617 618 // Merged cells 619 if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned') 620 || $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') 621 ) { 622 if (($type !== DataType::TYPE_NULL) || (!$this->readDataOnly)) { 623 $columnTo = $columnID; 624 625 if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { 626 $columnIndex = Coordinate::columnIndexFromString($columnID); 627 $columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); 628 $columnIndex -= 2; 629 630 $columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); 631 } 632 633 $rowTo = $rowID; 634 635 if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { 636 $rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; 637 } 638 639 $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; 640 $spreadsheet->getActiveSheet()->mergeCells($cellRange); 641 } 642 } 643 644 ++$columnID; 645 } 646 $rowID += $rowRepeats; 647 648 break; 649 } 650 } 651 ++$worksheetID; 652 } 653 } 654 655 // Return 656 return $spreadsheet; 657 } 658 659 /** 660 * Recursively scan element. 661 * 662 * @param \DOMNode $element 663 * 664 * @return string 665 */ 666 protected function scanElementForText(\DOMNode $element) 667 { 668 $str = ''; 669 foreach ($element->childNodes as $child) { 670 /** @var \DOMNode $child */ 671 if ($child->nodeType == XML_TEXT_NODE) { 672 $str .= $child->nodeValue; 673 } elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') { 674 // It's a space 675 676 // Multiple spaces? 677 /** @var \DOMAttr $cAttr */ 678 $cAttr = $child->attributes->getNamedItem('c'); 679 if ($cAttr) { 680 $multiplier = (int) $cAttr->nodeValue; 681 } else { 682 $multiplier = 1; 683 } 684 685 $str .= str_repeat(' ', $multiplier); 686 } 687 688 if ($child->hasChildNodes()) { 689 $str .= $this->scanElementForText($child); 690 } 691 } 692 693 return $str; 694 } 695 696 /** 697 * @param string $is 698 * 699 * @return RichText 700 */ 701 private function parseRichText($is) 702 { 703 $value = new RichText(); 704 $value->createText($is); 705 706 return $value; 707 } 708} 709