1<?php
2
3namespace PhpOffice\PhpSpreadsheet\Reader;
4
5use DateTime;
6use DateTimeZone;
7use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
8use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
9use PhpOffice\PhpSpreadsheet\Cell\DataType;
10use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties;
11use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
12use PhpOffice\PhpSpreadsheet\RichText\RichText;
13use PhpOffice\PhpSpreadsheet\Settings;
14use PhpOffice\PhpSpreadsheet\Shared\Date;
15use PhpOffice\PhpSpreadsheet\Shared\File;
16use PhpOffice\PhpSpreadsheet\Spreadsheet;
17use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
18use XMLReader;
19use ZipArchive;
20
21class Ods extends BaseReader
22{
23    /**
24     * Create a new Ods Reader instance.
25     */
26    public function __construct()
27    {
28        parent::__construct();
29        $this->securityScanner = XmlScanner::getInstance($this);
30    }
31
32    /**
33     * Can the current IReader read the file?
34     *
35     * @param string $pFilename
36     *
37     * @throws Exception
38     *
39     * @return bool
40     */
41    public function canRead($pFilename)
42    {
43        File::assertFile($pFilename);
44
45        $mimeType = 'UNKNOWN';
46
47        // Load file
48
49        $zip = new ZipArchive();
50        if ($zip->open($pFilename) === true) {
51            // check if it is an OOXML archive
52            $stat = $zip->statName('mimetype');
53            if ($stat && ($stat['size'] <= 255)) {
54                $mimeType = $zip->getFromName($stat['name']);
55            } elseif ($zip->statName('META-INF/manifest.xml')) {
56                $xml = simplexml_load_string(
57                    $this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')),
58                    'SimpleXMLElement',
59                    Settings::getLibXmlLoaderOptions()
60                );
61                $namespacesContent = $xml->getNamespaces(true);
62                if (isset($namespacesContent['manifest'])) {
63                    $manifest = $xml->children($namespacesContent['manifest']);
64                    foreach ($manifest as $manifestDataSet) {
65                        $manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']);
66                        if ($manifestAttributes->{'full-path'} == '/') {
67                            $mimeType = (string) $manifestAttributes->{'media-type'};
68
69                            break;
70                        }
71                    }
72                }
73            }
74
75            $zip->close();
76
77            return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet';
78        }
79
80        return false;
81    }
82
83    /**
84     * Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
85     *
86     * @param string $pFilename
87     *
88     * @throws Exception
89     *
90     * @return string[]
91     */
92    public function listWorksheetNames($pFilename)
93    {
94        File::assertFile($pFilename);
95
96        $zip = new ZipArchive();
97        if (!$zip->open($pFilename)) {
98            throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
99        }
100
101        $worksheetNames = [];
102
103        $xml = new XMLReader();
104        $xml->xml(
105            $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
106            null,
107            Settings::getLibXmlLoaderOptions()
108        );
109        $xml->setParserProperty(2, true);
110
111        // Step into the first level of content of the XML
112        $xml->read();
113        while ($xml->read()) {
114            // Quickly jump through to the office:body node
115            while ($xml->name !== 'office:body') {
116                if ($xml->isEmptyElement) {
117                    $xml->read();
118                } else {
119                    $xml->next();
120                }
121            }
122            // Now read each node until we find our first table:table node
123            while ($xml->read()) {
124                if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) {
125                    // Loop through each table:table node reading the table:name attribute for each worksheet name
126                    do {
127                        $worksheetNames[] = $xml->getAttribute('table:name');
128                        $xml->next();
129                    } while ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT);
130                }
131            }
132        }
133
134        return $worksheetNames;
135    }
136
137    /**
138     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
139     *
140     * @param string $pFilename
141     *
142     * @throws Exception
143     *
144     * @return array
145     */
146    public function listWorksheetInfo($pFilename)
147    {
148        File::assertFile($pFilename);
149
150        $worksheetInfo = [];
151
152        $zip = new ZipArchive();
153        if (!$zip->open($pFilename)) {
154            throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
155        }
156
157        $xml = new XMLReader();
158        $xml->xml(
159            $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
160            null,
161            Settings::getLibXmlLoaderOptions()
162        );
163        $xml->setParserProperty(2, true);
164
165        // Step into the first level of content of the XML
166        $xml->read();
167        while ($xml->read()) {
168            // Quickly jump through to the office:body node
169            while ($xml->name !== 'office:body') {
170                if ($xml->isEmptyElement) {
171                    $xml->read();
172                } else {
173                    $xml->next();
174                }
175            }
176            // Now read each node until we find our first table:table node
177            while ($xml->read()) {
178                if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) {
179                    $worksheetNames[] = $xml->getAttribute('table:name');
180
181                    $tmpInfo = [
182                        'worksheetName' => $xml->getAttribute('table:name'),
183                        'lastColumnLetter' => 'A',
184                        'lastColumnIndex' => 0,
185                        'totalRows' => 0,
186                        'totalColumns' => 0,
187                    ];
188
189                    // Loop through each child node of the table:table element reading
190                    $currCells = 0;
191                    do {
192                        $xml->read();
193                        if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) {
194                            $rowspan = $xml->getAttribute('table:number-rows-repeated');
195                            $rowspan = empty($rowspan) ? 1 : $rowspan;
196                            $tmpInfo['totalRows'] += $rowspan;
197                            $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
198                            $currCells = 0;
199                            // Step into the row
200                            $xml->read();
201                            do {
202                                if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) {
203                                    if (!$xml->isEmptyElement) {
204                                        ++$currCells;
205                                        $xml->next();
206                                    } else {
207                                        $xml->read();
208                                    }
209                                } elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) {
210                                    $mergeSize = $xml->getAttribute('table:number-columns-repeated');
211                                    $currCells += (int) $mergeSize;
212                                    $xml->read();
213                                } else {
214                                    $xml->read();
215                                }
216                            } while ($xml->name != 'table:table-row');
217                        }
218                    } while ($xml->name != 'table:table');
219
220                    $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
221                    $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1;
222                    $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
223                    $worksheetInfo[] = $tmpInfo;
224                }
225            }
226        }
227
228        return $worksheetInfo;
229    }
230
231    /**
232     * Loads PhpSpreadsheet from file.
233     *
234     * @param string $pFilename
235     *
236     * @throws Exception
237     *
238     * @return Spreadsheet
239     */
240    public function load($pFilename)
241    {
242        // Create new Spreadsheet
243        $spreadsheet = new Spreadsheet();
244
245        // Load into this instance
246        return $this->loadIntoExisting($pFilename, $spreadsheet);
247    }
248
249    /**
250     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
251     *
252     * @param string $pFilename
253     * @param Spreadsheet $spreadsheet
254     *
255     * @throws Exception
256     *
257     * @return Spreadsheet
258     */
259    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
260    {
261        File::assertFile($pFilename);
262
263        $timezoneObj = new DateTimeZone('Europe/London');
264        $GMT = new \DateTimeZone('UTC');
265
266        $zip = new ZipArchive();
267        if (!$zip->open($pFilename)) {
268            throw new Exception("Could not open {$pFilename} for reading! Error opening file.");
269        }
270
271        // Meta
272
273        $xml = simplexml_load_string(
274            $this->securityScanner->scan($zip->getFromName('meta.xml')),
275            'SimpleXMLElement',
276            Settings::getLibXmlLoaderOptions()
277        );
278        if ($xml === false) {
279            throw new Exception('Unable to read data from {$pFilename}');
280        }
281
282        $namespacesMeta = $xml->getNamespaces(true);
283
284        (new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta);
285
286        // Content
287
288        $dom = new \DOMDocument('1.01', 'UTF-8');
289        $dom->loadXML(
290            $this->securityScanner->scan($zip->getFromName('content.xml')),
291            Settings::getLibXmlLoaderOptions()
292        );
293
294        $officeNs = $dom->lookupNamespaceUri('office');
295        $tableNs = $dom->lookupNamespaceUri('table');
296        $textNs = $dom->lookupNamespaceUri('text');
297        $xlinkNs = $dom->lookupNamespaceUri('xlink');
298
299        $spreadsheets = $dom->getElementsByTagNameNS($officeNs, 'body')
300            ->item(0)
301            ->getElementsByTagNameNS($officeNs, 'spreadsheet');
302
303        foreach ($spreadsheets as $workbookData) {
304            /** @var \DOMElement $workbookData */
305            $tables = $workbookData->getElementsByTagNameNS($tableNs, 'table');
306
307            $worksheetID = 0;
308            foreach ($tables as $worksheetDataSet) {
309                /** @var \DOMElement $worksheetDataSet */
310                $worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name');
311
312                // Check loadSheetsOnly
313                if (isset($this->loadSheetsOnly)
314                    && $worksheetName
315                    && !in_array($worksheetName, $this->loadSheetsOnly)) {
316                    continue;
317                }
318
319                // Create sheet
320                if ($worksheetID > 0) {
321                    $spreadsheet->createSheet(); // First sheet is added by default
322                }
323                $spreadsheet->setActiveSheetIndex($worksheetID);
324
325                if ($worksheetName) {
326                    // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
327                    // formula cells... during the load, all formulae should be correct, and we're simply
328                    // bringing the worksheet name in line with the formula, not the reverse
329                    $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
330                }
331
332                // Go through every child of table element
333                $rowID = 1;
334                foreach ($worksheetDataSet->childNodes as $childNode) {
335                    /** @var \DOMElement $childNode */
336
337                    // Filter elements which are not under the "table" ns
338                    if ($childNode->namespaceURI != $tableNs) {
339                        continue;
340                    }
341
342                    $key = $childNode->nodeName;
343
344                    // Remove ns from node name
345                    if (strpos($key, ':') !== false) {
346                        $keyChunks = explode(':', $key);
347                        $key = array_pop($keyChunks);
348                    }
349
350                    switch ($key) {
351                        case 'table-header-rows':
352                            /// TODO :: Figure this out. This is only a partial implementation I guess.
353                            //          ($rowData it's not used at all and I'm not sure that PHPExcel
354                            //          has an API for this)
355
356//                            foreach ($rowData as $keyRowData => $cellData) {
357//                                $rowData = $cellData;
358//                                break;
359//                            }
360                            break;
361                        case 'table-row':
362                            if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) {
363                                $rowRepeats = $childNode->getAttributeNS($tableNs, 'number-rows-repeated');
364                            } else {
365                                $rowRepeats = 1;
366                            }
367
368                            $columnID = 'A';
369                            foreach ($childNode->childNodes as $key => $cellData) {
370                                // @var \DOMElement $cellData
371
372                                if ($this->getReadFilter() !== null) {
373                                    if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
374                                        ++$columnID;
375
376                                        continue;
377                                    }
378                                }
379
380                                // Initialize variables
381                                $formatting = $hyperlink = null;
382                                $hasCalculatedValue = false;
383                                $cellDataFormula = '';
384
385                                if ($cellData->hasAttributeNS($tableNs, 'formula')) {
386                                    $cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula');
387                                    $hasCalculatedValue = true;
388                                }
389
390                                // Annotations
391                                $annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation');
392
393                                if ($annotation->length > 0) {
394                                    $textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p');
395
396                                    if ($textNode->length > 0) {
397                                        $text = $this->scanElementForText($textNode->item(0));
398
399                                        $spreadsheet->getActiveSheet()
400                                            ->getComment($columnID . $rowID)
401                                            ->setText($this->parseRichText($text));
402//                                                                    ->setAuthor( $author )
403                                    }
404                                }
405
406                                // Content
407
408                                /** @var \DOMElement[] $paragraphs */
409                                $paragraphs = [];
410
411                                foreach ($cellData->childNodes as $item) {
412                                    /** @var \DOMElement $item */
413
414                                    // Filter text:p elements
415                                    if ($item->nodeName == 'text:p') {
416                                        $paragraphs[] = $item;
417                                    }
418                                }
419
420                                if (count($paragraphs) > 0) {
421                                    // Consolidate if there are multiple p records (maybe with spans as well)
422                                    $dataArray = [];
423
424                                    // Text can have multiple text:p and within those, multiple text:span.
425                                    // text:p newlines, but text:span does not.
426                                    // Also, here we assume there is no text data is span fields are specified, since
427                                    // we have no way of knowing proper positioning anyway.
428
429                                    foreach ($paragraphs as $pData) {
430                                        $dataArray[] = $this->scanElementForText($pData);
431                                    }
432                                    $allCellDataText = implode("\n", $dataArray);
433
434                                    $type = $cellData->getAttributeNS($officeNs, 'value-type');
435
436                                    switch ($type) {
437                                        case 'string':
438                                            $type = DataType::TYPE_STRING;
439                                            $dataValue = $allCellDataText;
440
441                                            foreach ($paragraphs as $paragraph) {
442                                                $link = $paragraph->getElementsByTagNameNS($textNs, 'a');
443                                                if ($link->length > 0) {
444                                                    $hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href');
445                                                }
446                                            }
447
448                                            break;
449                                        case 'boolean':
450                                            $type = DataType::TYPE_BOOL;
451                                            $dataValue = ($allCellDataText == 'TRUE') ? true : false;
452
453                                            break;
454                                        case 'percentage':
455                                            $type = DataType::TYPE_NUMERIC;
456                                            $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
457
458                                            if (floor($dataValue) == $dataValue) {
459                                                $dataValue = (int) $dataValue;
460                                            }
461                                            $formatting = NumberFormat::FORMAT_PERCENTAGE_00;
462
463                                            break;
464                                        case 'currency':
465                                            $type = DataType::TYPE_NUMERIC;
466                                            $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
467
468                                            if (floor($dataValue) == $dataValue) {
469                                                $dataValue = (int) $dataValue;
470                                            }
471                                            $formatting = NumberFormat::FORMAT_CURRENCY_USD_SIMPLE;
472
473                                            break;
474                                        case 'float':
475                                            $type = DataType::TYPE_NUMERIC;
476                                            $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
477
478                                            if (floor($dataValue) == $dataValue) {
479                                                if ($dataValue == (int) $dataValue) {
480                                                    $dataValue = (int) $dataValue;
481                                                } else {
482                                                    $dataValue = (float) $dataValue;
483                                                }
484                                            }
485
486                                            break;
487                                        case 'date':
488                                            $type = DataType::TYPE_NUMERIC;
489                                            $value = $cellData->getAttributeNS($officeNs, 'date-value');
490
491                                            $dateObj = new DateTime($value, $GMT);
492                                            $dateObj->setTimeZone($timezoneObj);
493                                            [$year, $month, $day, $hour, $minute, $second] = explode(
494                                                ' ',
495                                                $dateObj->format('Y m d H i s')
496                                            );
497
498                                            $dataValue = Date::formattedPHPToExcel(
499                                                (int) $year,
500                                                (int) $month,
501                                                (int) $day,
502                                                (int) $hour,
503                                                (int) $minute,
504                                                (int) $second
505                                            );
506
507                                            if ($dataValue != floor($dataValue)) {
508                                                $formatting = NumberFormat::FORMAT_DATE_XLSX15
509                                                    . ' '
510                                                    . NumberFormat::FORMAT_DATE_TIME4;
511                                            } else {
512                                                $formatting = NumberFormat::FORMAT_DATE_XLSX15;
513                                            }
514
515                                            break;
516                                        case 'time':
517                                            $type = DataType::TYPE_NUMERIC;
518
519                                            $timeValue = $cellData->getAttributeNS($officeNs, 'time-value');
520
521                                            $dataValue = Date::PHPToExcel(
522                                                strtotime(
523                                                    '01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS'))
524                                                )
525                                            );
526                                            $formatting = NumberFormat::FORMAT_DATE_TIME4;
527
528                                            break;
529                                        default:
530                                            $dataValue = null;
531                                    }
532                                } else {
533                                    $type = DataType::TYPE_NULL;
534                                    $dataValue = null;
535                                }
536
537                                if ($hasCalculatedValue) {
538                                    $type = DataType::TYPE_FORMULA;
539                                    $cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1);
540                                    $temp = explode('"', $cellDataFormula);
541                                    $tKey = false;
542                                    foreach ($temp as &$value) {
543                                        // Only replace in alternate array entries (i.e. non-quoted blocks)
544                                        if ($tKey = !$tKey) {
545                                            // Cell range reference in another sheet
546                                            $value = preg_replace('/\[([^\.]+)\.([^\.]+):\.([^\.]+)\]/U', '$1!$2:$3', $value);
547
548                                            // Cell reference in another sheet
549                                            $value = preg_replace('/\[([^\.]+)\.([^\.]+)\]/U', '$1!$2', $value);
550
551                                            // Cell range reference
552                                            $value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/U', '$1:$2', $value);
553
554                                            // Simple cell reference
555                                            $value = preg_replace('/\[\.([^\.]+)\]/U', '$1', $value);
556
557                                            $value = Calculation::translateSeparator(';', ',', $value, $inBraces);
558                                        }
559                                    }
560                                    unset($value);
561
562                                    // Then rebuild the formula string
563                                    $cellDataFormula = implode('"', $temp);
564                                }
565
566                                if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) {
567                                    $colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated');
568                                } else {
569                                    $colRepeats = 1;
570                                }
571
572                                if ($type !== null) {
573                                    for ($i = 0; $i < $colRepeats; ++$i) {
574                                        if ($i > 0) {
575                                            ++$columnID;
576                                        }
577
578                                        if ($type !== DataType::TYPE_NULL) {
579                                            for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) {
580                                                $rID = $rowID + $rowAdjust;
581
582                                                $cell = $spreadsheet->getActiveSheet()
583                                                    ->getCell($columnID . $rID);
584
585                                                // Set value
586                                                if ($hasCalculatedValue) {
587                                                    $cell->setValueExplicit($cellDataFormula, $type);
588                                                } else {
589                                                    $cell->setValueExplicit($dataValue, $type);
590                                                }
591
592                                                if ($hasCalculatedValue) {
593                                                    $cell->setCalculatedValue($dataValue);
594                                                }
595
596                                                // Set other properties
597                                                if ($formatting !== null) {
598                                                    $spreadsheet->getActiveSheet()
599                                                        ->getStyle($columnID . $rID)
600                                                        ->getNumberFormat()
601                                                        ->setFormatCode($formatting);
602                                                } else {
603                                                    $spreadsheet->getActiveSheet()
604                                                        ->getStyle($columnID . $rID)
605                                                        ->getNumberFormat()
606                                                        ->setFormatCode(NumberFormat::FORMAT_GENERAL);
607                                                }
608
609                                                if ($hyperlink !== null) {
610                                                    $cell->getHyperlink()
611                                                        ->setUrl($hyperlink);
612                                                }
613                                            }
614                                        }
615                                    }
616                                }
617
618                                // Merged cells
619                                if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')
620                                    || $cellData->hasAttributeNS($tableNs, 'number-rows-spanned')
621                                ) {
622                                    if (($type !== DataType::TYPE_NULL) || (!$this->readDataOnly)) {
623                                        $columnTo = $columnID;
624
625                                        if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) {
626                                            $columnIndex = Coordinate::columnIndexFromString($columnID);
627                                            $columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned');
628                                            $columnIndex -= 2;
629
630                                            $columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1);
631                                        }
632
633                                        $rowTo = $rowID;
634
635                                        if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) {
636                                            $rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1;
637                                        }
638
639                                        $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo;
640                                        $spreadsheet->getActiveSheet()->mergeCells($cellRange);
641                                    }
642                                }
643
644                                ++$columnID;
645                            }
646                            $rowID += $rowRepeats;
647
648                            break;
649                    }
650                }
651                ++$worksheetID;
652            }
653        }
654
655        // Return
656        return $spreadsheet;
657    }
658
659    /**
660     * Recursively scan element.
661     *
662     * @param \DOMNode $element
663     *
664     * @return string
665     */
666    protected function scanElementForText(\DOMNode $element)
667    {
668        $str = '';
669        foreach ($element->childNodes as $child) {
670            /** @var \DOMNode $child */
671            if ($child->nodeType == XML_TEXT_NODE) {
672                $str .= $child->nodeValue;
673            } elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') {
674                // It's a space
675
676                // Multiple spaces?
677                /** @var \DOMAttr $cAttr */
678                $cAttr = $child->attributes->getNamedItem('c');
679                if ($cAttr) {
680                    $multiplier = (int) $cAttr->nodeValue;
681                } else {
682                    $multiplier = 1;
683                }
684
685                $str .= str_repeat(' ', $multiplier);
686            }
687
688            if ($child->hasChildNodes()) {
689                $str .= $this->scanElementForText($child);
690            }
691        }
692
693        return $str;
694    }
695
696    /**
697     * @param string $is
698     *
699     * @return RichText
700     */
701    private function parseRichText($is)
702    {
703        $value = new RichText();
704        $value->createText($is);
705
706        return $value;
707    }
708}
709