1<?php
2
3namespace Box\Spout\Reader\ODS;
4
5use Box\Spout\Common\Entity\Cell;
6use Box\Spout\Common\Entity\Row;
7use Box\Spout\Common\Exception\IOException;
8use Box\Spout\Common\Manager\OptionsManagerInterface;
9use Box\Spout\Reader\Common\Entity\Options;
10use Box\Spout\Reader\Common\Manager\RowManager;
11use Box\Spout\Reader\Common\XMLProcessor;
12use Box\Spout\Reader\Exception\InvalidValueException;
13use Box\Spout\Reader\Exception\IteratorNotRewindableException;
14use Box\Spout\Reader\Exception\XMLProcessingException;
15use Box\Spout\Reader\IteratorInterface;
16use Box\Spout\Reader\ODS\Creator\InternalEntityFactory;
17use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
18use Box\Spout\Reader\Wrapper\XMLReader;
19
20/**
21 * Class RowIterator
22 */
23class RowIterator implements IteratorInterface
24{
25    /** Definition of XML nodes names used to parse data */
26    const XML_NODE_TABLE = 'table:table';
27    const XML_NODE_ROW = 'table:table-row';
28    const XML_NODE_CELL = 'table:table-cell';
29    const MAX_COLUMNS_EXCEL = 16384;
30
31    /** Definition of XML attribute used to parse data */
32    const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
33    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
34
35    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
36    protected $xmlReader;
37
38    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
39    protected $xmlProcessor;
40
41    /** @var bool Whether empty rows should be returned or skipped */
42    protected $shouldPreserveEmptyRows;
43
44    /** @var Helper\CellValueFormatter Helper to format cell values */
45    protected $cellValueFormatter;
46
47    /** @var RowManager Manages rows */
48    protected $rowManager;
49
50    /** @var InternalEntityFactory Factory to create entities */
51    protected $entityFactory;
52
53    /** @var bool Whether the iterator has already been rewound once */
54    protected $hasAlreadyBeenRewound = false;
55
56    /** @var Row The currently processed row */
57    protected $currentlyProcessedRow;
58
59    /** @var Row Buffer used to store the current row, while checking if there are more rows to read */
60    protected $rowBuffer;
61
62    /** @var bool Indicates whether all rows have been read */
63    protected $hasReachedEndOfFile = false;
64
65    /** @var int Last row index processed (one-based) */
66    protected $lastRowIndexProcessed = 0;
67
68    /** @var int Row index to be processed next (one-based) */
69    protected $nextRowIndexToBeProcessed = 1;
70
71    /** @var Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
72    protected $lastProcessedCell;
73
74    /** @var int Number of times the last processed row should be repeated */
75    protected $numRowsRepeated = 1;
76
77    /** @var int Number of times the last cell value should be copied to the cells on its right */
78    protected $numColumnsRepeated = 1;
79
80    /** @var bool Whether at least one cell has been read for the row currently being processed */
81    protected $hasAlreadyReadOneCellInCurrentRow = false;
82
83    /**
84     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
85     * @param OptionsManagerInterface $optionsManager Reader's options manager
86     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
87     * @param XMLProcessor $xmlProcessor Helper to process XML files
88     * @param RowManager $rowManager Manages rows
89     * @param InternalEntityFactory $entityFactory Factory to create entities
90     */
91    public function __construct(
92        XMLReader $xmlReader,
93        OptionsManagerInterface $optionsManager,
94        CellValueFormatter $cellValueFormatter,
95        XMLProcessor $xmlProcessor,
96        RowManager $rowManager,
97        InternalEntityFactory $entityFactory
98    ) {
99        $this->xmlReader = $xmlReader;
100        $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
101        $this->cellValueFormatter = $cellValueFormatter;
102        $this->entityFactory = $entityFactory;
103        $this->rowManager = $rowManager;
104
105        // Register all callbacks to process different nodes when reading the XML file
106        $this->xmlProcessor = $xmlProcessor;
107        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
108        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
109        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
110        $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
111    }
112
113    /**
114     * Rewind the Iterator to the first element.
115     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
116     * @see http://php.net/manual/en/iterator.rewind.php
117     *
118     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
119     * @return void
120     */
121    public function rewind()
122    {
123        // Because sheet and row data is located in the file, we can't rewind both the
124        // sheet iterator and the row iterator, as XML file cannot be read backwards.
125        // Therefore, rewinding the row iterator has been disabled.
126        if ($this->hasAlreadyBeenRewound) {
127            throw new IteratorNotRewindableException();
128        }
129
130        $this->hasAlreadyBeenRewound = true;
131        $this->lastRowIndexProcessed = 0;
132        $this->nextRowIndexToBeProcessed = 1;
133        $this->rowBuffer = null;
134        $this->hasReachedEndOfFile = false;
135
136        $this->next();
137    }
138
139    /**
140     * Checks if current position is valid
141     * @see http://php.net/manual/en/iterator.valid.php
142     *
143     * @return bool
144     */
145    public function valid()
146    {
147        return (!$this->hasReachedEndOfFile);
148    }
149
150    /**
151     * Move forward to next element. Empty rows will be skipped.
152     * @see http://php.net/manual/en/iterator.next.php
153     *
154     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
155     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
156     * @return void
157     */
158    public function next()
159    {
160        if ($this->doesNeedDataForNextRowToBeProcessed()) {
161            $this->readDataForNextRow();
162        }
163
164        $this->lastRowIndexProcessed++;
165    }
166
167    /**
168     * Returns whether we need data for the next row to be processed.
169     * We DO need to read data if:
170     *   - we have not read any rows yet
171     *      OR
172     *   - the next row to be processed immediately follows the last read row
173     *
174     * @return bool Whether we need data for the next row to be processed.
175     */
176    protected function doesNeedDataForNextRowToBeProcessed()
177    {
178        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
179
180        return (
181            !$hasReadAtLeastOneRow ||
182            $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
183        );
184    }
185
186    /**
187     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
188     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
189     * @return void
190     */
191    protected function readDataForNextRow()
192    {
193        $this->currentlyProcessedRow = $this->entityFactory->createRow();
194
195        try {
196            $this->xmlProcessor->readUntilStopped();
197        } catch (XMLProcessingException $exception) {
198            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
199        }
200
201        $this->rowBuffer = $this->currentlyProcessedRow;
202    }
203
204    /**
205     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
206     * @return int A return code that indicates what action should the processor take next
207     */
208    protected function processRowStartingNode($xmlReader)
209    {
210        // Reset data from current row
211        $this->hasAlreadyReadOneCellInCurrentRow = false;
212        $this->lastProcessedCell = null;
213        $this->numColumnsRepeated = 1;
214        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
215
216        return XMLProcessor::PROCESSING_CONTINUE;
217    }
218
219    /**
220     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
221     * @return int A return code that indicates what action should the processor take next
222     */
223    protected function processCellStartingNode($xmlReader)
224    {
225        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
226
227        // NOTE: expand() will automatically decode all XML entities of the child nodes
228        $node = $xmlReader->expand();
229        $currentCell = $this->getCell($node);
230
231        // process cell N only after having read cell N+1 (see below why)
232        if ($this->hasAlreadyReadOneCellInCurrentRow) {
233            for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
234                $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
235            }
236        }
237
238        $this->hasAlreadyReadOneCellInCurrentRow = true;
239        $this->lastProcessedCell = $currentCell;
240        $this->numColumnsRepeated = $currentNumColumnsRepeated;
241
242        return XMLProcessor::PROCESSING_CONTINUE;
243    }
244
245    /**
246     * @return int A return code that indicates what action should the processor take next
247     */
248    protected function processRowEndingNode()
249    {
250        $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
251
252        // if the fetched row is empty and we don't want to preserve it...
253        if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
254            // ... skip it
255            return XMLProcessor::PROCESSING_CONTINUE;
256        }
257
258        // if the row is empty, we don't want to return more than one cell
259        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
260        $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
261
262        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
263        // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
264        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
265        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
266        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
267        // always 16384 cells.
268        if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
269            for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
270                $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
271            }
272        }
273
274        // If we are processing row N and the row is repeated M times,
275        // then the next row to be processed will be row (N+M).
276        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
277
278        // at this point, we have all the data we need for the row
279        // so that we can populate the buffer
280        return XMLProcessor::PROCESSING_STOP;
281    }
282
283    /**
284     * @return int A return code that indicates what action should the processor take next
285     */
286    protected function processTableEndingNode()
287    {
288        // The closing "</table:table>" marks the end of the file
289        $this->hasReachedEndOfFile = true;
290
291        return XMLProcessor::PROCESSING_STOP;
292    }
293
294    /**
295     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
296     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
297     */
298    protected function getNumRowsRepeatedForCurrentNode($xmlReader)
299    {
300        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
301
302        return ($numRowsRepeated !== null) ? (int) $numRowsRepeated : 1;
303    }
304
305    /**
306     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
307     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
308     */
309    protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
310    {
311        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
312
313        return ($numColumnsRepeated !== null) ? (int) $numColumnsRepeated : 1;
314    }
315
316    /**
317     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
318     *
319     * @param \DOMNode $node
320     * @return Cell The cell set with the associated with the cell
321     */
322    protected function getCell($node)
323    {
324        try {
325            $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
326            $cell = $this->entityFactory->createCell($cellValue);
327        } catch (InvalidValueException $exception) {
328            $cell = $this->entityFactory->createCell($exception->getInvalidValue());
329            $cell->setType(Cell::TYPE_ERROR);
330        }
331
332        return $cell;
333    }
334
335    /**
336     * After finishing processing each cell, a row is considered empty if it contains
337     * no cells or if the last read cell is empty.
338     * After finishing processing each cell, the last read cell is not part of the
339     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
340     *
341     * @param Row $currentRow
342     * @param Cell $lastReadCell The last read cell
343     * @return bool Whether the row is empty
344     */
345    protected function isEmptyRow($currentRow, $lastReadCell)
346    {
347        return (
348            $this->rowManager->isEmpty($currentRow) &&
349            (!isset($lastReadCell) || $lastReadCell->isEmpty())
350        );
351    }
352
353    /**
354     * Return the current element, from the buffer.
355     * @see http://php.net/manual/en/iterator.current.php
356     *
357     * @return Row
358     */
359    public function current()
360    {
361        return $this->rowBuffer;
362    }
363
364    /**
365     * Return the key of the current element
366     * @see http://php.net/manual/en/iterator.key.php
367     *
368     * @return int
369     */
370    public function key()
371    {
372        return $this->lastRowIndexProcessed;
373    }
374
375    /**
376     * Cleans up what was created to iterate over the object.
377     *
378     * @return void
379     */
380    public function end()
381    {
382        $this->xmlReader->close();
383    }
384}
385