1<?php 2 3namespace Box\Spout\Reader\ODS; 4 5use Box\Spout\Common\Entity\Cell; 6use Box\Spout\Common\Entity\Row; 7use Box\Spout\Common\Exception\IOException; 8use Box\Spout\Common\Manager\OptionsManagerInterface; 9use Box\Spout\Reader\Common\Entity\Options; 10use Box\Spout\Reader\Common\Manager\RowManager; 11use Box\Spout\Reader\Common\XMLProcessor; 12use Box\Spout\Reader\Exception\InvalidValueException; 13use Box\Spout\Reader\Exception\IteratorNotRewindableException; 14use Box\Spout\Reader\Exception\XMLProcessingException; 15use Box\Spout\Reader\IteratorInterface; 16use Box\Spout\Reader\ODS\Creator\InternalEntityFactory; 17use Box\Spout\Reader\ODS\Helper\CellValueFormatter; 18use Box\Spout\Reader\Wrapper\XMLReader; 19 20/** 21 * Class RowIterator 22 */ 23class RowIterator implements IteratorInterface 24{ 25 /** Definition of XML nodes names used to parse data */ 26 const XML_NODE_TABLE = 'table:table'; 27 const XML_NODE_ROW = 'table:table-row'; 28 const XML_NODE_CELL = 'table:table-cell'; 29 const MAX_COLUMNS_EXCEL = 16384; 30 31 /** Definition of XML attribute used to parse data */ 32 const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated'; 33 const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated'; 34 35 /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ 36 protected $xmlReader; 37 38 /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ 39 protected $xmlProcessor; 40 41 /** @var bool Whether empty rows should be returned or skipped */ 42 protected $shouldPreserveEmptyRows; 43 44 /** @var Helper\CellValueFormatter Helper to format cell values */ 45 protected $cellValueFormatter; 46 47 /** @var RowManager Manages rows */ 48 protected $rowManager; 49 50 /** @var InternalEntityFactory Factory to create entities */ 51 protected $entityFactory; 52 53 /** @var bool Whether the iterator has already been rewound once */ 54 protected $hasAlreadyBeenRewound = false; 55 56 /** @var Row The currently processed row */ 57 protected $currentlyProcessedRow; 58 59 /** @var Row Buffer used to store the current row, while checking if there are more rows to read */ 60 protected $rowBuffer; 61 62 /** @var bool Indicates whether all rows have been read */ 63 protected $hasReachedEndOfFile = false; 64 65 /** @var int Last row index processed (one-based) */ 66 protected $lastRowIndexProcessed = 0; 67 68 /** @var int Row index to be processed next (one-based) */ 69 protected $nextRowIndexToBeProcessed = 1; 70 71 /** @var Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */ 72 protected $lastProcessedCell; 73 74 /** @var int Number of times the last processed row should be repeated */ 75 protected $numRowsRepeated = 1; 76 77 /** @var int Number of times the last cell value should be copied to the cells on its right */ 78 protected $numColumnsRepeated = 1; 79 80 /** @var bool Whether at least one cell has been read for the row currently being processed */ 81 protected $hasAlreadyReadOneCellInCurrentRow = false; 82 83 /** 84 * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element 85 * @param OptionsManagerInterface $optionsManager Reader's options manager 86 * @param CellValueFormatter $cellValueFormatter Helper to format cell values 87 * @param XMLProcessor $xmlProcessor Helper to process XML files 88 * @param RowManager $rowManager Manages rows 89 * @param InternalEntityFactory $entityFactory Factory to create entities 90 */ 91 public function __construct( 92 XMLReader $xmlReader, 93 OptionsManagerInterface $optionsManager, 94 CellValueFormatter $cellValueFormatter, 95 XMLProcessor $xmlProcessor, 96 RowManager $rowManager, 97 InternalEntityFactory $entityFactory 98 ) { 99 $this->xmlReader = $xmlReader; 100 $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS); 101 $this->cellValueFormatter = $cellValueFormatter; 102 $this->entityFactory = $entityFactory; 103 $this->rowManager = $rowManager; 104 105 // Register all callbacks to process different nodes when reading the XML file 106 $this->xmlProcessor = $xmlProcessor; 107 $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); 108 $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); 109 $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); 110 $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']); 111 } 112 113 /** 114 * Rewind the Iterator to the first element. 115 * NOTE: It can only be done once, as it is not possible to read an XML file backwards. 116 * @see http://php.net/manual/en/iterator.rewind.php 117 * 118 * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once 119 * @return void 120 */ 121 public function rewind() 122 { 123 // Because sheet and row data is located in the file, we can't rewind both the 124 // sheet iterator and the row iterator, as XML file cannot be read backwards. 125 // Therefore, rewinding the row iterator has been disabled. 126 if ($this->hasAlreadyBeenRewound) { 127 throw new IteratorNotRewindableException(); 128 } 129 130 $this->hasAlreadyBeenRewound = true; 131 $this->lastRowIndexProcessed = 0; 132 $this->nextRowIndexToBeProcessed = 1; 133 $this->rowBuffer = null; 134 $this->hasReachedEndOfFile = false; 135 136 $this->next(); 137 } 138 139 /** 140 * Checks if current position is valid 141 * @see http://php.net/manual/en/iterator.valid.php 142 * 143 * @return bool 144 */ 145 public function valid() 146 { 147 return (!$this->hasReachedEndOfFile); 148 } 149 150 /** 151 * Move forward to next element. Empty rows will be skipped. 152 * @see http://php.net/manual/en/iterator.next.php 153 * 154 * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found 155 * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML 156 * @return void 157 */ 158 public function next() 159 { 160 if ($this->doesNeedDataForNextRowToBeProcessed()) { 161 $this->readDataForNextRow(); 162 } 163 164 $this->lastRowIndexProcessed++; 165 } 166 167 /** 168 * Returns whether we need data for the next row to be processed. 169 * We DO need to read data if: 170 * - we have not read any rows yet 171 * OR 172 * - the next row to be processed immediately follows the last read row 173 * 174 * @return bool Whether we need data for the next row to be processed. 175 */ 176 protected function doesNeedDataForNextRowToBeProcessed() 177 { 178 $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); 179 180 return ( 181 !$hasReadAtLeastOneRow || 182 $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 183 ); 184 } 185 186 /** 187 * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found 188 * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML 189 * @return void 190 */ 191 protected function readDataForNextRow() 192 { 193 $this->currentlyProcessedRow = $this->entityFactory->createRow(); 194 195 try { 196 $this->xmlProcessor->readUntilStopped(); 197 } catch (XMLProcessingException $exception) { 198 throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); 199 } 200 201 $this->rowBuffer = $this->currentlyProcessedRow; 202 } 203 204 /** 205 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node 206 * @return int A return code that indicates what action should the processor take next 207 */ 208 protected function processRowStartingNode($xmlReader) 209 { 210 // Reset data from current row 211 $this->hasAlreadyReadOneCellInCurrentRow = false; 212 $this->lastProcessedCell = null; 213 $this->numColumnsRepeated = 1; 214 $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); 215 216 return XMLProcessor::PROCESSING_CONTINUE; 217 } 218 219 /** 220 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node 221 * @return int A return code that indicates what action should the processor take next 222 */ 223 protected function processCellStartingNode($xmlReader) 224 { 225 $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); 226 227 // NOTE: expand() will automatically decode all XML entities of the child nodes 228 $node = $xmlReader->expand(); 229 $currentCell = $this->getCell($node); 230 231 // process cell N only after having read cell N+1 (see below why) 232 if ($this->hasAlreadyReadOneCellInCurrentRow) { 233 for ($i = 0; $i < $this->numColumnsRepeated; $i++) { 234 $this->currentlyProcessedRow->addCell($this->lastProcessedCell); 235 } 236 } 237 238 $this->hasAlreadyReadOneCellInCurrentRow = true; 239 $this->lastProcessedCell = $currentCell; 240 $this->numColumnsRepeated = $currentNumColumnsRepeated; 241 242 return XMLProcessor::PROCESSING_CONTINUE; 243 } 244 245 /** 246 * @return int A return code that indicates what action should the processor take next 247 */ 248 protected function processRowEndingNode() 249 { 250 $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell); 251 252 // if the fetched row is empty and we don't want to preserve it... 253 if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { 254 // ... skip it 255 return XMLProcessor::PROCESSING_CONTINUE; 256 } 257 258 // if the row is empty, we don't want to return more than one cell 259 $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; 260 $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells(); 261 262 // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. 263 // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData". 264 // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>" 265 // with a number-columns-repeated value equals to the number of (supported columns - used columns). 266 // In Excel, the number of supported columns is 16384, but we don't want to returns rows with 267 // always 16384 cells. 268 if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { 269 for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { 270 $this->currentlyProcessedRow->addCell($this->lastProcessedCell); 271 } 272 } 273 274 // If we are processing row N and the row is repeated M times, 275 // then the next row to be processed will be row (N+M). 276 $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; 277 278 // at this point, we have all the data we need for the row 279 // so that we can populate the buffer 280 return XMLProcessor::PROCESSING_STOP; 281 } 282 283 /** 284 * @return int A return code that indicates what action should the processor take next 285 */ 286 protected function processTableEndingNode() 287 { 288 // The closing "</table:table>" marks the end of the file 289 $this->hasReachedEndOfFile = true; 290 291 return XMLProcessor::PROCESSING_STOP; 292 } 293 294 /** 295 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node 296 * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing 297 */ 298 protected function getNumRowsRepeatedForCurrentNode($xmlReader) 299 { 300 $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); 301 302 return ($numRowsRepeated !== null) ? (int) $numRowsRepeated : 1; 303 } 304 305 /** 306 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node 307 * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing 308 */ 309 protected function getNumColumnsRepeatedForCurrentNode($xmlReader) 310 { 311 $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); 312 313 return ($numColumnsRepeated !== null) ? (int) $numColumnsRepeated : 1; 314 } 315 316 /** 317 * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node. 318 * 319 * @param \DOMNode $node 320 * @return Cell The cell set with the associated with the cell 321 */ 322 protected function getCell($node) 323 { 324 try { 325 $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node); 326 $cell = $this->entityFactory->createCell($cellValue); 327 } catch (InvalidValueException $exception) { 328 $cell = $this->entityFactory->createCell($exception->getInvalidValue()); 329 $cell->setType(Cell::TYPE_ERROR); 330 } 331 332 return $cell; 333 } 334 335 /** 336 * After finishing processing each cell, a row is considered empty if it contains 337 * no cells or if the last read cell is empty. 338 * After finishing processing each cell, the last read cell is not part of the 339 * row data yet (as we still need to apply the "num-columns-repeated" attribute). 340 * 341 * @param Row $currentRow 342 * @param Cell $lastReadCell The last read cell 343 * @return bool Whether the row is empty 344 */ 345 protected function isEmptyRow($currentRow, $lastReadCell) 346 { 347 return ( 348 $this->rowManager->isEmpty($currentRow) && 349 (!isset($lastReadCell) || $lastReadCell->isEmpty()) 350 ); 351 } 352 353 /** 354 * Return the current element, from the buffer. 355 * @see http://php.net/manual/en/iterator.current.php 356 * 357 * @return Row 358 */ 359 public function current() 360 { 361 return $this->rowBuffer; 362 } 363 364 /** 365 * Return the key of the current element 366 * @see http://php.net/manual/en/iterator.key.php 367 * 368 * @return int 369 */ 370 public function key() 371 { 372 return $this->lastRowIndexProcessed; 373 } 374 375 /** 376 * Cleans up what was created to iterate over the object. 377 * 378 * @return void 379 */ 380 public function end() 381 { 382 $this->xmlReader->close(); 383 } 384} 385