1 /*=========================================================================
2 
3   Program:   Visualization Toolkit
4   Module:    vtkXMLDataParser.cxx
5 
6   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7   All rights reserved.
8   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9 
10      This software is distributed WITHOUT ANY WARRANTY; without even
11      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12      PURPOSE.  See the above copyright notice for more information.
13 
14 =========================================================================*/
15 #include "vtkXMLDataParser.h"
16 
17 #include "vtkBase64InputStream.h"
18 #include "vtkByteSwap.h"
19 #include "vtkCommand.h"
20 #include "vtkDataCompressor.h"
21 #include "vtkEndian.h"
22 #include "vtkInputStream.h"
23 #include "vtkObjectFactory.h"
24 #include "vtkXMLDataElement.h"
25 #define vtkXMLDataHeaderPrivate_DoNotInclude
26 #include "vtkXMLDataHeaderPrivate.h"
27 #undef vtkXMLDataHeaderPrivate_DoNotInclude
28 
29 #include <algorithm>
30 #include <cassert>
31 #include <cctype>
32 #include <memory>
33 #include <sstream>
34 #include <vector>
35 
36 #include "vtkXMLUtilities.h"
37 
38 vtkStandardNewMacro(vtkXMLDataParser);
39 vtkCxxSetObjectMacro(vtkXMLDataParser, Compressor, vtkDataCompressor);
40 
41 //------------------------------------------------------------------------------
vtkXMLDataParser()42 vtkXMLDataParser::vtkXMLDataParser()
43 {
44   this->NumberOfOpenElements = 0;
45   this->OpenElementsSize = 10;
46   this->OpenElements = new vtkXMLDataElement*[this->OpenElementsSize];
47   this->RootElement = nullptr;
48   this->AppendedDataPosition = 0;
49   this->AppendedDataMatched = 0;
50   this->DataStream = nullptr;
51   this->InlineDataStream = vtkBase64InputStream::New();
52   this->AppendedDataStream = vtkBase64InputStream::New();
53 
54   this->BlockCompressedSizes = nullptr;
55   this->BlockStartOffsets = nullptr;
56   this->Compressor = nullptr;
57 
58   this->AsciiDataBuffer = nullptr;
59   this->AsciiDataBufferLength = 0;
60   this->AsciiDataPosition = 0;
61 
62   this->Abort = 0;
63   this->Progress = 0;
64 
65   // Default byte order to that of this machine.
66 #ifdef VTK_WORDS_BIGENDIAN
67   this->ByteOrder = vtkXMLDataParser::BigEndian;
68 #else
69   this->ByteOrder = vtkXMLDataParser::LittleEndian;
70 #endif
71   this->HeaderType = 32;
72 
73   this->AttributesEncoding = VTK_ENCODING_NONE;
74 
75   // Have specialized methods for reading array data both inline or
76   // appended, however typical tags may use the more general CharacterData
77   // methods.
78   this->IgnoreCharacterData = 0;
79 }
80 
81 //------------------------------------------------------------------------------
~vtkXMLDataParser()82 vtkXMLDataParser::~vtkXMLDataParser()
83 {
84   this->FreeAllElements();
85   delete[] this->OpenElements;
86   this->InlineDataStream->Delete();
87   this->AppendedDataStream->Delete();
88   delete[] this->BlockCompressedSizes;
89   delete[] this->BlockStartOffsets;
90   this->SetCompressor(nullptr);
91   if (this->AsciiDataBuffer)
92   {
93     this->FreeAsciiBuffer();
94   }
95 }
96 
97 //------------------------------------------------------------------------------
PrintSelf(ostream & os,vtkIndent indent)98 void vtkXMLDataParser::PrintSelf(ostream& os, vtkIndent indent)
99 {
100   this->Superclass::PrintSelf(os, indent);
101   os << indent << "AppendedDataPosition: " << this->AppendedDataPosition << "\n";
102   if (this->RootElement)
103   {
104     this->RootElement->PrintXML(os, indent);
105   }
106   if (this->Compressor)
107   {
108     os << indent << "Compressor: " << this->Compressor << "\n";
109   }
110   else
111   {
112     os << indent << "Compressor: (none)\n";
113   }
114   os << indent << "Progress: " << this->Progress << "\n";
115   os << indent << "Abort: " << this->Abort << "\n";
116   os << indent << "AttributesEncoding: " << this->AttributesEncoding << "\n";
117 }
118 
119 //------------------------------------------------------------------------------
Parse()120 int vtkXMLDataParser::Parse()
121 {
122   // Delete any elements left from previous parsing.
123   this->FreeAllElements();
124 
125   // Parse the input from the stream.
126   int result = this->Superclass::Parse();
127 
128   // Check that the input is okay.
129   if (result && !this->CheckPrimaryAttributes())
130   {
131     result = 0;
132   }
133 
134   return result;
135 }
136 
137 //------------------------------------------------------------------------------
Parse(const char *)138 int vtkXMLDataParser::Parse(const char*)
139 {
140   vtkErrorMacro("Parsing from a string is not supported.");
141   return 0;
142 }
143 
144 //------------------------------------------------------------------------------
Parse(const char *,unsigned int)145 int vtkXMLDataParser::Parse(const char*, unsigned int)
146 {
147   vtkErrorMacro("Parsing from a string is not supported.");
148   return 0;
149 }
150 
151 //------------------------------------------------------------------------------
StartElement(const char * name,const char ** atts)152 void vtkXMLDataParser::StartElement(const char* name, const char** atts)
153 {
154   vtkXMLDataElement* element = vtkXMLDataElement::New();
155   element->SetName(name);
156   element->SetXMLByteIndex(this->GetXMLByteIndex());
157   vtkXMLUtilities::ReadElementFromAttributeArray(element, atts, this->AttributesEncoding);
158 
159   const char* id = element->GetAttribute("id");
160   if (id)
161   {
162     element->SetId(id);
163   }
164   this->PushOpenElement(element);
165 
166   if (strcmp(name, "AppendedData") == 0)
167   {
168     // This is the AppendedData element.
169     this->FindAppendedDataPosition();
170 
171     // Switch to raw decoder if necessary.
172     const char* encoding = element->GetAttribute("encoding");
173     if (encoding && (strcmp(encoding, "raw") == 0))
174     {
175       this->AppendedDataStream->Delete();
176       this->AppendedDataStream = vtkInputStream::New();
177     }
178   }
179 }
180 
181 //------------------------------------------------------------------------------
SeekInlineDataPosition(vtkXMLDataElement * element)182 void vtkXMLDataParser::SeekInlineDataPosition(vtkXMLDataElement* element)
183 {
184   istream* stream = this->GetStream();
185   if (!element->GetInlineDataPosition())
186   {
187     // Scan for the start of the actual inline data.
188     char c = 0;
189     stream->clear(stream->rdstate() & ~ios::eofbit);
190     stream->clear(stream->rdstate() & ~ios::failbit);
191     this->SeekG(element->GetXMLByteIndex());
192     while (stream->get(c) && (c != '>'))
193     {
194       ;
195     }
196     while (stream->get(c) && vtkXMLDataElement::IsSpace(c))
197     {
198       ;
199     }
200     vtkTypeInt64 pos = this->TellG();
201     element->SetInlineDataPosition(pos - 1);
202   }
203 
204   // Seek to the data position.
205   this->SeekG(element->GetInlineDataPosition());
206 }
207 
208 //------------------------------------------------------------------------------
EndElement(const char *)209 void vtkXMLDataParser::EndElement(const char*)
210 {
211   vtkXMLDataElement* finished = this->PopOpenElement();
212   unsigned int numOpen = this->NumberOfOpenElements;
213   if (numOpen > 0)
214   {
215     this->OpenElements[numOpen - 1]->AddNestedElement(finished);
216     finished->Delete();
217   }
218   else
219   {
220     this->RootElement = finished;
221   }
222 }
223 
224 //------------------------------------------------------------------------------
ParsingComplete()225 int vtkXMLDataParser::ParsingComplete()
226 {
227   // If we have reached the appended data section, we stop parsing.
228   // This prevents the XML parser from having to walk over the entire
229   // appended data section.
230   if (this->AppendedDataPosition)
231   {
232     return 1;
233   }
234   return this->Superclass::ParsingComplete();
235 }
236 
237 //------------------------------------------------------------------------------
CheckPrimaryAttributes()238 int vtkXMLDataParser::CheckPrimaryAttributes()
239 {
240   const char* byte_order = this->RootElement->GetAttribute("byte_order");
241   if (byte_order)
242   {
243     if (strcmp(byte_order, "BigEndian") == 0)
244     {
245       this->ByteOrder = vtkXMLDataParser::BigEndian;
246     }
247     else if (strcmp(byte_order, "LittleEndian") == 0)
248     {
249       this->ByteOrder = vtkXMLDataParser::LittleEndian;
250     }
251     else
252     {
253       vtkErrorMacro("Unsupported byte_order=\"" << byte_order << "\"");
254       return 0;
255     }
256   }
257   if (const char* header_type = this->RootElement->GetAttribute("header_type"))
258   {
259     if (strcmp(header_type, "UInt32") == 0)
260     {
261       this->HeaderType = 32;
262     }
263     else if (strcmp(header_type, "UInt64") == 0)
264     {
265       this->HeaderType = 64;
266     }
267     else
268     {
269       vtkErrorMacro("Unsupported header_type=\"" << header_type << "\"");
270       return 0;
271     }
272   }
273   return 1;
274 }
275 
276 //------------------------------------------------------------------------------
FindAppendedDataPosition()277 void vtkXMLDataParser::FindAppendedDataPosition()
278 {
279   // Clear stream fail and eof bits.  We may have already read past
280   // the end of the stream when processing the AppendedData element.
281   this->Stream->clear(this->Stream->rdstate() & ~ios::failbit);
282   this->Stream->clear(this->Stream->rdstate() & ~ios::eofbit);
283 
284   // Scan for the start of the actual appended data.
285   char c = 0;
286   vtkTypeInt64 returnPosition = this->TellG();
287   this->SeekG(this->GetXMLByteIndex());
288   while (this->Stream->get(c) && (c != '>'))
289   {
290     ;
291   }
292   while (this->Stream->get(c) && vtkXMLDataParser::IsSpace(c))
293   {
294     ;
295   }
296 
297   // Store the start of the appended data.  We skip the first
298   // character because it is always a "_".
299   this->AppendedDataPosition = this->TellG();
300 
301   // If first character was not an underscore, assume it is part of
302   // the data.
303   if (c != '_')
304   {
305     vtkWarningMacro("First character in AppendedData is ASCII value "
306       << int(c) << ", not '_'.  Scan for first character "
307       << "started from file position " << this->GetXMLByteIndex() << ".  The return position is "
308       << returnPosition << ".");
309     --this->AppendedDataPosition;
310   }
311 
312   // Restore the stream position.
313   this->SeekG(returnPosition);
314 }
315 
316 //------------------------------------------------------------------------------
PushOpenElement(vtkXMLDataElement * element)317 void vtkXMLDataParser::PushOpenElement(vtkXMLDataElement* element)
318 {
319   if (this->NumberOfOpenElements == this->OpenElementsSize)
320   {
321     unsigned int newSize = this->OpenElementsSize * 2;
322     vtkXMLDataElement** newOpenElements = new vtkXMLDataElement*[newSize];
323     unsigned int i;
324     for (i = 0; i < this->NumberOfOpenElements; ++i)
325     {
326       newOpenElements[i] = this->OpenElements[i];
327     }
328     delete[] this->OpenElements;
329     this->OpenElements = newOpenElements;
330     this->OpenElementsSize = newSize;
331   }
332 
333   unsigned int pos = this->NumberOfOpenElements++;
334   this->OpenElements[pos] = element;
335 }
336 
337 //------------------------------------------------------------------------------
PopOpenElement()338 vtkXMLDataElement* vtkXMLDataParser::PopOpenElement()
339 {
340   if (this->NumberOfOpenElements > 0)
341   {
342     --this->NumberOfOpenElements;
343     return this->OpenElements[this->NumberOfOpenElements];
344   }
345   return nullptr;
346 }
347 
348 //------------------------------------------------------------------------------
FreeAllElements()349 void vtkXMLDataParser::FreeAllElements()
350 {
351   while (this->NumberOfOpenElements > 0)
352   {
353     --this->NumberOfOpenElements;
354     this->OpenElements[this->NumberOfOpenElements]->Delete();
355     this->OpenElements[this->NumberOfOpenElements] = nullptr;
356   }
357   if (this->RootElement)
358   {
359     this->RootElement->Delete();
360     this->RootElement = nullptr;
361   }
362 }
363 
364 //------------------------------------------------------------------------------
ParseBuffer(const char * buffer,unsigned int count)365 int vtkXMLDataParser::ParseBuffer(const char* buffer, unsigned int count)
366 {
367   // Parsing must stop when "<AppendedData" is reached.  Use a search
368   // similar to the KMP string search algorithm.
369   const char pattern[] = "<AppendedData";
370   const int length = sizeof(pattern) - 1;
371 
372   const char* s = buffer;
373   const char* end = buffer + count;
374   int matched = this->AppendedDataMatched;
375   while (s != end)
376   {
377     char c = *s++;
378     if (c == pattern[matched])
379     {
380       if (++matched == length)
381       {
382         break;
383       }
384     }
385     else
386     {
387       matched = (c == pattern[0]) ? 1 : 0;
388     }
389   }
390   this->AppendedDataMatched = matched;
391 
392   // Parse as much of the buffer as is safe.
393   if (!this->Superclass::ParseBuffer(buffer, s - buffer))
394   {
395     return 0;
396   }
397 
398   // If we have reached the appended data, artificially finish the
399   // document.
400   if (matched == length)
401   {
402     // Parse the rest of the element's opening tag.
403     const char* t = s;
404     char prev = 0;
405     while ((t != end) && (*t != '>'))
406     {
407       ++t;
408     }
409     if (!this->Superclass::ParseBuffer(s, t - s))
410     {
411       return 0;
412     }
413     if (t > s)
414     {
415       prev = *(t - 1);
416     }
417 
418     if (t == end)
419     {
420       // Scan for the real end of the element's opening tag.
421       char c = 0;
422       while (this->Stream->get(c) && (c != '>'))
423       {
424         prev = c;
425         if (!this->Superclass::ParseBuffer(&c, 1))
426         {
427           return 0;
428         }
429       }
430     }
431 
432     // Artificially end the AppendedData element.
433     if (prev != '/')
434     {
435       if (!this->Superclass::ParseBuffer("/", 1))
436       {
437         return 0;
438       }
439     }
440     if (!this->Superclass::ParseBuffer(">", 1))
441     {
442       return 0;
443     }
444 
445     // Artificially end the VTKFile element.
446     const char finish[] = "\n</VTKFile>\n";
447     if (!this->Superclass::ParseBuffer(finish, sizeof(finish) - 1))
448     {
449       return 0;
450     }
451   }
452 
453   return 1;
454 }
455 
456 //------------------------------------------------------------------------------
457 template <class T>
vtkXMLDataParserGetWordTypeSize(T *)458 size_t vtkXMLDataParserGetWordTypeSize(T*)
459 {
460   return sizeof(T);
461 }
462 
463 //------------------------------------------------------------------------------
GetWordTypeSize(int wordType)464 size_t vtkXMLDataParser::GetWordTypeSize(int wordType)
465 {
466   size_t size = 1;
467   switch (wordType)
468   {
469     vtkTemplateMacro(size = vtkXMLDataParserGetWordTypeSize(static_cast<VTK_TT*>(nullptr)));
470 
471     case VTK_BIT:
472       size = 1;
473       break;
474 
475     default:
476     {
477       vtkWarningMacro("Unsupported data type: " << wordType);
478     }
479     break;
480   }
481   return size;
482 }
483 
484 //------------------------------------------------------------------------------
PerformByteSwap(void * data,size_t numWords,size_t wordSize)485 void vtkXMLDataParser::PerformByteSwap(void* data, size_t numWords, size_t wordSize)
486 {
487   char* ptr = static_cast<char*>(data);
488   if (this->ByteOrder == vtkXMLDataParser::BigEndian)
489   {
490     switch (wordSize)
491     {
492       case 1:
493         break;
494       case 2:
495         vtkByteSwap::Swap2BERange(ptr, numWords);
496         break;
497       case 4:
498         vtkByteSwap::Swap4BERange(ptr, numWords);
499         break;
500       case 8:
501         vtkByteSwap::Swap8BERange(ptr, numWords);
502         break;
503       default:
504         vtkErrorMacro("Unsupported data type size " << wordSize);
505     }
506   }
507   else
508   {
509     switch (wordSize)
510     {
511       case 1:
512         break;
513       case 2:
514         vtkByteSwap::Swap2LERange(ptr, numWords);
515         break;
516       case 4:
517         vtkByteSwap::Swap4LERange(ptr, numWords);
518         break;
519       case 8:
520         vtkByteSwap::Swap8LERange(ptr, numWords);
521         break;
522       default:
523         vtkErrorMacro("Unsupported data type size " << wordSize);
524     }
525   }
526 }
527 
528 //------------------------------------------------------------------------------
ReadCompressionHeader()529 int vtkXMLDataParser::ReadCompressionHeader()
530 {
531   std::unique_ptr<vtkXMLDataHeader> ch(vtkXMLDataHeader::New(this->HeaderType, 3));
532 
533   this->DataStream->StartReading();
534 
535   // Read the standard part of the header.
536   size_t const headerSize = ch->DataSize();
537   size_t r = this->DataStream->Read(ch->Data(), headerSize);
538   if (r < headerSize)
539   {
540     vtkErrorMacro("Error reading beginning of compression header.  Read "
541       << r << " of " << headerSize << " bytes.");
542     return 0;
543   }
544 
545   // Byte swap the header to make sure the values are correct.
546   this->PerformByteSwap(ch->Data(), ch->WordCount(), ch->WordSize());
547 
548   // Get the standard values.
549   this->NumberOfBlocks = size_t(ch->Get(0));
550   this->BlockUncompressedSize = size_t(ch->Get(1));
551   this->PartialLastBlockUncompressedSize = size_t(ch->Get(2));
552 
553   // Allocate the size and offset parts of the header.
554   ch->Resize(this->NumberOfBlocks);
555   delete[] this->BlockCompressedSizes;
556   this->BlockCompressedSizes = nullptr;
557   delete[] this->BlockStartOffsets;
558   this->BlockStartOffsets = nullptr;
559   if (this->NumberOfBlocks > 0)
560   {
561     this->BlockCompressedSizes = new size_t[this->NumberOfBlocks];
562     this->BlockStartOffsets = new vtkTypeInt64[this->NumberOfBlocks];
563 
564     // Read the compressed block sizes.
565     size_t len = ch->DataSize();
566     if (this->DataStream->Read(ch->Data(), len) < len)
567     {
568       vtkErrorMacro("Error reading compression header.");
569       return 0;
570     }
571 
572     // Byte swap the sizes to make sure the values are correct.
573     this->PerformByteSwap(ch->Data(), ch->WordCount(), ch->WordSize());
574   }
575 
576   this->DataStream->EndReading();
577 
578   // Use the compressed block sizes to calculate the starting offset
579   // of each block.
580   vtkTypeInt64 offset = 0;
581   for (size_t i = 0; i < this->NumberOfBlocks; ++i)
582   {
583     size_t const sz = size_t(ch->Get(i));
584     this->BlockCompressedSizes[i] = sz;
585     this->BlockStartOffsets[i] = offset;
586     offset += sz;
587   }
588   return 1;
589 }
590 
591 //------------------------------------------------------------------------------
FindBlockSize(vtkTypeUInt64 block)592 size_t vtkXMLDataParser::FindBlockSize(vtkTypeUInt64 block)
593 {
594   if (block < this->NumberOfBlocks - (this->PartialLastBlockUncompressedSize ? 1 : 0))
595   {
596     return this->BlockUncompressedSize;
597   }
598   else
599   {
600     return this->PartialLastBlockUncompressedSize;
601   }
602 }
603 
604 //------------------------------------------------------------------------------
ReadBlock(vtkTypeUInt64 block,unsigned char * buffer)605 int vtkXMLDataParser::ReadBlock(vtkTypeUInt64 block, unsigned char* buffer)
606 {
607   size_t uncompressedSize = this->FindBlockSize(block);
608   size_t compressedSize = this->BlockCompressedSizes[block];
609 
610   if (!this->DataStream->Seek(this->BlockStartOffsets[block]))
611   {
612     return 0;
613   }
614 
615   unsigned char* readBuffer = new unsigned char[compressedSize];
616 
617   if (this->DataStream->Read(readBuffer, compressedSize) < compressedSize)
618   {
619     delete[] readBuffer;
620     return 0;
621   }
622 
623   size_t result =
624     this->Compressor->Uncompress(readBuffer, compressedSize, buffer, uncompressedSize);
625 
626   delete[] readBuffer;
627   return result > 0;
628 }
629 
630 //------------------------------------------------------------------------------
ReadBlock(vtkTypeUInt64 block)631 unsigned char* vtkXMLDataParser::ReadBlock(vtkTypeUInt64 block)
632 {
633   unsigned char* decompressBuffer = new unsigned char[this->FindBlockSize(block)];
634   if (!this->ReadBlock(block, decompressBuffer))
635   {
636     delete[] decompressBuffer;
637     return nullptr;
638   }
639   return decompressBuffer;
640 }
641 
642 //------------------------------------------------------------------------------
ReadUncompressedData(unsigned char * data,vtkTypeUInt64 startWord,size_t numWords,size_t wordSize)643 size_t vtkXMLDataParser::ReadUncompressedData(
644   unsigned char* data, vtkTypeUInt64 startWord, size_t numWords, size_t wordSize)
645 {
646   // First read the length of the data.
647   std::unique_ptr<vtkXMLDataHeader> uh(vtkXMLDataHeader::New(this->HeaderType, 1));
648 
649   size_t const headerSize = uh->DataSize();
650   size_t r = this->DataStream->Read(uh->Data(), headerSize);
651   if (r < headerSize)
652   {
653     vtkErrorMacro("Error reading uncompressed binary data header.  "
654                   "Read "
655       << r << " of " << headerSize << " bytes.");
656     return 0;
657   }
658   this->PerformByteSwap(uh->Data(), uh->WordCount(), uh->WordSize());
659   vtkTypeUInt64 rsize = uh->Get(0);
660 
661   // Adjust the size to be a multiple of the wordSize by taking
662   // advantage of integer division.  This will only change the value
663   // when the input file is invalid.
664   vtkTypeUInt64 size = (rsize / wordSize) * wordSize;
665 
666   // Convert the start/length into bytes.
667   vtkTypeUInt64 offset = startWord * wordSize;
668   size_t length = numWords * wordSize;
669 
670   // Make sure the begin/end offsets fall within total size.
671   if (offset > size)
672   {
673     return 0;
674   }
675   vtkTypeUInt64 end = offset + length;
676   if (end > size)
677   {
678     end = size;
679   }
680   length = end - offset;
681 
682   // Read the data.
683   if (!this->DataStream->Seek(headerSize + offset))
684   {
685     return 0;
686   }
687 
688   // Read data in 2MB blocks and report progress.
689   size_t const blockSize = 2097152;
690   size_t left = length;
691   unsigned char* p = data;
692   this->UpdateProgress(0);
693   while (left > 0 && !this->Abort)
694   {
695     // Read this block.
696     size_t n = (blockSize < left) ? blockSize : left;
697     if (!this->DataStream->Read(p, n))
698     {
699       return 0;
700     }
701 
702     // Byte swap this block.  Note that n will always be an integer
703     // multiple of the word size.
704     this->PerformByteSwap(p, n / wordSize, wordSize);
705 
706     // Update pointer and counter.
707     p += n;
708     left -= n;
709 
710     // Report progress.
711     this->UpdateProgress(float(p - data) / length);
712   }
713   this->UpdateProgress(1);
714   return length / wordSize;
715 }
716 
717 //------------------------------------------------------------------------------
ReadCompressedData(unsigned char * data,vtkTypeUInt64 startWord,size_t numWords,size_t wordSize)718 size_t vtkXMLDataParser::ReadCompressedData(
719   unsigned char* data, vtkTypeUInt64 startWord, size_t numWords, size_t wordSize)
720 {
721   // Make sure there are data.
722   if (numWords == 0)
723   {
724     return 0;
725   }
726 
727   // Find the begin and end offsets into the data.
728   vtkTypeUInt64 beginOffset = startWord * wordSize;
729   vtkTypeUInt64 endOffset = beginOffset + numWords * wordSize;
730 
731   // Find the total size of the data.
732   vtkTypeUInt64 totalSize = this->NumberOfBlocks * this->BlockUncompressedSize;
733   if (this->PartialLastBlockUncompressedSize)
734   {
735     totalSize -= this->BlockUncompressedSize;
736     totalSize += this->PartialLastBlockUncompressedSize;
737   }
738 
739   // Make sure there's even data to be read
740   if (totalSize == 0)
741   {
742     return 0;
743   }
744 
745   // Adjust the size to be a multiple of the wordSize by taking
746   // advantage of integer division.  This will only change the value
747   // when the input file is invalid.
748   totalSize = (totalSize / wordSize) * wordSize;
749 
750   // Make sure the begin/end offsets fall within the total size.
751   if (beginOffset > totalSize)
752   {
753     return 0;
754   }
755   if (endOffset > totalSize)
756   {
757     endOffset = totalSize;
758   }
759 
760   // Find the range of compression blocks to read.
761   vtkTypeUInt64 firstBlock = beginOffset / this->BlockUncompressedSize;
762   vtkTypeUInt64 lastBlock = endOffset / this->BlockUncompressedSize;
763 
764   // Find the offset into the first block where the data begin.
765   size_t beginBlockOffset = beginOffset - firstBlock * this->BlockUncompressedSize;
766 
767   // Find the offset into the last block where the data end.
768   size_t endBlockOffset = endOffset - lastBlock * this->BlockUncompressedSize;
769 
770   this->UpdateProgress(0);
771   if (firstBlock == lastBlock)
772   {
773     // Everything fits in one block.
774     unsigned char* blockBuffer = this->ReadBlock(firstBlock);
775     if (!blockBuffer)
776     {
777       return 0;
778     }
779     size_t n = endBlockOffset - beginBlockOffset;
780     memcpy(data, blockBuffer + beginBlockOffset, n);
781     delete[] blockBuffer;
782 
783     // Byte swap this block.  Note that n will always be an integer
784     // multiple of the word size.
785     this->PerformByteSwap(data, n / wordSize, wordSize);
786   }
787   else
788   {
789     // Read all the complete blocks first.
790     size_t length = endOffset - beginOffset;
791     unsigned char* outputPointer = data;
792     size_t blockSize = this->FindBlockSize(firstBlock);
793 
794     // Read the first block.
795     unsigned char* blockBuffer = this->ReadBlock(firstBlock);
796     if (!blockBuffer)
797     {
798       return 0;
799     }
800     size_t n = blockSize - beginBlockOffset;
801     memcpy(outputPointer, blockBuffer + beginBlockOffset, n);
802     delete[] blockBuffer;
803 
804     // Byte swap the first block.  Note that n will always be an
805     // integer multiple of the word size.
806     this->PerformByteSwap(outputPointer, n / wordSize, wordSize);
807 
808     // Advance the pointer to the beginning of the second block.
809     outputPointer += blockSize - beginBlockOffset;
810 
811     // Report progress.
812     this->UpdateProgress(float(outputPointer - data) / length);
813 
814     unsigned int currentBlock = firstBlock + 1;
815     for (; currentBlock != lastBlock && !this->Abort; ++currentBlock)
816     {
817       // Read this block.
818       if (!this->ReadBlock(currentBlock, outputPointer))
819       {
820         return 0;
821       }
822 
823       // Byte swap this block.  Note that blockSize will always be an
824       // integer multiple of the word size.
825       this->PerformByteSwap(outputPointer, blockSize / wordSize, wordSize);
826 
827       // Advance the pointer to the beginning of the next block.
828       outputPointer += this->FindBlockSize(currentBlock);
829 
830       // Report progress.
831       this->UpdateProgress(float(outputPointer - data) / length);
832     }
833 
834     // Now read the final block, which is incomplete if it exists.
835     if (endBlockOffset > 0 && !this->Abort)
836     {
837       blockBuffer = this->ReadBlock(lastBlock);
838       if (!blockBuffer)
839       {
840         return 0;
841       }
842       memcpy(outputPointer, blockBuffer, endBlockOffset);
843       delete[] blockBuffer;
844 
845       // Byte swap the partial block.  Note that endBlockOffset will
846       // always be an integer multiple of the word size.
847       this->PerformByteSwap(outputPointer, endBlockOffset / wordSize, wordSize);
848     }
849   }
850   this->UpdateProgress(1);
851 
852   // Return the total words actually read.
853   return (endOffset - beginOffset) / wordSize;
854 }
855 
856 //------------------------------------------------------------------------------
GetRootElement()857 vtkXMLDataElement* vtkXMLDataParser::GetRootElement()
858 {
859   return this->RootElement;
860 }
861 
862 //------------------------------------------------------------------------------
ReadBinaryData(void * in_buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)863 size_t vtkXMLDataParser::ReadBinaryData(
864   void* in_buffer, vtkTypeUInt64 startWord, size_t numWords, int wordType)
865 {
866   // Skip real read if aborting.
867   if (this->Abort)
868   {
869     return 0;
870   }
871 
872   size_t wordSize = this->GetWordTypeSize(wordType);
873   void* buffer = in_buffer;
874 
875   // Make sure our streams are setup correctly.
876   this->DataStream->SetStream(this->Stream);
877 
878   // Read the data.
879   unsigned char* d = reinterpret_cast<unsigned char*>(buffer);
880   size_t actualWords;
881   if (this->Compressor)
882   {
883     if (!this->ReadCompressionHeader())
884     {
885       vtkErrorMacro("ReadCompressionHeader failed. Aborting read.");
886       return 0;
887     }
888     this->DataStream->StartReading();
889     actualWords = this->ReadCompressedData(d, startWord, numWords, wordSize);
890     this->DataStream->EndReading();
891   }
892   else
893   {
894     this->DataStream->StartReading();
895     actualWords = this->ReadUncompressedData(d, startWord, numWords, wordSize);
896     this->DataStream->EndReading();
897   }
898 
899   // Return the actual amount read.
900   return this->Abort ? 0 : actualWords;
901 }
902 
903 //------------------------------------------------------------------------------
ReadAsciiData(void * buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)904 size_t vtkXMLDataParser::ReadAsciiData(
905   void* buffer, vtkTypeUInt64 startWord, size_t numWords, int wordType)
906 {
907   // Skip real read if aborting.
908   if (this->Abort)
909   {
910     return 0;
911   }
912 
913   // We assume that ascii data are not very large and parse the entire
914   // block into memory.
915   this->UpdateProgress(0);
916 
917   // Parse the ascii data from the file.
918   if (!this->ParseAsciiData(wordType))
919   {
920     return 0;
921   }
922 
923   // Make sure we don't read outside the range of data available.
924   vtkTypeUInt64 endWord = startWord + numWords;
925   if (this->AsciiDataBufferLength < startWord)
926   {
927     return 0;
928   }
929   if (endWord > this->AsciiDataBufferLength)
930   {
931     endWord = this->AsciiDataBufferLength;
932   }
933   size_t wordSize = this->GetWordTypeSize(wordType);
934   size_t actualWords = endWord - startWord;
935   size_t actualBytes = wordSize * actualWords;
936   size_t startByte = wordSize * startWord;
937 
938   this->UpdateProgress(0.5);
939 
940   // Copy the data from the pre-parsed ascii data buffer.
941   if (buffer && actualBytes)
942   {
943     memcpy(buffer, this->AsciiDataBuffer + startByte, actualBytes);
944   }
945 
946   this->UpdateProgress(1);
947 
948   return this->Abort ? 0 : actualWords;
949 }
950 
951 //------------------------------------------------------------------------------
ReadInlineData(vtkXMLDataElement * element,int isAscii,void * buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)952 size_t vtkXMLDataParser::ReadInlineData(vtkXMLDataElement* element, int isAscii, void* buffer,
953   vtkTypeUInt64 startWord, size_t numWords, int wordType)
954 {
955   this->DataStream = this->InlineDataStream;
956   this->SeekInlineDataPosition(element);
957   if (isAscii)
958   {
959     return this->ReadAsciiData(buffer, startWord, numWords, wordType);
960   }
961   else
962   {
963     return this->ReadBinaryData(buffer, startWord, numWords, wordType);
964   }
965 }
966 
967 //------------------------------------------------------------------------------
ReadAppendedData(vtkTypeInt64 offset,void * buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)968 size_t vtkXMLDataParser::ReadAppendedData(
969   vtkTypeInt64 offset, void* buffer, vtkTypeUInt64 startWord, size_t numWords, int wordType)
970 {
971   this->DataStream = this->AppendedDataStream;
972   this->SeekG(this->AppendedDataPosition + offset);
973   return this->ReadBinaryData(buffer, startWord, numWords, wordType);
974 }
975 
976 //------------------------------------------------------------------------------
977 //------------------------------------------------------------------------------
978 // Define a parsing function template.  The extra "long" argument is used
979 // to help broken compilers select the non-templates below for char and
980 // unsigned char, and float/double by making them a better conversion than
981 // the template.
982 template <class T>
vtkXMLParseAsciiData(istream & is,int * length,T *,long)983 T* vtkXMLParseAsciiData(istream& is, int* length, T*, long)
984 {
985   int dataLength = 0;
986   int dataBufferSize = 64;
987 
988   T* dataBuffer = new T[dataBufferSize];
989   T element;
990 
991   while (is >> element)
992   {
993     if (dataLength == dataBufferSize)
994     {
995       int newSize = dataBufferSize * 2;
996       T* newBuffer = new T[newSize];
997       memcpy(newBuffer, dataBuffer, dataLength * sizeof(T));
998       delete[] dataBuffer;
999       dataBuffer = newBuffer;
1000       dataBufferSize = newSize;
1001     }
1002     dataBuffer[dataLength++] = element;
1003   }
1004 
1005   if (length)
1006   {
1007     *length = dataLength;
1008   }
1009 
1010   return dataBuffer;
1011 }
1012 
1013 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,float *,int)1014 static float* vtkXMLParseAsciiData(istream& is, int* length, float*, int)
1015 {
1016   int dataLength = 0;
1017   int dataBufferSize = 64;
1018 
1019   float* dataBuffer = new float[dataBufferSize];
1020   std::string stringBuffer;
1021   float element;
1022 
1023   while (true)
1024   {
1025     is >> element;
1026     if (!is.good())
1027     {
1028       is.clear(is.rdstate() & ~ios::failbit);
1029       is >> stringBuffer;
1030       if (!is.good())
1031       {
1032         break;
1033       }
1034       else
1035       {
1036         std::for_each(
1037           stringBuffer.begin(), stringBuffer.end(), [](char& c) { c = std::tolower(c); });
1038         if (stringBuffer == "inf" || stringBuffer == "nan" || stringBuffer == "-inf")
1039         {
1040           element = strtof(stringBuffer.c_str(), nullptr);
1041         }
1042         else
1043         {
1044           break;
1045         }
1046       }
1047     }
1048     if (dataLength == dataBufferSize)
1049     {
1050       int newSize = dataBufferSize * 2;
1051       float* newBuffer = new float[newSize];
1052       memcpy(newBuffer, dataBuffer, dataLength * sizeof(float));
1053       delete[] dataBuffer;
1054       dataBuffer = newBuffer;
1055       dataBufferSize = newSize;
1056     }
1057     dataBuffer[dataLength++] = element;
1058   }
1059 
1060   if (length)
1061   {
1062     *length = dataLength;
1063   }
1064 
1065   return dataBuffer;
1066 }
1067 
1068 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,double *,int)1069 static double* vtkXMLParseAsciiData(istream& is, int* length, double*, int)
1070 {
1071   int dataLength = 0;
1072   int dataBufferSize = 64;
1073 
1074   double* dataBuffer = new double[dataBufferSize];
1075   std::string stringBuffer;
1076   double element;
1077 
1078   while (true)
1079   {
1080     is >> element;
1081     if (!is.good())
1082     {
1083       is.clear(is.rdstate() & ~ios::failbit);
1084       is >> stringBuffer;
1085       if (!is.good())
1086       {
1087         break;
1088       }
1089       else
1090       {
1091         std::for_each(
1092           stringBuffer.begin(), stringBuffer.end(), [](char& c) { c = std::tolower(c); });
1093         if (stringBuffer == "inf" || stringBuffer == "nan" || stringBuffer == "-inf")
1094         {
1095           element = strtod(stringBuffer.c_str(), nullptr);
1096         }
1097         else
1098         {
1099           break;
1100         }
1101       }
1102     }
1103     if (dataLength == dataBufferSize)
1104     {
1105       int newSize = dataBufferSize * 2;
1106       double* newBuffer = new double[newSize];
1107       memcpy(newBuffer, dataBuffer, dataLength * sizeof(double));
1108       delete[] dataBuffer;
1109       dataBuffer = newBuffer;
1110       dataBufferSize = newSize;
1111     }
1112     dataBuffer[dataLength++] = element;
1113   }
1114 
1115   if (length)
1116   {
1117     *length = dataLength;
1118   }
1119 
1120   return dataBuffer;
1121 }
1122 
1123 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,char *,int)1124 static char* vtkXMLParseAsciiData(istream& is, int* length, char*, int)
1125 {
1126   int dataLength = 0;
1127   int dataBufferSize = 64;
1128 
1129   char* dataBuffer = new char[dataBufferSize];
1130   char element;
1131   short inElement;
1132 
1133   while (is >> inElement)
1134   {
1135     element = inElement;
1136     if (dataLength == dataBufferSize)
1137     {
1138       int newSize = dataBufferSize * 2;
1139       char* newBuffer = new char[newSize];
1140       memcpy(newBuffer, dataBuffer, dataLength * sizeof(char));
1141       delete[] dataBuffer;
1142       dataBuffer = newBuffer;
1143       dataBufferSize = newSize;
1144     }
1145     dataBuffer[dataLength++] = element;
1146   }
1147 
1148   if (length)
1149   {
1150     *length = dataLength;
1151   }
1152 
1153   return dataBuffer;
1154 }
1155 
1156 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,unsigned char *,int)1157 static unsigned char* vtkXMLParseAsciiData(istream& is, int* length, unsigned char*, int)
1158 {
1159   int dataLength = 0;
1160   int dataBufferSize = 64;
1161 
1162   unsigned char* dataBuffer = new unsigned char[dataBufferSize];
1163   unsigned char element;
1164   short inElement;
1165 
1166   while (is >> inElement)
1167   {
1168     element = inElement;
1169     if (dataLength == dataBufferSize)
1170     {
1171       int newSize = dataBufferSize * 2;
1172       unsigned char* newBuffer = new unsigned char[newSize];
1173       memcpy(newBuffer, dataBuffer, dataLength * sizeof(unsigned char));
1174       delete[] dataBuffer;
1175       dataBuffer = newBuffer;
1176       dataBufferSize = newSize;
1177     }
1178     dataBuffer[dataLength++] = element;
1179   }
1180 
1181   if (length)
1182   {
1183     *length = dataLength;
1184   }
1185 
1186   return dataBuffer;
1187 }
1188 
1189 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,signed char *,int)1190 static signed char* vtkXMLParseAsciiData(istream& is, int* length, signed char*, int)
1191 {
1192   int dataLength = 0;
1193   int dataBufferSize = 64;
1194 
1195   signed char* dataBuffer = new signed char[dataBufferSize];
1196   signed char element;
1197   short inElement;
1198 
1199   while (is >> inElement)
1200   {
1201     element = inElement;
1202     if (dataLength == dataBufferSize)
1203     {
1204       int newSize = dataBufferSize * 2;
1205       signed char* newBuffer = new signed char[newSize];
1206       memcpy(newBuffer, dataBuffer, dataLength * sizeof(signed char));
1207       delete[] dataBuffer;
1208       dataBuffer = newBuffer;
1209       dataBufferSize = newSize;
1210     }
1211     dataBuffer[dataLength++] = element;
1212   }
1213 
1214   if (length)
1215   {
1216     *length = dataLength;
1217   }
1218 
1219   return dataBuffer;
1220 }
1221 
1222 //------------------------------------------------------------------------------
vtkXMLParseAsciiBitData(istream & is,int * length)1223 static unsigned char* vtkXMLParseAsciiBitData(istream& is, int* length)
1224 {
1225   size_t arrayCapacity = 64; // capacity in bytes
1226   unsigned char* array = new unsigned char[arrayCapacity];
1227   std::fill(array, array + arrayCapacity, static_cast<unsigned char>(0));
1228 
1229   size_t fullBytesRead = 0;
1230   unsigned char currentBitInByte = 0;
1231   unsigned char* currentByte = array;
1232 
1233   int value;
1234   while (is >> value)
1235   {
1236     // Realloc array buffer if needed:
1237     if (fullBytesRead == arrayCapacity)
1238     {
1239       assert("sanity check" && currentBitInByte == 0);
1240       size_t newSize = arrayCapacity * 2;
1241       unsigned char* tmp = new unsigned char[newSize];
1242       std::copy(array, array + arrayCapacity, tmp);
1243       std::fill(tmp + arrayCapacity, tmp + newSize, static_cast<unsigned char>(0));
1244 
1245       delete[] array;
1246       array = tmp;
1247       currentByte = array + fullBytesRead;
1248       arrayCapacity = newSize;
1249     }
1250 
1251     // Set the current bit:
1252     assert("sanity check" && currentBitInByte < 8);
1253     if (value != 0)
1254     { // Mimic the storage mechanism used by vtkBitArray
1255       *currentByte = *currentByte | (0x80 >> currentBitInByte);
1256     }
1257 
1258     // Update bookkeeping:
1259     if (++currentBitInByte == 8)
1260     {
1261       ++currentByte;
1262       ++fullBytesRead;
1263       currentBitInByte = 0;
1264     }
1265   }
1266 
1267   if (length)
1268   {
1269     // We fudge the 'word size' to 1 byte for bit arrays (since it's integral)
1270     // so return the length in bytes here:
1271     *length = static_cast<int>(fullBytesRead + (currentBitInByte != 0 ? 1 : 0));
1272   }
1273 
1274   return array;
1275 }
1276 
1277 //------------------------------------------------------------------------------
ParseAsciiData(int wordType)1278 int vtkXMLDataParser::ParseAsciiData(int wordType)
1279 {
1280   istream& is = *(this->Stream);
1281 
1282   // Don't re-parse the same ascii data.
1283   if (this->AsciiDataPosition == this->TellG())
1284   {
1285     return (this->AsciiDataBuffer ? 1 : 0);
1286   }
1287 
1288   // Prepare for new data.
1289   this->AsciiDataPosition = this->TellG();
1290   if (this->AsciiDataBuffer)
1291   {
1292     this->FreeAsciiBuffer();
1293   }
1294 
1295   int length = 0;
1296   void* buffer = nullptr;
1297   switch (wordType)
1298   {
1299     vtkTemplateMacro(buffer = vtkXMLParseAsciiData(is, &length, static_cast<VTK_TT*>(nullptr), 1));
1300 
1301     case VTK_BIT:
1302       buffer = vtkXMLParseAsciiBitData(is, &length);
1303       break;
1304   }
1305 
1306   // Read terminated from failure.  Clear the fail bit so another read
1307   // can take place later.
1308   is.clear(is.rdstate() & ~ios::failbit);
1309 
1310   // Save the buffer.
1311   this->AsciiDataBuffer = reinterpret_cast<unsigned char*>(buffer);
1312   this->AsciiDataBufferLength = length;
1313   this->AsciiDataWordType = wordType;
1314   return (this->AsciiDataBuffer ? 1 : 0);
1315 }
1316 
1317 //------------------------------------------------------------------------------
1318 template <class T>
vtkXMLDataParserFreeAsciiBuffer(T * buffer)1319 void vtkXMLDataParserFreeAsciiBuffer(T* buffer)
1320 {
1321   delete[] buffer;
1322 }
1323 
1324 //------------------------------------------------------------------------------
FreeAsciiBuffer()1325 void vtkXMLDataParser::FreeAsciiBuffer()
1326 {
1327   void* buffer = this->AsciiDataBuffer;
1328   switch (this->AsciiDataWordType)
1329   {
1330     vtkTemplateMacro(vtkXMLDataParserFreeAsciiBuffer(static_cast<VTK_TT*>(buffer)));
1331 
1332     case VTK_BIT:
1333       vtkXMLDataParserFreeAsciiBuffer(static_cast<unsigned char*>(buffer));
1334       break;
1335   }
1336   this->AsciiDataBuffer = nullptr;
1337 }
1338 
1339 //------------------------------------------------------------------------------
UpdateProgress(float progress)1340 void vtkXMLDataParser::UpdateProgress(float progress)
1341 {
1342   this->Progress = progress;
1343   double dProgress = progress;
1344   this->InvokeEvent(vtkCommand::ProgressEvent, &dProgress);
1345 }
1346