1 /*=========================================================================
2
3 Program: Visualization Toolkit
4 Module: vtkXMLDataParser.cxx
5
6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7 All rights reserved.
8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9
10 This software is distributed WITHOUT ANY WARRANTY; without even
11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the above copyright notice for more information.
13
14 =========================================================================*/
15 #include "vtkXMLDataParser.h"
16
17 #include "vtkBase64InputStream.h"
18 #include "vtkByteSwap.h"
19 #include "vtkCommand.h"
20 #include "vtkDataCompressor.h"
21 #include "vtkEndian.h"
22 #include "vtkInputStream.h"
23 #include "vtkObjectFactory.h"
24 #include "vtkXMLDataElement.h"
25 #define vtkXMLDataHeaderPrivate_DoNotInclude
26 #include "vtkXMLDataHeaderPrivate.h"
27 #undef vtkXMLDataHeaderPrivate_DoNotInclude
28
29 #include <algorithm>
30 #include <cassert>
31 #include <cctype>
32 #include <memory>
33 #include <sstream>
34 #include <vector>
35
36 #include "vtkXMLUtilities.h"
37
38 vtkStandardNewMacro(vtkXMLDataParser);
39 vtkCxxSetObjectMacro(vtkXMLDataParser, Compressor, vtkDataCompressor);
40
41 //------------------------------------------------------------------------------
vtkXMLDataParser()42 vtkXMLDataParser::vtkXMLDataParser()
43 {
44 this->NumberOfOpenElements = 0;
45 this->OpenElementsSize = 10;
46 this->OpenElements = new vtkXMLDataElement*[this->OpenElementsSize];
47 this->RootElement = nullptr;
48 this->AppendedDataPosition = 0;
49 this->AppendedDataMatched = 0;
50 this->DataStream = nullptr;
51 this->InlineDataStream = vtkBase64InputStream::New();
52 this->AppendedDataStream = vtkBase64InputStream::New();
53
54 this->BlockCompressedSizes = nullptr;
55 this->BlockStartOffsets = nullptr;
56 this->Compressor = nullptr;
57
58 this->AsciiDataBuffer = nullptr;
59 this->AsciiDataBufferLength = 0;
60 this->AsciiDataPosition = 0;
61
62 this->Abort = 0;
63 this->Progress = 0;
64
65 // Default byte order to that of this machine.
66 #ifdef VTK_WORDS_BIGENDIAN
67 this->ByteOrder = vtkXMLDataParser::BigEndian;
68 #else
69 this->ByteOrder = vtkXMLDataParser::LittleEndian;
70 #endif
71 this->HeaderType = 32;
72
73 this->AttributesEncoding = VTK_ENCODING_NONE;
74
75 // Have specialized methods for reading array data both inline or
76 // appended, however typical tags may use the more general CharacterData
77 // methods.
78 this->IgnoreCharacterData = 0;
79 }
80
81 //------------------------------------------------------------------------------
~vtkXMLDataParser()82 vtkXMLDataParser::~vtkXMLDataParser()
83 {
84 this->FreeAllElements();
85 delete[] this->OpenElements;
86 this->InlineDataStream->Delete();
87 this->AppendedDataStream->Delete();
88 delete[] this->BlockCompressedSizes;
89 delete[] this->BlockStartOffsets;
90 this->SetCompressor(nullptr);
91 if (this->AsciiDataBuffer)
92 {
93 this->FreeAsciiBuffer();
94 }
95 }
96
97 //------------------------------------------------------------------------------
PrintSelf(ostream & os,vtkIndent indent)98 void vtkXMLDataParser::PrintSelf(ostream& os, vtkIndent indent)
99 {
100 this->Superclass::PrintSelf(os, indent);
101 os << indent << "AppendedDataPosition: " << this->AppendedDataPosition << "\n";
102 if (this->RootElement)
103 {
104 this->RootElement->PrintXML(os, indent);
105 }
106 if (this->Compressor)
107 {
108 os << indent << "Compressor: " << this->Compressor << "\n";
109 }
110 else
111 {
112 os << indent << "Compressor: (none)\n";
113 }
114 os << indent << "Progress: " << this->Progress << "\n";
115 os << indent << "Abort: " << this->Abort << "\n";
116 os << indent << "AttributesEncoding: " << this->AttributesEncoding << "\n";
117 }
118
119 //------------------------------------------------------------------------------
Parse()120 int vtkXMLDataParser::Parse()
121 {
122 // Delete any elements left from previous parsing.
123 this->FreeAllElements();
124
125 // Parse the input from the stream.
126 int result = this->Superclass::Parse();
127
128 // Check that the input is okay.
129 if (result && !this->CheckPrimaryAttributes())
130 {
131 result = 0;
132 }
133
134 return result;
135 }
136
137 //------------------------------------------------------------------------------
Parse(const char *)138 int vtkXMLDataParser::Parse(const char*)
139 {
140 vtkErrorMacro("Parsing from a string is not supported.");
141 return 0;
142 }
143
144 //------------------------------------------------------------------------------
Parse(const char *,unsigned int)145 int vtkXMLDataParser::Parse(const char*, unsigned int)
146 {
147 vtkErrorMacro("Parsing from a string is not supported.");
148 return 0;
149 }
150
151 //------------------------------------------------------------------------------
StartElement(const char * name,const char ** atts)152 void vtkXMLDataParser::StartElement(const char* name, const char** atts)
153 {
154 vtkXMLDataElement* element = vtkXMLDataElement::New();
155 element->SetName(name);
156 element->SetXMLByteIndex(this->GetXMLByteIndex());
157 vtkXMLUtilities::ReadElementFromAttributeArray(element, atts, this->AttributesEncoding);
158
159 const char* id = element->GetAttribute("id");
160 if (id)
161 {
162 element->SetId(id);
163 }
164 this->PushOpenElement(element);
165
166 if (strcmp(name, "AppendedData") == 0)
167 {
168 // This is the AppendedData element.
169 this->FindAppendedDataPosition();
170
171 // Switch to raw decoder if necessary.
172 const char* encoding = element->GetAttribute("encoding");
173 if (encoding && (strcmp(encoding, "raw") == 0))
174 {
175 this->AppendedDataStream->Delete();
176 this->AppendedDataStream = vtkInputStream::New();
177 }
178 }
179 }
180
181 //------------------------------------------------------------------------------
SeekInlineDataPosition(vtkXMLDataElement * element)182 void vtkXMLDataParser::SeekInlineDataPosition(vtkXMLDataElement* element)
183 {
184 istream* stream = this->GetStream();
185 if (!element->GetInlineDataPosition())
186 {
187 // Scan for the start of the actual inline data.
188 char c = 0;
189 stream->clear(stream->rdstate() & ~ios::eofbit);
190 stream->clear(stream->rdstate() & ~ios::failbit);
191 this->SeekG(element->GetXMLByteIndex());
192 while (stream->get(c) && (c != '>'))
193 {
194 ;
195 }
196 while (stream->get(c) && vtkXMLDataElement::IsSpace(c))
197 {
198 ;
199 }
200 vtkTypeInt64 pos = this->TellG();
201 element->SetInlineDataPosition(pos - 1);
202 }
203
204 // Seek to the data position.
205 this->SeekG(element->GetInlineDataPosition());
206 }
207
208 //------------------------------------------------------------------------------
EndElement(const char *)209 void vtkXMLDataParser::EndElement(const char*)
210 {
211 vtkXMLDataElement* finished = this->PopOpenElement();
212 unsigned int numOpen = this->NumberOfOpenElements;
213 if (numOpen > 0)
214 {
215 this->OpenElements[numOpen - 1]->AddNestedElement(finished);
216 finished->Delete();
217 }
218 else
219 {
220 this->RootElement = finished;
221 }
222 }
223
224 //------------------------------------------------------------------------------
ParsingComplete()225 int vtkXMLDataParser::ParsingComplete()
226 {
227 // If we have reached the appended data section, we stop parsing.
228 // This prevents the XML parser from having to walk over the entire
229 // appended data section.
230 if (this->AppendedDataPosition)
231 {
232 return 1;
233 }
234 return this->Superclass::ParsingComplete();
235 }
236
237 //------------------------------------------------------------------------------
CheckPrimaryAttributes()238 int vtkXMLDataParser::CheckPrimaryAttributes()
239 {
240 const char* byte_order = this->RootElement->GetAttribute("byte_order");
241 if (byte_order)
242 {
243 if (strcmp(byte_order, "BigEndian") == 0)
244 {
245 this->ByteOrder = vtkXMLDataParser::BigEndian;
246 }
247 else if (strcmp(byte_order, "LittleEndian") == 0)
248 {
249 this->ByteOrder = vtkXMLDataParser::LittleEndian;
250 }
251 else
252 {
253 vtkErrorMacro("Unsupported byte_order=\"" << byte_order << "\"");
254 return 0;
255 }
256 }
257 if (const char* header_type = this->RootElement->GetAttribute("header_type"))
258 {
259 if (strcmp(header_type, "UInt32") == 0)
260 {
261 this->HeaderType = 32;
262 }
263 else if (strcmp(header_type, "UInt64") == 0)
264 {
265 this->HeaderType = 64;
266 }
267 else
268 {
269 vtkErrorMacro("Unsupported header_type=\"" << header_type << "\"");
270 return 0;
271 }
272 }
273 return 1;
274 }
275
276 //------------------------------------------------------------------------------
FindAppendedDataPosition()277 void vtkXMLDataParser::FindAppendedDataPosition()
278 {
279 // Clear stream fail and eof bits. We may have already read past
280 // the end of the stream when processing the AppendedData element.
281 this->Stream->clear(this->Stream->rdstate() & ~ios::failbit);
282 this->Stream->clear(this->Stream->rdstate() & ~ios::eofbit);
283
284 // Scan for the start of the actual appended data.
285 char c = 0;
286 vtkTypeInt64 returnPosition = this->TellG();
287 this->SeekG(this->GetXMLByteIndex());
288 while (this->Stream->get(c) && (c != '>'))
289 {
290 ;
291 }
292 while (this->Stream->get(c) && vtkXMLDataParser::IsSpace(c))
293 {
294 ;
295 }
296
297 // Store the start of the appended data. We skip the first
298 // character because it is always a "_".
299 this->AppendedDataPosition = this->TellG();
300
301 // If first character was not an underscore, assume it is part of
302 // the data.
303 if (c != '_')
304 {
305 vtkWarningMacro("First character in AppendedData is ASCII value "
306 << int(c) << ", not '_'. Scan for first character "
307 << "started from file position " << this->GetXMLByteIndex() << ". The return position is "
308 << returnPosition << ".");
309 --this->AppendedDataPosition;
310 }
311
312 // Restore the stream position.
313 this->SeekG(returnPosition);
314 }
315
316 //------------------------------------------------------------------------------
PushOpenElement(vtkXMLDataElement * element)317 void vtkXMLDataParser::PushOpenElement(vtkXMLDataElement* element)
318 {
319 if (this->NumberOfOpenElements == this->OpenElementsSize)
320 {
321 unsigned int newSize = this->OpenElementsSize * 2;
322 vtkXMLDataElement** newOpenElements = new vtkXMLDataElement*[newSize];
323 unsigned int i;
324 for (i = 0; i < this->NumberOfOpenElements; ++i)
325 {
326 newOpenElements[i] = this->OpenElements[i];
327 }
328 delete[] this->OpenElements;
329 this->OpenElements = newOpenElements;
330 this->OpenElementsSize = newSize;
331 }
332
333 unsigned int pos = this->NumberOfOpenElements++;
334 this->OpenElements[pos] = element;
335 }
336
337 //------------------------------------------------------------------------------
PopOpenElement()338 vtkXMLDataElement* vtkXMLDataParser::PopOpenElement()
339 {
340 if (this->NumberOfOpenElements > 0)
341 {
342 --this->NumberOfOpenElements;
343 return this->OpenElements[this->NumberOfOpenElements];
344 }
345 return nullptr;
346 }
347
348 //------------------------------------------------------------------------------
FreeAllElements()349 void vtkXMLDataParser::FreeAllElements()
350 {
351 while (this->NumberOfOpenElements > 0)
352 {
353 --this->NumberOfOpenElements;
354 this->OpenElements[this->NumberOfOpenElements]->Delete();
355 this->OpenElements[this->NumberOfOpenElements] = nullptr;
356 }
357 if (this->RootElement)
358 {
359 this->RootElement->Delete();
360 this->RootElement = nullptr;
361 }
362 }
363
364 //------------------------------------------------------------------------------
ParseBuffer(const char * buffer,unsigned int count)365 int vtkXMLDataParser::ParseBuffer(const char* buffer, unsigned int count)
366 {
367 // Parsing must stop when "<AppendedData" is reached. Use a search
368 // similar to the KMP string search algorithm.
369 const char pattern[] = "<AppendedData";
370 const int length = sizeof(pattern) - 1;
371
372 const char* s = buffer;
373 const char* end = buffer + count;
374 int matched = this->AppendedDataMatched;
375 while (s != end)
376 {
377 char c = *s++;
378 if (c == pattern[matched])
379 {
380 if (++matched == length)
381 {
382 break;
383 }
384 }
385 else
386 {
387 matched = (c == pattern[0]) ? 1 : 0;
388 }
389 }
390 this->AppendedDataMatched = matched;
391
392 // Parse as much of the buffer as is safe.
393 if (!this->Superclass::ParseBuffer(buffer, s - buffer))
394 {
395 return 0;
396 }
397
398 // If we have reached the appended data, artificially finish the
399 // document.
400 if (matched == length)
401 {
402 // Parse the rest of the element's opening tag.
403 const char* t = s;
404 char prev = 0;
405 while ((t != end) && (*t != '>'))
406 {
407 ++t;
408 }
409 if (!this->Superclass::ParseBuffer(s, t - s))
410 {
411 return 0;
412 }
413 if (t > s)
414 {
415 prev = *(t - 1);
416 }
417
418 if (t == end)
419 {
420 // Scan for the real end of the element's opening tag.
421 char c = 0;
422 while (this->Stream->get(c) && (c != '>'))
423 {
424 prev = c;
425 if (!this->Superclass::ParseBuffer(&c, 1))
426 {
427 return 0;
428 }
429 }
430 }
431
432 // Artificially end the AppendedData element.
433 if (prev != '/')
434 {
435 if (!this->Superclass::ParseBuffer("/", 1))
436 {
437 return 0;
438 }
439 }
440 if (!this->Superclass::ParseBuffer(">", 1))
441 {
442 return 0;
443 }
444
445 // Artificially end the VTKFile element.
446 const char finish[] = "\n</VTKFile>\n";
447 if (!this->Superclass::ParseBuffer(finish, sizeof(finish) - 1))
448 {
449 return 0;
450 }
451 }
452
453 return 1;
454 }
455
456 //------------------------------------------------------------------------------
457 template <class T>
vtkXMLDataParserGetWordTypeSize(T *)458 size_t vtkXMLDataParserGetWordTypeSize(T*)
459 {
460 return sizeof(T);
461 }
462
463 //------------------------------------------------------------------------------
GetWordTypeSize(int wordType)464 size_t vtkXMLDataParser::GetWordTypeSize(int wordType)
465 {
466 size_t size = 1;
467 switch (wordType)
468 {
469 vtkTemplateMacro(size = vtkXMLDataParserGetWordTypeSize(static_cast<VTK_TT*>(nullptr)));
470
471 case VTK_BIT:
472 size = 1;
473 break;
474
475 default:
476 {
477 vtkWarningMacro("Unsupported data type: " << wordType);
478 }
479 break;
480 }
481 return size;
482 }
483
484 //------------------------------------------------------------------------------
PerformByteSwap(void * data,size_t numWords,size_t wordSize)485 void vtkXMLDataParser::PerformByteSwap(void* data, size_t numWords, size_t wordSize)
486 {
487 char* ptr = static_cast<char*>(data);
488 if (this->ByteOrder == vtkXMLDataParser::BigEndian)
489 {
490 switch (wordSize)
491 {
492 case 1:
493 break;
494 case 2:
495 vtkByteSwap::Swap2BERange(ptr, numWords);
496 break;
497 case 4:
498 vtkByteSwap::Swap4BERange(ptr, numWords);
499 break;
500 case 8:
501 vtkByteSwap::Swap8BERange(ptr, numWords);
502 break;
503 default:
504 vtkErrorMacro("Unsupported data type size " << wordSize);
505 }
506 }
507 else
508 {
509 switch (wordSize)
510 {
511 case 1:
512 break;
513 case 2:
514 vtkByteSwap::Swap2LERange(ptr, numWords);
515 break;
516 case 4:
517 vtkByteSwap::Swap4LERange(ptr, numWords);
518 break;
519 case 8:
520 vtkByteSwap::Swap8LERange(ptr, numWords);
521 break;
522 default:
523 vtkErrorMacro("Unsupported data type size " << wordSize);
524 }
525 }
526 }
527
528 //------------------------------------------------------------------------------
ReadCompressionHeader()529 int vtkXMLDataParser::ReadCompressionHeader()
530 {
531 std::unique_ptr<vtkXMLDataHeader> ch(vtkXMLDataHeader::New(this->HeaderType, 3));
532
533 this->DataStream->StartReading();
534
535 // Read the standard part of the header.
536 size_t const headerSize = ch->DataSize();
537 size_t r = this->DataStream->Read(ch->Data(), headerSize);
538 if (r < headerSize)
539 {
540 vtkErrorMacro("Error reading beginning of compression header. Read "
541 << r << " of " << headerSize << " bytes.");
542 return 0;
543 }
544
545 // Byte swap the header to make sure the values are correct.
546 this->PerformByteSwap(ch->Data(), ch->WordCount(), ch->WordSize());
547
548 // Get the standard values.
549 this->NumberOfBlocks = size_t(ch->Get(0));
550 this->BlockUncompressedSize = size_t(ch->Get(1));
551 this->PartialLastBlockUncompressedSize = size_t(ch->Get(2));
552
553 // Allocate the size and offset parts of the header.
554 ch->Resize(this->NumberOfBlocks);
555 delete[] this->BlockCompressedSizes;
556 this->BlockCompressedSizes = nullptr;
557 delete[] this->BlockStartOffsets;
558 this->BlockStartOffsets = nullptr;
559 if (this->NumberOfBlocks > 0)
560 {
561 this->BlockCompressedSizes = new size_t[this->NumberOfBlocks];
562 this->BlockStartOffsets = new vtkTypeInt64[this->NumberOfBlocks];
563
564 // Read the compressed block sizes.
565 size_t len = ch->DataSize();
566 if (this->DataStream->Read(ch->Data(), len) < len)
567 {
568 vtkErrorMacro("Error reading compression header.");
569 return 0;
570 }
571
572 // Byte swap the sizes to make sure the values are correct.
573 this->PerformByteSwap(ch->Data(), ch->WordCount(), ch->WordSize());
574 }
575
576 this->DataStream->EndReading();
577
578 // Use the compressed block sizes to calculate the starting offset
579 // of each block.
580 vtkTypeInt64 offset = 0;
581 for (size_t i = 0; i < this->NumberOfBlocks; ++i)
582 {
583 size_t const sz = size_t(ch->Get(i));
584 this->BlockCompressedSizes[i] = sz;
585 this->BlockStartOffsets[i] = offset;
586 offset += sz;
587 }
588 return 1;
589 }
590
591 //------------------------------------------------------------------------------
FindBlockSize(vtkTypeUInt64 block)592 size_t vtkXMLDataParser::FindBlockSize(vtkTypeUInt64 block)
593 {
594 if (block < this->NumberOfBlocks - (this->PartialLastBlockUncompressedSize ? 1 : 0))
595 {
596 return this->BlockUncompressedSize;
597 }
598 else
599 {
600 return this->PartialLastBlockUncompressedSize;
601 }
602 }
603
604 //------------------------------------------------------------------------------
ReadBlock(vtkTypeUInt64 block,unsigned char * buffer)605 int vtkXMLDataParser::ReadBlock(vtkTypeUInt64 block, unsigned char* buffer)
606 {
607 size_t uncompressedSize = this->FindBlockSize(block);
608 size_t compressedSize = this->BlockCompressedSizes[block];
609
610 if (!this->DataStream->Seek(this->BlockStartOffsets[block]))
611 {
612 return 0;
613 }
614
615 unsigned char* readBuffer = new unsigned char[compressedSize];
616
617 if (this->DataStream->Read(readBuffer, compressedSize) < compressedSize)
618 {
619 delete[] readBuffer;
620 return 0;
621 }
622
623 size_t result =
624 this->Compressor->Uncompress(readBuffer, compressedSize, buffer, uncompressedSize);
625
626 delete[] readBuffer;
627 return result > 0;
628 }
629
630 //------------------------------------------------------------------------------
ReadBlock(vtkTypeUInt64 block)631 unsigned char* vtkXMLDataParser::ReadBlock(vtkTypeUInt64 block)
632 {
633 unsigned char* decompressBuffer = new unsigned char[this->FindBlockSize(block)];
634 if (!this->ReadBlock(block, decompressBuffer))
635 {
636 delete[] decompressBuffer;
637 return nullptr;
638 }
639 return decompressBuffer;
640 }
641
642 //------------------------------------------------------------------------------
ReadUncompressedData(unsigned char * data,vtkTypeUInt64 startWord,size_t numWords,size_t wordSize)643 size_t vtkXMLDataParser::ReadUncompressedData(
644 unsigned char* data, vtkTypeUInt64 startWord, size_t numWords, size_t wordSize)
645 {
646 // First read the length of the data.
647 std::unique_ptr<vtkXMLDataHeader> uh(vtkXMLDataHeader::New(this->HeaderType, 1));
648
649 size_t const headerSize = uh->DataSize();
650 size_t r = this->DataStream->Read(uh->Data(), headerSize);
651 if (r < headerSize)
652 {
653 vtkErrorMacro("Error reading uncompressed binary data header. "
654 "Read "
655 << r << " of " << headerSize << " bytes.");
656 return 0;
657 }
658 this->PerformByteSwap(uh->Data(), uh->WordCount(), uh->WordSize());
659 vtkTypeUInt64 rsize = uh->Get(0);
660
661 // Adjust the size to be a multiple of the wordSize by taking
662 // advantage of integer division. This will only change the value
663 // when the input file is invalid.
664 vtkTypeUInt64 size = (rsize / wordSize) * wordSize;
665
666 // Convert the start/length into bytes.
667 vtkTypeUInt64 offset = startWord * wordSize;
668 size_t length = numWords * wordSize;
669
670 // Make sure the begin/end offsets fall within total size.
671 if (offset > size)
672 {
673 return 0;
674 }
675 vtkTypeUInt64 end = offset + length;
676 if (end > size)
677 {
678 end = size;
679 }
680 length = end - offset;
681
682 // Read the data.
683 if (!this->DataStream->Seek(headerSize + offset))
684 {
685 return 0;
686 }
687
688 // Read data in 2MB blocks and report progress.
689 size_t const blockSize = 2097152;
690 size_t left = length;
691 unsigned char* p = data;
692 this->UpdateProgress(0);
693 while (left > 0 && !this->Abort)
694 {
695 // Read this block.
696 size_t n = (blockSize < left) ? blockSize : left;
697 if (!this->DataStream->Read(p, n))
698 {
699 return 0;
700 }
701
702 // Byte swap this block. Note that n will always be an integer
703 // multiple of the word size.
704 this->PerformByteSwap(p, n / wordSize, wordSize);
705
706 // Update pointer and counter.
707 p += n;
708 left -= n;
709
710 // Report progress.
711 this->UpdateProgress(float(p - data) / length);
712 }
713 this->UpdateProgress(1);
714 return length / wordSize;
715 }
716
717 //------------------------------------------------------------------------------
ReadCompressedData(unsigned char * data,vtkTypeUInt64 startWord,size_t numWords,size_t wordSize)718 size_t vtkXMLDataParser::ReadCompressedData(
719 unsigned char* data, vtkTypeUInt64 startWord, size_t numWords, size_t wordSize)
720 {
721 // Make sure there are data.
722 if (numWords == 0)
723 {
724 return 0;
725 }
726
727 // Find the begin and end offsets into the data.
728 vtkTypeUInt64 beginOffset = startWord * wordSize;
729 vtkTypeUInt64 endOffset = beginOffset + numWords * wordSize;
730
731 // Find the total size of the data.
732 vtkTypeUInt64 totalSize = this->NumberOfBlocks * this->BlockUncompressedSize;
733 if (this->PartialLastBlockUncompressedSize)
734 {
735 totalSize -= this->BlockUncompressedSize;
736 totalSize += this->PartialLastBlockUncompressedSize;
737 }
738
739 // Make sure there's even data to be read
740 if (totalSize == 0)
741 {
742 return 0;
743 }
744
745 // Adjust the size to be a multiple of the wordSize by taking
746 // advantage of integer division. This will only change the value
747 // when the input file is invalid.
748 totalSize = (totalSize / wordSize) * wordSize;
749
750 // Make sure the begin/end offsets fall within the total size.
751 if (beginOffset > totalSize)
752 {
753 return 0;
754 }
755 if (endOffset > totalSize)
756 {
757 endOffset = totalSize;
758 }
759
760 // Find the range of compression blocks to read.
761 vtkTypeUInt64 firstBlock = beginOffset / this->BlockUncompressedSize;
762 vtkTypeUInt64 lastBlock = endOffset / this->BlockUncompressedSize;
763
764 // Find the offset into the first block where the data begin.
765 size_t beginBlockOffset = beginOffset - firstBlock * this->BlockUncompressedSize;
766
767 // Find the offset into the last block where the data end.
768 size_t endBlockOffset = endOffset - lastBlock * this->BlockUncompressedSize;
769
770 this->UpdateProgress(0);
771 if (firstBlock == lastBlock)
772 {
773 // Everything fits in one block.
774 unsigned char* blockBuffer = this->ReadBlock(firstBlock);
775 if (!blockBuffer)
776 {
777 return 0;
778 }
779 size_t n = endBlockOffset - beginBlockOffset;
780 memcpy(data, blockBuffer + beginBlockOffset, n);
781 delete[] blockBuffer;
782
783 // Byte swap this block. Note that n will always be an integer
784 // multiple of the word size.
785 this->PerformByteSwap(data, n / wordSize, wordSize);
786 }
787 else
788 {
789 // Read all the complete blocks first.
790 size_t length = endOffset - beginOffset;
791 unsigned char* outputPointer = data;
792 size_t blockSize = this->FindBlockSize(firstBlock);
793
794 // Read the first block.
795 unsigned char* blockBuffer = this->ReadBlock(firstBlock);
796 if (!blockBuffer)
797 {
798 return 0;
799 }
800 size_t n = blockSize - beginBlockOffset;
801 memcpy(outputPointer, blockBuffer + beginBlockOffset, n);
802 delete[] blockBuffer;
803
804 // Byte swap the first block. Note that n will always be an
805 // integer multiple of the word size.
806 this->PerformByteSwap(outputPointer, n / wordSize, wordSize);
807
808 // Advance the pointer to the beginning of the second block.
809 outputPointer += blockSize - beginBlockOffset;
810
811 // Report progress.
812 this->UpdateProgress(float(outputPointer - data) / length);
813
814 unsigned int currentBlock = firstBlock + 1;
815 for (; currentBlock != lastBlock && !this->Abort; ++currentBlock)
816 {
817 // Read this block.
818 if (!this->ReadBlock(currentBlock, outputPointer))
819 {
820 return 0;
821 }
822
823 // Byte swap this block. Note that blockSize will always be an
824 // integer multiple of the word size.
825 this->PerformByteSwap(outputPointer, blockSize / wordSize, wordSize);
826
827 // Advance the pointer to the beginning of the next block.
828 outputPointer += this->FindBlockSize(currentBlock);
829
830 // Report progress.
831 this->UpdateProgress(float(outputPointer - data) / length);
832 }
833
834 // Now read the final block, which is incomplete if it exists.
835 if (endBlockOffset > 0 && !this->Abort)
836 {
837 blockBuffer = this->ReadBlock(lastBlock);
838 if (!blockBuffer)
839 {
840 return 0;
841 }
842 memcpy(outputPointer, blockBuffer, endBlockOffset);
843 delete[] blockBuffer;
844
845 // Byte swap the partial block. Note that endBlockOffset will
846 // always be an integer multiple of the word size.
847 this->PerformByteSwap(outputPointer, endBlockOffset / wordSize, wordSize);
848 }
849 }
850 this->UpdateProgress(1);
851
852 // Return the total words actually read.
853 return (endOffset - beginOffset) / wordSize;
854 }
855
856 //------------------------------------------------------------------------------
GetRootElement()857 vtkXMLDataElement* vtkXMLDataParser::GetRootElement()
858 {
859 return this->RootElement;
860 }
861
862 //------------------------------------------------------------------------------
ReadBinaryData(void * in_buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)863 size_t vtkXMLDataParser::ReadBinaryData(
864 void* in_buffer, vtkTypeUInt64 startWord, size_t numWords, int wordType)
865 {
866 // Skip real read if aborting.
867 if (this->Abort)
868 {
869 return 0;
870 }
871
872 size_t wordSize = this->GetWordTypeSize(wordType);
873 void* buffer = in_buffer;
874
875 // Make sure our streams are setup correctly.
876 this->DataStream->SetStream(this->Stream);
877
878 // Read the data.
879 unsigned char* d = reinterpret_cast<unsigned char*>(buffer);
880 size_t actualWords;
881 if (this->Compressor)
882 {
883 if (!this->ReadCompressionHeader())
884 {
885 vtkErrorMacro("ReadCompressionHeader failed. Aborting read.");
886 return 0;
887 }
888 this->DataStream->StartReading();
889 actualWords = this->ReadCompressedData(d, startWord, numWords, wordSize);
890 this->DataStream->EndReading();
891 }
892 else
893 {
894 this->DataStream->StartReading();
895 actualWords = this->ReadUncompressedData(d, startWord, numWords, wordSize);
896 this->DataStream->EndReading();
897 }
898
899 // Return the actual amount read.
900 return this->Abort ? 0 : actualWords;
901 }
902
903 //------------------------------------------------------------------------------
ReadAsciiData(void * buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)904 size_t vtkXMLDataParser::ReadAsciiData(
905 void* buffer, vtkTypeUInt64 startWord, size_t numWords, int wordType)
906 {
907 // Skip real read if aborting.
908 if (this->Abort)
909 {
910 return 0;
911 }
912
913 // We assume that ascii data are not very large and parse the entire
914 // block into memory.
915 this->UpdateProgress(0);
916
917 // Parse the ascii data from the file.
918 if (!this->ParseAsciiData(wordType))
919 {
920 return 0;
921 }
922
923 // Make sure we don't read outside the range of data available.
924 vtkTypeUInt64 endWord = startWord + numWords;
925 if (this->AsciiDataBufferLength < startWord)
926 {
927 return 0;
928 }
929 if (endWord > this->AsciiDataBufferLength)
930 {
931 endWord = this->AsciiDataBufferLength;
932 }
933 size_t wordSize = this->GetWordTypeSize(wordType);
934 size_t actualWords = endWord - startWord;
935 size_t actualBytes = wordSize * actualWords;
936 size_t startByte = wordSize * startWord;
937
938 this->UpdateProgress(0.5);
939
940 // Copy the data from the pre-parsed ascii data buffer.
941 if (buffer && actualBytes)
942 {
943 memcpy(buffer, this->AsciiDataBuffer + startByte, actualBytes);
944 }
945
946 this->UpdateProgress(1);
947
948 return this->Abort ? 0 : actualWords;
949 }
950
951 //------------------------------------------------------------------------------
ReadInlineData(vtkXMLDataElement * element,int isAscii,void * buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)952 size_t vtkXMLDataParser::ReadInlineData(vtkXMLDataElement* element, int isAscii, void* buffer,
953 vtkTypeUInt64 startWord, size_t numWords, int wordType)
954 {
955 this->DataStream = this->InlineDataStream;
956 this->SeekInlineDataPosition(element);
957 if (isAscii)
958 {
959 return this->ReadAsciiData(buffer, startWord, numWords, wordType);
960 }
961 else
962 {
963 return this->ReadBinaryData(buffer, startWord, numWords, wordType);
964 }
965 }
966
967 //------------------------------------------------------------------------------
ReadAppendedData(vtkTypeInt64 offset,void * buffer,vtkTypeUInt64 startWord,size_t numWords,int wordType)968 size_t vtkXMLDataParser::ReadAppendedData(
969 vtkTypeInt64 offset, void* buffer, vtkTypeUInt64 startWord, size_t numWords, int wordType)
970 {
971 this->DataStream = this->AppendedDataStream;
972 this->SeekG(this->AppendedDataPosition + offset);
973 return this->ReadBinaryData(buffer, startWord, numWords, wordType);
974 }
975
976 //------------------------------------------------------------------------------
977 //------------------------------------------------------------------------------
978 // Define a parsing function template. The extra "long" argument is used
979 // to help broken compilers select the non-templates below for char and
980 // unsigned char, and float/double by making them a better conversion than
981 // the template.
982 template <class T>
vtkXMLParseAsciiData(istream & is,int * length,T *,long)983 T* vtkXMLParseAsciiData(istream& is, int* length, T*, long)
984 {
985 int dataLength = 0;
986 int dataBufferSize = 64;
987
988 T* dataBuffer = new T[dataBufferSize];
989 T element;
990
991 while (is >> element)
992 {
993 if (dataLength == dataBufferSize)
994 {
995 int newSize = dataBufferSize * 2;
996 T* newBuffer = new T[newSize];
997 memcpy(newBuffer, dataBuffer, dataLength * sizeof(T));
998 delete[] dataBuffer;
999 dataBuffer = newBuffer;
1000 dataBufferSize = newSize;
1001 }
1002 dataBuffer[dataLength++] = element;
1003 }
1004
1005 if (length)
1006 {
1007 *length = dataLength;
1008 }
1009
1010 return dataBuffer;
1011 }
1012
1013 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,float *,int)1014 static float* vtkXMLParseAsciiData(istream& is, int* length, float*, int)
1015 {
1016 int dataLength = 0;
1017 int dataBufferSize = 64;
1018
1019 float* dataBuffer = new float[dataBufferSize];
1020 std::string stringBuffer;
1021 float element;
1022
1023 while (true)
1024 {
1025 is >> element;
1026 if (!is.good())
1027 {
1028 is.clear(is.rdstate() & ~ios::failbit);
1029 is >> stringBuffer;
1030 if (!is.good())
1031 {
1032 break;
1033 }
1034 else
1035 {
1036 std::for_each(
1037 stringBuffer.begin(), stringBuffer.end(), [](char& c) { c = std::tolower(c); });
1038 if (stringBuffer == "inf" || stringBuffer == "nan" || stringBuffer == "-inf")
1039 {
1040 element = strtof(stringBuffer.c_str(), nullptr);
1041 }
1042 else
1043 {
1044 break;
1045 }
1046 }
1047 }
1048 if (dataLength == dataBufferSize)
1049 {
1050 int newSize = dataBufferSize * 2;
1051 float* newBuffer = new float[newSize];
1052 memcpy(newBuffer, dataBuffer, dataLength * sizeof(float));
1053 delete[] dataBuffer;
1054 dataBuffer = newBuffer;
1055 dataBufferSize = newSize;
1056 }
1057 dataBuffer[dataLength++] = element;
1058 }
1059
1060 if (length)
1061 {
1062 *length = dataLength;
1063 }
1064
1065 return dataBuffer;
1066 }
1067
1068 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,double *,int)1069 static double* vtkXMLParseAsciiData(istream& is, int* length, double*, int)
1070 {
1071 int dataLength = 0;
1072 int dataBufferSize = 64;
1073
1074 double* dataBuffer = new double[dataBufferSize];
1075 std::string stringBuffer;
1076 double element;
1077
1078 while (true)
1079 {
1080 is >> element;
1081 if (!is.good())
1082 {
1083 is.clear(is.rdstate() & ~ios::failbit);
1084 is >> stringBuffer;
1085 if (!is.good())
1086 {
1087 break;
1088 }
1089 else
1090 {
1091 std::for_each(
1092 stringBuffer.begin(), stringBuffer.end(), [](char& c) { c = std::tolower(c); });
1093 if (stringBuffer == "inf" || stringBuffer == "nan" || stringBuffer == "-inf")
1094 {
1095 element = strtod(stringBuffer.c_str(), nullptr);
1096 }
1097 else
1098 {
1099 break;
1100 }
1101 }
1102 }
1103 if (dataLength == dataBufferSize)
1104 {
1105 int newSize = dataBufferSize * 2;
1106 double* newBuffer = new double[newSize];
1107 memcpy(newBuffer, dataBuffer, dataLength * sizeof(double));
1108 delete[] dataBuffer;
1109 dataBuffer = newBuffer;
1110 dataBufferSize = newSize;
1111 }
1112 dataBuffer[dataLength++] = element;
1113 }
1114
1115 if (length)
1116 {
1117 *length = dataLength;
1118 }
1119
1120 return dataBuffer;
1121 }
1122
1123 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,char *,int)1124 static char* vtkXMLParseAsciiData(istream& is, int* length, char*, int)
1125 {
1126 int dataLength = 0;
1127 int dataBufferSize = 64;
1128
1129 char* dataBuffer = new char[dataBufferSize];
1130 char element;
1131 short inElement;
1132
1133 while (is >> inElement)
1134 {
1135 element = inElement;
1136 if (dataLength == dataBufferSize)
1137 {
1138 int newSize = dataBufferSize * 2;
1139 char* newBuffer = new char[newSize];
1140 memcpy(newBuffer, dataBuffer, dataLength * sizeof(char));
1141 delete[] dataBuffer;
1142 dataBuffer = newBuffer;
1143 dataBufferSize = newSize;
1144 }
1145 dataBuffer[dataLength++] = element;
1146 }
1147
1148 if (length)
1149 {
1150 *length = dataLength;
1151 }
1152
1153 return dataBuffer;
1154 }
1155
1156 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,unsigned char *,int)1157 static unsigned char* vtkXMLParseAsciiData(istream& is, int* length, unsigned char*, int)
1158 {
1159 int dataLength = 0;
1160 int dataBufferSize = 64;
1161
1162 unsigned char* dataBuffer = new unsigned char[dataBufferSize];
1163 unsigned char element;
1164 short inElement;
1165
1166 while (is >> inElement)
1167 {
1168 element = inElement;
1169 if (dataLength == dataBufferSize)
1170 {
1171 int newSize = dataBufferSize * 2;
1172 unsigned char* newBuffer = new unsigned char[newSize];
1173 memcpy(newBuffer, dataBuffer, dataLength * sizeof(unsigned char));
1174 delete[] dataBuffer;
1175 dataBuffer = newBuffer;
1176 dataBufferSize = newSize;
1177 }
1178 dataBuffer[dataLength++] = element;
1179 }
1180
1181 if (length)
1182 {
1183 *length = dataLength;
1184 }
1185
1186 return dataBuffer;
1187 }
1188
1189 //------------------------------------------------------------------------------
vtkXMLParseAsciiData(istream & is,int * length,signed char *,int)1190 static signed char* vtkXMLParseAsciiData(istream& is, int* length, signed char*, int)
1191 {
1192 int dataLength = 0;
1193 int dataBufferSize = 64;
1194
1195 signed char* dataBuffer = new signed char[dataBufferSize];
1196 signed char element;
1197 short inElement;
1198
1199 while (is >> inElement)
1200 {
1201 element = inElement;
1202 if (dataLength == dataBufferSize)
1203 {
1204 int newSize = dataBufferSize * 2;
1205 signed char* newBuffer = new signed char[newSize];
1206 memcpy(newBuffer, dataBuffer, dataLength * sizeof(signed char));
1207 delete[] dataBuffer;
1208 dataBuffer = newBuffer;
1209 dataBufferSize = newSize;
1210 }
1211 dataBuffer[dataLength++] = element;
1212 }
1213
1214 if (length)
1215 {
1216 *length = dataLength;
1217 }
1218
1219 return dataBuffer;
1220 }
1221
1222 //------------------------------------------------------------------------------
vtkXMLParseAsciiBitData(istream & is,int * length)1223 static unsigned char* vtkXMLParseAsciiBitData(istream& is, int* length)
1224 {
1225 size_t arrayCapacity = 64; // capacity in bytes
1226 unsigned char* array = new unsigned char[arrayCapacity];
1227 std::fill(array, array + arrayCapacity, static_cast<unsigned char>(0));
1228
1229 size_t fullBytesRead = 0;
1230 unsigned char currentBitInByte = 0;
1231 unsigned char* currentByte = array;
1232
1233 int value;
1234 while (is >> value)
1235 {
1236 // Realloc array buffer if needed:
1237 if (fullBytesRead == arrayCapacity)
1238 {
1239 assert("sanity check" && currentBitInByte == 0);
1240 size_t newSize = arrayCapacity * 2;
1241 unsigned char* tmp = new unsigned char[newSize];
1242 std::copy(array, array + arrayCapacity, tmp);
1243 std::fill(tmp + arrayCapacity, tmp + newSize, static_cast<unsigned char>(0));
1244
1245 delete[] array;
1246 array = tmp;
1247 currentByte = array + fullBytesRead;
1248 arrayCapacity = newSize;
1249 }
1250
1251 // Set the current bit:
1252 assert("sanity check" && currentBitInByte < 8);
1253 if (value != 0)
1254 { // Mimic the storage mechanism used by vtkBitArray
1255 *currentByte = *currentByte | (0x80 >> currentBitInByte);
1256 }
1257
1258 // Update bookkeeping:
1259 if (++currentBitInByte == 8)
1260 {
1261 ++currentByte;
1262 ++fullBytesRead;
1263 currentBitInByte = 0;
1264 }
1265 }
1266
1267 if (length)
1268 {
1269 // We fudge the 'word size' to 1 byte for bit arrays (since it's integral)
1270 // so return the length in bytes here:
1271 *length = static_cast<int>(fullBytesRead + (currentBitInByte != 0 ? 1 : 0));
1272 }
1273
1274 return array;
1275 }
1276
1277 //------------------------------------------------------------------------------
ParseAsciiData(int wordType)1278 int vtkXMLDataParser::ParseAsciiData(int wordType)
1279 {
1280 istream& is = *(this->Stream);
1281
1282 // Don't re-parse the same ascii data.
1283 if (this->AsciiDataPosition == this->TellG())
1284 {
1285 return (this->AsciiDataBuffer ? 1 : 0);
1286 }
1287
1288 // Prepare for new data.
1289 this->AsciiDataPosition = this->TellG();
1290 if (this->AsciiDataBuffer)
1291 {
1292 this->FreeAsciiBuffer();
1293 }
1294
1295 int length = 0;
1296 void* buffer = nullptr;
1297 switch (wordType)
1298 {
1299 vtkTemplateMacro(buffer = vtkXMLParseAsciiData(is, &length, static_cast<VTK_TT*>(nullptr), 1));
1300
1301 case VTK_BIT:
1302 buffer = vtkXMLParseAsciiBitData(is, &length);
1303 break;
1304 }
1305
1306 // Read terminated from failure. Clear the fail bit so another read
1307 // can take place later.
1308 is.clear(is.rdstate() & ~ios::failbit);
1309
1310 // Save the buffer.
1311 this->AsciiDataBuffer = reinterpret_cast<unsigned char*>(buffer);
1312 this->AsciiDataBufferLength = length;
1313 this->AsciiDataWordType = wordType;
1314 return (this->AsciiDataBuffer ? 1 : 0);
1315 }
1316
1317 //------------------------------------------------------------------------------
1318 template <class T>
vtkXMLDataParserFreeAsciiBuffer(T * buffer)1319 void vtkXMLDataParserFreeAsciiBuffer(T* buffer)
1320 {
1321 delete[] buffer;
1322 }
1323
1324 //------------------------------------------------------------------------------
FreeAsciiBuffer()1325 void vtkXMLDataParser::FreeAsciiBuffer()
1326 {
1327 void* buffer = this->AsciiDataBuffer;
1328 switch (this->AsciiDataWordType)
1329 {
1330 vtkTemplateMacro(vtkXMLDataParserFreeAsciiBuffer(static_cast<VTK_TT*>(buffer)));
1331
1332 case VTK_BIT:
1333 vtkXMLDataParserFreeAsciiBuffer(static_cast<unsigned char*>(buffer));
1334 break;
1335 }
1336 this->AsciiDataBuffer = nullptr;
1337 }
1338
1339 //------------------------------------------------------------------------------
UpdateProgress(float progress)1340 void vtkXMLDataParser::UpdateProgress(float progress)
1341 {
1342 this->Progress = progress;
1343 double dProgress = progress;
1344 this->InvokeEvent(vtkCommand::ProgressEvent, &dProgress);
1345 }
1346