1 /*=========================================================================
2
3 Program: Visualization Toolkit
4 Module: vtkDelimitedTextReader.cxx
5
6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7 All rights reserved.
8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9
10 This software is distributed WITHOUT ANY WARRANTY; without even
11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the above copyright notice for more information.
13
14 =========================================================================*/
15 /*-------------------------------------------------------------------------
16 Copyright 2008 Sandia Corporation.
17 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18 the U.S. Government retains certain rights in this software.
19 -------------------------------------------------------------------------*/
20
21 #include "vtkDelimitedTextReader.h"
22 #include "vtkCommand.h"
23 #include "vtkDataSetAttributes.h"
24 #include "vtkIdTypeArray.h"
25 #include "vtkInformation.h"
26 #include "vtkInformationVector.h"
27 #include "vtkObjectFactory.h"
28 #include "vtkSmartPointer.h"
29 #include "vtkStreamingDemandDrivenPipeline.h"
30 #include "vtkTable.h"
31 #include "vtkUnicodeStringArray.h"
32 #include "vtkStringArray.h"
33 #include "vtkStringToNumeric.h"
34
35 #include "vtkTextCodec.h"
36 #include "vtkTextCodecFactory.h"
37
38 #include <sstream>
39 #include <iostream>
40 #include <algorithm>
41 #include <iterator>
42 #include <stdexcept>
43 #include <string>
44 #include <set>
45 #include <vector>
46
47 #include <cctype>
48
49 ////////////////////////////////////////////////////////////////////////////////
50 // DelimitedTextIterator
51
52 /// Output iterator object that parses a stream of Unicode characters into records and
53 /// fields, inserting them into a vtkTable.
54
55 namespace {
56
57 class DelimitedTextIterator : public vtkTextCodec::OutputIterator
58 {
59 public:
60 typedef std::forward_iterator_tag iterator_category;
61 typedef vtkUnicodeStringValueType value_type;
62 typedef std::string::difference_type difference_type;
63 typedef value_type* pointer;
64 typedef value_type& reference;
65
DelimitedTextIterator(const vtkIdType max_records,const vtkUnicodeString & record_delimiters,const vtkUnicodeString & field_delimiters,const vtkUnicodeString & string_delimiters,const vtkUnicodeString & whitespace,const vtkUnicodeString & escape,bool have_headers,bool unicode_array_output,bool merg_cons_delimiters,bool use_string_delimeter,vtkTable * const output_table)66 DelimitedTextIterator(
67 const vtkIdType max_records,
68 const vtkUnicodeString& record_delimiters,
69 const vtkUnicodeString& field_delimiters,
70 const vtkUnicodeString& string_delimiters,
71 const vtkUnicodeString& whitespace,
72 const vtkUnicodeString& escape,
73 bool have_headers,
74 bool unicode_array_output,
75 bool merg_cons_delimiters,
76 bool use_string_delimeter,
77 vtkTable* const output_table
78 ) :
79 MaxRecords(max_records),
80 MaxRecordIndex(have_headers ? max_records + 1 : max_records),
81 RecordDelimiters(record_delimiters.begin(), record_delimiters.end()),
82 FieldDelimiters(field_delimiters.begin(), field_delimiters.end()),
83 StringDelimiters(string_delimiters.begin(), string_delimiters.end()),
84 Whitespace(whitespace.begin(), whitespace.end()),
85 EscapeDelimiter(escape.begin(), escape.end()),
86 HaveHeaders(have_headers),
87 UnicodeArrayOutput(unicode_array_output),
88 WhiteSpaceOnlyString(true),
89 OutputTable(output_table),
90 CurrentRecordIndex(0),
91 CurrentFieldIndex(0),
92 RecordAdjacent(true),
93 MergeConsDelims(merg_cons_delimiters),
94 ProcessEscapeSequence(false),
95 UseStringDelimiter(use_string_delimeter),
96 WithinString(0)
97 {
98 }
99
~DelimitedTextIterator()100 ~DelimitedTextIterator() override
101 {
102 // Ensure that all table columns have the same length ...
103 for(vtkIdType i = 0; i != this->OutputTable->GetNumberOfColumns(); ++i)
104 {
105 if(this->OutputTable->GetColumn(i)->GetNumberOfTuples() !=
106 this->OutputTable->GetColumn(0)->GetNumberOfTuples())
107 {
108 this->OutputTable->GetColumn(i)
109 ->Resize(this->OutputTable->GetColumn(0)->GetNumberOfTuples());
110 }
111 }
112 }
113
operator ++(int)114 DelimitedTextIterator& operator++(int) override
115 {
116 return *this;
117 }
118
operator *()119 DelimitedTextIterator& operator*() override
120 {
121 return *this;
122 }
123
124 // Handle windows files that do not have a carriage return line feed on the last line of the file ...
ReachedEndOfInput()125 void ReachedEndOfInput()
126 {
127 if(this->CurrentField.empty())
128 {
129 return;
130 }
131 vtkUnicodeString::value_type value =
132 this->CurrentField[this->CurrentField.character_count()-1];
133 if(!this->RecordDelimiters.count(value) && !this->Whitespace.count(value))
134 {
135 this->InsertField();
136 }
137 }
138
operator =(const vtkUnicodeString::value_type value)139 DelimitedTextIterator& operator=(const vtkUnicodeString::value_type value) override
140 {
141 // If we've already read our maximum number of records, we're done ...
142 if(this->MaxRecords && this->CurrentRecordIndex == this->MaxRecordIndex)
143 {
144 return *this;
145 }
146
147 // Strip adjacent record delimiters and whitespace...
148 if(this->RecordAdjacent && (this->RecordDelimiters.count(value) ||
149 this->Whitespace.count(value)))
150 {
151 return *this;
152 }
153 else
154 {
155 this->RecordAdjacent = false;
156 }
157
158 // Look for record delimiters ...
159 if(this->RecordDelimiters.count(value))
160 {
161 this->InsertField();
162 this->CurrentRecordIndex += 1;
163 this->CurrentFieldIndex = 0;
164 this->CurrentField.clear();
165 this->RecordAdjacent = true;
166 this->WithinString = 0;
167 this->WhiteSpaceOnlyString = true;
168 return *this;
169 }
170
171 // Look for field delimiters unless we're in a string ...
172 if(!this->WithinString && this->FieldDelimiters.count(value))
173 {
174 // Handle special case of merging consective delimiters ...
175 if( !(this->CurrentField.empty() && this->MergeConsDelims) )
176 {
177 this->InsertField();
178 this->CurrentFieldIndex += 1;
179 this->CurrentField.clear();
180 }
181 return *this;
182 }
183
184 // Check for start of escape sequence ...
185 if(!this->ProcessEscapeSequence && this->EscapeDelimiter.count(value))
186 {
187 this->ProcessEscapeSequence = true;
188 return *this;
189 }
190
191 // Process escape sequence ...
192 if(this->ProcessEscapeSequence)
193 {
194 vtkUnicodeString curr_char;
195 curr_char += value;
196 if(curr_char == vtkUnicodeString::from_utf8("0"))
197 {
198 this->CurrentField += vtkUnicodeString::from_utf8("\0");
199 }
200 else if(curr_char == vtkUnicodeString::from_utf8("a"))
201 {
202 this->CurrentField += vtkUnicodeString::from_utf8("\a");
203 }
204 else if(curr_char == vtkUnicodeString::from_utf8("b"))
205 {
206 this->CurrentField += vtkUnicodeString::from_utf8("\b");
207 }
208 else if(curr_char == vtkUnicodeString::from_utf8("t"))
209 {
210 this->CurrentField += vtkUnicodeString::from_utf8("\t");
211 }
212 else if(curr_char == vtkUnicodeString::from_utf8("n"))
213 {
214 this->CurrentField += vtkUnicodeString::from_utf8("\n");
215 }
216 else if(curr_char == vtkUnicodeString::from_utf8("v"))
217 {
218 this->CurrentField += vtkUnicodeString::from_utf8("\v");
219 }
220 else if(curr_char == vtkUnicodeString::from_utf8("f"))
221 {
222 this->CurrentField += vtkUnicodeString::from_utf8("\f");
223 }
224 else if(curr_char == vtkUnicodeString::from_utf8("r"))
225 {
226 this->CurrentField += vtkUnicodeString::from_utf8("\r");
227 }
228 else if(curr_char == vtkUnicodeString::from_utf8("\\"))
229 {
230 this->CurrentField += vtkUnicodeString::from_utf8("\\");
231 }
232 else
233 {
234 this->CurrentField += value;
235 }
236 this->ProcessEscapeSequence = false;
237 return *this;
238 }
239
240 // Start a string ...
241 if(!this->WithinString && this->StringDelimiters.count(value) &&
242 this->UseStringDelimiter)
243 {
244 this->WithinString = value;
245 this->CurrentField.clear();
246 return *this;
247 }
248
249 // End a string ...
250 if(this->WithinString && (this->WithinString == value) &&
251 this->UseStringDelimiter)
252 {
253 this->WithinString = 0;
254 return *this;
255 }
256
257 if(!this->Whitespace.count(value))
258 {
259 this->WhiteSpaceOnlyString = false;
260 }
261 // Keep growing the current field ...
262 this->CurrentField += value;
263 return *this;
264 }
265
266 private:
InsertField()267 void InsertField()
268 {
269 if(this->CurrentFieldIndex >= this->OutputTable->GetNumberOfColumns() &&
270 0 == this->CurrentRecordIndex)
271 {
272 vtkAbstractArray* array;
273 if(this->UnicodeArrayOutput)
274 {
275 array = vtkUnicodeStringArray::New();
276 }
277 else
278 {
279 array = vtkStringArray::New();
280 }
281
282 if(this->HaveHeaders)
283 {
284 array->SetName(this->CurrentField.utf8_str());
285 }
286 else
287 {
288 std::stringstream buffer;
289 buffer << "Field " << this->CurrentFieldIndex;
290 array->SetName(buffer.str().c_str());
291 if(this->UnicodeArrayOutput)
292 {
293 array->SetNumberOfTuples(this->CurrentRecordIndex + 1);
294 vtkArrayDownCast<vtkUnicodeStringArray>(array)->SetValue(this->CurrentRecordIndex, this->CurrentField);
295 }
296 else
297 {
298 std::string s;
299 this->CurrentField.utf8_str(s);
300 vtkArrayDownCast<vtkStringArray>(array)->InsertValue(this->CurrentRecordIndex, s);
301 }
302 }
303 this->OutputTable->AddColumn(array);
304 array->Delete();
305 }
306 else if(this->CurrentFieldIndex < this->OutputTable->GetNumberOfColumns())
307 {
308 // Handle case where input file has header information ...
309 vtkIdType rec_index;
310 if(this->HaveHeaders)
311 {
312 rec_index = this->CurrentRecordIndex - 1;
313 }
314 else
315 {
316 rec_index = this->CurrentRecordIndex;
317 }
318
319 if(this->UnicodeArrayOutput)
320 {
321 vtkUnicodeStringArray* uarray = vtkArrayDownCast<vtkUnicodeStringArray>(this->OutputTable->GetColumn(this->CurrentFieldIndex));
322 uarray->SetNumberOfTuples(rec_index + 1);
323 uarray->SetValue(rec_index, this->CurrentField);
324 }
325 else
326 {
327 vtkStringArray* sarray = vtkArrayDownCast<vtkStringArray>(this->OutputTable->GetColumn(this->CurrentFieldIndex));
328 std::string s;
329 this->CurrentField.utf8_str(s);
330 sarray->InsertValue(rec_index,s);
331 }
332 }
333 }
334
335 vtkIdType MaxRecords;
336 vtkIdType MaxRecordIndex;
337 std::set<vtkUnicodeString::value_type> RecordDelimiters;
338 std::set<vtkUnicodeString::value_type> FieldDelimiters;
339 std::set<vtkUnicodeString::value_type> StringDelimiters;
340 std::set<vtkUnicodeString::value_type> Whitespace;
341 std::set<vtkUnicodeString::value_type> EscapeDelimiter;
342 bool HaveHeaders;
343 bool UnicodeArrayOutput;
344 bool WhiteSpaceOnlyString;
345 vtkTable* OutputTable;
346 vtkIdType CurrentRecordIndex;
347 vtkIdType CurrentFieldIndex;
348 vtkUnicodeString CurrentField;
349 bool RecordAdjacent;
350 bool MergeConsDelims;
351 bool ProcessEscapeSequence;
352 bool UseStringDelimiter;
353 vtkUnicodeString::value_type WithinString;
354 };
355
356 } // End anonymous namespace
357
358 /////////////////////////////////////////////////////////////////////////////////////////
359 // vtkDelimitedTextReader
360
361 vtkStandardNewMacro(vtkDelimitedTextReader);
362
vtkDelimitedTextReader()363 vtkDelimitedTextReader::vtkDelimitedTextReader() :
364 FileName(nullptr),
365 UnicodeCharacterSet(nullptr),
366 MaxRecords(0),
367 UnicodeRecordDelimiters(vtkUnicodeString::from_utf8("\r\n")),
368 UnicodeFieldDelimiters(vtkUnicodeString::from_utf8(",")),
369 UnicodeStringDelimiters(vtkUnicodeString::from_utf8("\"")),
370 UnicodeWhitespace(vtkUnicodeString::from_utf8(" \t\r\n\v\f")),
371 UnicodeEscapeCharacter(vtkUnicodeString::from_utf8("\\")),
372 HaveHeaders(false),
373 ReplacementCharacter('x')
374 {
375 this->SetNumberOfInputPorts(0);
376 this->SetNumberOfOutputPorts(1);
377
378 this->ReadFromInputString = 0;
379 this->InputString = nullptr;
380 this->InputStringLength = 0;
381 this->MergeConsecutiveDelimiters = false;
382 this->PedigreeIdArrayName = nullptr;
383 this->SetPedigreeIdArrayName("id");
384 this->GeneratePedigreeIds = true;
385 this->OutputPedigreeIds = false;
386 this->AddTabFieldDelimiter = false;
387 this->UnicodeOutputArrays = false;
388 this->FieldDelimiterCharacters = nullptr;
389 this->SetFieldDelimiterCharacters(",");
390 this->StringDelimiter='"';
391 this->UseStringDelimiter = true;
392 this->DetectNumericColumns = false;
393 this->ForceDouble = false;
394 this->DefaultIntegerValue = 0;
395 this->DefaultDoubleValue = 0.0;
396 this->TrimWhitespacePriorToNumericConversion = false;
397 }
398
~vtkDelimitedTextReader()399 vtkDelimitedTextReader::~vtkDelimitedTextReader()
400 {
401 this->SetPedigreeIdArrayName(nullptr);
402 this->SetUnicodeCharacterSet(nullptr);
403 this->SetFileName(nullptr);
404 this->SetInputString(nullptr);
405 this->SetFieldDelimiterCharacters(nullptr);
406 }
407
PrintSelf(ostream & os,vtkIndent indent)408 void vtkDelimitedTextReader::PrintSelf(ostream& os, vtkIndent indent)
409 {
410 this->Superclass::PrintSelf(os, indent);
411 os << indent << "FileName: "
412 << (this->FileName ? this->FileName : "(none)") << endl;
413 os << indent << "ReadFromInputString: "
414 << (this->ReadFromInputString ? "On\n" : "Off\n");
415 if ( this->InputString )
416 {
417 os << indent << "Input String: " << this->InputString << "\n";
418 }
419 else
420 {
421 os << indent << "Input String: (None)\n";
422 }
423 os << indent << "UnicodeCharacterSet: "
424 << (this->UnicodeCharacterSet ? this->UnicodeCharacterSet : "(none)") << endl;
425 os << indent << "MaxRecords: " << this->MaxRecords
426 << endl;
427 os << indent << "UnicodeRecordDelimiters: '" << this->UnicodeRecordDelimiters.utf8_str()
428 << "'" << endl;
429 os << indent << "UnicodeFieldDelimiters: '" << this->UnicodeFieldDelimiters.utf8_str()
430 << "'" << endl;
431 os << indent << "UnicodeStringDelimiters: '" << this->UnicodeStringDelimiters.utf8_str()
432 << "'" << endl;
433 os << indent << "StringDelimiter: "
434 << this->StringDelimiter << endl;
435 os << indent << "ReplacementCharacter: " << this->ReplacementCharacter << endl;
436 os << indent << "FieldDelimiterCharacters: "
437 << (this->FieldDelimiterCharacters ? this->FieldDelimiterCharacters : "(none)") << endl;
438 os << indent << "HaveHeaders: "
439 << (this->HaveHeaders ? "true" : "false") << endl;
440 os << indent << "MergeConsecutiveDelimiters: "
441 << (this->MergeConsecutiveDelimiters ? "true" : "false") << endl;
442 os << indent << "UseStringDelimiter: "
443 << (this->UseStringDelimiter ? "true" : "false") << endl;
444 os << indent << "DetectNumericColumns: "
445 << (this->DetectNumericColumns? "true" : "false") << endl;
446 os << indent << "ForceDouble: "
447 << (this->ForceDouble ? "true" : "false") << endl;
448 os << indent << "DefaultIntegerValue: "
449 << this->DefaultIntegerValue << endl;
450 os << indent << "DefaultDoubleValue: "
451 << this->DefaultDoubleValue << endl;
452 os << indent << "TrimWhitespacePriorToNumericConversion: "
453 << (this->TrimWhitespacePriorToNumericConversion ? "true" : "false") << endl;
454 os << indent << "GeneratePedigreeIds: "
455 << this->GeneratePedigreeIds << endl;
456 os << indent << "PedigreeIdArrayName: "
457 << this->PedigreeIdArrayName << endl;
458 os << indent << "OutputPedigreeIds: "
459 << (this->OutputPedigreeIds ? "true" : "false") << endl;
460 os << indent << "AddTabFieldDelimiter: "
461 << (this->AddTabFieldDelimiter ? "true" : "false") << endl;
462 }
463
SetInputString(const char * in)464 void vtkDelimitedTextReader::SetInputString(const char *in)
465 {
466 int len = 0;
467 if (in != nullptr)
468 {
469 len = static_cast<int>(strlen(in));
470 }
471 this->SetInputString(in, len);
472 }
473
SetInputString(const char * in,int len)474 void vtkDelimitedTextReader::SetInputString(const char *in, int len)
475 {
476 if (this->InputString && in && strncmp(in, this->InputString, len) == 0)
477 {
478 return;
479 }
480
481 delete [] this->InputString;
482
483 if (in && len>0)
484 {
485 // Add a nullptr terminator so that GetInputString
486 // callers (from wrapped languages) get a valid
487 // C string in *ALL* cases...
488 //
489 this->InputString = new char[len+1];
490 memcpy(this->InputString,in,len);
491 this->InputString[len] = 0;
492 this->InputStringLength = len;
493 }
494 else
495 {
496 this->InputString = nullptr;
497 this->InputStringLength = 0;
498 }
499
500 this->Modified();
501 }
502
SetUnicodeRecordDelimiters(const vtkUnicodeString & delimiters)503 void vtkDelimitedTextReader::SetUnicodeRecordDelimiters(const vtkUnicodeString& delimiters)
504 {
505 this->UnicodeRecordDelimiters = delimiters;
506 this->Modified();
507 }
508
GetUnicodeRecordDelimiters()509 vtkUnicodeString vtkDelimitedTextReader::GetUnicodeRecordDelimiters()
510 {
511 return this->UnicodeRecordDelimiters;
512 }
513
SetUTF8RecordDelimiters(const char * delimiters)514 void vtkDelimitedTextReader::SetUTF8RecordDelimiters(const char* delimiters)
515 {
516 this->UnicodeRecordDelimiters = vtkUnicodeString::from_utf8(delimiters);
517 this->Modified();
518 }
519
GetUTF8RecordDelimiters()520 const char* vtkDelimitedTextReader::GetUTF8RecordDelimiters()
521 {
522 return this->UnicodeRecordDelimiters.utf8_str();
523 }
524
SetUnicodeFieldDelimiters(const vtkUnicodeString & delimiters)525 void vtkDelimitedTextReader::SetUnicodeFieldDelimiters(const vtkUnicodeString& delimiters)
526 {
527 this->UnicodeFieldDelimiters = delimiters;
528 this->Modified();
529 }
530
GetUnicodeFieldDelimiters()531 vtkUnicodeString vtkDelimitedTextReader::GetUnicodeFieldDelimiters()
532 {
533 return this->UnicodeFieldDelimiters;
534 }
535
SetUTF8FieldDelimiters(const char * delimiters)536 void vtkDelimitedTextReader::SetUTF8FieldDelimiters(const char* delimiters)
537 {
538 this->UnicodeFieldDelimiters = vtkUnicodeString::from_utf8(delimiters);
539 this->Modified();
540 }
541
GetUTF8FieldDelimiters()542 const char* vtkDelimitedTextReader::GetUTF8FieldDelimiters()
543 {
544 return this->UnicodeFieldDelimiters.utf8_str();
545 }
546
SetUnicodeStringDelimiters(const vtkUnicodeString & delimiters)547 void vtkDelimitedTextReader::SetUnicodeStringDelimiters(const vtkUnicodeString& delimiters)
548 {
549 this->UnicodeStringDelimiters = delimiters;
550 this->Modified();
551 }
552
GetUnicodeStringDelimiters()553 vtkUnicodeString vtkDelimitedTextReader::GetUnicodeStringDelimiters()
554 {
555 return this->UnicodeStringDelimiters;
556 }
557
SetUTF8StringDelimiters(const char * delimiters)558 void vtkDelimitedTextReader::SetUTF8StringDelimiters(const char* delimiters)
559 {
560 this->UnicodeStringDelimiters = vtkUnicodeString::from_utf8(delimiters);
561 this->Modified();
562 }
563
GetUTF8StringDelimiters()564 const char* vtkDelimitedTextReader::GetUTF8StringDelimiters()
565 {
566 return this->UnicodeStringDelimiters.utf8_str();
567 }
568
GetLastError()569 vtkStdString vtkDelimitedTextReader::GetLastError()
570 {
571 return this->LastError;
572 }
573
RequestData(vtkInformation *,vtkInformationVector **,vtkInformationVector * outputVector)574 int vtkDelimitedTextReader::RequestData(
575 vtkInformation*,
576 vtkInformationVector**,
577 vtkInformationVector* outputVector)
578 {
579 vtkTable* const output_table = vtkTable::GetData(outputVector);
580
581 this->LastError = "";
582
583 try
584 {
585 // We only retrieve one piece ...
586 vtkInformation* const outInfo = outputVector->GetInformationObject(0);
587 if(outInfo->Has(vtkStreamingDemandDrivenPipeline::UPDATE_PIECE_NUMBER()) &&
588 outInfo->Get(vtkStreamingDemandDrivenPipeline::UPDATE_PIECE_NUMBER()) > 0)
589 {
590 return 1;
591 }
592
593 if (!this->PedigreeIdArrayName)
594 {
595 throw std::runtime_error("You must specify a pedigree id array name");
596 }
597
598 istream* input_stream_pt = nullptr;
599 ifstream file_stream;
600 std::istringstream string_stream;
601
602 if(!this->ReadFromInputString)
603 {
604 // If the filename hasn't been specified, we're done ...
605 if(!this->FileName)
606 {
607 return 1;
608 }
609 // Get the total size of the input file in bytes
610 file_stream.open(this->FileName, ios::binary);
611 if(!file_stream.good())
612 {
613 throw std::runtime_error(
614 "Unable to open input file " + std::string(this->FileName));
615 }
616
617 file_stream.seekg(0, ios::end);
618 //const vtkIdType total_bytes = file_stream.tellg();
619 file_stream.seekg(0, ios::beg);
620
621 input_stream_pt = dynamic_cast<istream*>(&file_stream);
622 }
623 else
624 {
625 string_stream.str(this->InputString);
626 input_stream_pt = dynamic_cast<istream*>(&string_stream);
627 }
628
629 vtkStdString character_set;
630 vtkTextCodec* transCodec = nullptr;
631
632 if(this->UnicodeCharacterSet)
633 {
634 this->UnicodeOutputArrays = true;
635 character_set = this->UnicodeCharacterSet;
636 transCodec = vtkTextCodecFactory::CodecForName(this->UnicodeCharacterSet);
637 }
638 else
639 {
640 char tstring[2];
641 tstring[1] = '\0';
642 tstring[0] = this->StringDelimiter;
643 // don't use Set* methods since they change the MTime in
644 // RequestData() !!!!!
645 std::string fieldDelimiterCharacters = this->FieldDelimiterCharacters;
646 if (this->AddTabFieldDelimiter)
647 {
648 fieldDelimiterCharacters.push_back('\t');
649 }
650 this->UnicodeFieldDelimiters =
651 vtkUnicodeString::from_utf8(fieldDelimiterCharacters);
652 this->UnicodeStringDelimiters =
653 vtkUnicodeString::from_utf8(tstring);
654 this->UnicodeOutputArrays = false;
655 transCodec = vtkTextCodecFactory::CodecToHandle(*input_stream_pt);
656 }
657
658 if (nullptr == transCodec)
659 {
660 // should this use the locale instead??
661 return 1;
662 }
663
664 DelimitedTextIterator iterator(
665 this->MaxRecords,
666 this->UnicodeRecordDelimiters,
667 this->UnicodeFieldDelimiters,
668 this->UnicodeStringDelimiters,
669 this->UnicodeWhitespace,
670 this->UnicodeEscapeCharacter,
671 this->HaveHeaders,
672 this->UnicodeOutputArrays,
673 this->MergeConsecutiveDelimiters,
674 this->UseStringDelimiter,
675 output_table);
676
677 vtkTextCodec::OutputIterator& outIter = iterator;
678
679 transCodec->ToUnicode(*input_stream_pt, outIter);
680 iterator.ReachedEndOfInput();
681 transCodec->Delete();
682
683 if(this->OutputPedigreeIds)
684 {
685 if (this->GeneratePedigreeIds)
686 {
687 vtkSmartPointer<vtkIdTypeArray> pedigreeIds =
688 vtkSmartPointer<vtkIdTypeArray>::New();
689 vtkIdType numRows = output_table->GetNumberOfRows();
690 pedigreeIds->SetNumberOfTuples(numRows);
691 pedigreeIds->SetName(this->PedigreeIdArrayName);
692 for (vtkIdType i = 0; i < numRows; ++i)
693 {
694 pedigreeIds->InsertValue(i, i);
695 }
696 output_table->GetRowData()->SetPedigreeIds(pedigreeIds);
697 }
698 else
699 {
700 vtkAbstractArray* arr =
701 output_table->GetColumnByName(this->PedigreeIdArrayName);
702 if (arr)
703 {
704 output_table->GetRowData()->SetPedigreeIds(arr);
705 }
706 else
707 {
708 throw std::runtime_error(
709 "Could not find pedigree id array: " +
710 vtkStdString(this->PedigreeIdArrayName));
711 }
712 }
713 }
714
715 if (this->DetectNumericColumns && !this->UnicodeOutputArrays)
716 {
717 vtkStringToNumeric* converter = vtkStringToNumeric::New();
718 converter->SetForceDouble(this->ForceDouble);
719 converter->SetDefaultIntegerValue(this->DefaultIntegerValue);
720 converter->SetDefaultDoubleValue(this->DefaultDoubleValue);
721 converter->SetTrimWhitespacePriorToNumericConversion(this->TrimWhitespacePriorToNumericConversion);
722 vtkTable* clone = output_table->NewInstance();
723 clone->ShallowCopy(output_table);
724 converter->SetInputData(clone);
725 converter->Update();
726 clone->Delete();
727 output_table->ShallowCopy(converter->GetOutputDataObject(0));
728 converter->Delete();
729 }
730
731 }
732 catch(std::exception& e)
733 {
734 vtkErrorMacro(<< "caught exception: " << e.what() << endl);
735 this->LastError = e.what();
736 output_table->Initialize();
737 }
738 catch(...)
739 {
740 vtkErrorMacro(<< "caught unknown exception." << endl);
741 this->LastError = "Unknown exception.";
742 output_table->Initialize();
743 }
744
745 return 1;
746 }
747