1 /*========================================================================= 2 3 Program: Visualization Toolkit 4 Module: vtkDelimitedTextReader.h 5 6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen 7 All rights reserved. 8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details. 9 10 This software is distributed WITHOUT ANY WARRANTY; without even 11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 12 PURPOSE. See the above copyright notice for more information. 13 14 =========================================================================*/ 15 /*------------------------------------------------------------------------- 16 Copyright 2008 Sandia Corporation. 17 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 18 the U.S. Government retains certain rights in this software. 19 -------------------------------------------------------------------------*/ 20 21 22 /** 23 * @class vtkDelimitedTextReader 24 * @brief reads in delimited ascii or unicode text files 25 * and outputs a vtkTable data structure. 26 * 27 * 28 * vtkDelimitedTextReader is an interface for pulling in data from a 29 * flat, delimited ascii or unicode text file (delimiter can be any character). 30 * 31 * The behavior of the reader with respect to ascii or unicode input 32 * is controlled by the SetUnicodeCharacterSet() method. By default 33 * (without calling SetUnicodeCharacterSet()), the reader will expect 34 * to read ascii text and will output vtkStdString columns. Use the 35 * Set and Get methods to set delimiters that do not contain UTF8 in 36 * the name when operating the reader in default ascii mode. If the 37 * SetUnicodeCharacterSet() method is called, the reader will output 38 * vtkUnicodeString columns in the output table. In addition, it is 39 * necessary to use the Set and Get methods that contain UTF8 in the 40 * name to specify delimiters when operating in unicode mode. 41 * 42 * There is also a special character set US-ASCII-WITH-FALLBACK that 43 * will treat the input text as ASCII no matter what. If and when it 44 * encounters a character with its 8th bit set it will replace that 45 * character with the code point ReplacementCharacter. You may use 46 * this if you have text that belongs to a code page like LATIN9 or 47 * ISO-8859-1 or friends: mostly ASCII but not entirely. Eventually 48 * this class will acquire the ability to read gracefully text from 49 * any code page, making this option obsolete. 50 * 51 * This class emits ProgressEvent for every 100 lines it reads. 52 * 53 * @par Thanks: 54 * Thanks to Andy Wilson, Brian Wylie, Tim Shead, and Thomas Otahal 55 * from Sandia National Laboratories for implementing this class. 56 * 57 * 58 * @warning 59 * This reader assumes that the first line in the file (whether that's 60 * headers or the first document) contains at least as many fields as 61 * any other line in the file. 62 */ 63 64 #ifndef vtkDelimitedTextReader_h 65 #define vtkDelimitedTextReader_h 66 67 #include "vtkIOInfovisModule.h" // For export macro 68 #include "vtkTableAlgorithm.h" 69 #include "vtkUnicodeString.h" // Needed for vtkUnicodeString 70 #include "vtkStdString.h" // Needed for vtkStdString 71 72 class VTKIOINFOVIS_EXPORT vtkDelimitedTextReader : public vtkTableAlgorithm 73 { 74 public: 75 static vtkDelimitedTextReader* New(); 76 vtkTypeMacro(vtkDelimitedTextReader, vtkTableAlgorithm); 77 void PrintSelf(ostream& os, vtkIndent indent) override; 78 79 //@{ 80 /** 81 * Specifies the delimited text file to be loaded. 82 */ 83 vtkGetStringMacro(FileName); 84 vtkSetStringMacro(FileName); 85 //@} 86 87 //@{ 88 /** 89 * Specify the InputString for use when reading from a character array. 90 * Optionally include the length for binary strings. Note that a copy 91 * of the string is made and stored. If this causes exceedingly large 92 * memory consumption, consider using InputArray instead. 93 */ 94 void SetInputString(const char *in); 95 vtkGetStringMacro(InputString); 96 void SetInputString(const char *in, int len); 97 vtkGetMacro(InputStringLength, int); SetInputString(const vtkStdString & input)98 void SetInputString(const vtkStdString& input) 99 { this->SetInputString(input.c_str(), static_cast<int>(input.length())); } 100 //@} 101 102 //@{ 103 /** 104 * Enable reading from an InputString or InputArray instead of the default, 105 * a file. 106 */ 107 vtkSetMacro(ReadFromInputString,vtkTypeBool); 108 vtkGetMacro(ReadFromInputString,vtkTypeBool); 109 vtkBooleanMacro(ReadFromInputString,vtkTypeBool); 110 //@} 111 112 //@{ 113 /** 114 * Specifies the character set used in the input file. Valid character set 115 * names will be drawn from the list maintained by the Internet Assigned Name 116 * Authority at 117 118 * http://www.iana.org/assignments/character-sets 119 120 * Where multiple aliases are provided for a character set, the preferred MIME name 121 * will be used. vtkUnicodeDelimitedTextReader currently supports "US-ASCII", "UTF-8", 122 * "UTF-16", "UTF-16BE", and "UTF-16LE" character sets. 123 */ 124 vtkGetStringMacro(UnicodeCharacterSet); 125 vtkSetStringMacro(UnicodeCharacterSet); 126 //@} 127 128 //@{ 129 /** 130 * Specify the character(s) that will be used to separate records. 131 * The order of characters in the string does not matter. Defaults 132 * to "\r\n". 133 */ 134 void SetUTF8RecordDelimiters(const char* delimiters); 135 const char* GetUTF8RecordDelimiters(); 136 void SetUnicodeRecordDelimiters(const vtkUnicodeString& delimiters); 137 vtkUnicodeString GetUnicodeRecordDelimiters(); 138 //@} 139 140 //@{ 141 /** 142 * Specify the character(s) that will be used to separate fields. For 143 * example, set this to "," for a comma-separated value file. Set 144 * it to ".:;" for a file where columns can be separated by a 145 * period, colon or semicolon. The order of the characters in the 146 * string does not matter. Defaults to a comma. 147 */ 148 vtkSetStringMacro(FieldDelimiterCharacters); 149 vtkGetStringMacro(FieldDelimiterCharacters); 150 //@} 151 152 void SetUTF8FieldDelimiters(const char* delimiters); 153 const char* GetUTF8FieldDelimiters(); 154 void SetUnicodeFieldDelimiters(const vtkUnicodeString& delimiters); 155 vtkUnicodeString GetUnicodeFieldDelimiters(); 156 157 //@{ 158 /** 159 * Get/set the character that will begin and end strings. Microsoft 160 * Excel, for example, will export the following format: 161 162 * "First Field","Second Field","Field, With, Commas","Fourth Field" 163 164 * The third field has a comma in it. By using a string delimiter, 165 * this will be correctly read. The delimiter defaults to '"'. 166 */ 167 vtkGetMacro(StringDelimiter, char); 168 vtkSetMacro(StringDelimiter, char); 169 //@} 170 171 void SetUTF8StringDelimiters(const char* delimiters); 172 const char* GetUTF8StringDelimiters(); 173 void SetUnicodeStringDelimiters(const vtkUnicodeString& delimiters); 174 vtkUnicodeString GetUnicodeStringDelimiters(); 175 176 //@{ 177 /** 178 * Set/get whether to use the string delimiter. Defaults to on. 179 */ 180 vtkSetMacro(UseStringDelimiter, bool); 181 vtkGetMacro(UseStringDelimiter, bool); 182 vtkBooleanMacro(UseStringDelimiter, bool); 183 //@} 184 185 //@{ 186 /** 187 * Set/get whether to treat the first line of the file as headers. 188 * The default is false (no headers). 189 */ 190 vtkGetMacro(HaveHeaders, bool); 191 vtkSetMacro(HaveHeaders, bool); 192 //@} 193 194 //@{ 195 /** 196 * Set/get whether to merge successive delimiters. Use this if (for 197 * example) your fields are separated by spaces but you don't know 198 * exactly how many. 199 */ 200 vtkSetMacro(MergeConsecutiveDelimiters, bool); 201 vtkGetMacro(MergeConsecutiveDelimiters, bool); 202 vtkBooleanMacro(MergeConsecutiveDelimiters, bool); 203 //@} 204 205 //@{ 206 /** 207 * Specifies the maximum number of records to read from the file. Limiting the 208 * number of records to read is useful for previewing the contents of a file. 209 */ 210 vtkGetMacro(MaxRecords, vtkIdType); 211 vtkSetMacro(MaxRecords, vtkIdType); 212 //@} 213 214 //@{ 215 /** 216 * When set to true, the reader will detect numeric columns and create 217 * vtkDoubleArray or vtkIntArray for those instead of vtkStringArray. Default 218 * is off. 219 */ 220 vtkSetMacro(DetectNumericColumns, bool); 221 vtkGetMacro(DetectNumericColumns, bool); 222 vtkBooleanMacro(DetectNumericColumns, bool); 223 //@} 224 225 //@{ 226 /** 227 * When set to true and DetectNumericColumns is also true, forces all 228 * numeric columns to vtkDoubleArray even if they contain only 229 * integer values. Default is off. 230 */ 231 vtkSetMacro(ForceDouble, bool); 232 vtkGetMacro(ForceDouble, bool); 233 vtkBooleanMacro(ForceDouble, bool); 234 //@} 235 236 //@{ 237 /** 238 * When DetectNumericColumns is set to true, whether to trim whitespace from 239 * strings prior to conversion to a numeric. 240 * Default is false to preserve backward compatibility. 241 242 * vtkVariant handles whitespace inconsistently, so trim it before we try to 243 * convert it. For example: 244 245 * vtkVariant(" 2.0").ToDouble() == 2.0 <-- leading whitespace is not a problem 246 * vtkVariant(" 2.0 ").ToDouble() == NaN <-- trailing whitespace is a problem 247 * vtkVariant(" infinity ").ToDouble() == NaN <-- any whitespace is a problem 248 249 * In these cases, trimming the whitespace gives us the result we expect: 250 * 2.0 and INF respectively. 251 */ 252 vtkSetMacro(TrimWhitespacePriorToNumericConversion, bool); 253 vtkGetMacro(TrimWhitespacePriorToNumericConversion, bool); 254 vtkBooleanMacro(TrimWhitespacePriorToNumericConversion, bool); 255 //@} 256 257 //@{ 258 /** 259 * When DetectNumericColumns is set to true, the reader use this value to populate 260 * the vtkIntArray where empty strings are found. Default is 0. 261 */ 262 vtkSetMacro(DefaultIntegerValue, int); 263 vtkGetMacro(DefaultIntegerValue, int); 264 //@} 265 266 //@{ 267 /** 268 * When DetectNumericColumns is set to true, the reader use this value to populate 269 * the vtkDoubleArray where empty strings are found. Default is 0.0 270 */ 271 vtkSetMacro(DefaultDoubleValue, double); 272 vtkGetMacro(DefaultDoubleValue, double); 273 //@} 274 275 //@{ 276 /** 277 * The name of the array for generating or assigning pedigree ids 278 * (default "id"). 279 */ 280 vtkSetStringMacro(PedigreeIdArrayName); 281 vtkGetStringMacro(PedigreeIdArrayName); 282 //@} 283 284 //@{ 285 /** 286 * If on (default), generates pedigree ids automatically. 287 * If off, assign one of the arrays to be the pedigree id. 288 */ 289 vtkSetMacro(GeneratePedigreeIds, bool); 290 vtkGetMacro(GeneratePedigreeIds, bool); 291 vtkBooleanMacro(GeneratePedigreeIds, bool); 292 //@} 293 294 //@{ 295 /** 296 * If on, assigns pedigree ids to output. Defaults to off. 297 */ 298 vtkSetMacro(OutputPedigreeIds, bool); 299 vtkGetMacro(OutputPedigreeIds, bool); 300 vtkBooleanMacro(OutputPedigreeIds, bool); 301 //@} 302 303 //@{ 304 /** 305 * If on, also add in the tab (i.e. '\t') character as a field delimiter. 306 * We add this specially since applications may have a more 307 * difficult time doing this. Defaults to off. 308 */ 309 vtkSetMacro(AddTabFieldDelimiter, bool); 310 vtkGetMacro(AddTabFieldDelimiter, bool); 311 vtkBooleanMacro(AddTabFieldDelimiter, bool); 312 //@} 313 314 /** 315 * Returns a human-readable description of the most recent error, if any. 316 * Otherwise, returns an empty string. Note that the result is only valid 317 * after calling Update(). 318 */ 319 vtkStdString GetLastError(); 320 321 //@{ 322 /** 323 * Fallback character for use in the US-ASCII-WITH-FALLBACK 324 * character set. Any characters that have their 8th bit set will 325 * be replaced with this code point. Defaults to 'x'. 326 */ 327 vtkSetMacro(ReplacementCharacter, vtkTypeUInt32); 328 vtkGetMacro(ReplacementCharacter, vtkTypeUInt32); 329 //@} 330 331 protected: 332 vtkDelimitedTextReader(); 333 ~vtkDelimitedTextReader() override; 334 335 int RequestData( 336 vtkInformation*, 337 vtkInformationVector**, 338 vtkInformationVector*) override; 339 340 char* FileName; 341 vtkTypeBool ReadFromInputString; 342 char *InputString; 343 int InputStringLength; 344 char* UnicodeCharacterSet; 345 vtkIdType MaxRecords; 346 vtkUnicodeString UnicodeRecordDelimiters; 347 vtkUnicodeString UnicodeFieldDelimiters; 348 vtkUnicodeString UnicodeStringDelimiters; 349 vtkUnicodeString UnicodeWhitespace; 350 vtkUnicodeString UnicodeEscapeCharacter; 351 bool DetectNumericColumns; 352 bool ForceDouble; 353 bool TrimWhitespacePriorToNumericConversion; 354 int DefaultIntegerValue; 355 double DefaultDoubleValue; 356 char* FieldDelimiterCharacters; 357 char StringDelimiter; 358 bool UseStringDelimiter; 359 bool HaveHeaders; 360 bool UnicodeOutputArrays; 361 bool MergeConsecutiveDelimiters; 362 char* PedigreeIdArrayName; 363 bool GeneratePedigreeIds; 364 bool OutputPedigreeIds; 365 bool AddTabFieldDelimiter; 366 vtkStdString LastError; 367 vtkTypeUInt32 ReplacementCharacter; 368 369 private: 370 vtkDelimitedTextReader(const vtkDelimitedTextReader&) = delete; 371 void operator=(const vtkDelimitedTextReader&) = delete; 372 373 }; 374 375 #endif 376