1 /* 2 This file forked from https://github.com/srajotte/libplyxx 3 4 Copyright (c) 2016 Simon Rajotte 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy 7 of this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights 9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 copies of the Software, and to permit persons to whom the Software is 11 furnished to do so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 24 Updated (c) 2021 Runette Software Ltd to make multiplatform, to complete the typemaps and add to voxel types. 25 26 Permission is hereby granted, free of charge, to any person obtaining a copy 27 of this software and associated documentation files (the "Software"), to deal 28 in the Software without restriction, including without limitation the rights 29 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 copies of the Software, and to permit persons to whom the Software is 31 furnished to do so, subject to the following conditions: 32 33 The above copyright notice and this permission notice shall be included in all 34 copies or substantial portions of the Software. 35 36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 SOFTWARE. 43 44 */ 45 46 #pragma once 47 48 #include <string> 49 #include <vector> 50 #include <functional> 51 #include <fstream> 52 #include <cassert> 53 #include <cmath> 54 #include <cstring> 55 #include <iostream> 56 57 namespace textio 58 { 59 class SubString 60 { 61 public: 62 typedef std::string::const_iterator const_iterator; 63 64 public: 65 SubString() = default; SubString(const std::string::const_iterator & begin,const std::string::const_iterator & end)66 SubString( const std::string::const_iterator &begin, const std::string::const_iterator &end ) 67 : m_begin( begin ), m_end( end ) {}; 68 string()69 operator std::string() const { return std::string( m_begin, m_end ); }; 70 begin()71 const_iterator begin() const { return m_begin; }; end()72 const_iterator end() const { return m_end; }; size()73 int size() const { return m_end - m_begin; }; 74 75 private: 76 const_iterator m_begin; 77 const_iterator m_end; 78 }; 79 80 class Tokenizer 81 { 82 public: 83 typedef std::vector<SubString> TokenList; 84 public: 85 inline Tokenizer( char delimiter ); 86 87 inline TokenList tokenize( const std::string &buffer ) const; 88 inline TokenList tokenize( const SubString &buffer ) const; 89 inline void tokenize( const SubString &buffer, TokenList &tokens ) const; 90 91 inline static std::string toString( const TokenList &tokens ); 92 93 private: 94 char m_delimiter; 95 }; 96 97 class LineReader 98 { 99 public: 100 template<typename PathString> 101 inline LineReader( const PathString &filename, bool textMode = false ); 102 103 // Read next line from input file. 104 // Returned SubString is valid until the next call to getline() 105 inline SubString getline(); eof()106 inline bool eof() const { return m_eof; }; filestream()107 inline std::ifstream &filestream() { return m_file; }; 108 inline std::streamsize position( const std::string::const_iterator &workbuf_iter ); 109 110 private: 111 //inline void readFileChunk(std::streamoff offset = 0); 112 inline std::streamsize readFileChunk( std::size_t overlap ); 113 inline SubString findLine(); 114 115 private: 116 typedef std::string WorkBuffer; 117 118 private: 119 std::ifstream m_file; 120 121 std::streamsize m_workBufSize; 122 std::streamsize m_workBufFileEndPosition; 123 WorkBuffer m_workBuf; 124 bool m_eof; 125 126 WorkBuffer::const_iterator m_begin; 127 WorkBuffer::const_iterator m_end; 128 }; 129 130 // Convert string to floating point (real) type. 131 template<typename T> stor(const SubString & substr)132 T stor( const SubString &substr ) 133 { 134 auto p = substr.begin(); 135 auto end = substr.end(); 136 T real = 0.0; 137 bool negative = false; 138 if ( p != end && *p == '-' ) 139 { 140 negative = true; 141 ++p; 142 } 143 while ( p != end && *p >= '0' && *p <= '9' ) 144 { 145 real = ( real * static_cast<T>( 10.0 ) ) + ( *p - '0' ); 146 ++p; 147 } 148 if ( p != end && *p == '.' ) 149 { 150 T frac = 0.0; 151 int n = 0; 152 ++p; 153 while ( p != end && *p >= '0' && *p <= '9' ) 154 { 155 frac = ( frac * static_cast<T>( 10.0 ) ) + ( *p - '0' ); 156 ++p; 157 ++n; 158 } 159 real += static_cast<T>( frac / std::pow( 10.0, n ) ); 160 } 161 if ( p != end && ( *p == 'e' || *p == 'E' ) ) 162 { 163 ++p; 164 T sign = 1.0; 165 if ( p != end && *p == '-' ) 166 { 167 sign = -1.0; 168 ++p; 169 } 170 T exponent = 0.0; 171 while ( p != end && *p >= '0' && *p <= '9' ) 172 { 173 exponent = ( exponent * static_cast<T>( 10.0 ) ) + ( *p - '0' ); 174 ++p; 175 } 176 real = real * std::pow( static_cast<T>( 10.0 ), sign * exponent ); 177 } 178 if ( negative ) 179 { 180 real = -real; 181 } 182 return real; 183 } 184 185 template<typename T> stor(const std::string & str)186 T stor( const std::string &str ) 187 { 188 return stor<T>( SubString( str.cbegin(), str.cend() ) ); 189 } 190 191 // Convert string to unsigned type. 192 template<typename T> stou(const SubString & substr)193 T stou( const SubString &substr ) 194 { 195 static_assert( std::is_unsigned<T>::value, "Cannot use stou() with signed type." ); 196 auto p = substr.begin(); 197 auto end = substr.end(); 198 T integer = 0; 199 assert( *p != '-' ); 200 while ( p != end && *p >= '0' && *p <= '9' ) 201 { 202 integer = ( integer * 10 ) + ( *p - '0' ); 203 ++p; 204 } 205 return integer; 206 } 207 208 // Convert string to signed integer type. 209 template<typename T> stoi(const SubString & substr)210 T stoi( const SubString &substr ) 211 { 212 auto p = substr.begin(); 213 auto end = substr.end(); 214 T integer = 0; 215 bool negative = false; 216 if ( p != end && *p == '-' ) 217 { 218 negative = true; 219 ++p; 220 } 221 while ( p != end && *p >= '0' && *p <= '9' ) 222 { 223 integer = ( integer * 10 ) + ( *p - '0' ); 224 ++p; 225 } 226 if ( negative ) 227 { 228 integer = -integer; 229 } 230 return integer; 231 } 232 233 template<typename T> stou(const std::string & str)234 T stou( const std::string &str ) 235 { 236 return stou<T>( SubString( str.cbegin(), str.cend() ) ); 237 } 238 Tokenizer(char delimiter)239 Tokenizer::Tokenizer( char delimiter ) 240 : m_delimiter( delimiter ) 241 { 242 243 } 244 tokenize(const SubString & buffer)245 Tokenizer::TokenList Tokenizer::tokenize( const SubString &buffer ) const 246 { 247 TokenList tokens; 248 tokenize( buffer, tokens ); 249 return tokens; 250 } 251 tokenize(const std::string & buffer)252 Tokenizer::TokenList Tokenizer::tokenize( const std::string &buffer ) const 253 { 254 return tokenize( SubString( buffer.cbegin(), buffer.cend() ) ); 255 } 256 find(textio::SubString::const_iterator begin,textio::SubString::const_iterator end,char delimiter)257 inline textio::SubString::const_iterator find( textio::SubString::const_iterator begin, textio::SubString::const_iterator end, char delimiter ) 258 { 259 textio::SubString::const_iterator start = begin; 260 while ( start != end ) 261 { 262 if ( *start == delimiter ) return start; 263 ++start; 264 } 265 return end; 266 } 267 findSIMD(textio::SubString::const_iterator begin,textio::SubString::const_iterator end,char delimiter)268 inline textio::SubString::const_iterator findSIMD( textio::SubString::const_iterator begin, textio::SubString::const_iterator end, char delimiter ) 269 { 270 uint64_t pattern; 271 switch ( delimiter ) 272 { 273 case '\n': pattern = 0x0a0a0a0a0a0a0a0aULL; break; 274 case ' ': pattern = 0x2020202020202020ULL; break; 275 case '\r': pattern = 0x0d0d0d0d0d0d0d0dULL; break; 276 default: throw std::runtime_error( "Unsupported delimiter." ); //TODO 277 } 278 279 textio::SubString::const_iterator start = begin; 280 const int WORD_WIDTH = 8; 281 while ( end - start > WORD_WIDTH ) 282 { 283 // Xor data with pattern to find the bit distance. Matching bytes will be 0x00, otherwise >0x00. 284 // When subtracting 0x01 to all bytes, only bytes at 0x00 will underflow to 0xff, i.e. bytes that match the pattern. 285 // Apply bitwise-and between that last result and 0x80, i.e. b10000000, to keep bytes >0x80. Since the last ASCII character is 0x79, only the matching bytes that underflowed are kept. 286 // Must also test with ~data, because subtraction at 0x00 cause borrowing from the adjacent byte, which might have cause an underflow at that byte. 287 uint64_t data = *( uint64_t * ) & ( *start ); 288 data = data ^ pattern; 289 if ( ( data - 0x0101010101010101ULL ) & ~data & 0x8080808080808080 ) 290 { 291 // Delimiter found in sequence. 292 return textio::find( start, end, delimiter ); 293 } 294 else 295 { 296 start += WORD_WIDTH; 297 } 298 } 299 // Remaining data in sequence too small to run SIMD search. 300 return textio::find( begin, end, delimiter ); 301 } 302 tokenize(const SubString & buffer,TokenList & tokens)303 inline void Tokenizer::tokenize( const SubString &buffer, TokenList &tokens ) const 304 { 305 tokens.clear(); 306 textio::SubString::const_iterator begin = buffer.begin(); 307 const textio::SubString::const_iterator end = buffer.end(); 308 textio::SubString::const_iterator eot = begin; 309 while ( eot != end ) 310 { 311 // Skip all delimiters. 312 while ( begin != end && *begin == m_delimiter ) 313 { 314 ++begin; 315 } 316 eot = textio::find( begin, end, m_delimiter ); 317 318 tokens.emplace_back( begin, eot ); 319 if ( eot != end ) 320 { 321 // Move begin after delimiter. 322 begin = eot + 1; 323 } 324 } 325 } 326 toString(const TokenList & tokens)327 inline std::string Tokenizer::toString( const TokenList &tokens ) 328 { 329 std::string ret = ""; 330 for ( textio::SubString token : tokens ) 331 { 332 ret.append( token ); 333 } 334 return ret; 335 } 336 337 template<typename PathString> LineReader(const PathString & filename,bool textMode)338 LineReader::LineReader( const PathString &filename, bool textMode ) 339 : m_workBufSize( 1 * 1024 * 1024 ), m_workBufFileEndPosition( 0 ), m_eof( false ) 340 { 341 std::ios_base::openmode mode = std::fstream::in; 342 if ( !textMode ) { mode |= std::fstream::binary; } 343 m_file.open( filename, mode ); 344 if ( !m_file.is_open() ) 345 { 346 throw std::runtime_error( "Could not open file." ); 347 } 348 m_workBuf.resize( m_workBufSize ); 349 readFileChunk( 0 ); 350 } 351 getline()352 SubString LineReader::getline() 353 { 354 return findLine(); 355 } 356 readFileChunk(std::size_t overlap)357 std::streamsize LineReader::readFileChunk( std::size_t overlap ) 358 { 359 char *bufferFront = &m_workBuf.front(); 360 if ( overlap != 0 ) 361 { 362 size_t offset = m_workBufSize - overlap; 363 std::memcpy( bufferFront, bufferFront + offset, overlap ); 364 } 365 m_file.read( bufferFront + overlap, m_workBufSize - overlap ); 366 m_begin = m_workBuf.cbegin(); 367 m_end = m_workBuf.cbegin() + overlap + m_file.gcount(); 368 m_workBufFileEndPosition += m_file.gcount(); 369 return m_file.gcount(); 370 } 371 findLine()372 SubString LineReader::findLine() 373 { 374 SubString::const_iterator nl = findSIMD( m_begin, m_end, '\n' ); 375 SubString::const_iterator eol = findSIMD( m_begin, nl, '\r' ); 376 if ( m_begin == m_workBuf.cbegin() && eol == m_end ) 377 { 378 std::runtime_error( "Working buffer too small to fit single line." ); 379 } 380 SubString lineSubstring( m_begin, eol ); 381 382 // Reached the end of the work buffer (last character not a newline delimiter). 383 if ( eol == m_end ) 384 { 385 auto count = readFileChunk( m_end - m_begin ); 386 if ( count == 0 && m_file.eof() ) 387 { 388 m_eof = true; 389 return lineSubstring; 390 } 391 else 392 { 393 lineSubstring = findLine(); 394 } 395 } 396 // Line complete. 397 else 398 { 399 // Set begin pointer to the first character after the newline delimiter. 400 m_begin = nl + 1; 401 } 402 return lineSubstring; 403 } 404 position(const std::string::const_iterator & workbuf_iter)405 std::streamsize LineReader::position( const std::string::const_iterator &workbuf_iter ) 406 { 407 return m_workBufFileEndPosition - ( m_end - workbuf_iter ); 408 } 409 } 410