1 /*
2 This file forked from https://github.com/srajotte/libplyxx
3 
4 Copyright (c) 2016 Simon Rajotte
5 
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15 
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 
24 Updated (c) 2021 Runette Software Ltd to make multiplatform, to complete the typemaps and add to voxel types.
25 
26 Permission is hereby granted, free of charge, to any person obtaining a copy
27 of this software and associated documentation files (the "Software"), to deal
28 in the Software without restriction, including without limitation the rights
29 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30 copies of the Software, and to permit persons to whom the Software is
31 furnished to do so, subject to the following conditions:
32 
33 The above copyright notice and this permission notice shall be included in all
34 copies or substantial portions of the Software.
35 
36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
39 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
40 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 SOFTWARE.
43 
44 */
45 
46 #pragma once
47 
48 #include <string>
49 #include <vector>
50 #include <functional>
51 #include <fstream>
52 #include <cassert>
53 #include <cmath>
54 #include <cstring>
55 #include <iostream>
56 
57 namespace textio
58 {
59   class SubString
60   {
61     public:
62       typedef std::string::const_iterator const_iterator;
63 
64     public:
65       SubString() = default;
SubString(const std::string::const_iterator & begin,const std::string::const_iterator & end)66       SubString( const std::string::const_iterator &begin, const std::string::const_iterator &end )
67         : m_begin( begin ), m_end( end ) {};
68 
string()69       operator std::string() const { return std::string( m_begin, m_end ); };
70 
begin()71       const_iterator begin() const { return m_begin; };
end()72       const_iterator end() const { return m_end; };
size()73       int size() const { return m_end - m_begin; };
74 
75     private:
76       const_iterator m_begin;
77       const_iterator m_end;
78   };
79 
80   class Tokenizer
81   {
82     public:
83       typedef std::vector<SubString> TokenList;
84     public:
85       inline Tokenizer( char delimiter );
86 
87       inline TokenList tokenize( const std::string &buffer ) const;
88       inline TokenList tokenize( const SubString &buffer ) const;
89       inline void tokenize( const SubString &buffer, TokenList &tokens ) const;
90 
91       inline static std::string toString( const TokenList &tokens );
92 
93     private:
94       char m_delimiter;
95   };
96 
97   class LineReader
98   {
99     public:
100       template<typename PathString>
101       inline LineReader( const PathString &filename, bool textMode = false );
102 
103       // Read next line from input file.
104       // Returned SubString is valid until the next call to getline()
105       inline SubString getline();
eof()106       inline bool eof() const { return m_eof; };
filestream()107       inline std::ifstream &filestream() { return m_file; };
108       inline std::streamsize position( const std::string::const_iterator &workbuf_iter );
109 
110     private:
111       //inline void readFileChunk(std::streamoff offset = 0);
112       inline std::streamsize readFileChunk( std::size_t overlap );
113       inline SubString findLine();
114 
115     private:
116       typedef std::string WorkBuffer;
117 
118     private:
119       std::ifstream m_file;
120 
121       std::streamsize m_workBufSize;
122       std::streamsize m_workBufFileEndPosition;
123       WorkBuffer m_workBuf;
124       bool m_eof;
125 
126       WorkBuffer::const_iterator m_begin;
127       WorkBuffer::const_iterator m_end;
128   };
129 
130   // Convert string to floating point (real) type.
131   template<typename T>
stor(const SubString & substr)132   T stor( const SubString &substr )
133   {
134     auto p = substr.begin();
135     auto end = substr.end();
136     T real = 0.0;
137     bool negative = false;
138     if ( p != end && *p == '-' )
139     {
140       negative = true;
141       ++p;
142     }
143     while ( p != end && *p >= '0' && *p <= '9' )
144     {
145       real = ( real * static_cast<T>( 10.0 ) ) + ( *p - '0' );
146       ++p;
147     }
148     if ( p != end && *p == '.' )
149     {
150       T frac = 0.0;
151       int n = 0;
152       ++p;
153       while ( p != end && *p >= '0' && *p <= '9' )
154       {
155         frac = ( frac * static_cast<T>( 10.0 ) ) + ( *p - '0' );
156         ++p;
157         ++n;
158       }
159       real += static_cast<T>( frac / std::pow( 10.0, n ) );
160     }
161     if ( p != end && ( *p == 'e' || *p == 'E' ) )
162     {
163       ++p;
164       T sign = 1.0;
165       if ( p != end && *p == '-' )
166       {
167         sign = -1.0;
168         ++p;
169       }
170       T exponent = 0.0;
171       while ( p != end && *p >= '0' && *p <= '9' )
172       {
173         exponent = ( exponent * static_cast<T>( 10.0 ) ) + ( *p - '0' );
174         ++p;
175       }
176       real = real * std::pow( static_cast<T>( 10.0 ), sign * exponent );
177     }
178     if ( negative )
179     {
180       real = -real;
181     }
182     return real;
183   }
184 
185   template<typename T>
stor(const std::string & str)186   T stor( const std::string &str )
187   {
188     return stor<T>( SubString( str.cbegin(), str.cend() ) );
189   }
190 
191   // Convert string to unsigned type.
192   template<typename T>
stou(const SubString & substr)193   T stou( const SubString &substr )
194   {
195     static_assert( std::is_unsigned<T>::value, "Cannot use stou() with signed type." );
196     auto p = substr.begin();
197     auto end = substr.end();
198     T integer = 0;
199     assert( *p != '-' );
200     while ( p != end && *p >= '0' && *p <= '9' )
201     {
202       integer = ( integer * 10 ) + ( *p - '0' );
203       ++p;
204     }
205     return integer;
206   }
207 
208   // Convert string to signed integer type.
209   template<typename T>
stoi(const SubString & substr)210   T stoi( const SubString &substr )
211   {
212     auto p = substr.begin();
213     auto end = substr.end();
214     T integer = 0;
215     bool negative = false;
216     if ( p != end && *p == '-' )
217     {
218       negative = true;
219       ++p;
220     }
221     while ( p != end && *p >= '0' && *p <= '9' )
222     {
223       integer = ( integer * 10 ) + ( *p - '0' );
224       ++p;
225     }
226     if ( negative )
227     {
228       integer = -integer;
229     }
230     return integer;
231   }
232 
233   template<typename T>
stou(const std::string & str)234   T stou( const std::string &str )
235   {
236     return stou<T>( SubString( str.cbegin(), str.cend() ) );
237   }
238 
Tokenizer(char delimiter)239   Tokenizer::Tokenizer( char delimiter )
240     : m_delimiter( delimiter )
241   {
242 
243   }
244 
tokenize(const SubString & buffer)245   Tokenizer::TokenList Tokenizer::tokenize( const SubString &buffer ) const
246   {
247     TokenList tokens;
248     tokenize( buffer, tokens );
249     return tokens;
250   }
251 
tokenize(const std::string & buffer)252   Tokenizer::TokenList Tokenizer::tokenize( const std::string &buffer ) const
253   {
254     return tokenize( SubString( buffer.cbegin(), buffer.cend() ) );
255   }
256 
find(textio::SubString::const_iterator begin,textio::SubString::const_iterator end,char delimiter)257   inline textio::SubString::const_iterator find( textio::SubString::const_iterator begin, textio::SubString::const_iterator end, char delimiter )
258   {
259     textio::SubString::const_iterator start = begin;
260     while ( start != end )
261     {
262       if ( *start == delimiter ) return start;
263       ++start;
264     }
265     return end;
266   }
267 
findSIMD(textio::SubString::const_iterator begin,textio::SubString::const_iterator end,char delimiter)268   inline textio::SubString::const_iterator findSIMD( textio::SubString::const_iterator begin, textio::SubString::const_iterator end, char delimiter )
269   {
270     uint64_t pattern;
271     switch ( delimiter )
272     {
273       case '\n': pattern = 0x0a0a0a0a0a0a0a0aULL; break;
274       case ' ': pattern = 0x2020202020202020ULL; break;
275       case '\r': pattern = 0x0d0d0d0d0d0d0d0dULL; break;
276       default: throw std::runtime_error( "Unsupported delimiter." ); //TODO
277     }
278 
279     textio::SubString::const_iterator start = begin;
280     const int WORD_WIDTH = 8;
281     while ( end - start > WORD_WIDTH )
282     {
283       // Xor data with pattern to find the bit distance. Matching bytes will be 0x00, otherwise >0x00.
284       // When subtracting 0x01 to all bytes, only bytes at 0x00 will underflow to 0xff, i.e. bytes that match the pattern.
285       // Apply bitwise-and between that last result and 0x80, i.e. b10000000, to keep bytes >0x80. Since the last ASCII character is 0x79, only the matching bytes that underflowed are kept.
286       // Must also test with ~data, because subtraction at 0x00 cause borrowing from the adjacent byte, which might have cause an underflow at that byte.
287       uint64_t data = *( uint64_t * ) & ( *start );
288       data = data ^ pattern;
289       if ( ( data - 0x0101010101010101ULL ) & ~data & 0x8080808080808080 )
290       {
291         // Delimiter found in sequence.
292         return textio::find( start, end, delimiter );
293       }
294       else
295       {
296         start += WORD_WIDTH;
297       }
298     }
299     // Remaining data in sequence too small to run SIMD search.
300     return textio::find( begin, end, delimiter );
301   }
302 
tokenize(const SubString & buffer,TokenList & tokens)303   inline void Tokenizer::tokenize( const SubString &buffer, TokenList &tokens ) const
304   {
305     tokens.clear();
306     textio::SubString::const_iterator begin = buffer.begin();
307     const textio::SubString::const_iterator end = buffer.end();
308     textio::SubString::const_iterator eot = begin;
309     while ( eot != end )
310     {
311       // Skip all delimiters.
312       while ( begin != end && *begin == m_delimiter )
313       {
314         ++begin;
315       }
316       eot = textio::find( begin, end, m_delimiter );
317 
318       tokens.emplace_back( begin, eot );
319       if ( eot != end )
320       {
321         // Move begin after delimiter.
322         begin = eot + 1;
323       }
324     }
325   }
326 
toString(const TokenList & tokens)327   inline std::string Tokenizer::toString( const TokenList &tokens )
328   {
329     std::string ret = "";
330     for ( textio::SubString token : tokens )
331     {
332       ret.append( token );
333     }
334     return ret;
335   }
336 
337   template<typename PathString>
LineReader(const PathString & filename,bool textMode)338   LineReader::LineReader( const PathString &filename, bool textMode )
339     : m_workBufSize( 1 * 1024 * 1024 ), m_workBufFileEndPosition( 0 ), m_eof( false )
340   {
341     std::ios_base::openmode mode = std::fstream::in;
342     if ( !textMode ) { mode |= std::fstream::binary; }
343     m_file.open( filename, mode );
344     if ( !m_file.is_open() )
345     {
346       throw std::runtime_error( "Could not open file." );
347     }
348     m_workBuf.resize( m_workBufSize );
349     readFileChunk( 0 );
350   }
351 
getline()352   SubString LineReader::getline()
353   {
354     return findLine();
355   }
356 
readFileChunk(std::size_t overlap)357   std::streamsize LineReader::readFileChunk( std::size_t overlap )
358   {
359     char *bufferFront = &m_workBuf.front();
360     if ( overlap != 0 )
361     {
362       size_t offset = m_workBufSize - overlap;
363       std::memcpy( bufferFront, bufferFront + offset, overlap );
364     }
365     m_file.read( bufferFront + overlap, m_workBufSize - overlap );
366     m_begin = m_workBuf.cbegin();
367     m_end = m_workBuf.cbegin() + overlap + m_file.gcount();
368     m_workBufFileEndPosition += m_file.gcount();
369     return m_file.gcount();
370   }
371 
findLine()372   SubString LineReader::findLine()
373   {
374     SubString::const_iterator nl = findSIMD( m_begin, m_end, '\n' );
375     SubString::const_iterator eol = findSIMD( m_begin, nl, '\r' );
376     if ( m_begin == m_workBuf.cbegin() && eol == m_end )
377     {
378       std::runtime_error( "Working buffer too small to fit single line." );
379     }
380     SubString lineSubstring( m_begin, eol );
381 
382     // Reached the end of the work buffer (last character not a newline delimiter).
383     if ( eol == m_end )
384     {
385       auto count = readFileChunk( m_end - m_begin );
386       if ( count == 0 && m_file.eof() )
387       {
388         m_eof = true;
389         return lineSubstring;
390       }
391       else
392       {
393         lineSubstring = findLine();
394       }
395     }
396     // Line complete.
397     else
398     {
399       // Set begin pointer to the first character after the newline delimiter.
400       m_begin = nl + 1;
401     }
402     return lineSubstring;
403   }
404 
position(const std::string::const_iterator & workbuf_iter)405   std::streamsize LineReader::position( const std::string::const_iterator &workbuf_iter )
406   {
407     return m_workBufFileEndPosition - ( m_end - workbuf_iter );
408   }
409 }
410