1 #ifndef UTIL___LINE_READER__HPP 2 #define UTIL___LINE_READER__HPP 3 4 /* $Id: line_reader.hpp 637993 2021-09-21 18:48:26Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Aaron Ucko, Anatoliy Kuznetsov 30 * 31 */ 32 33 /// @file line_reader.hpp 34 /// Lightweight interface for getting lines of data with minimal 35 /// memory copying. 36 /// 37 /// Any implementation must always keep its current line in memory so 38 /// that callers may harvest data from it in place. 39 40 #include <corelib/ncbifile.hpp> 41 42 #include <memory> 43 44 /** @addtogroup Miscellaneous 45 * 46 * @{ 47 */ 48 49 50 BEGIN_NCBI_SCOPE 51 52 /// Abstract base class for lightweight line-by-line reading. 53 class NCBI_XUTIL_EXPORT ILineReader : public CObject 54 { 55 public: 56 /// Return a new ILineReader object corresponding to the given 57 /// filename, taking "-" (but not "./-") to mean standard input. 58 /// 59 /// As always with ILineReader, an explicit call to operator++ or 60 /// ReadLine() will be necessary to fetch the first line. 61 static CRef<ILineReader> New(const string& filename); 62 63 /// Return a new ILineReader object corresponding to the given 64 /// input stream, optionally taking ownership thereof. 65 /// 66 /// As always with ILineReader, an explicit call to operator++ or 67 /// ReadLine() will be necessary to fetch the first line. 68 static CRef<ILineReader> New(CNcbiIstream& is, 69 EOwnership ownership = eNoOwnership); 70 71 /// Indicates (negatively) whether there is any more input. 72 virtual bool AtEOF(void) const = 0; 73 74 /// Returns the first character of the next string without consuming it. 75 /// If the next string is empty, returns zero. 76 /// @note 77 /// Before the first call of operator++() this function returns the 78 /// first char of the first string. 79 /// @attention 80 /// It is not guaranteed that any implementation of this function 81 /// will stop reading at EOF, so you should check for EOF yourself 82 virtual char PeekChar(void) const = 0; 83 84 /// Make a line available. MUST be called even for the first line; 85 /// MAY trigger EOF conditions even when also retrieving data. 86 /// When beyond the last string, becomes a no-op. 87 virtual ILineReader& operator++(void) = 0; ReadLine(void)88 void ReadLine(void) { ++*this; } 89 90 /// Unget current line, which must be valid. 91 /// After calling this method: 92 /// 1. AtEOF() returns false, 93 /// 2. PeekChar() returns first char of the current line, 94 /// 3. Calling operator*() or UngetLine() is illegal, 95 /// 4. Calling operator++() will make the line current again 96 virtual void UngetLine(void) = 0; 97 98 /// Return the current line, minus its terminator. Before the first run of 99 /// operator++() returns an empty CTempString. 100 /// At EOF returns an empty CTempString. 101 /// @attention 102 /// Right after UngetLine() calling this 103 /// method is illegal. Call operator++() first 104 virtual CTempString operator*(void) const = 0; GetCurrentLine(void) const105 CTempString GetCurrentLine(void) const { return **this; } 106 107 /// Return the current (absolute) position. 108 virtual CT_POS_TYPE GetPosition(void) const = 0; 109 110 /// Returns the current line number (counting from 1, not 0). 111 /// @attention 112 /// Right after constructor or after UngetLine() calling this 113 /// method is illegal. Call operator++() first 114 virtual Uint8 GetLineNumber(void) const = 0; 115 }; 116 117 118 /// Simple implementation of ILineReader for i(o)streams. 119 class NCBI_XUTIL_EXPORT CStreamLineReader : public ILineReader 120 { 121 public: 122 enum EEOLStyle { 123 eEOL_unknown = 0, ///< to be detected 124 eEOL_cr = 1, ///< bare CR (classic Mac) 125 eEOL_lf = 2, ///< bare LF (Unix et al.) 126 eEOL_crlf = 3, ///< DOS/Windows 127 #ifdef NCBI_OS_UNIX 128 eEOL_native = eEOL_lf, 129 #elif defined(NCBI_OS_MSWIN) 130 eEOL_native = eEOL_crlf, 131 #else 132 eEOL_native = eEOL_unknown, 133 #endif 134 eEOL_mixed = 4 ///< contains both bare CRs and bare LFs 135 }; 136 137 /// Open a line reader over a given stream, with the given 138 /// EOL-style and ownership settings (if specified). 139 /// 140 /// As always with ILineReader, an explicit call to operator++ or 141 /// ReadLine() will be necessary to fetch the first line. 142 explicit CStreamLineReader(CNcbiIstream& is, 143 EEOLStyle eol_style = eEOL_unknown, 144 EOwnership ownership = eNoOwnership); 145 146 /// Open a line reader over a given stream, with the given 147 /// ownership setting. 148 /// 149 /// As always with ILineReader, an explicit call to operator++ or 150 /// ReadLine() will be necessary to fetch the first line. 151 CStreamLineReader(CNcbiIstream& is, EOwnership ownership); 152 153 ~CStreamLineReader(); 154 155 bool AtEOF(void) const; 156 char PeekChar(void) const; 157 CStreamLineReader& operator++(void); 158 void UngetLine(void); 159 CTempString operator*(void) const; 160 CT_POS_TYPE GetPosition(void) const; 161 Uint8 GetLineNumber(void) const; 162 163 private: 164 EEOLStyle x_AdvanceEOLUnknown(void); 165 EEOLStyle x_AdvanceEOLSimple(char eol, char alt_eol); 166 EEOLStyle x_AdvanceEOLCRLF(void); 167 168 AutoPtr<CNcbiIstream> m_Stream; 169 string m_Line; 170 Uint8 m_LineNumber; 171 SIZE_TYPE m_LastReadSize; 172 bool m_UngetLine; 173 bool m_AutoEOL; 174 EEOLStyle m_EOLStyle; 175 }; 176 177 178 /// Simple implementation of ILineReader for regions of memory 179 /// (such as memory-mapped files). 180 class NCBI_XUTIL_EXPORT CMemoryLineReader : public ILineReader 181 { 182 public: 183 /// Open a line reader over the half-open memory range [start, end). 184 /// 185 /// As always with ILineReader, an explicit call to operator++ or 186 /// ReadLine() will be necessary to fetch the first line. CMemoryLineReader(const char * start,const char * end)187 CMemoryLineReader(const char* start, const char* end) 188 : m_Start(start), m_End(end), m_Pos(start), m_LineNumber(0) { } 189 190 /// Open a line reader over the half-open memory range 191 /// [start, start+length). 192 /// 193 /// As always with ILineReader, an explicit call to operator++ or 194 /// ReadLine() will be necessary to fetch the first line. CMemoryLineReader(const char * start,SIZE_TYPE length)195 CMemoryLineReader(const char* start, SIZE_TYPE length) 196 : m_Start(start), m_End(start + length), m_Pos(start), 197 m_LineNumber(0) { } 198 199 /// Open a line reader over a given memory-mapped file, with the 200 /// given ownership setting (if specified). 201 /// 202 /// As always with ILineReader, an explicit call to operator++ or 203 /// ReadLine() will be necessary to fetch the first line. 204 CMemoryLineReader(CMemoryFile* mem_file, 205 EOwnership ownership = eNoOwnership); 206 207 bool AtEOF(void) const; 208 char PeekChar(void) const; 209 CMemoryLineReader& operator++(void); 210 void UngetLine(void); 211 CTempString operator*(void) const; 212 CT_POS_TYPE GetPosition(void) const; 213 Uint8 GetLineNumber(void) const; 214 215 private: 216 const char* m_Start; 217 const char* m_End; 218 const char* m_Pos; 219 CTempString m_Line; 220 AutoPtr<CMemoryFile> m_MemFile; 221 Uint8 m_LineNumber; 222 }; 223 224 /// Implementation of ILineReader for IReader 225 /// 226 class NCBI_XUTIL_EXPORT CBufferedLineReader : public ILineReader 227 { 228 public: 229 /// read from the IReader 230 /// 231 /// As always with ILineReader, an explicit call to operator++ or 232 /// ReadLine() will be necessary to fetch the first line. 233 CBufferedLineReader(IReader* reader, 234 EOwnership ownership = eNoOwnership); 235 236 /// read from the istream 237 /// 238 /// As always with ILineReader, an explicit call to operator++ or 239 /// ReadLine() will be necessary to fetch the first line. 240 CBufferedLineReader(CNcbiIstream& is, 241 EOwnership ownership = eNoOwnership); 242 243 /// read from the file, "-" (but not "./-") means standard input 244 /// 245 /// As always with ILineReader, an explicit call to operator++ or 246 /// ReadLine() will be necessary to fetch the first line. 247 CBufferedLineReader(const string& filename); 248 249 virtual ~CBufferedLineReader(); 250 251 bool AtEOF(void) const; 252 char PeekChar(void) const; 253 CBufferedLineReader& operator++(void); 254 void UngetLine(void); 255 CTempString operator*(void) const; 256 CT_POS_TYPE GetPosition(void) const; 257 Uint8 GetLineNumber(void) const; 258 259 private: 260 CBufferedLineReader(const CBufferedLineReader&); 261 CBufferedLineReader& operator=(const CBufferedLineReader&); 262 private: 263 void x_LoadLong(); 264 bool x_ReadBuffer(); 265 private: 266 AutoPtr<IReader> m_Reader; 267 bool m_Eof; 268 bool m_UngetLine; 269 SIZE_TYPE m_LastReadSize; 270 size_t m_BufferSize; 271 AutoArray<char> m_Buffer; 272 const char* m_Pos; 273 const char* m_End; 274 CTempString m_Line; 275 string m_String; 276 CT_POS_TYPE m_InputPos; 277 Uint8 m_LineNumber; 278 }; 279 280 281 282 END_NCBI_SCOPE 283 284 285 /* @} */ 286 287 #endif /* UTIL___LINE_READER__HPP */ 288