1 2 /// string tokenizer header 3 /** 4 * \file strtok.h 5 * 6 * string tokenizer 7 * 8 * Copyright (C) 2006, 2007, 2008 Lukas Jelinek, <lukas@aiken.cz> 9 * Copyright (C) 2014, 2015 Andreas Altair Redmer, <altair.ibn.la.ahad.sy@gmail.com> 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of one of the following licenses: 13 * 14 * \li 1. X11-style license (see LICENSE-X11) 15 * \li 2. GNU Lesser General Public License, version 2.1 (see LICENSE-LGPL) 16 * \li 3. GNU General Public License, version 2 (see LICENSE-GPL) 17 * 18 * If you want to help with choosing the best license for you, 19 * please visit http://www.gnu.org/licenses/license-list.html. 20 * 21 */ 22 23 24 #ifndef _STRTOK_H_ 25 #define _STRTOK_H_ 26 27 28 #include <string> 29 30 typedef std::string::size_type SIZE; 31 32 /// Simple string tokenizer class. 33 /** 34 * This class implements a string tokenizer. It splits a string 35 * by a character to a number of elements (tokens) which are 36 * provided sequentially. 37 * 38 * All operations are made on a copy of the original string 39 * (which may be in fact a copy-on-write instance). 40 * 41 * The original string is left unchanged. All tokens are returned 42 * as newly created strings. 43 * 44 * There is possibility to specify a prefix character which 45 * causes the consecutive character is not considered as 46 * a delimiter. If you don't specify this character (or specify 47 * the NUL character, 0x00) this feature is disabled. The mostly 48 * used prefix is a backslash ('\'). 49 * 50 * This class is not thread-safe. 51 * 52 * Performance note: This class is currently not intended 53 * to be very fast. Speed optimizations will be done later. 54 */ 55 class StringTokenizer 56 { 57 public: 58 /// Constructor. 59 /** 60 * Creates a ready-to-use tokenizer. 61 * 62 * \param[in] rStr string for tokenizing 63 * \param[in] cDelim delimiter (separator) character 64 * \param[in] cPrefix character which is prepended if a 65 * character must not separate tokens 66 */ 67 StringTokenizer(const std::string& rStr, const std::string& cDelim = ",", char cPrefix = '\0'); 68 69 /// Destructor. ~StringTokenizer()70 ~StringTokenizer() {} 71 72 /// Checks whether the tokenizer can provide more tokens. 73 /** 74 * \return true = more tokens available, false = otherwise 75 */ HasMoreTokens()76 inline bool HasMoreTokens() const 77 { 78 return m_pos < m_len; 79 } 80 81 /// Returns the next token. 82 /** 83 * If a prefix is defined it is stripped from the returned 84 * string (e.g. 'abc\ def' is transformed to 'abc def' 85 * while the prefix is '\'). 86 * 87 * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters) 88 * \return next token or "" if no more tokens available 89 * 90 * \sa GetNextTokenRaw() 91 */ 92 std::string GetNextToken(bool fSkipEmpty = false); 93 94 /// Returns the next token. 95 /** 96 * This method always returns an unmodified string even 97 * if it contains prefix characters. 98 * 99 * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters) 100 * \return next token or "" if no more tokens available 101 * 102 * \sa GetNextToken() 103 */ 104 std::string GetNextTokenRaw(bool fSkipEmpty = false); 105 106 /// Returns the remainder of the source string. 107 /** 108 * This method returns everything what has not been 109 * processed (tokenized) yet and moves the current 110 * position to the end of the string. 111 * 112 * If a prefix is defined it is stripped from 113 * the returned string. 114 * 115 * \return remainder string 116 */ 117 std::string GetRemainder(); 118 119 /// Sets a delimiter (separator) character. 120 /** 121 * The new delimiter has effect only to tokens returned later; 122 * the position in the string is not affected. 123 * 124 * If you specify a NUL character (0x00) here the prefix 125 * will not be used. 126 * 127 * \param[in] cDelim delimiter character 128 */ SetDelimiter(std::string cDelim)129 inline void SetDelimiter(std::string cDelim) 130 { 131 m_cDelim = cDelim; 132 } 133 134 /// Returns the delimiter (separator) character. 135 /** 136 * \return delimiter character 137 */ GetDelimiter()138 inline std::string GetDelimiter() const 139 { 140 return m_cDelim; 141 } 142 143 /// Sets a prefix character. 144 /** 145 * The new prefix has effect only to tokens returned later; 146 * the position in the string is not affected. 147 * 148 * \param[in] cPrefix prefix character 149 * 150 * \sa SetNoPrefix() 151 */ SetPrefix(char cPrefix)152 inline void SetPrefix(char cPrefix) 153 { 154 m_cPrefix = cPrefix; 155 } 156 157 /// Returns the prefix character. 158 /** 159 * \return prefix character 160 */ GetPrefix()161 inline char GetPrefix() const 162 { 163 return m_cPrefix; 164 } 165 166 /// Sets the prefix to 'no prefix'. 167 /** 168 * Calling this method is equivalent to SetPrefix((char) 0). 169 * 170 * \sa SetPrefix() 171 */ SetNoPrefix()172 inline void SetNoPrefix() 173 { 174 SetPrefix('\0'); 175 } 176 177 /// Resets the tokenizer. 178 /** 179 * Re-initializes tokenizing to the start of the string. 180 */ Reset()181 inline void Reset() 182 { 183 m_pos = 0; 184 } 185 186 private: 187 std::string m_str; ///< tokenized string 188 std::string m_cDelim; ///< delimiter character 189 char m_cPrefix; ///< prefix character 190 std::string::size_type m_pos; ///< current position 191 std::string::size_type m_len; ///< string length 192 193 /// Strips all prefix characters. 194 /** 195 * \param[in] s source string 196 * \param[in] cnt string length 197 * \return modified string 198 */ 199 std::string StripPrefix(const char* s, SIZE cnt); 200 201 /// Extracts the next token (internal method). 202 /** 203 * The extracted token may be empty. 204 * 205 * \param[out] rToken extracted token 206 * \param[in] fStripPrefix strip prefix characters yes/no 207 */ 208 void _GetNextToken(std::string& rToken, bool fStripPrefix); 209 210 /// Extracts the next token (internal method). 211 /** 212 * This method does no checking about the prefix character. 213 * 214 * The extracted token may be empty. 215 * 216 * \param[out] rToken extracted token 217 */ 218 void _GetNextTokenNoPrefix(std::string& rToken); 219 220 /// Extracts the next token (internal method). 221 /** 222 * This method does checking about the prefix character. 223 * 224 * The extracted token may be empty. 225 * 226 * \param[out] rToken extracted token 227 */ 228 void _GetNextTokenWithPrefix(std::string& rToken); 229 }; 230 231 232 #endif //_STRTOK_H_ 233