1 
2 /// string tokenizer header
3 /**
4  * \file strtok.h
5  *
6  * string tokenizer
7  *
8  * Copyright (C) 2006, 2007, 2008 Lukas Jelinek, <lukas@aiken.cz>
9  * Copyright (C) 2014, 2015 Andreas Altair Redmer, <altair.ibn.la.ahad.sy@gmail.com>
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of one of the following licenses:
13  *
14  * \li 1. X11-style license (see LICENSE-X11)
15  * \li 2. GNU Lesser General Public License, version 2.1 (see LICENSE-LGPL)
16  * \li 3. GNU General Public License, version 2  (see LICENSE-GPL)
17  *
18  * If you want to help with choosing the best license for you,
19  * please visit http://www.gnu.org/licenses/license-list.html.
20  *
21  */
22 
23 
24 #ifndef _STRTOK_H_
25 #define _STRTOK_H_
26 
27 
28 #include <string>
29 
30 typedef std::string::size_type SIZE;
31 
32 /// Simple string tokenizer class.
33 /**
34  * This class implements a string tokenizer. It splits a string
35  * by a character to a number of elements (tokens) which are
36  * provided sequentially.
37  *
38  * All operations are made on a copy of the original string
39  * (which may be in fact a copy-on-write instance).
40  *
41  * The original string is left unchanged. All tokens are returned
42  * as newly created strings.
43  *
44  * There is possibility to specify a prefix character which
45  * causes the consecutive character is not considered as
46  * a delimiter. If you don't specify this character (or specify
47  * the NUL character, 0x00) this feature is disabled. The mostly
48  * used prefix is a backslash ('\').
49  *
50  * This class is not thread-safe.
51  *
52  * Performance note: This class is currently not intended
53  * to be very fast. Speed optimizations will be done later.
54  */
55 class StringTokenizer
56 {
57 public:
58   /// Constructor.
59   /**
60    * Creates a ready-to-use tokenizer.
61    *
62    * \param[in] rStr string for tokenizing
63    * \param[in] cDelim delimiter (separator) character
64    * \param[in] cPrefix character which is prepended if a
65    *            character must not separate tokens
66    */
67   StringTokenizer(const std::string& rStr, const std::string& cDelim = ",", char cPrefix = '\0');
68 
69   /// Destructor.
~StringTokenizer()70   ~StringTokenizer() {}
71 
72   /// Checks whether the tokenizer can provide more tokens.
73   /**
74    * \return true = more tokens available, false = otherwise
75    */
HasMoreTokens()76   inline bool HasMoreTokens() const
77   {
78     return m_pos < m_len;
79   }
80 
81   /// Returns the next token.
82   /**
83    * If a prefix is defined it is stripped from the returned
84    * string (e.g. 'abc\ def' is transformed to 'abc def'
85    * while the prefix is '\').
86    *
87    * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
88    * \return next token or "" if no more tokens available
89    *
90    * \sa GetNextTokenRaw()
91    */
92   std::string GetNextToken(bool fSkipEmpty = false);
93 
94   /// Returns the next token.
95   /**
96    * This method always returns an unmodified string even
97    * if it contains prefix characters.
98    *
99    * \param[in] fSkipEmpty skip empty strings (more consecutive delimiters)
100    * \return next token or "" if no more tokens available
101    *
102    * \sa GetNextToken()
103    */
104   std::string GetNextTokenRaw(bool fSkipEmpty = false);
105 
106   /// Returns the remainder of the source string.
107   /**
108    * This method returns everything what has not been
109    * processed (tokenized) yet and moves the current
110    * position to the end of the string.
111    *
112    * If a prefix is defined it is stripped from
113    * the returned string.
114    *
115    * \return remainder string
116    */
117   std::string GetRemainder();
118 
119   /// Sets a delimiter (separator) character.
120   /**
121    * The new delimiter has effect only to tokens returned later;
122    * the position in the string is not affected.
123    *
124    * If you specify a NUL character (0x00) here the prefix
125    * will not be used.
126    *
127    * \param[in] cDelim delimiter character
128    */
SetDelimiter(std::string cDelim)129   inline void SetDelimiter(std::string cDelim)
130   {
131     m_cDelim = cDelim;
132   }
133 
134   /// Returns the delimiter (separator) character.
135   /**
136    * \return delimiter character
137    */
GetDelimiter()138   inline std::string GetDelimiter() const
139   {
140     return m_cDelim;
141   }
142 
143   /// Sets a prefix character.
144   /**
145    * The new prefix has effect only to tokens returned later;
146    * the position in the string is not affected.
147    *
148    * \param[in] cPrefix prefix character
149    *
150    * \sa SetNoPrefix()
151    */
SetPrefix(char cPrefix)152   inline void SetPrefix(char cPrefix)
153   {
154     m_cPrefix = cPrefix;
155   }
156 
157   /// Returns the prefix character.
158   /**
159    * \return prefix character
160    */
GetPrefix()161   inline char GetPrefix() const
162   {
163     return m_cPrefix;
164   }
165 
166   /// Sets the prefix to 'no prefix'.
167   /**
168    * Calling this method is equivalent to SetPrefix((char) 0).
169    *
170    * \sa SetPrefix()
171    */
SetNoPrefix()172   inline void SetNoPrefix()
173   {
174     SetPrefix('\0');
175   }
176 
177   /// Resets the tokenizer.
178   /**
179    * Re-initializes tokenizing to the start of the string.
180    */
Reset()181   inline void Reset()
182   {
183     m_pos = 0;
184   }
185 
186 private:
187   std::string m_str;            ///< tokenized string
188   std::string m_cDelim;                ///< delimiter character
189   char m_cPrefix;               ///< prefix character
190   std::string::size_type m_pos; ///< current position
191   std::string::size_type m_len; ///< string length
192 
193   /// Strips all prefix characters.
194   /**
195    * \param[in] s source string
196    * \param[in] cnt string length
197    * \return modified string
198    */
199   std::string StripPrefix(const char* s, SIZE cnt);
200 
201   /// Extracts the next token (internal method).
202   /**
203    * The extracted token may be empty.
204    *
205    * \param[out] rToken extracted token
206    * \param[in] fStripPrefix strip prefix characters yes/no
207    */
208   void _GetNextToken(std::string& rToken, bool fStripPrefix);
209 
210   /// Extracts the next token (internal method).
211   /**
212    * This method does no checking about the prefix character.
213    *
214    * The extracted token may be empty.
215    *
216    * \param[out] rToken extracted token
217    */
218   void _GetNextTokenNoPrefix(std::string& rToken);
219 
220   /// Extracts the next token (internal method).
221   /**
222    * This method does checking about the prefix character.
223    *
224    * The extracted token may be empty.
225    *
226    * \param[out] rToken extracted token
227    */
228   void _GetNextTokenWithPrefix(std::string& rToken);
229 };
230 
231 
232 #endif //_STRTOK_H_
233