1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2019 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v2.0
6 // which accompanies this distribution, and is available at
7 // http://www.eclipse.org/legal/epl-v20.html
8 // SPDX-License-Identifier: EPL-2.0
9 /****************************************************************************/
10 /// @file    StringTokenizer.h
11 /// @author  Daniel Krajzewicz
12 /// @author  Jakob Erdmann
13 /// @author  Michael Behrisch
14 /// @date    ?
15 /// @version $Id$
16 ///
17 // A java-style StringTokenizer for c++ (stl)
18 /****************************************************************************/
19 #ifndef StringTokenizer_h
20 #define StringTokenizer_h
21 
22 
23 // ===========================================================================
24 // included modules
25 // ===========================================================================
26 
27 #include <string>
28 #include <vector>
29 
30 /**
31  * StringTokenizer
32  * A class similar to the StringTokenizer from Java. It splits a string at
33  * the given string or character (or one of the special cases NEWLINE or
34  * WHITECHAR) and allows to iterate over the so generated substrings.
35  *
36  * The normal usage is like this:
37  * <pre>
38  * StringTokenizer st(CString("This is a line"), ' ');
39  * while(st.hasNext())
40  *    cout << st.next() << endl;
41  * </pre>
42  * This would generate the output:
43  * <pre>
44  * This
45  * is
46  * a
47  * line
48  * </pre>
49  *
50  * There is something to know about the behaviour:
51  * When using WHITECHAR, a list of whitechars occuring in  the string to
52  * split is regarded as a single divider. All other parameter will use
53  * multiple occurences of operators as a list of single divider and the
54  * string between them will have a length of zero.
55  */
56 // ===========================================================================
57 // class definitions
58 // ===========================================================================
59 /**
60  *
61  */
62 class StringTokenizer {
63 public:
64     /** identifier for splitting the given string at all newline characters */
65     static const int NEWLINE;
66 
67     /** identifier for splitting the given string at all whitespace
68         characters */
69     static const int WHITECHARS;
70 
71     /** the ascii index of the highest whitespace character */
72     static const int SPACE;
73 
74     /** the ascii index of the tab character */
75     static const int TAB;
76 
77 public:
78     /** default constructor */
StringTokenizer()79     StringTokenizer() { }
80 
81     /** @brief constructor
82         same as StringTokenizer(tosplit, StringTokenizer.WHITECHARS)
83         tosplit is the string to split into substrings. If the string between two split
84         positions is empty, it will not be returned.  */
85     StringTokenizer(std::string tosplit);
86 
87     /** @brief constructor
88         the first string will be split at the second string's occurences.
89         If the optional third parameter is true, the string will be split whenever
90         a char from the second string occurs. If the string between two split
91         positions is empty, it will nevertheless be returned. */
92     StringTokenizer(std::string tosplit, std::string token, bool splitAtAllChars = false);
93 
94     /** @brief constructor
95         When StringTokenizer.NEWLINE is used as second parameter, the string
96         will be split at all occurences of a newline character (0x0d / 0x0a)
97         When StringTokenizer.WHITECHARS is used as second parameter, the
98         string will be split at all characters below 0x20 (SPACE)
99         All other ints specified as second parameter are casted int o a char
100         at which the string will be splitted. */
101     StringTokenizer(std::string tosplit, int special);
102 
103     /** destructor */
104     ~StringTokenizer();
105 
106     /** reinitialises the internal iterator */
107     void reinit();
108 
109     /** returns the information whether further substrings exist */
110     bool hasNext();
111 
112     /** returns the next substring when it exists. Otherwise the behaviour is
113         undefined */
114     std::string next();
115 
116     /** returns the number of existing substrings */
117     int size() const;
118 
119     /** returns the first substring without moving the iterator */
120     std::string front();
121 
122     /** returns the item at the given position */
123     std::string get(int pos) const;
124 
125     std::vector<std::string> getVector();
126 
127 private:
128     /** splits the first string at all occurences of the second. If the third parameter is true
129         split at all chars given in the second */
130     void prepare(const std::string& tosplit, const std::string& token,
131                  bool splitAtAllChars);
132 
133     /** splits the first string at all occurences of whitechars */
134     void prepareWhitechar(const std::string& tosplit);
135 
136 private:
137     /** a list of positions/lengths */
138     typedef std::vector<int> SizeVector;
139 
140     /** the string to split */
141     std::string   myTosplit;
142 
143     /** the current position in the list of substrings */
144     int        myPos;
145 
146     /** the list of substring starts */
147     SizeVector    myStarts;
148 
149     /** the list of substring lengths */
150     SizeVector   myLengths;
151 
152 };
153 
154 
155 #endif
156 
157 /****************************************************************************/
158 
159