1
2 /*!
3 \file lib/gis/token.c
4
5 \brief GIS Library - Tokenize strings
6
7 (C) 2001-2008, 2011-2013 by the GRASS Development Team
8
9 This program is free software under the GNU General Public License
10 (>=v2). Read the file COPYING that comes with GRASS for details.
11
12 \author USA CERL and others
13 */
14
15 #include <stdlib.h>
16 #include <string.h>
17 #include <grass/gis.h>
18 #include <grass/glocale.h>
19
20 static char **tokenize(const char *, const char *, const char *);
21
22 /*!
23 \brief Tokenize string
24
25 Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
26 '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
27 must not contain a new line (\n). <em>delim</em> may consist of more
28 than one character. G_free_tokens() must be called when finished
29 with tokens to release memory.
30
31 Example:
32 \code
33 char **tokens;
34 int ntok, i;
35 tokens = G_tokenize(buf, " |:,");
36 ntok = G_number_of_tokens(tokens);
37 for (i=0; i < ntok; i++) {
38 G_debug(1, "%d=[%s]", i, tokens[i]);
39 }
40 G_free_tokens(tokens);
41 \endcode
42
43 \param buf input string
44 \param delim string delimiter
45
46 \return pointer to string token
47 */
G_tokenize(const char * buf,const char * delim)48 char **G_tokenize(const char *buf, const char *delim)
49 {
50 return tokenize(buf, delim, NULL);
51 }
52
53 /*!
54 \brief Tokenize string
55
56 This function behaves similarly to G_tokenize().
57
58 It introduces <em>valchar</em> which defines borders of token. Within
59 token <em>delim</em> is ignored.
60
61 Example:
62 \code
63 char *str = "a,'b,c',d";
64
65 char **tokens1, **tokens2;
66 int ntok1, ntok2;
67
68 tokens1 = G_tokenize(str, ",");
69 ntok1 = G_number_of_tokens(tokens1);
70
71 tokens1 = G_tokenize2(str, ",", "'");
72 ntok2 = G_number_of_tokens(tokens2);
73 \endcode
74
75 In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
76 i.e. { "a", "'b, c'", "d"}
77
78 \param buf input string
79 \param delim string delimiter
80 \param valchar character defining border of token
81
82 \return pointer to string token
83 */
G_tokenize2(const char * buf,const char * delim,const char * valchar)84 char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
85 {
86 return tokenize(buf, delim, valchar);
87 }
88
tokenize(const char * buf,const char * delim,const char * inchar)89 char **tokenize(const char *buf, const char *delim, const char *inchar)
90 {
91 int i;
92 char **tokens;
93 const char *p;
94 char *q;
95 enum {
96 S_START,
97 S_IN_QUOTE,
98 S_AFTER_QUOTE,
99 };
100 enum {
101 A_NO_OP,
102 A_ADD_CHAR,
103 A_NEW_FIELD,
104 A_END_RECORD,
105 A_ERROR
106 };
107 int state;
108 int quo = inchar ? *inchar : -1;
109
110 /* do not modify buf, make a copy */
111 p = q = G_store(buf);
112
113 i = 0;
114 tokens = (char **)G_malloc(2 * sizeof(char *));
115
116 /* always one token */
117 tokens[i++] = q;
118
119 for (state = S_START; ; p++) {
120 int c = *p;
121 int action = A_NO_OP;
122 switch (state) {
123 case S_START:
124 if (c == quo)
125 state = S_IN_QUOTE;
126 else if (c == '\0')
127 action = A_END_RECORD;
128 else if (strchr(delim, c))
129 action = A_NEW_FIELD;
130 else
131 action = A_ADD_CHAR;
132 break;
133 case S_IN_QUOTE:
134 if (c == quo)
135 state = S_AFTER_QUOTE;
136 else if (c == '\0')
137 action = A_ERROR;
138 else
139 action = A_ADD_CHAR;
140 break;
141 case S_AFTER_QUOTE:
142 if (c == quo)
143 state = S_IN_QUOTE, action = A_ADD_CHAR;
144 else if (c == '\0')
145 action = A_END_RECORD;
146 else if (strchr(delim, c))
147 state = S_START, action = A_NEW_FIELD;
148 else
149 action = A_ERROR;
150 break;
151 }
152
153 switch (action) {
154 case A_NO_OP:
155 break;
156 case A_ADD_CHAR:
157 *q++ = *p;
158 break;
159 case A_NEW_FIELD:
160 *q++ = '\0';
161 tokens[i++] = q;
162 tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
163 break;
164 case A_END_RECORD:
165 *q++ = '\0';
166 tokens[i++] = NULL;
167 return tokens;
168 case A_ERROR:
169 G_warning(_("parse error"));
170 *q++ = '\0';
171 tokens[i++] = NULL;
172 return tokens;
173 }
174 }
175 }
176
177 /*!
178 \brief Return number of tokens
179
180 \param tokens
181
182 \return number of tokens
183 */
184
G_number_of_tokens(char ** tokens)185 int G_number_of_tokens(char **tokens)
186 {
187 int n;
188
189 n = 0;
190 for (n = 0; tokens[n] != NULL; n++)
191 ;
192
193 return n;
194 }
195
196 /*!
197 \brief Free memory allocated to tokens.
198
199 <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
200 tokens to release memory.
201
202 \param[out] tokens
203 */
G_free_tokens(char ** tokens)204 void G_free_tokens(char **tokens)
205 {
206 if (tokens[0] != NULL)
207 G_free(tokens[0]);
208 G_free(tokens);
209 }
210