1 
2 /*!
3   \file lib/gis/token.c
4 
5   \brief GIS Library - Tokenize strings
6 
7   (C) 2001-2008, 2011-2013 by the GRASS Development Team
8 
9   This program is free software under the GNU General Public License
10   (>=v2). Read the file COPYING that comes with GRASS for details.
11 
12   \author USA CERL and others
13 */
14 
15 #include <stdlib.h>
16 #include <string.h>
17 #include <grass/gis.h>
18 #include <grass/glocale.h>
19 
20 static char **tokenize(const char *, const char *, const char *);
21 
22 /*!
23   \brief Tokenize string
24 
25   Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
26   '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
27   must not contain a new line (\n). <em>delim</em> may consist of more
28   than one character. G_free_tokens() must be called when finished
29   with tokens to release memory.
30 
31   Example:
32   \code
33   char **tokens;
34   int ntok, i;
35   tokens = G_tokenize(buf, " |:,");
36   ntok = G_number_of_tokens(tokens);
37   for (i=0; i < ntok; i++) {
38      G_debug(1, "%d=[%s]", i, tokens[i]);
39   }
40   G_free_tokens(tokens);
41   \endcode
42 
43   \param buf input string
44   \param delim string delimiter
45 
46   \return pointer to string token
47 */
G_tokenize(const char * buf,const char * delim)48 char **G_tokenize(const char *buf, const char *delim)
49 {
50     return tokenize(buf, delim, NULL);
51 }
52 
53 /*!
54   \brief Tokenize string
55 
56   This function behaves similarly to G_tokenize().
57 
58   It introduces <em>valchar</em> which defines borders of token. Within
59   token <em>delim</em> is ignored.
60 
61   Example:
62   \code
63   char *str = "a,'b,c',d";
64 
65   char **tokens1, **tokens2;
66   int ntok1, ntok2;
67 
68   tokens1 = G_tokenize(str, ",");
69   ntok1 = G_number_of_tokens(tokens1);
70 
71   tokens1 = G_tokenize2(str, ",", "'");
72   ntok2 = G_number_of_tokens(tokens2);
73   \endcode
74 
75   In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
76   i.e. { "a", "'b, c'", "d"}
77 
78   \param buf input string
79   \param delim string delimiter
80   \param valchar character defining border of token
81 
82   \return pointer to string token
83 */
G_tokenize2(const char * buf,const char * delim,const char * valchar)84 char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
85 {
86     return tokenize(buf, delim, valchar);
87 }
88 
tokenize(const char * buf,const char * delim,const char * inchar)89 char **tokenize(const char *buf, const char *delim, const char *inchar)
90 {
91     int i;
92     char **tokens;
93     const char *p;
94     char *q;
95     enum {
96 	S_START,
97 	S_IN_QUOTE,
98 	S_AFTER_QUOTE,
99     };
100     enum {
101 	A_NO_OP,
102 	A_ADD_CHAR,
103 	A_NEW_FIELD,
104 	A_END_RECORD,
105 	A_ERROR
106     };
107     int state;
108     int quo = inchar ? *inchar : -1;
109 
110     /* do not modify buf, make a copy */
111     p = q = G_store(buf);
112 
113     i = 0;
114     tokens = (char **)G_malloc(2 * sizeof(char *));
115 
116     /* always one token */
117     tokens[i++] = q;
118 
119     for (state = S_START; ; p++) {
120 	int c = *p;
121 	int action = A_NO_OP;
122 	switch (state) {
123 	case S_START:
124 	    if (c == quo)
125 		state = S_IN_QUOTE;
126 	    else if (c == '\0')
127 		action = A_END_RECORD;
128 	    else if (strchr(delim, c))
129 		action = A_NEW_FIELD;
130 	    else
131 		action = A_ADD_CHAR;
132 	    break;
133 	case S_IN_QUOTE:
134 	    if (c == quo)
135 		state = S_AFTER_QUOTE;
136 	    else if (c == '\0')
137 		action = A_ERROR;
138 	    else
139 		action = A_ADD_CHAR;
140 	    break;
141 	case S_AFTER_QUOTE:
142 	    if (c == quo)
143 		state = S_IN_QUOTE, action = A_ADD_CHAR;
144 	    else if (c == '\0')
145 		action = A_END_RECORD;
146 	    else if (strchr(delim, c))
147 		state = S_START, action = A_NEW_FIELD;
148 	    else
149 		action = A_ERROR;
150 	    break;
151 	}
152 
153 	switch (action) {
154 	case A_NO_OP:
155 	    break;
156 	case A_ADD_CHAR:
157 	    *q++ = *p;
158 	    break;
159 	case A_NEW_FIELD:
160 	    *q++ = '\0';
161 	    tokens[i++] = q;
162 	    tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
163 	    break;
164 	case A_END_RECORD:
165 	    *q++ = '\0';
166 	    tokens[i++] = NULL;
167 	    return tokens;
168 	case A_ERROR:
169 	    G_warning(_("parse error"));
170 	    *q++ = '\0';
171 	    tokens[i++] = NULL;
172 	    return tokens;
173 	}
174     }
175 }
176 
177 /*!
178   \brief Return number of tokens
179 
180   \param tokens
181 
182   \return number of tokens
183 */
184 
G_number_of_tokens(char ** tokens)185 int G_number_of_tokens(char **tokens)
186 {
187     int n;
188 
189     n = 0;
190     for (n = 0; tokens[n] != NULL; n++)
191       ;
192 
193     return n;
194 }
195 
196 /*!
197   \brief Free memory allocated to tokens.
198 
199   <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
200   tokens to release memory.
201 
202   \param[out] tokens
203 */
G_free_tokens(char ** tokens)204 void G_free_tokens(char **tokens)
205 {
206     if (tokens[0] != NULL)
207 	G_free(tokens[0]);
208     G_free(tokens);
209 }
210