1 /*
2 * psql - the PostgreSQL interactive terminal
3 *
4 * Copyright (c) 2000-2020, PostgreSQL Global Development Group
5 *
6 * src/bin/psql/stringutils.c
7 */
8 #include "postgres_fe.h"
9
10 #include <ctype.h>
11
12 #include "common.h"
13 #include "stringutils.h"
14
15 #define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e))
16
17
18 /*
19 * Replacement for strtok() (a.k.a. poor man's flex)
20 *
21 * Splits a string into tokens, returning one token per call, then NULL
22 * when no more tokens exist in the given string.
23 *
24 * The calling convention is similar to that of strtok, but with more
25 * frammishes.
26 *
27 * s - string to parse, if NULL continue parsing the last string
28 * whitespace - set of whitespace characters that separate tokens
29 * delim - set of non-whitespace separator characters (or NULL)
30 * quote - set of characters that can quote a token (NULL if none)
31 * escape - character that can quote quotes (0 if none)
32 * e_strings - if true, treat E'...' syntax as a valid token
33 * del_quotes - if true, strip quotes from the returned token, else return
34 * it exactly as found in the string
35 * encoding - the active character-set encoding
36 *
37 * Characters in 'delim', if any, will be returned as single-character
38 * tokens unless part of a quoted token.
39 *
40 * Double occurrences of the quoting character are always taken to represent
41 * a single quote character in the data. If escape isn't 0, then escape
42 * followed by anything (except \0) is a data character too.
43 *
44 * The combination of e_strings and del_quotes both true is not currently
45 * handled. This could be fixed but it's not needed anywhere at the moment.
46 *
47 * Note that the string s is _not_ overwritten in this implementation.
48 *
49 * NB: it's okay to vary delim, quote, and escape from one call to the
50 * next on a single source string, but changing whitespace is a bad idea
51 * since you might lose data.
52 */
53 char *
strtokx(const char * s,const char * whitespace,const char * delim,const char * quote,char escape,bool e_strings,bool del_quotes,int encoding)54 strtokx(const char *s,
55 const char *whitespace,
56 const char *delim,
57 const char *quote,
58 char escape,
59 bool e_strings,
60 bool del_quotes,
61 int encoding)
62 {
63 static char *storage = NULL; /* store the local copy of the users
64 * string here */
65 static char *string = NULL; /* pointer into storage where to continue on
66 * next call */
67
68 /* variously abused variables: */
69 unsigned int offset;
70 char *start;
71 char *p;
72
73 if (s)
74 {
75 free(storage);
76
77 /*
78 * We may need extra space to insert delimiter nulls for adjacent
79 * tokens. 2X the space is a gross overestimate, but it's unlikely
80 * that this code will be used on huge strings anyway.
81 */
82 storage = pg_malloc(2 * strlen(s) + 1);
83 strcpy(storage, s);
84 string = storage;
85 }
86
87 if (!storage)
88 return NULL;
89
90 /* skip leading whitespace */
91 offset = strspn(string, whitespace);
92 start = &string[offset];
93
94 /* end of string reached? */
95 if (*start == '\0')
96 {
97 /* technically we don't need to free here, but we're nice */
98 free(storage);
99 storage = NULL;
100 string = NULL;
101 return NULL;
102 }
103
104 /* test if delimiter character */
105 if (delim && strchr(delim, *start))
106 {
107 /*
108 * If not at end of string, we need to insert a null to terminate the
109 * returned token. We can just overwrite the next character if it
110 * happens to be in the whitespace set ... otherwise move over the
111 * rest of the string to make room. (This is why we allocated extra
112 * space above).
113 */
114 p = start + 1;
115 if (*p != '\0')
116 {
117 if (!strchr(whitespace, *p))
118 memmove(p + 1, p, strlen(p) + 1);
119 *p = '\0';
120 string = p + 1;
121 }
122 else
123 {
124 /* at end of string, so no extra work */
125 string = p;
126 }
127
128 return start;
129 }
130
131 /* check for E string */
132 p = start;
133 if (e_strings &&
134 (*p == 'E' || *p == 'e') &&
135 p[1] == '\'')
136 {
137 quote = "'";
138 escape = '\\'; /* if std strings before, not any more */
139 p++;
140 }
141
142 /* test if quoting character */
143 if (quote && strchr(quote, *p))
144 {
145 /* okay, we have a quoted token, now scan for the closer */
146 char thisquote = *p++;
147
148 for (; *p; p += PQmblenBounded(p, encoding))
149 {
150 if (*p == escape && p[1] != '\0')
151 p++; /* process escaped anything */
152 else if (*p == thisquote && p[1] == thisquote)
153 p++; /* process doubled quote */
154 else if (*p == thisquote)
155 {
156 p++; /* skip trailing quote */
157 break;
158 }
159 }
160
161 /*
162 * If not at end of string, we need to insert a null to terminate the
163 * returned token. See notes above.
164 */
165 if (*p != '\0')
166 {
167 if (!strchr(whitespace, *p))
168 memmove(p + 1, p, strlen(p) + 1);
169 *p = '\0';
170 string = p + 1;
171 }
172 else
173 {
174 /* at end of string, so no extra work */
175 string = p;
176 }
177
178 /* Clean up the token if caller wants that */
179 if (del_quotes)
180 strip_quotes(start, thisquote, escape, encoding);
181
182 return start;
183 }
184
185 /*
186 * Otherwise no quoting character. Scan till next whitespace, delimiter
187 * or quote. NB: at this point, *start is known not to be '\0',
188 * whitespace, delim, or quote, so we will consume at least one character.
189 */
190 offset = strcspn(start, whitespace);
191
192 if (delim)
193 {
194 unsigned int offset2 = strcspn(start, delim);
195
196 if (offset > offset2)
197 offset = offset2;
198 }
199
200 if (quote)
201 {
202 unsigned int offset2 = strcspn(start, quote);
203
204 if (offset > offset2)
205 offset = offset2;
206 }
207
208 p = start + offset;
209
210 /*
211 * If not at end of string, we need to insert a null to terminate the
212 * returned token. See notes above.
213 */
214 if (*p != '\0')
215 {
216 if (!strchr(whitespace, *p))
217 memmove(p + 1, p, strlen(p) + 1);
218 *p = '\0';
219 string = p + 1;
220 }
221 else
222 {
223 /* at end of string, so no extra work */
224 string = p;
225 }
226
227 return start;
228 }
229
230
231 /*
232 * strip_quotes
233 *
234 * Remove quotes from the string at *source. Leading and trailing occurrences
235 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
236 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
237 * significance of next character.
238 *
239 * Note that the source string is overwritten in-place.
240 */
241 void
strip_quotes(char * source,char quote,char escape,int encoding)242 strip_quotes(char *source, char quote, char escape, int encoding)
243 {
244 char *src;
245 char *dst;
246
247 Assert(source != NULL);
248 Assert(quote != '\0');
249
250 src = dst = source;
251
252 if (*src && *src == quote)
253 src++; /* skip leading quote */
254
255 while (*src)
256 {
257 char c = *src;
258 int i;
259
260 if (c == quote && src[1] == '\0')
261 break; /* skip trailing quote */
262 else if (c == quote && src[1] == quote)
263 src++; /* process doubled quote */
264 else if (c == escape && src[1] != '\0')
265 src++; /* process escaped character */
266
267 i = PQmblenBounded(src, encoding);
268 while (i--)
269 *dst++ = *src++;
270 }
271
272 *dst = '\0';
273 }
274
275
276 /*
277 * quote_if_needed
278 *
279 * Opposite of strip_quotes(). If "source" denotes itself literally without
280 * quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with
281 * quoting and escaping applied:
282 *
283 * source - string to parse
284 * entails_quote - any of these present? need outer quotes
285 * quote - doubled within string, affixed to both ends
286 * escape - doubled within string
287 * force_quote - if true, quote the output even if it doesn't "need" it
288 * encoding - the active character-set encoding
289 *
290 * Do not use this as a substitute for PQescapeStringConn(). Use it for
291 * strings to be parsed by strtokx() or psql_scan_slash_option().
292 */
293 char *
quote_if_needed(const char * source,const char * entails_quote,char quote,char escape,bool force_quote,int encoding)294 quote_if_needed(const char *source, const char *entails_quote,
295 char quote, char escape, bool force_quote,
296 int encoding)
297 {
298 const char *src;
299 char *ret;
300 char *dst;
301 bool need_quotes = force_quote;
302
303 Assert(source != NULL);
304 Assert(quote != '\0');
305
306 src = source;
307 dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
308
309 *dst++ = quote;
310
311 while (*src)
312 {
313 char c = *src;
314 int i;
315
316 if (c == quote)
317 {
318 need_quotes = true;
319 *dst++ = quote;
320 }
321 else if (c == escape)
322 {
323 need_quotes = true;
324 *dst++ = escape;
325 }
326 else if (strchr(entails_quote, c))
327 need_quotes = true;
328
329 i = PQmblenBounded(src, encoding);
330 while (i--)
331 *dst++ = *src++;
332 }
333
334 *dst++ = quote;
335 *dst = '\0';
336
337 if (!need_quotes)
338 {
339 free(ret);
340 ret = NULL;
341 }
342
343 return ret;
344 }
345