1 /*-------------------------------------------------------------------------
2  *
3  * String-processing utility routines for frontend code
4  *
5  * Assorted utility functions that are useful in constructing SQL queries
6  * and interpreting backend output.
7  *
8  *
9  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  * src/fe_utils/string_utils.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres_fe.h"
17 
18 #include <ctype.h>
19 
20 #include "common/keywords.h"
21 #include "fe_utils/string_utils.h"
22 
23 static PQExpBuffer defaultGetLocalPQExpBuffer(void);
24 
25 /* Globals exported by this file */
26 int			quote_all_identifiers = 0;
27 PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
28 
29 
30 /*
31  * Returns a temporary PQExpBuffer, valid until the next call to the function.
32  * This is used by fmtId and fmtQualifiedId.
33  *
34  * Non-reentrant and non-thread-safe but reduces memory leakage. You can
35  * replace this with a custom version by setting the getLocalPQExpBuffer
36  * function pointer.
37  */
38 static PQExpBuffer
defaultGetLocalPQExpBuffer(void)39 defaultGetLocalPQExpBuffer(void)
40 {
41 	static PQExpBuffer id_return = NULL;
42 
43 	if (id_return)				/* first time through? */
44 	{
45 		/* same buffer, just wipe contents */
46 		resetPQExpBuffer(id_return);
47 	}
48 	else
49 	{
50 		/* new buffer */
51 		id_return = createPQExpBuffer();
52 	}
53 
54 	return id_return;
55 }
56 
57 /*
58  *	Quotes input string if it's not a legitimate SQL identifier as-is.
59  *
60  *	Note that the returned string must be used before calling fmtId again,
61  *	since we re-use the same return buffer each time.
62  */
63 const char *
fmtId(const char * rawid)64 fmtId(const char *rawid)
65 {
66 	PQExpBuffer id_return = getLocalPQExpBuffer();
67 
68 	const char *cp;
69 	bool		need_quotes = false;
70 
71 	/*
72 	 * These checks need to match the identifier production in scan.l. Don't
73 	 * use islower() etc.
74 	 */
75 	if (quote_all_identifiers)
76 		need_quotes = true;
77 	/* slightly different rules for first character */
78 	else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
79 		need_quotes = true;
80 	else
81 	{
82 		/* otherwise check the entire string */
83 		for (cp = rawid; *cp; cp++)
84 		{
85 			if (!((*cp >= 'a' && *cp <= 'z')
86 				  || (*cp >= '0' && *cp <= '9')
87 				  || (*cp == '_')))
88 			{
89 				need_quotes = true;
90 				break;
91 			}
92 		}
93 	}
94 
95 	if (!need_quotes)
96 	{
97 		/*
98 		 * Check for keyword.  We quote keywords except for unreserved ones.
99 		 * (In some cases we could avoid quoting a col_name or type_func_name
100 		 * keyword, but it seems much harder than it's worth to tell that.)
101 		 *
102 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
103 		 * that's fine, since we already know we have all-lower-case.
104 		 */
105 		int			kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
106 
107 		if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
108 			need_quotes = true;
109 	}
110 
111 	if (!need_quotes)
112 	{
113 		/* no quoting needed */
114 		appendPQExpBufferStr(id_return, rawid);
115 	}
116 	else
117 	{
118 		appendPQExpBufferChar(id_return, '"');
119 		for (cp = rawid; *cp; cp++)
120 		{
121 			/*
122 			 * Did we find a double-quote in the string? Then make this a
123 			 * double double-quote per SQL99. Before, we put in a
124 			 * backslash/double-quote pair. - thomas 2000-08-05
125 			 */
126 			if (*cp == '"')
127 				appendPQExpBufferChar(id_return, '"');
128 			appendPQExpBufferChar(id_return, *cp);
129 		}
130 		appendPQExpBufferChar(id_return, '"');
131 	}
132 
133 	return id_return->data;
134 }
135 
136 /*
137  * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
138  *
139  * Like fmtId, use the result before calling again.
140  *
141  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
142  * use that buffer until we're finished with calling fmtId().
143  */
144 const char *
fmtQualifiedId(const char * schema,const char * id)145 fmtQualifiedId(const char *schema, const char *id)
146 {
147 	PQExpBuffer id_return;
148 	PQExpBuffer lcl_pqexp = createPQExpBuffer();
149 
150 	/* Some callers might fail to provide a schema name */
151 	if (schema && *schema)
152 	{
153 		appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
154 	}
155 	appendPQExpBufferStr(lcl_pqexp, fmtId(id));
156 
157 	id_return = getLocalPQExpBuffer();
158 
159 	appendPQExpBufferStr(id_return, lcl_pqexp->data);
160 	destroyPQExpBuffer(lcl_pqexp);
161 
162 	return id_return->data;
163 }
164 
165 
166 /*
167  * Format a Postgres version number (in the PG_VERSION_NUM integer format
168  * returned by PQserverVersion()) as a string.  This exists mainly to
169  * encapsulate knowledge about two-part vs. three-part version numbers.
170  *
171  * For reentrancy, caller must supply the buffer the string is put in.
172  * Recommended size of the buffer is 32 bytes.
173  *
174  * Returns address of 'buf', as a notational convenience.
175  */
176 char *
formatPGVersionNumber(int version_number,bool include_minor,char * buf,size_t buflen)177 formatPGVersionNumber(int version_number, bool include_minor,
178 					  char *buf, size_t buflen)
179 {
180 	if (version_number >= 100000)
181 	{
182 		/* New two-part style */
183 		if (include_minor)
184 			snprintf(buf, buflen, "%d.%d", version_number / 10000,
185 					 version_number % 10000);
186 		else
187 			snprintf(buf, buflen, "%d", version_number / 10000);
188 	}
189 	else
190 	{
191 		/* Old three-part style */
192 		if (include_minor)
193 			snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
194 					 (version_number / 100) % 100,
195 					 version_number % 100);
196 		else
197 			snprintf(buf, buflen, "%d.%d", version_number / 10000,
198 					 (version_number / 100) % 100);
199 	}
200 	return buf;
201 }
202 
203 
204 /*
205  * Convert a string value to an SQL string literal and append it to
206  * the given buffer.  We assume the specified client_encoding and
207  * standard_conforming_strings settings.
208  *
209  * This is essentially equivalent to libpq's PQescapeStringInternal,
210  * except for the output buffer structure.  We need it in situations
211  * where we do not have a PGconn available.  Where we do,
212  * appendStringLiteralConn is a better choice.
213  */
214 void
appendStringLiteral(PQExpBuffer buf,const char * str,int encoding,bool std_strings)215 appendStringLiteral(PQExpBuffer buf, const char *str,
216 					int encoding, bool std_strings)
217 {
218 	size_t		length = strlen(str);
219 	const char *source = str;
220 	char	   *target;
221 
222 	if (!enlargePQExpBuffer(buf, 2 * length + 2))
223 		return;
224 
225 	target = buf->data + buf->len;
226 	*target++ = '\'';
227 
228 	while (*source != '\0')
229 	{
230 		char		c = *source;
231 		int			len;
232 		int			i;
233 
234 		/* Fast path for plain ASCII */
235 		if (!IS_HIGHBIT_SET(c))
236 		{
237 			/* Apply quoting if needed */
238 			if (SQL_STR_DOUBLE(c, !std_strings))
239 				*target++ = c;
240 			/* Copy the character */
241 			*target++ = c;
242 			source++;
243 			continue;
244 		}
245 
246 		/* Slow path for possible multibyte characters */
247 		len = PQmblen(source, encoding);
248 
249 		/* Copy the character */
250 		for (i = 0; i < len; i++)
251 		{
252 			if (*source == '\0')
253 				break;
254 			*target++ = *source++;
255 		}
256 
257 		/*
258 		 * If we hit premature end of string (ie, incomplete multibyte
259 		 * character), try to pad out to the correct length with spaces. We
260 		 * may not be able to pad completely, but we will always be able to
261 		 * insert at least one pad space (since we'd not have quoted a
262 		 * multibyte character).  This should be enough to make a string that
263 		 * the server will error out on.
264 		 */
265 		if (i < len)
266 		{
267 			char	   *stop = buf->data + buf->maxlen - 2;
268 
269 			for (; i < len; i++)
270 			{
271 				if (target >= stop)
272 					break;
273 				*target++ = ' ';
274 			}
275 			break;
276 		}
277 	}
278 
279 	/* Write the terminating quote and NUL character. */
280 	*target++ = '\'';
281 	*target = '\0';
282 
283 	buf->len = target - buf->data;
284 }
285 
286 
287 /*
288  * Convert a string value to an SQL string literal and append it to
289  * the given buffer.  Encoding and string syntax rules are as indicated
290  * by current settings of the PGconn.
291  */
292 void
appendStringLiteralConn(PQExpBuffer buf,const char * str,PGconn * conn)293 appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
294 {
295 	size_t		length = strlen(str);
296 
297 	/*
298 	 * XXX This is a kluge to silence escape_string_warning in our utility
299 	 * programs.  It should go away someday.
300 	 */
301 	if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
302 	{
303 		/* ensure we are not adjacent to an identifier */
304 		if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
305 			appendPQExpBufferChar(buf, ' ');
306 		appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
307 		appendStringLiteral(buf, str, PQclientEncoding(conn), false);
308 		return;
309 	}
310 	/* XXX end kluge */
311 
312 	if (!enlargePQExpBuffer(buf, 2 * length + 2))
313 		return;
314 	appendPQExpBufferChar(buf, '\'');
315 	buf->len += PQescapeStringConn(conn, buf->data + buf->len,
316 								   str, length, NULL);
317 	appendPQExpBufferChar(buf, '\'');
318 }
319 
320 
321 /*
322  * Convert a string value to a dollar quoted literal and append it to
323  * the given buffer. If the dqprefix parameter is not NULL then the
324  * dollar quote delimiter will begin with that (after the opening $).
325  *
326  * No escaping is done at all on str, in compliance with the rules
327  * for parsing dollar quoted strings.  Also, we need not worry about
328  * encoding issues.
329  */
330 void
appendStringLiteralDQ(PQExpBuffer buf,const char * str,const char * dqprefix)331 appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
332 {
333 	static const char suffixes[] = "_XXXXXXX";
334 	int			nextchar = 0;
335 	PQExpBuffer delimBuf = createPQExpBuffer();
336 
337 	/* start with $ + dqprefix if not NULL */
338 	appendPQExpBufferChar(delimBuf, '$');
339 	if (dqprefix)
340 		appendPQExpBufferStr(delimBuf, dqprefix);
341 
342 	/*
343 	 * Make sure we choose a delimiter which (without the trailing $) is not
344 	 * present in the string being quoted. We don't check with the trailing $
345 	 * because a string ending in $foo must not be quoted with $foo$.
346 	 */
347 	while (strstr(str, delimBuf->data) != NULL)
348 	{
349 		appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
350 		nextchar %= sizeof(suffixes) - 1;
351 	}
352 
353 	/* add trailing $ */
354 	appendPQExpBufferChar(delimBuf, '$');
355 
356 	/* quote it and we are all done */
357 	appendPQExpBufferStr(buf, delimBuf->data);
358 	appendPQExpBufferStr(buf, str);
359 	appendPQExpBufferStr(buf, delimBuf->data);
360 
361 	destroyPQExpBuffer(delimBuf);
362 }
363 
364 
365 /*
366  * Convert a bytea value (presented as raw bytes) to an SQL string literal
367  * and append it to the given buffer.  We assume the specified
368  * standard_conforming_strings setting.
369  *
370  * This is needed in situations where we do not have a PGconn available.
371  * Where we do, PQescapeByteaConn is a better choice.
372  */
373 void
appendByteaLiteral(PQExpBuffer buf,const unsigned char * str,size_t length,bool std_strings)374 appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
375 				   bool std_strings)
376 {
377 	const unsigned char *source = str;
378 	char	   *target;
379 
380 	static const char hextbl[] = "0123456789abcdef";
381 
382 	/*
383 	 * This implementation is hard-wired to produce hex-format output. We do
384 	 * not know the server version the output will be loaded into, so making
385 	 * an intelligent format choice is impossible.  It might be better to
386 	 * always use the old escaped format.
387 	 */
388 	if (!enlargePQExpBuffer(buf, 2 * length + 5))
389 		return;
390 
391 	target = buf->data + buf->len;
392 	*target++ = '\'';
393 	if (!std_strings)
394 		*target++ = '\\';
395 	*target++ = '\\';
396 	*target++ = 'x';
397 
398 	while (length-- > 0)
399 	{
400 		unsigned char c = *source++;
401 
402 		*target++ = hextbl[(c >> 4) & 0xF];
403 		*target++ = hextbl[c & 0xF];
404 	}
405 
406 	/* Write the terminating quote and NUL character. */
407 	*target++ = '\'';
408 	*target = '\0';
409 
410 	buf->len = target - buf->data;
411 }
412 
413 
414 /*
415  * Append the given string to the shell command being built in the buffer,
416  * with shell-style quoting as needed to create exactly one argument.
417  *
418  * Forbid LF or CR characters, which have scant practical use beyond designing
419  * security breaches.  The Windows command shell is unusable as a conduit for
420  * arguments containing LF or CR characters.  A future major release should
421  * reject those characters in CREATE ROLE and CREATE DATABASE, because use
422  * there eventually leads to errors here.
423  *
424  * appendShellString() simply prints an error and dies if LF or CR appears.
425  * appendShellStringNoError() omits those characters from the result, and
426  * returns false if there were any.
427  */
428 void
appendShellString(PQExpBuffer buf,const char * str)429 appendShellString(PQExpBuffer buf, const char *str)
430 {
431 	if (!appendShellStringNoError(buf, str))
432 	{
433 		fprintf(stderr,
434 				_("shell command argument contains a newline or carriage return: \"%s\"\n"),
435 				str);
436 		exit(EXIT_FAILURE);
437 	}
438 }
439 
440 bool
appendShellStringNoError(PQExpBuffer buf,const char * str)441 appendShellStringNoError(PQExpBuffer buf, const char *str)
442 {
443 #ifdef WIN32
444 	int			backslash_run_length = 0;
445 #endif
446 	bool		ok = true;
447 	const char *p;
448 
449 	/*
450 	 * Don't bother with adding quotes if the string is nonempty and clearly
451 	 * contains only safe characters.
452 	 */
453 	if (*str != '\0' &&
454 		strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
455 	{
456 		appendPQExpBufferStr(buf, str);
457 		return ok;
458 	}
459 
460 #ifndef WIN32
461 	appendPQExpBufferChar(buf, '\'');
462 	for (p = str; *p; p++)
463 	{
464 		if (*p == '\n' || *p == '\r')
465 		{
466 			ok = false;
467 			continue;
468 		}
469 
470 		if (*p == '\'')
471 			appendPQExpBufferStr(buf, "'\"'\"'");
472 		else
473 			appendPQExpBufferChar(buf, *p);
474 	}
475 	appendPQExpBufferChar(buf, '\'');
476 #else							/* WIN32 */
477 
478 	/*
479 	 * A Windows system() argument experiences two layers of interpretation.
480 	 * First, cmd.exe interprets the string.  Its behavior is undocumented,
481 	 * but a caret escapes any byte except LF or CR that would otherwise have
482 	 * special meaning.  Handling of a caret before LF or CR differs between
483 	 * "cmd.exe /c" and other modes, and it is unusable here.
484 	 *
485 	 * Second, the new process parses its command line to construct argv (see
486 	 * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx).  This treats
487 	 * backslash-double quote sequences specially.
488 	 */
489 	appendPQExpBufferStr(buf, "^\"");
490 	for (p = str; *p; p++)
491 	{
492 		if (*p == '\n' || *p == '\r')
493 		{
494 			ok = false;
495 			continue;
496 		}
497 
498 		/* Change N backslashes before a double quote to 2N+1 backslashes. */
499 		if (*p == '"')
500 		{
501 			while (backslash_run_length)
502 			{
503 				appendPQExpBufferStr(buf, "^\\");
504 				backslash_run_length--;
505 			}
506 			appendPQExpBufferStr(buf, "^\\");
507 		}
508 		else if (*p == '\\')
509 			backslash_run_length++;
510 		else
511 			backslash_run_length = 0;
512 
513 		/*
514 		 * Decline to caret-escape the most mundane characters, to ease
515 		 * debugging and lest we approach the command length limit.
516 		 */
517 		if (!((*p >= 'a' && *p <= 'z') ||
518 			  (*p >= 'A' && *p <= 'Z') ||
519 			  (*p >= '0' && *p <= '9')))
520 			appendPQExpBufferChar(buf, '^');
521 		appendPQExpBufferChar(buf, *p);
522 	}
523 
524 	/*
525 	 * Change N backslashes at end of argument to 2N backslashes, because they
526 	 * precede the double quote that terminates the argument.
527 	 */
528 	while (backslash_run_length)
529 	{
530 		appendPQExpBufferStr(buf, "^\\");
531 		backslash_run_length--;
532 	}
533 	appendPQExpBufferStr(buf, "^\"");
534 #endif							/* WIN32 */
535 
536 	return ok;
537 }
538 
539 
540 /*
541  * Append the given string to the buffer, with suitable quoting for passing
542  * the string as a value in a keyword/value pair in a libpq connection string.
543  */
544 void
appendConnStrVal(PQExpBuffer buf,const char * str)545 appendConnStrVal(PQExpBuffer buf, const char *str)
546 {
547 	const char *s;
548 	bool		needquotes;
549 
550 	/*
551 	 * If the string is one or more plain ASCII characters, no need to quote
552 	 * it. This is quite conservative, but better safe than sorry.
553 	 */
554 	needquotes = true;
555 	for (s = str; *s; s++)
556 	{
557 		if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
558 			  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
559 		{
560 			needquotes = true;
561 			break;
562 		}
563 		needquotes = false;
564 	}
565 
566 	if (needquotes)
567 	{
568 		appendPQExpBufferChar(buf, '\'');
569 		while (*str)
570 		{
571 			/* ' and \ must be escaped by to \' and \\ */
572 			if (*str == '\'' || *str == '\\')
573 				appendPQExpBufferChar(buf, '\\');
574 
575 			appendPQExpBufferChar(buf, *str);
576 			str++;
577 		}
578 		appendPQExpBufferChar(buf, '\'');
579 	}
580 	else
581 		appendPQExpBufferStr(buf, str);
582 }
583 
584 
585 /*
586  * Append a psql meta-command that connects to the given database with the
587  * then-current connection's user, host and port.
588  */
589 void
appendPsqlMetaConnect(PQExpBuffer buf,const char * dbname)590 appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
591 {
592 	const char *s;
593 	bool complex;
594 
595 	/*
596 	 * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
597 	 * For other names, even many not technically requiring it, skip to the
598 	 * general case.  No database has a zero-length name.
599 	 */
600 	complex = false;
601 
602 	for (s = dbname; *s; s++)
603 	{
604 		if (*s == '\n' || *s == '\r')
605 		{
606 			fprintf(stderr,
607 					_("database name contains a newline or carriage return: \"%s\"\n"),
608 					dbname);
609 			exit(EXIT_FAILURE);
610 		}
611 
612 		if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
613 			  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
614 		{
615 			complex = true;
616 		}
617 	}
618 
619 	appendPQExpBufferStr(buf, "\\connect ");
620 	if (complex)
621 	{
622 		PQExpBufferData connstr;
623 
624 		initPQExpBuffer(&connstr);
625 		appendPQExpBufferStr(&connstr, "dbname=");
626 		appendConnStrVal(&connstr, dbname);
627 
628 		appendPQExpBufferStr(buf, "-reuse-previous=on ");
629 
630 		/*
631 		 * As long as the name does not contain a newline, SQL identifier
632 		 * quoting satisfies the psql meta-command parser.  Prefer not to
633 		 * involve psql-interpreted single quotes, which behaved differently
634 		 * before PostgreSQL 9.2.
635 		 */
636 		appendPQExpBufferStr(buf, fmtId(connstr.data));
637 
638 		termPQExpBuffer(&connstr);
639 	}
640 	else
641 		appendPQExpBufferStr(buf, fmtId(dbname));
642 	appendPQExpBufferChar(buf, '\n');
643 }
644 
645 
646 /*
647  * Deconstruct the text representation of a 1-dimensional Postgres array
648  * into individual items.
649  *
650  * On success, returns true and sets *itemarray and *nitems to describe
651  * an array of individual strings.  On parse failure, returns false;
652  * *itemarray may exist or be NULL.
653  *
654  * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
655  */
656 bool
parsePGArray(const char * atext,char *** itemarray,int * nitems)657 parsePGArray(const char *atext, char ***itemarray, int *nitems)
658 {
659 	int			inputlen;
660 	char	  **items;
661 	char	   *strings;
662 	int			curitem;
663 
664 	/*
665 	 * We expect input in the form of "{item,item,item}" where any item is
666 	 * either raw data, or surrounded by double quotes (in which case embedded
667 	 * characters including backslashes and quotes are backslashed).
668 	 *
669 	 * We build the result as an array of pointers followed by the actual
670 	 * string data, all in one malloc block for convenience of deallocation.
671 	 * The worst-case storage need is not more than one pointer and one
672 	 * character for each input character (consider "{,,,,,,,,,,}").
673 	 */
674 	*itemarray = NULL;
675 	*nitems = 0;
676 	inputlen = strlen(atext);
677 	if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
678 		return false;			/* bad input */
679 	items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
680 	if (items == NULL)
681 		return false;			/* out of memory */
682 	*itemarray = items;
683 	strings = (char *) (items + inputlen);
684 
685 	atext++;					/* advance over initial '{' */
686 	curitem = 0;
687 	while (*atext != '}')
688 	{
689 		if (*atext == '\0')
690 			return false;		/* premature end of string */
691 		items[curitem] = strings;
692 		while (*atext != '}' && *atext != ',')
693 		{
694 			if (*atext == '\0')
695 				return false;	/* premature end of string */
696 			if (*atext != '"')
697 				*strings++ = *atext++;	/* copy unquoted data */
698 			else
699 			{
700 				/* process quoted substring */
701 				atext++;
702 				while (*atext != '"')
703 				{
704 					if (*atext == '\0')
705 						return false;	/* premature end of string */
706 					if (*atext == '\\')
707 					{
708 						atext++;
709 						if (*atext == '\0')
710 							return false;	/* premature end of string */
711 					}
712 					*strings++ = *atext++;	/* copy quoted data */
713 				}
714 				atext++;
715 			}
716 		}
717 		*strings++ = '\0';
718 		if (*atext == ',')
719 			atext++;
720 		curitem++;
721 	}
722 	if (atext[1] != '\0')
723 		return false;			/* bogus syntax (embedded '}') */
724 	*nitems = curitem;
725 	return true;
726 }
727 
728 
729 /*
730  * Format a reloptions array and append it to the given buffer.
731  *
732  * "prefix" is prepended to the option names; typically it's "" or "toast.".
733  *
734  * Returns false if the reloptions array could not be parsed (in which case
735  * nothing will have been appended to the buffer), or true on success.
736  *
737  * Note: this logic should generally match the backend's flatten_reloptions()
738  * (in adt/ruleutils.c).
739  */
740 bool
appendReloptionsArray(PQExpBuffer buffer,const char * reloptions,const char * prefix,int encoding,bool std_strings)741 appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
742 					  const char *prefix, int encoding, bool std_strings)
743 {
744 	char	  **options;
745 	int			noptions;
746 	int			i;
747 
748 	if (!parsePGArray(reloptions, &options, &noptions))
749 	{
750 		if (options)
751 			free(options);
752 		return false;
753 	}
754 
755 	for (i = 0; i < noptions; i++)
756 	{
757 		char	   *option = options[i];
758 		char	   *name;
759 		char	   *separator;
760 		char	   *value;
761 
762 		/*
763 		 * Each array element should have the form name=value.  If the "=" is
764 		 * missing for some reason, treat it like an empty value.
765 		 */
766 		name = option;
767 		separator = strchr(option, '=');
768 		if (separator)
769 		{
770 			*separator = '\0';
771 			value = separator + 1;
772 		}
773 		else
774 			value = "";
775 
776 		if (i > 0)
777 			appendPQExpBufferStr(buffer, ", ");
778 		appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
779 
780 		/*
781 		 * In general we need to quote the value; but to avoid unnecessary
782 		 * clutter, do not quote if it is an identifier that would not need
783 		 * quoting.  (We could also allow numbers, but that is a bit trickier
784 		 * than it looks --- for example, are leading zeroes significant?  We
785 		 * don't want to assume very much here about what custom reloptions
786 		 * might mean.)
787 		 */
788 		if (strcmp(fmtId(value), value) == 0)
789 			appendPQExpBufferStr(buffer, value);
790 		else
791 			appendStringLiteral(buffer, value, encoding, std_strings);
792 	}
793 
794 	if (options)
795 		free(options);
796 
797 	return true;
798 }
799 
800 
801 /*
802  * processSQLNamePattern
803  *
804  * Scan a wildcard-pattern string and generate appropriate WHERE clauses
805  * to limit the set of objects returned.  The WHERE clauses are appended
806  * to the already-partially-constructed query in buf.  Returns whether
807  * any clause was added.
808  *
809  * conn: connection query will be sent to (consulted for escaping rules).
810  * buf: output parameter.
811  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
812  * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
813  * onto the existing WHERE clause).
814  * force_escape: always quote regexp special characters, even outside
815  * double quotes (else they are quoted only between double quotes).
816  * schemavar: name of query variable to match against a schema-name pattern.
817  * Can be NULL if no schema.
818  * namevar: name of query variable to match against an object-name pattern.
819  * altnamevar: NULL, or name of an alternative variable to match against name.
820  * visibilityrule: clause to use if we want to restrict to visible objects
821  * (for example, "pg_catalog.pg_table_is_visible(p.oid)").  Can be NULL.
822  *
823  * Formatting note: the text already present in buf should end with a newline.
824  * The appended text, if any, will end with one too.
825  */
826 bool
processSQLNamePattern(PGconn * conn,PQExpBuffer buf,const char * pattern,bool have_where,bool force_escape,const char * schemavar,const char * namevar,const char * altnamevar,const char * visibilityrule)827 processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
828 					  bool have_where, bool force_escape,
829 					  const char *schemavar, const char *namevar,
830 					  const char *altnamevar, const char *visibilityrule)
831 {
832 	PQExpBufferData schemabuf;
833 	PQExpBufferData namebuf;
834 	bool		added_clause = false;
835 
836 #define WHEREAND() \
837 	(appendPQExpBufferStr(buf, have_where ? "  AND " : "WHERE "), \
838 	 have_where = true, added_clause = true)
839 
840 	if (pattern == NULL)
841 	{
842 		/* Default: select all visible objects */
843 		if (visibilityrule)
844 		{
845 			WHEREAND();
846 			appendPQExpBuffer(buf, "%s\n", visibilityrule);
847 		}
848 		return added_clause;
849 	}
850 
851 	initPQExpBuffer(&schemabuf);
852 	initPQExpBuffer(&namebuf);
853 
854 	/*
855 	 * Convert shell-style 'pattern' into the regular expression(s) we want to
856 	 * execute.  Quoting/escaping into SQL literal format will be done below
857 	 * using appendStringLiteralConn().
858 	 */
859 	patternToSQLRegex(PQclientEncoding(conn), NULL, &schemabuf, &namebuf,
860 					  pattern, force_escape);
861 
862 	/*
863 	 * Now decide what we need to emit.  We may run under a hostile
864 	 * search_path, so qualify EVERY name.  Note there will be a leading "^("
865 	 * in the patterns in any case.
866 	 *
867 	 * We want the regex matches to use the database's default collation where
868 	 * collation-sensitive behavior is required (for example, which characters
869 	 * match '\w').  That happened by default before PG v12, but if the server
870 	 * is >= v12 then we need to force it through explicit COLLATE clauses,
871 	 * otherwise the "C" collation attached to "name" catalog columns wins.
872 	 */
873 	if (namebuf.len > 2)
874 	{
875 		/* We have a name pattern, so constrain the namevar(s) */
876 
877 		/* Optimize away a "*" pattern */
878 		if (strcmp(namebuf.data, "^(.*)$") != 0)
879 		{
880 			WHEREAND();
881 			if (altnamevar)
882 			{
883 				appendPQExpBuffer(buf,
884 								  "(%s OPERATOR(pg_catalog.~) ", namevar);
885 				appendStringLiteralConn(buf, namebuf.data, conn);
886 				if (PQserverVersion(conn) >= 120000)
887 					appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
888 				appendPQExpBuffer(buf,
889 								  "\n        OR %s OPERATOR(pg_catalog.~) ",
890 								  altnamevar);
891 				appendStringLiteralConn(buf, namebuf.data, conn);
892 				if (PQserverVersion(conn) >= 120000)
893 					appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
894 				appendPQExpBufferStr(buf, ")\n");
895 			}
896 			else
897 			{
898 				appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
899 				appendStringLiteralConn(buf, namebuf.data, conn);
900 				if (PQserverVersion(conn) >= 120000)
901 					appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
902 				appendPQExpBufferChar(buf, '\n');
903 			}
904 		}
905 	}
906 
907 	if (schemabuf.len > 2)
908 	{
909 		/* We have a schema pattern, so constrain the schemavar */
910 
911 		/* Optimize away a "*" pattern */
912 		if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
913 		{
914 			WHEREAND();
915 			appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
916 			appendStringLiteralConn(buf, schemabuf.data, conn);
917 			if (PQserverVersion(conn) >= 120000)
918 				appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
919 			appendPQExpBufferChar(buf, '\n');
920 		}
921 	}
922 	else
923 	{
924 		/* No schema pattern given, so select only visible objects */
925 		if (visibilityrule)
926 		{
927 			WHEREAND();
928 			appendPQExpBuffer(buf, "%s\n", visibilityrule);
929 		}
930 	}
931 
932 	termPQExpBuffer(&schemabuf);
933 	termPQExpBuffer(&namebuf);
934 
935 	return added_clause;
936 #undef WHEREAND
937 }
938 
939 /*
940  * Transform a possibly qualified shell-style object name pattern into up to
941  * three SQL-style regular expressions, converting quotes, lower-casing
942  * unquoted letters, and adjusting shell-style wildcard characters into regexp
943  * notation.
944  *
945  * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
946  * contains two or more dbname/schema/name separators, we parse the portions of
947  * the pattern prior to the first and second separators into dbnamebuf and
948  * schemabuf, and the rest into namebuf.  (Additional dots in the name portion
949  * are not treated as special.)
950  *
951  * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
952  * least one separator, we parse the first portion into schemabuf and the rest
953  * into namebuf.
954  *
955  * Otherwise, we parse all the pattern into namebuf.
956  *
957  * We surround the regexps with "^(...)$" to force them to match whole strings,
958  * as per SQL practice.  We have to have parens in case strings contain "|",
959  * else the "^" and "$" will be bound into the first and last alternatives
960  * which is not what we want.
961  *
962  * The regexps we parse into the buffers are appended to the data (if any)
963  * already present.  If we parse fewer fields than the number of buffers we
964  * were given, the extra buffers are unaltered.
965  */
966 void
patternToSQLRegex(int encoding,PQExpBuffer dbnamebuf,PQExpBuffer schemabuf,PQExpBuffer namebuf,const char * pattern,bool force_escape)967 patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
968 				  PQExpBuffer namebuf, const char *pattern, bool force_escape)
969 {
970 	PQExpBufferData buf[3];
971 	PQExpBuffer curbuf;
972 	PQExpBuffer maxbuf;
973 	int			i;
974 	bool		inquotes;
975 	const char *cp;
976 
977 	Assert(pattern != NULL);
978 	Assert(namebuf != NULL);
979 
980 	/* callers should never expect "dbname.relname" format */
981 	Assert(dbnamebuf == NULL || schemabuf != NULL);
982 
983 	inquotes = false;
984 	cp = pattern;
985 
986 	if (dbnamebuf != NULL)
987 		maxbuf = &buf[2];
988 	else if (schemabuf != NULL)
989 		maxbuf = &buf[1];
990 	else
991 		maxbuf = &buf[0];
992 
993 	curbuf = &buf[0];
994 	initPQExpBuffer(curbuf);
995 	appendPQExpBufferStr(curbuf, "^(");
996 	while (*cp)
997 	{
998 		char		ch = *cp;
999 
1000 		if (ch == '"')
1001 		{
1002 			if (inquotes && cp[1] == '"')
1003 			{
1004 				/* emit one quote, stay in inquotes mode */
1005 				appendPQExpBufferChar(curbuf, '"');
1006 				cp++;
1007 			}
1008 			else
1009 				inquotes = !inquotes;
1010 			cp++;
1011 		}
1012 		else if (!inquotes && isupper((unsigned char) ch))
1013 		{
1014 			appendPQExpBufferChar(curbuf,
1015 								  pg_tolower((unsigned char) ch));
1016 			cp++;
1017 		}
1018 		else if (!inquotes && ch == '*')
1019 		{
1020 			appendPQExpBufferStr(curbuf, ".*");
1021 			cp++;
1022 		}
1023 		else if (!inquotes && ch == '?')
1024 		{
1025 			appendPQExpBufferChar(curbuf, '.');
1026 			cp++;
1027 		}
1028 
1029 		/*
1030 		 * When we find a dbname/schema/name separator, we treat it specially
1031 		 * only if the caller requested more patterns to be parsed than we
1032 		 * have already parsed from the pattern.  Otherwise, dot characters
1033 		 * are not special.
1034 		 */
1035 		else if (!inquotes && ch == '.' && curbuf < maxbuf)
1036 		{
1037 			appendPQExpBufferStr(curbuf, ")$");
1038 			curbuf++;
1039 			initPQExpBuffer(curbuf);
1040 			appendPQExpBufferStr(curbuf, "^(");
1041 			cp++;
1042 		}
1043 		else if (ch == '$')
1044 		{
1045 			/*
1046 			 * Dollar is always quoted, whether inside quotes or not. The
1047 			 * reason is that it's allowed in SQL identifiers, so there's a
1048 			 * significant use-case for treating it literally, while because
1049 			 * we anchor the pattern automatically there is no use-case for
1050 			 * having it possess its regexp meaning.
1051 			 */
1052 			appendPQExpBufferStr(curbuf, "\\$");
1053 			cp++;
1054 		}
1055 		else
1056 		{
1057 			/*
1058 			 * Ordinary data character, transfer to pattern
1059 			 *
1060 			 * Inside double quotes, or at all times if force_escape is true,
1061 			 * quote regexp special characters with a backslash to avoid
1062 			 * regexp errors.  Outside quotes, however, let them pass through
1063 			 * as-is; this lets knowledgeable users build regexp expressions
1064 			 * that are more powerful than shell-style patterns.
1065 			 *
1066 			 * As an exception to that, though, always quote "[]", as that's
1067 			 * much more likely to be an attempt to write an array type name
1068 			 * than it is to be the start of a regexp bracket expression.
1069 			 */
1070 			if ((inquotes || force_escape) &&
1071 				strchr("|*+?()[]{}.^$\\", ch))
1072 				appendPQExpBufferChar(curbuf, '\\');
1073 			else if (ch == '[' && cp[1] == ']')
1074 				appendPQExpBufferChar(curbuf, '\\');
1075 			i = PQmblenBounded(cp, encoding);
1076 			while (i--)
1077 				appendPQExpBufferChar(curbuf, *cp++);
1078 		}
1079 	}
1080 	appendPQExpBufferStr(curbuf, ")$");
1081 
1082 	appendPQExpBufferStr(namebuf, curbuf->data);
1083 	termPQExpBuffer(curbuf);
1084 
1085 	if (curbuf > buf)
1086 	{
1087 		curbuf--;
1088 		appendPQExpBufferStr(schemabuf, curbuf->data);
1089 		termPQExpBuffer(curbuf);
1090 
1091 		if (curbuf > buf)
1092 		{
1093 			curbuf--;
1094 			appendPQExpBufferStr(dbnamebuf, curbuf->data);
1095 			termPQExpBuffer(curbuf);
1096 		}
1097 	}
1098 }
1099