1 /*-------------------------------------------------------------------------
2 *
3 * String-processing utility routines for frontend code
4 *
5 * Assorted utility functions that are useful in constructing SQL queries
6 * and interpreting backend output.
7 *
8 *
9 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
11 *
12 * src/fe_utils/string_utils.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "postgres_fe.h"
17
18 #include <ctype.h>
19
20 #include "common/keywords.h"
21 #include "fe_utils/string_utils.h"
22
23 static PQExpBuffer defaultGetLocalPQExpBuffer(void);
24
25 /* Globals exported by this file */
26 int quote_all_identifiers = 0;
27 PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
28
29
30 /*
31 * Returns a temporary PQExpBuffer, valid until the next call to the function.
32 * This is used by fmtId and fmtQualifiedId.
33 *
34 * Non-reentrant and non-thread-safe but reduces memory leakage. You can
35 * replace this with a custom version by setting the getLocalPQExpBuffer
36 * function pointer.
37 */
38 static PQExpBuffer
defaultGetLocalPQExpBuffer(void)39 defaultGetLocalPQExpBuffer(void)
40 {
41 static PQExpBuffer id_return = NULL;
42
43 if (id_return) /* first time through? */
44 {
45 /* same buffer, just wipe contents */
46 resetPQExpBuffer(id_return);
47 }
48 else
49 {
50 /* new buffer */
51 id_return = createPQExpBuffer();
52 }
53
54 return id_return;
55 }
56
57 /*
58 * Quotes input string if it's not a legitimate SQL identifier as-is.
59 *
60 * Note that the returned string must be used before calling fmtId again,
61 * since we re-use the same return buffer each time.
62 */
63 const char *
fmtId(const char * rawid)64 fmtId(const char *rawid)
65 {
66 PQExpBuffer id_return = getLocalPQExpBuffer();
67
68 const char *cp;
69 bool need_quotes = false;
70
71 /*
72 * These checks need to match the identifier production in scan.l. Don't
73 * use islower() etc.
74 */
75 if (quote_all_identifiers)
76 need_quotes = true;
77 /* slightly different rules for first character */
78 else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
79 need_quotes = true;
80 else
81 {
82 /* otherwise check the entire string */
83 for (cp = rawid; *cp; cp++)
84 {
85 if (!((*cp >= 'a' && *cp <= 'z')
86 || (*cp >= '0' && *cp <= '9')
87 || (*cp == '_')))
88 {
89 need_quotes = true;
90 break;
91 }
92 }
93 }
94
95 if (!need_quotes)
96 {
97 /*
98 * Check for keyword. We quote keywords except for unreserved ones.
99 * (In some cases we could avoid quoting a col_name or type_func_name
100 * keyword, but it seems much harder than it's worth to tell that.)
101 *
102 * Note: ScanKeywordLookup() does case-insensitive comparison, but
103 * that's fine, since we already know we have all-lower-case.
104 */
105 int kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
106
107 if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
108 need_quotes = true;
109 }
110
111 if (!need_quotes)
112 {
113 /* no quoting needed */
114 appendPQExpBufferStr(id_return, rawid);
115 }
116 else
117 {
118 appendPQExpBufferChar(id_return, '"');
119 for (cp = rawid; *cp; cp++)
120 {
121 /*
122 * Did we find a double-quote in the string? Then make this a
123 * double double-quote per SQL99. Before, we put in a
124 * backslash/double-quote pair. - thomas 2000-08-05
125 */
126 if (*cp == '"')
127 appendPQExpBufferChar(id_return, '"');
128 appendPQExpBufferChar(id_return, *cp);
129 }
130 appendPQExpBufferChar(id_return, '"');
131 }
132
133 return id_return->data;
134 }
135
136 /*
137 * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
138 *
139 * Like fmtId, use the result before calling again.
140 *
141 * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
142 * use that buffer until we're finished with calling fmtId().
143 */
144 const char *
fmtQualifiedId(const char * schema,const char * id)145 fmtQualifiedId(const char *schema, const char *id)
146 {
147 PQExpBuffer id_return;
148 PQExpBuffer lcl_pqexp = createPQExpBuffer();
149
150 /* Some callers might fail to provide a schema name */
151 if (schema && *schema)
152 {
153 appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
154 }
155 appendPQExpBufferStr(lcl_pqexp, fmtId(id));
156
157 id_return = getLocalPQExpBuffer();
158
159 appendPQExpBufferStr(id_return, lcl_pqexp->data);
160 destroyPQExpBuffer(lcl_pqexp);
161
162 return id_return->data;
163 }
164
165
166 /*
167 * Format a Postgres version number (in the PG_VERSION_NUM integer format
168 * returned by PQserverVersion()) as a string. This exists mainly to
169 * encapsulate knowledge about two-part vs. three-part version numbers.
170 *
171 * For reentrancy, caller must supply the buffer the string is put in.
172 * Recommended size of the buffer is 32 bytes.
173 *
174 * Returns address of 'buf', as a notational convenience.
175 */
176 char *
formatPGVersionNumber(int version_number,bool include_minor,char * buf,size_t buflen)177 formatPGVersionNumber(int version_number, bool include_minor,
178 char *buf, size_t buflen)
179 {
180 if (version_number >= 100000)
181 {
182 /* New two-part style */
183 if (include_minor)
184 snprintf(buf, buflen, "%d.%d", version_number / 10000,
185 version_number % 10000);
186 else
187 snprintf(buf, buflen, "%d", version_number / 10000);
188 }
189 else
190 {
191 /* Old three-part style */
192 if (include_minor)
193 snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
194 (version_number / 100) % 100,
195 version_number % 100);
196 else
197 snprintf(buf, buflen, "%d.%d", version_number / 10000,
198 (version_number / 100) % 100);
199 }
200 return buf;
201 }
202
203
204 /*
205 * Convert a string value to an SQL string literal and append it to
206 * the given buffer. We assume the specified client_encoding and
207 * standard_conforming_strings settings.
208 *
209 * This is essentially equivalent to libpq's PQescapeStringInternal,
210 * except for the output buffer structure. We need it in situations
211 * where we do not have a PGconn available. Where we do,
212 * appendStringLiteralConn is a better choice.
213 */
214 void
appendStringLiteral(PQExpBuffer buf,const char * str,int encoding,bool std_strings)215 appendStringLiteral(PQExpBuffer buf, const char *str,
216 int encoding, bool std_strings)
217 {
218 size_t length = strlen(str);
219 const char *source = str;
220 char *target;
221
222 if (!enlargePQExpBuffer(buf, 2 * length + 2))
223 return;
224
225 target = buf->data + buf->len;
226 *target++ = '\'';
227
228 while (*source != '\0')
229 {
230 char c = *source;
231 int len;
232 int i;
233
234 /* Fast path for plain ASCII */
235 if (!IS_HIGHBIT_SET(c))
236 {
237 /* Apply quoting if needed */
238 if (SQL_STR_DOUBLE(c, !std_strings))
239 *target++ = c;
240 /* Copy the character */
241 *target++ = c;
242 source++;
243 continue;
244 }
245
246 /* Slow path for possible multibyte characters */
247 len = PQmblen(source, encoding);
248
249 /* Copy the character */
250 for (i = 0; i < len; i++)
251 {
252 if (*source == '\0')
253 break;
254 *target++ = *source++;
255 }
256
257 /*
258 * If we hit premature end of string (ie, incomplete multibyte
259 * character), try to pad out to the correct length with spaces. We
260 * may not be able to pad completely, but we will always be able to
261 * insert at least one pad space (since we'd not have quoted a
262 * multibyte character). This should be enough to make a string that
263 * the server will error out on.
264 */
265 if (i < len)
266 {
267 char *stop = buf->data + buf->maxlen - 2;
268
269 for (; i < len; i++)
270 {
271 if (target >= stop)
272 break;
273 *target++ = ' ';
274 }
275 break;
276 }
277 }
278
279 /* Write the terminating quote and NUL character. */
280 *target++ = '\'';
281 *target = '\0';
282
283 buf->len = target - buf->data;
284 }
285
286
287 /*
288 * Convert a string value to an SQL string literal and append it to
289 * the given buffer. Encoding and string syntax rules are as indicated
290 * by current settings of the PGconn.
291 */
292 void
appendStringLiteralConn(PQExpBuffer buf,const char * str,PGconn * conn)293 appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
294 {
295 size_t length = strlen(str);
296
297 /*
298 * XXX This is a kluge to silence escape_string_warning in our utility
299 * programs. It should go away someday.
300 */
301 if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
302 {
303 /* ensure we are not adjacent to an identifier */
304 if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
305 appendPQExpBufferChar(buf, ' ');
306 appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
307 appendStringLiteral(buf, str, PQclientEncoding(conn), false);
308 return;
309 }
310 /* XXX end kluge */
311
312 if (!enlargePQExpBuffer(buf, 2 * length + 2))
313 return;
314 appendPQExpBufferChar(buf, '\'');
315 buf->len += PQescapeStringConn(conn, buf->data + buf->len,
316 str, length, NULL);
317 appendPQExpBufferChar(buf, '\'');
318 }
319
320
321 /*
322 * Convert a string value to a dollar quoted literal and append it to
323 * the given buffer. If the dqprefix parameter is not NULL then the
324 * dollar quote delimiter will begin with that (after the opening $).
325 *
326 * No escaping is done at all on str, in compliance with the rules
327 * for parsing dollar quoted strings. Also, we need not worry about
328 * encoding issues.
329 */
330 void
appendStringLiteralDQ(PQExpBuffer buf,const char * str,const char * dqprefix)331 appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
332 {
333 static const char suffixes[] = "_XXXXXXX";
334 int nextchar = 0;
335 PQExpBuffer delimBuf = createPQExpBuffer();
336
337 /* start with $ + dqprefix if not NULL */
338 appendPQExpBufferChar(delimBuf, '$');
339 if (dqprefix)
340 appendPQExpBufferStr(delimBuf, dqprefix);
341
342 /*
343 * Make sure we choose a delimiter which (without the trailing $) is not
344 * present in the string being quoted. We don't check with the trailing $
345 * because a string ending in $foo must not be quoted with $foo$.
346 */
347 while (strstr(str, delimBuf->data) != NULL)
348 {
349 appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
350 nextchar %= sizeof(suffixes) - 1;
351 }
352
353 /* add trailing $ */
354 appendPQExpBufferChar(delimBuf, '$');
355
356 /* quote it and we are all done */
357 appendPQExpBufferStr(buf, delimBuf->data);
358 appendPQExpBufferStr(buf, str);
359 appendPQExpBufferStr(buf, delimBuf->data);
360
361 destroyPQExpBuffer(delimBuf);
362 }
363
364
365 /*
366 * Convert a bytea value (presented as raw bytes) to an SQL string literal
367 * and append it to the given buffer. We assume the specified
368 * standard_conforming_strings setting.
369 *
370 * This is needed in situations where we do not have a PGconn available.
371 * Where we do, PQescapeByteaConn is a better choice.
372 */
373 void
appendByteaLiteral(PQExpBuffer buf,const unsigned char * str,size_t length,bool std_strings)374 appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
375 bool std_strings)
376 {
377 const unsigned char *source = str;
378 char *target;
379
380 static const char hextbl[] = "0123456789abcdef";
381
382 /*
383 * This implementation is hard-wired to produce hex-format output. We do
384 * not know the server version the output will be loaded into, so making
385 * an intelligent format choice is impossible. It might be better to
386 * always use the old escaped format.
387 */
388 if (!enlargePQExpBuffer(buf, 2 * length + 5))
389 return;
390
391 target = buf->data + buf->len;
392 *target++ = '\'';
393 if (!std_strings)
394 *target++ = '\\';
395 *target++ = '\\';
396 *target++ = 'x';
397
398 while (length-- > 0)
399 {
400 unsigned char c = *source++;
401
402 *target++ = hextbl[(c >> 4) & 0xF];
403 *target++ = hextbl[c & 0xF];
404 }
405
406 /* Write the terminating quote and NUL character. */
407 *target++ = '\'';
408 *target = '\0';
409
410 buf->len = target - buf->data;
411 }
412
413
414 /*
415 * Append the given string to the shell command being built in the buffer,
416 * with shell-style quoting as needed to create exactly one argument.
417 *
418 * Forbid LF or CR characters, which have scant practical use beyond designing
419 * security breaches. The Windows command shell is unusable as a conduit for
420 * arguments containing LF or CR characters. A future major release should
421 * reject those characters in CREATE ROLE and CREATE DATABASE, because use
422 * there eventually leads to errors here.
423 *
424 * appendShellString() simply prints an error and dies if LF or CR appears.
425 * appendShellStringNoError() omits those characters from the result, and
426 * returns false if there were any.
427 */
428 void
appendShellString(PQExpBuffer buf,const char * str)429 appendShellString(PQExpBuffer buf, const char *str)
430 {
431 if (!appendShellStringNoError(buf, str))
432 {
433 fprintf(stderr,
434 _("shell command argument contains a newline or carriage return: \"%s\"\n"),
435 str);
436 exit(EXIT_FAILURE);
437 }
438 }
439
440 bool
appendShellStringNoError(PQExpBuffer buf,const char * str)441 appendShellStringNoError(PQExpBuffer buf, const char *str)
442 {
443 #ifdef WIN32
444 int backslash_run_length = 0;
445 #endif
446 bool ok = true;
447 const char *p;
448
449 /*
450 * Don't bother with adding quotes if the string is nonempty and clearly
451 * contains only safe characters.
452 */
453 if (*str != '\0' &&
454 strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
455 {
456 appendPQExpBufferStr(buf, str);
457 return ok;
458 }
459
460 #ifndef WIN32
461 appendPQExpBufferChar(buf, '\'');
462 for (p = str; *p; p++)
463 {
464 if (*p == '\n' || *p == '\r')
465 {
466 ok = false;
467 continue;
468 }
469
470 if (*p == '\'')
471 appendPQExpBufferStr(buf, "'\"'\"'");
472 else
473 appendPQExpBufferChar(buf, *p);
474 }
475 appendPQExpBufferChar(buf, '\'');
476 #else /* WIN32 */
477
478 /*
479 * A Windows system() argument experiences two layers of interpretation.
480 * First, cmd.exe interprets the string. Its behavior is undocumented,
481 * but a caret escapes any byte except LF or CR that would otherwise have
482 * special meaning. Handling of a caret before LF or CR differs between
483 * "cmd.exe /c" and other modes, and it is unusable here.
484 *
485 * Second, the new process parses its command line to construct argv (see
486 * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats
487 * backslash-double quote sequences specially.
488 */
489 appendPQExpBufferStr(buf, "^\"");
490 for (p = str; *p; p++)
491 {
492 if (*p == '\n' || *p == '\r')
493 {
494 ok = false;
495 continue;
496 }
497
498 /* Change N backslashes before a double quote to 2N+1 backslashes. */
499 if (*p == '"')
500 {
501 while (backslash_run_length)
502 {
503 appendPQExpBufferStr(buf, "^\\");
504 backslash_run_length--;
505 }
506 appendPQExpBufferStr(buf, "^\\");
507 }
508 else if (*p == '\\')
509 backslash_run_length++;
510 else
511 backslash_run_length = 0;
512
513 /*
514 * Decline to caret-escape the most mundane characters, to ease
515 * debugging and lest we approach the command length limit.
516 */
517 if (!((*p >= 'a' && *p <= 'z') ||
518 (*p >= 'A' && *p <= 'Z') ||
519 (*p >= '0' && *p <= '9')))
520 appendPQExpBufferChar(buf, '^');
521 appendPQExpBufferChar(buf, *p);
522 }
523
524 /*
525 * Change N backslashes at end of argument to 2N backslashes, because they
526 * precede the double quote that terminates the argument.
527 */
528 while (backslash_run_length)
529 {
530 appendPQExpBufferStr(buf, "^\\");
531 backslash_run_length--;
532 }
533 appendPQExpBufferStr(buf, "^\"");
534 #endif /* WIN32 */
535
536 return ok;
537 }
538
539
540 /*
541 * Append the given string to the buffer, with suitable quoting for passing
542 * the string as a value in a keyword/value pair in a libpq connection string.
543 */
544 void
appendConnStrVal(PQExpBuffer buf,const char * str)545 appendConnStrVal(PQExpBuffer buf, const char *str)
546 {
547 const char *s;
548 bool needquotes;
549
550 /*
551 * If the string is one or more plain ASCII characters, no need to quote
552 * it. This is quite conservative, but better safe than sorry.
553 */
554 needquotes = true;
555 for (s = str; *s; s++)
556 {
557 if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
558 (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
559 {
560 needquotes = true;
561 break;
562 }
563 needquotes = false;
564 }
565
566 if (needquotes)
567 {
568 appendPQExpBufferChar(buf, '\'');
569 while (*str)
570 {
571 /* ' and \ must be escaped by to \' and \\ */
572 if (*str == '\'' || *str == '\\')
573 appendPQExpBufferChar(buf, '\\');
574
575 appendPQExpBufferChar(buf, *str);
576 str++;
577 }
578 appendPQExpBufferChar(buf, '\'');
579 }
580 else
581 appendPQExpBufferStr(buf, str);
582 }
583
584
585 /*
586 * Append a psql meta-command that connects to the given database with the
587 * then-current connection's user, host and port.
588 */
589 void
appendPsqlMetaConnect(PQExpBuffer buf,const char * dbname)590 appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
591 {
592 const char *s;
593 bool complex;
594
595 /*
596 * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
597 * For other names, even many not technically requiring it, skip to the
598 * general case. No database has a zero-length name.
599 */
600 complex = false;
601
602 for (s = dbname; *s; s++)
603 {
604 if (*s == '\n' || *s == '\r')
605 {
606 fprintf(stderr,
607 _("database name contains a newline or carriage return: \"%s\"\n"),
608 dbname);
609 exit(EXIT_FAILURE);
610 }
611
612 if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
613 (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
614 {
615 complex = true;
616 }
617 }
618
619 appendPQExpBufferStr(buf, "\\connect ");
620 if (complex)
621 {
622 PQExpBufferData connstr;
623
624 initPQExpBuffer(&connstr);
625 appendPQExpBufferStr(&connstr, "dbname=");
626 appendConnStrVal(&connstr, dbname);
627
628 appendPQExpBufferStr(buf, "-reuse-previous=on ");
629
630 /*
631 * As long as the name does not contain a newline, SQL identifier
632 * quoting satisfies the psql meta-command parser. Prefer not to
633 * involve psql-interpreted single quotes, which behaved differently
634 * before PostgreSQL 9.2.
635 */
636 appendPQExpBufferStr(buf, fmtId(connstr.data));
637
638 termPQExpBuffer(&connstr);
639 }
640 else
641 appendPQExpBufferStr(buf, fmtId(dbname));
642 appendPQExpBufferChar(buf, '\n');
643 }
644
645
646 /*
647 * Deconstruct the text representation of a 1-dimensional Postgres array
648 * into individual items.
649 *
650 * On success, returns true and sets *itemarray and *nitems to describe
651 * an array of individual strings. On parse failure, returns false;
652 * *itemarray may exist or be NULL.
653 *
654 * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
655 */
656 bool
parsePGArray(const char * atext,char *** itemarray,int * nitems)657 parsePGArray(const char *atext, char ***itemarray, int *nitems)
658 {
659 int inputlen;
660 char **items;
661 char *strings;
662 int curitem;
663
664 /*
665 * We expect input in the form of "{item,item,item}" where any item is
666 * either raw data, or surrounded by double quotes (in which case embedded
667 * characters including backslashes and quotes are backslashed).
668 *
669 * We build the result as an array of pointers followed by the actual
670 * string data, all in one malloc block for convenience of deallocation.
671 * The worst-case storage need is not more than one pointer and one
672 * character for each input character (consider "{,,,,,,,,,,}").
673 */
674 *itemarray = NULL;
675 *nitems = 0;
676 inputlen = strlen(atext);
677 if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
678 return false; /* bad input */
679 items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
680 if (items == NULL)
681 return false; /* out of memory */
682 *itemarray = items;
683 strings = (char *) (items + inputlen);
684
685 atext++; /* advance over initial '{' */
686 curitem = 0;
687 while (*atext != '}')
688 {
689 if (*atext == '\0')
690 return false; /* premature end of string */
691 items[curitem] = strings;
692 while (*atext != '}' && *atext != ',')
693 {
694 if (*atext == '\0')
695 return false; /* premature end of string */
696 if (*atext != '"')
697 *strings++ = *atext++; /* copy unquoted data */
698 else
699 {
700 /* process quoted substring */
701 atext++;
702 while (*atext != '"')
703 {
704 if (*atext == '\0')
705 return false; /* premature end of string */
706 if (*atext == '\\')
707 {
708 atext++;
709 if (*atext == '\0')
710 return false; /* premature end of string */
711 }
712 *strings++ = *atext++; /* copy quoted data */
713 }
714 atext++;
715 }
716 }
717 *strings++ = '\0';
718 if (*atext == ',')
719 atext++;
720 curitem++;
721 }
722 if (atext[1] != '\0')
723 return false; /* bogus syntax (embedded '}') */
724 *nitems = curitem;
725 return true;
726 }
727
728
729 /*
730 * Format a reloptions array and append it to the given buffer.
731 *
732 * "prefix" is prepended to the option names; typically it's "" or "toast.".
733 *
734 * Returns false if the reloptions array could not be parsed (in which case
735 * nothing will have been appended to the buffer), or true on success.
736 *
737 * Note: this logic should generally match the backend's flatten_reloptions()
738 * (in adt/ruleutils.c).
739 */
740 bool
appendReloptionsArray(PQExpBuffer buffer,const char * reloptions,const char * prefix,int encoding,bool std_strings)741 appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
742 const char *prefix, int encoding, bool std_strings)
743 {
744 char **options;
745 int noptions;
746 int i;
747
748 if (!parsePGArray(reloptions, &options, &noptions))
749 {
750 if (options)
751 free(options);
752 return false;
753 }
754
755 for (i = 0; i < noptions; i++)
756 {
757 char *option = options[i];
758 char *name;
759 char *separator;
760 char *value;
761
762 /*
763 * Each array element should have the form name=value. If the "=" is
764 * missing for some reason, treat it like an empty value.
765 */
766 name = option;
767 separator = strchr(option, '=');
768 if (separator)
769 {
770 *separator = '\0';
771 value = separator + 1;
772 }
773 else
774 value = "";
775
776 if (i > 0)
777 appendPQExpBufferStr(buffer, ", ");
778 appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
779
780 /*
781 * In general we need to quote the value; but to avoid unnecessary
782 * clutter, do not quote if it is an identifier that would not need
783 * quoting. (We could also allow numbers, but that is a bit trickier
784 * than it looks --- for example, are leading zeroes significant? We
785 * don't want to assume very much here about what custom reloptions
786 * might mean.)
787 */
788 if (strcmp(fmtId(value), value) == 0)
789 appendPQExpBufferStr(buffer, value);
790 else
791 appendStringLiteral(buffer, value, encoding, std_strings);
792 }
793
794 if (options)
795 free(options);
796
797 return true;
798 }
799
800
801 /*
802 * processSQLNamePattern
803 *
804 * Scan a wildcard-pattern string and generate appropriate WHERE clauses
805 * to limit the set of objects returned. The WHERE clauses are appended
806 * to the already-partially-constructed query in buf. Returns whether
807 * any clause was added.
808 *
809 * conn: connection query will be sent to (consulted for escaping rules).
810 * buf: output parameter.
811 * pattern: user-specified pattern option, or NULL if none ("*" is implied).
812 * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
813 * onto the existing WHERE clause).
814 * force_escape: always quote regexp special characters, even outside
815 * double quotes (else they are quoted only between double quotes).
816 * schemavar: name of query variable to match against a schema-name pattern.
817 * Can be NULL if no schema.
818 * namevar: name of query variable to match against an object-name pattern.
819 * altnamevar: NULL, or name of an alternative variable to match against name.
820 * visibilityrule: clause to use if we want to restrict to visible objects
821 * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL.
822 *
823 * Formatting note: the text already present in buf should end with a newline.
824 * The appended text, if any, will end with one too.
825 */
826 bool
processSQLNamePattern(PGconn * conn,PQExpBuffer buf,const char * pattern,bool have_where,bool force_escape,const char * schemavar,const char * namevar,const char * altnamevar,const char * visibilityrule)827 processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
828 bool have_where, bool force_escape,
829 const char *schemavar, const char *namevar,
830 const char *altnamevar, const char *visibilityrule)
831 {
832 PQExpBufferData schemabuf;
833 PQExpBufferData namebuf;
834 bool added_clause = false;
835
836 #define WHEREAND() \
837 (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \
838 have_where = true, added_clause = true)
839
840 if (pattern == NULL)
841 {
842 /* Default: select all visible objects */
843 if (visibilityrule)
844 {
845 WHEREAND();
846 appendPQExpBuffer(buf, "%s\n", visibilityrule);
847 }
848 return added_clause;
849 }
850
851 initPQExpBuffer(&schemabuf);
852 initPQExpBuffer(&namebuf);
853
854 /*
855 * Convert shell-style 'pattern' into the regular expression(s) we want to
856 * execute. Quoting/escaping into SQL literal format will be done below
857 * using appendStringLiteralConn().
858 */
859 patternToSQLRegex(PQclientEncoding(conn), NULL, &schemabuf, &namebuf,
860 pattern, force_escape);
861
862 /*
863 * Now decide what we need to emit. We may run under a hostile
864 * search_path, so qualify EVERY name. Note there will be a leading "^("
865 * in the patterns in any case.
866 *
867 * We want the regex matches to use the database's default collation where
868 * collation-sensitive behavior is required (for example, which characters
869 * match '\w'). That happened by default before PG v12, but if the server
870 * is >= v12 then we need to force it through explicit COLLATE clauses,
871 * otherwise the "C" collation attached to "name" catalog columns wins.
872 */
873 if (namebuf.len > 2)
874 {
875 /* We have a name pattern, so constrain the namevar(s) */
876
877 /* Optimize away a "*" pattern */
878 if (strcmp(namebuf.data, "^(.*)$") != 0)
879 {
880 WHEREAND();
881 if (altnamevar)
882 {
883 appendPQExpBuffer(buf,
884 "(%s OPERATOR(pg_catalog.~) ", namevar);
885 appendStringLiteralConn(buf, namebuf.data, conn);
886 if (PQserverVersion(conn) >= 120000)
887 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
888 appendPQExpBuffer(buf,
889 "\n OR %s OPERATOR(pg_catalog.~) ",
890 altnamevar);
891 appendStringLiteralConn(buf, namebuf.data, conn);
892 if (PQserverVersion(conn) >= 120000)
893 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
894 appendPQExpBufferStr(buf, ")\n");
895 }
896 else
897 {
898 appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
899 appendStringLiteralConn(buf, namebuf.data, conn);
900 if (PQserverVersion(conn) >= 120000)
901 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
902 appendPQExpBufferChar(buf, '\n');
903 }
904 }
905 }
906
907 if (schemabuf.len > 2)
908 {
909 /* We have a schema pattern, so constrain the schemavar */
910
911 /* Optimize away a "*" pattern */
912 if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
913 {
914 WHEREAND();
915 appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
916 appendStringLiteralConn(buf, schemabuf.data, conn);
917 if (PQserverVersion(conn) >= 120000)
918 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
919 appendPQExpBufferChar(buf, '\n');
920 }
921 }
922 else
923 {
924 /* No schema pattern given, so select only visible objects */
925 if (visibilityrule)
926 {
927 WHEREAND();
928 appendPQExpBuffer(buf, "%s\n", visibilityrule);
929 }
930 }
931
932 termPQExpBuffer(&schemabuf);
933 termPQExpBuffer(&namebuf);
934
935 return added_clause;
936 #undef WHEREAND
937 }
938
939 /*
940 * Transform a possibly qualified shell-style object name pattern into up to
941 * three SQL-style regular expressions, converting quotes, lower-casing
942 * unquoted letters, and adjusting shell-style wildcard characters into regexp
943 * notation.
944 *
945 * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
946 * contains two or more dbname/schema/name separators, we parse the portions of
947 * the pattern prior to the first and second separators into dbnamebuf and
948 * schemabuf, and the rest into namebuf. (Additional dots in the name portion
949 * are not treated as special.)
950 *
951 * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
952 * least one separator, we parse the first portion into schemabuf and the rest
953 * into namebuf.
954 *
955 * Otherwise, we parse all the pattern into namebuf.
956 *
957 * We surround the regexps with "^(...)$" to force them to match whole strings,
958 * as per SQL practice. We have to have parens in case strings contain "|",
959 * else the "^" and "$" will be bound into the first and last alternatives
960 * which is not what we want.
961 *
962 * The regexps we parse into the buffers are appended to the data (if any)
963 * already present. If we parse fewer fields than the number of buffers we
964 * were given, the extra buffers are unaltered.
965 */
966 void
patternToSQLRegex(int encoding,PQExpBuffer dbnamebuf,PQExpBuffer schemabuf,PQExpBuffer namebuf,const char * pattern,bool force_escape)967 patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
968 PQExpBuffer namebuf, const char *pattern, bool force_escape)
969 {
970 PQExpBufferData buf[3];
971 PQExpBuffer curbuf;
972 PQExpBuffer maxbuf;
973 int i;
974 bool inquotes;
975 const char *cp;
976
977 Assert(pattern != NULL);
978 Assert(namebuf != NULL);
979
980 /* callers should never expect "dbname.relname" format */
981 Assert(dbnamebuf == NULL || schemabuf != NULL);
982
983 inquotes = false;
984 cp = pattern;
985
986 if (dbnamebuf != NULL)
987 maxbuf = &buf[2];
988 else if (schemabuf != NULL)
989 maxbuf = &buf[1];
990 else
991 maxbuf = &buf[0];
992
993 curbuf = &buf[0];
994 initPQExpBuffer(curbuf);
995 appendPQExpBufferStr(curbuf, "^(");
996 while (*cp)
997 {
998 char ch = *cp;
999
1000 if (ch == '"')
1001 {
1002 if (inquotes && cp[1] == '"')
1003 {
1004 /* emit one quote, stay in inquotes mode */
1005 appendPQExpBufferChar(curbuf, '"');
1006 cp++;
1007 }
1008 else
1009 inquotes = !inquotes;
1010 cp++;
1011 }
1012 else if (!inquotes && isupper((unsigned char) ch))
1013 {
1014 appendPQExpBufferChar(curbuf,
1015 pg_tolower((unsigned char) ch));
1016 cp++;
1017 }
1018 else if (!inquotes && ch == '*')
1019 {
1020 appendPQExpBufferStr(curbuf, ".*");
1021 cp++;
1022 }
1023 else if (!inquotes && ch == '?')
1024 {
1025 appendPQExpBufferChar(curbuf, '.');
1026 cp++;
1027 }
1028
1029 /*
1030 * When we find a dbname/schema/name separator, we treat it specially
1031 * only if the caller requested more patterns to be parsed than we
1032 * have already parsed from the pattern. Otherwise, dot characters
1033 * are not special.
1034 */
1035 else if (!inquotes && ch == '.' && curbuf < maxbuf)
1036 {
1037 appendPQExpBufferStr(curbuf, ")$");
1038 curbuf++;
1039 initPQExpBuffer(curbuf);
1040 appendPQExpBufferStr(curbuf, "^(");
1041 cp++;
1042 }
1043 else if (ch == '$')
1044 {
1045 /*
1046 * Dollar is always quoted, whether inside quotes or not. The
1047 * reason is that it's allowed in SQL identifiers, so there's a
1048 * significant use-case for treating it literally, while because
1049 * we anchor the pattern automatically there is no use-case for
1050 * having it possess its regexp meaning.
1051 */
1052 appendPQExpBufferStr(curbuf, "\\$");
1053 cp++;
1054 }
1055 else
1056 {
1057 /*
1058 * Ordinary data character, transfer to pattern
1059 *
1060 * Inside double quotes, or at all times if force_escape is true,
1061 * quote regexp special characters with a backslash to avoid
1062 * regexp errors. Outside quotes, however, let them pass through
1063 * as-is; this lets knowledgeable users build regexp expressions
1064 * that are more powerful than shell-style patterns.
1065 *
1066 * As an exception to that, though, always quote "[]", as that's
1067 * much more likely to be an attempt to write an array type name
1068 * than it is to be the start of a regexp bracket expression.
1069 */
1070 if ((inquotes || force_escape) &&
1071 strchr("|*+?()[]{}.^$\\", ch))
1072 appendPQExpBufferChar(curbuf, '\\');
1073 else if (ch == '[' && cp[1] == ']')
1074 appendPQExpBufferChar(curbuf, '\\');
1075 i = PQmblenBounded(cp, encoding);
1076 while (i--)
1077 appendPQExpBufferChar(curbuf, *cp++);
1078 }
1079 }
1080 appendPQExpBufferStr(curbuf, ")$");
1081
1082 appendPQExpBufferStr(namebuf, curbuf->data);
1083 termPQExpBuffer(curbuf);
1084
1085 if (curbuf > buf)
1086 {
1087 curbuf--;
1088 appendPQExpBufferStr(schemabuf, curbuf->data);
1089 termPQExpBuffer(curbuf);
1090
1091 if (curbuf > buf)
1092 {
1093 curbuf--;
1094 appendPQExpBufferStr(dbnamebuf, curbuf->data);
1095 termPQExpBuffer(curbuf);
1096 }
1097 }
1098 }
1099