1 %top{
2 /*-------------------------------------------------------------------------
3  *
4  * scan.l
5  *	  lexical scanner for PostgreSQL
6  *
7  * NOTE NOTE NOTE:
8  *
9  * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l!
10  *
11  * The rules are designed so that the scanner never has to backtrack,
12  * in the sense that there is always a rule that can match the input
13  * consumed so far (the rule action may internally throw back some input
14  * with yyless(), however).  As explained in the flex manual, this makes
15  * for a useful speed increase --- about a third faster than a plain -CF
16  * lexer, in simple testing.  The extra complexity is mostly in the rules
17  * for handling float numbers and continued string literals.  If you change
18  * the lexical rules, verify that you haven't broken the no-backtrack
19  * property by running flex with the "-b" option and checking that the
20  * resulting "lex.backup" file says that no backing up is needed.  (As of
21  * Postgres 9.2, this check is made automatically by the Makefile.)
22  *
23  *
24  * Portions Copyright (c) 2003-2016, PgPool Global Development Group
25  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
26  * Portions Copyright (c) 1994, Regents of the University of California
27  *
28  * IDENTIFICATION
29  *	  src/backend/parser/scan.l
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "pool_parser.h"
34 
35 #include <ctype.h>
36 #include <unistd.h>
37 
38 #include "parser.h"				/* only needed for GUC variables */
39 #include "scanner.h"
40 #include "gramparse.h"
41 #include "scansup.h"
42 #include "pg_wchar.h"
43 
44 #include "gram.h"
45 #include "utils/palloc.h"
46 #include "utils/elog.h"
47 }
48 
49 %{
50 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
51 #undef fprintf
52 #define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
53 
54 static void
fprintf_to_ereport(const char * fmt,const char * msg)55 fprintf_to_ereport(const char *fmt, const char *msg)
56 {
57 	ereport(ERROR, (errmsg_internal("%s", msg)));
58 }
59 
60 /*
61  * GUC variables.  This is a DIRECT violation of the warning given at the
62  * head of gram.y, ie flex/bison code must not depend on any GUC variables;
63  * as such, changing their values can induce very unintuitive behavior.
64  * But we shall have to live with it until we can remove these variables.
65  */
66 int			backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
67 bool		escape_string_warning = true;
68 bool		standard_conforming_strings = true;
69 
70 /*
71  * Set the type of YYSTYPE.
72  */
73 #define YYSTYPE core_YYSTYPE
74 
75 /*
76  * Set the type of yyextra.  All state variables used by the scanner should
77  * be in yyextra, *not* statically allocated.
78  */
79 #define YY_EXTRA_TYPE core_yy_extra_type *
80 
81 /*
82  * Each call to yylex must set yylloc to the location of the found token
83  * (expressed as a byte offset from the start of the input text).
84  * When we parse a token that requires multiple lexer rules to process,
85  * this should be done in the first such rule, else yylloc will point
86  * into the middle of the token.
87  */
88 #define SET_YYLLOC()  (*(yylloc) = yytext - yyextra->scanbuf)
89 
90 /*
91  * Advance yylloc by the given number of bytes.
92  */
93 #define ADVANCE_YYLLOC(delta)  ( *(yylloc) += (delta) )
94 
95 #define startlit()	( yyextra->literallen = 0 )
96 static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
97 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
98 static char *litbufdup(core_yyscan_t yyscanner);
99 static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner);
100 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
101 static int	process_integer_literal(const char *token, YYSTYPE *lval);
102 static bool is_utf16_surrogate_first(pg_wchar c);
103 static bool is_utf16_surrogate_second(pg_wchar c);
104 static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
105 static void addunicode(pg_wchar c, yyscan_t yyscanner);
106 static bool check_uescapechar(unsigned char escape);
107 
108 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
109 
110 #define lexer_errposition()  scanner_errposition(*(yylloc), yyscanner)
111 
112 static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
113 static void check_escape_warning(core_yyscan_t yyscanner);
114 
115 /*
116  * Work around a bug in flex 2.5.35: it emits a couple of functions that
117  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
118  * this would cause warnings.  Providing our own declarations should be
119  * harmless even when the bug gets fixed.
120  */
121 extern int	core_yyget_column(yyscan_t yyscanner);
122 extern void core_yyset_column(int column_no, yyscan_t yyscanner);
123 
124 %}
125 
126 %option reentrant
127 %option bison-bridge
128 %option bison-locations
129 %option 8bit
130 %option never-interactive
131 %option nodefault
132 %option noinput
133 %option nounput
134 %option noyywrap
135 %option noyyalloc
136 %option noyyrealloc
137 %option noyyfree
138 %option warn
139 %option prefix="core_yy"
140 
141 /*
142  * OK, here is a short description of lex/flex rules behavior.
143  * The longest pattern which matches an input string is always chosen.
144  * For equal-length patterns, the first occurring in the rules list is chosen.
145  * INITIAL is the starting state, to which all non-conditional rules apply.
146  * Exclusive states change parsing rules while the state is active.  When in
147  * an exclusive state, only those rules defined for that state apply.
148  *
149  * We use exclusive states for quoted strings, extended comments,
150  * and to eliminate parsing troubles for numeric strings.
151  * Exclusive states:
152  *  <xb> bit string literal
153  *  <xc> extended C-style comments
154  *  <xd> delimited identifiers (double-quoted identifiers)
155  *  <xh> hexadecimal numeric string
156  *  <xq> standard quoted strings
157  *  <xe> extended quoted strings (support backslash escape sequences)
158  *  <xdolq> $foo$ quoted strings
159  *  <xui> quoted identifier with Unicode escapes
160  *  <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
161  *  <xus> quoted string with Unicode escapes
162  *  <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
163  *  <xeu> Unicode surrogate pair in extended quoted string
164  *
165  * Remember to add an <<EOF>> case whenever you add a new exclusive state!
166  * The default one is probably not the right thing.
167  */
168 
169 %x xb
170 %x xc
171 %x xd
172 %x xh
173 %x xe
174 %x xq
175 %x xdolq
176 %x xui
177 %x xuiend
178 %x xus
179 %x xusend
180 %x xeu
181 
182 /*
183  * In order to make the world safe for Windows and Mac clients as well as
184  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
185  * sequence will be seen as two successive newlines, but that doesn't cause
186  * any problems.  Comments that start with -- and extend to the next
187  * newline are treated as equivalent to a single whitespace character.
188  *
189  * NOTE a fine point: if there is no newline following --, we will absorb
190  * everything to the end of the input as a comment.  This is correct.  Older
191  * versions of Postgres failed to recognize -- as a comment if the input
192  * did not end with a newline.
193  *
194  * XXX perhaps \f (formfeed) should be treated as a newline as well?
195  *
196  * XXX if you change the set of whitespace characters, fix scanner_isspace()
197  * to agree, and see also the plpgsql lexer.
198  */
199 
200 space			[ \t\n\r\f]
201 horiz_space		[ \t\f]
202 newline			[\n\r]
203 non_newline		[^\n\r]
204 
205 comment			("--"{non_newline}*)
206 
207 whitespace		({space}+|{comment})
208 
209 /*
210  * SQL requires at least one newline in the whitespace separating
211  * string literals that are to be concatenated.  Silly, but who are we
212  * to argue?  Note that {whitespace_with_newline} should not have * after
213  * it, whereas {whitespace} should generally have a * after it...
214  */
215 
216 special_whitespace		({space}+|{comment}{newline})
217 horiz_whitespace		({horiz_space}|{comment})
218 whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
219 
220 /*
221  * To ensure that {quotecontinue} can be scanned without having to back up
222  * if the full pattern isn't matched, we include trailing whitespace in
223  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
224  * except for {quote} followed by whitespace and just one "-" (not two,
225  * which would start a {comment}).  To cover that we have {quotefail}.
226  * The actions for {quotestop} and {quotefail} must throw back characters
227  * beyond the quote proper.
228  */
229 quote			'
230 quotestop		{quote}{whitespace}*
231 quotecontinue	{quote}{whitespace_with_newline}{quote}
232 quotefail		{quote}{whitespace}*"-"
233 
234 /* Bit string
235  * It is tempting to scan the string for only those characters
236  * which are allowed. However, this leads to silently swallowed
237  * characters if illegal characters are included in the string.
238  * For example, if xbinside is [01] then B'ABCD' is interpreted
239  * as a zero-length string, and the ABCD' is lost!
240  * Better to pass the string forward and let the input routines
241  * validate the contents.
242  */
243 xbstart			[bB]{quote}
244 xbinside		[^']*
245 
246 /* Hexadecimal number */
247 xhstart			[xX]{quote}
248 xhinside		[^']*
249 
250 /* National character */
251 xnstart			[nN]{quote}
252 
253 /* Quoted string that allows backslash escapes */
254 xestart			[eE]{quote}
255 xeinside		[^\\']+
256 xeescape		[\\][^0-7]
257 xeoctesc		[\\][0-7]{1,3}
258 xehexesc		[\\]x[0-9A-Fa-f]{1,2}
259 xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
260 xeunicodefail	[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
261 
262 /* Extended quote
263  * xqdouble implements embedded quote, ''''
264  */
265 xqstart			{quote}
266 xqdouble		{quote}{quote}
267 xqinside		[^']+
268 
269 /* $foo$ style quotes ("dollar quoting")
270  * The quoted string starts with $foo$ where "foo" is an optional string
271  * in the form of an identifier, except that it may not contain "$",
272  * and extends to the first occurrence of an identical string.
273  * There is *no* processing of the quoted text.
274  *
275  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
276  * fails to match its trailing "$".
277  */
278 dolq_start		[A-Za-z\200-\377_]
279 dolq_cont		[A-Za-z\200-\377_0-9]
280 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
281 dolqfailed		\${dolq_start}{dolq_cont}*
282 dolqinside		[^$]+
283 
284 /* Double quote
285  * Allows embedded spaces and other special characters into identifiers.
286  */
287 dquote			\"
288 xdstart			{dquote}
289 xdstop			{dquote}
290 xddouble		{dquote}{dquote}
291 xdinside		[^"]+
292 
293 /* Unicode escapes */
294 uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
295 /* error rule to avoid backup */
296 uescapefail		[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
297 
298 /* Quoted identifier with Unicode escapes */
299 xuistart		[uU]&{dquote}
300 
301 /* Quoted string with Unicode escapes */
302 xusstart		[uU]&{quote}
303 
304 /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
305 xustop1		{uescapefail}?
306 xustop2		{uescape}
307 
308 /* error rule to avoid backup */
309 xufailed		[uU]&
310 
311 
312 /* C-style comments
313  *
314  * The "extended comment" syntax closely resembles allowable operator syntax.
315  * The tricky part here is to get lex to recognize a string starting with
316  * slash-star as a comment, when interpreting it as an operator would produce
317  * a longer match --- remember lex will prefer a longer match!  Also, if we
318  * have something like plus-slash-star, lex will think this is a 3-character
319  * operator whereas we want to see it as a + operator and a comment start.
320  * The solution is two-fold:
321  * 1. append {op_chars}* to xcstart so that it matches as much text as
322  *    {operator} would. Then the tie-breaker (first matching rule of same
323  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
324  *    in case it contains a star-slash that should terminate the comment.
325  * 2. In the operator rule, check for slash-star within the operator, and
326  *    if found throw it back with yyless().  This handles the plus-slash-star
327  *    problem.
328  * Dash-dash comments have similar interactions with the operator rule.
329  */
330 xcstart			\/\*{op_chars}*
331 xcstop			\*+\/
332 xcinside		[^*/]+
333 
334 digit			[0-9]
335 ident_start		[A-Za-z\200-\377_]
336 ident_cont		[A-Za-z\200-\377_0-9\$]
337 
338 identifier		{ident_start}{ident_cont}*
339 
340 /* Assorted special-case operators and operator-like tokens */
341 typecast		"::"
342 dot_dot			\.\.
343 colon_equals	":="
344 equals_greater	"=>"
345 less_equals		"<="
346 greater_equals	">="
347 less_greater	"<>"
348 not_equals		"!="
349 
350 /*
351  * "self" is the set of chars that should be returned as single-character
352  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
353  * which can be one or more characters long (but if a single-char token
354  * appears in the "self" set, it is not to be returned as an Op).  Note
355  * that the sets overlap, but each has some chars that are not in the other.
356  *
357  * If you change either set, adjust the character lists appearing in the
358  * rule for "operator"!
359  */
360 self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
361 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
362 operator		{op_chars}+
363 
364 /* we no longer allow unary minus in numbers.
365  * instead we pass it separately to parser. there it gets
366  * coerced via doNegate() -- Leon aug 20 1999
367  *
368  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
369  *
370  * {realfail1} and {realfail2} are added to prevent the need for scanner
371  * backup when the {real} rule fails to match completely.
372  */
373 
374 integer			{digit}+
375 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
376 decimalfail		{digit}+\.\.
377 real			({integer}|{decimal})[Ee][-+]?{digit}+
378 realfail1		({integer}|{decimal})[Ee]
379 realfail2		({integer}|{decimal})[Ee][-+]
380 
381 param			\${integer}
382 
383 other			.
384 
385 /*
386  * Dollar quoted strings are totally opaque, and no escaping is done on them.
387  * Other quoted strings must allow some special characters such as single-quote
388  *  and newline.
389  * Embedded single-quotes are implemented both in the SQL standard
390  *  style of two adjacent single quotes "''" and in the Postgres/Java style
391  *  of escaped-quote "\'".
392  * Other embedded escaped characters are matched explicitly and the leading
393  *  backslash is dropped from the string.
394  * Note that xcstart must appear before operator, as explained above!
395  *  Also whitespace (comment) must appear before operator.
396  */
397 
398 %%
399 
400 {whitespace}	{
401 					/* ignore */
402 				}
403 
404 {xcstart}		{
405 					/* Set location in case of syntax error in comment */
406 					SET_YYLLOC();
407 					yyextra->xcdepth = 0;
408 					BEGIN(xc);
409 					/* Put back any characters past slash-star; see above */
410 					yyless(2);
411 				}
412 
413 <xc>{xcstart}	{
414 					(yyextra->xcdepth)++;
415 					/* Put back any characters past slash-star; see above */
416 					yyless(2);
417 				}
418 
419 <xc>{xcstop}	{
420 					if (yyextra->xcdepth <= 0)
421 						BEGIN(INITIAL);
422 					else
423 						(yyextra->xcdepth)--;
424 				}
425 
426 <xc>{xcinside}	{
427 					/* ignore */
428 				}
429 
430 <xc>{op_chars}	{
431 					/* ignore */
432 				}
433 
434 <xc>\*+			{
435 					/* ignore */
436 				}
437 
438 <xc><<EOF>>		{ yyerror("unterminated /* comment"); }
439 
440 {xbstart}		{
441 					/* Binary bit type.
442 					 * At some point we should simply pass the string
443 					 * forward to the parser and label it there.
444 					 * In the meantime, place a leading "b" on the string
445 					 * to mark it for the input routine as a binary string.
446 					 */
447 					SET_YYLLOC();
448 					BEGIN(xb);
449 					startlit();
450 					addlitchar('b', yyscanner);
451 				}
452 <xb>{quotestop}	|
453 <xb>{quotefail} {
454 					yyless(1);
455 					BEGIN(INITIAL);
456 					yylval->str = litbufdup(yyscanner);
457 					return BCONST;
458 				}
459 <xh>{xhinside}	|
460 <xb>{xbinside}	{
461 					addlit(yytext, yyleng, yyscanner);
462 				}
463 <xh>{quotecontinue}	|
464 <xb>{quotecontinue}	{
465 					/* ignore */
466 				}
467 <xb><<EOF>>		{ yyerror("unterminated bit string literal"); }
468 
469 {xhstart}		{
470 					/* Hexadecimal bit type.
471 					 * At some point we should simply pass the string
472 					 * forward to the parser and label it there.
473 					 * In the meantime, place a leading "x" on the string
474 					 * to mark it for the input routine as a hex string.
475 					 */
476 					SET_YYLLOC();
477 					BEGIN(xh);
478 					startlit();
479 					addlitchar('x', yyscanner);
480 				}
481 <xh>{quotestop}	|
482 <xh>{quotefail} {
483 					yyless(1);
484 					BEGIN(INITIAL);
485 					yylval->str = litbufdup(yyscanner);
486 					return XCONST;
487 				}
488 <xh><<EOF>>		{ yyerror("unterminated hexadecimal string literal"); }
489 
490 {xnstart}		{
491 					/* National character.
492 					 * We will pass this along as a normal character string,
493 					 * but preceded with an internally-generated "NCHAR".
494 					 */
495 					const ScanKeyword *keyword;
496 
497 					SET_YYLLOC();
498 					yyless(1);	/* eat only 'n' this time */
499 
500 					keyword = ScanKeywordLookup("nchar",
501 												yyextra->keywords,
502 												yyextra->num_keywords);
503 					if (keyword != NULL)
504 					{
505 						yylval->keyword = keyword->name;
506 						return keyword->value;
507 					}
508 					else
509 					{
510 						/* If NCHAR isn't a keyword, just return "n" */
511 						yylval->str = pstrdup("n");
512 						return IDENT;
513 					}
514 				}
515 
516 {xqstart}		{
517 					yyextra->warn_on_first_escape = true;
518 					yyextra->saw_non_ascii = false;
519 					SET_YYLLOC();
520 					if (yyextra->standard_conforming_strings)
521 						BEGIN(xq);
522 					else
523 						BEGIN(xe);
524 					startlit();
525 				}
526 {xestart}		{
527 					yyextra->warn_on_first_escape = false;
528 					yyextra->saw_non_ascii = false;
529 					SET_YYLLOC();
530 					BEGIN(xe);
531 					startlit();
532 				}
533 {xusstart}		{
534 					SET_YYLLOC();
535 					if (!yyextra->standard_conforming_strings)
536 						ereport(ERROR,
537 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
538 								 errmsg("unsafe use of string constant with Unicode escapes"),
539 								 errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."),
540 								 lexer_errposition()));
541 					BEGIN(xus);
542 					startlit();
543 				}
544 <xq,xe>{quotestop}	|
545 <xq,xe>{quotefail} {
546 					yyless(1);
547 					BEGIN(INITIAL);
548 					/*
549 					 * check that the data remains valid if it might have been
550 					 * made invalid by unescaping any chars.
551 					 */
552 					if (yyextra->saw_non_ascii)
553 						pg_verifymbstr(yyextra->literalbuf,
554 									   yyextra->literallen,
555 									   false);
556 					yylval->str = litbufdup(yyscanner);
557 					return SCONST;
558 				}
559 <xus>{quotestop} |
560 <xus>{quotefail} {
561 					/* throw back all but the quote */
562 					yyless(1);
563 					/* xusend state looks for possible UESCAPE */
564 					BEGIN(xusend);
565 				}
566 <xusend>{whitespace} {
567 					/* stay in xusend state over whitespace */
568 				}
569 <xusend><<EOF>> |
570 <xusend>{other} |
571 <xusend>{xustop1} {
572 					/* no UESCAPE after the quote, throw back everything */
573 					yyless(0);
574 					BEGIN(INITIAL);
575 					yylval->str = litbuf_udeescape('\\', yyscanner);
576 					return SCONST;
577 				}
578 <xusend>{xustop2} {
579 					/* found UESCAPE after the end quote */
580 					BEGIN(INITIAL);
581 					if (!check_uescapechar(yytext[yyleng - 2]))
582 					{
583 						SET_YYLLOC();
584 						ADVANCE_YYLLOC(yyleng - 2);
585 						yyerror("invalid Unicode escape character");
586 					}
587 					yylval->str = litbuf_udeescape(yytext[yyleng - 2],
588 												   yyscanner);
589 					return SCONST;
590 				}
591 <xq,xe,xus>{xqdouble} {
592 					addlitchar('\'', yyscanner);
593 				}
594 <xq,xus>{xqinside}  {
595 					addlit(yytext, yyleng, yyscanner);
596 				}
597 <xe>{xeinside}  {
598 					addlit(yytext, yyleng, yyscanner);
599 				}
600 <xe>{xeunicode} {
601 					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
602 
603 					check_escape_warning(yyscanner);
604 
605 					if (is_utf16_surrogate_first(c))
606 					{
607 						yyextra->utf16_first_part = c;
608 						BEGIN(xeu);
609 					}
610 					else if (is_utf16_surrogate_second(c))
611 						yyerror("invalid Unicode surrogate pair");
612 					else
613 						addunicode(c, yyscanner);
614 				}
615 <xeu>{xeunicode} {
616 					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
617 
618 					if (!is_utf16_surrogate_second(c))
619 						yyerror("invalid Unicode surrogate pair");
620 
621 					c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
622 
623 					addunicode(c, yyscanner);
624 
625 					BEGIN(xe);
626 				}
627 <xeu>.			{ yyerror("invalid Unicode surrogate pair"); }
628 <xeu>\n			{ yyerror("invalid Unicode surrogate pair"); }
629 <xeu><<EOF>>	{ yyerror("invalid Unicode surrogate pair"); }
630 <xe,xeu>{xeunicodefail}	{
631 					ereport(ERROR,
632 							(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
633 							 errmsg("invalid Unicode escape"),
634 							 errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
635 							 lexer_errposition()));
636 				}
637 <xe>{xeescape}  {
638 #ifdef PGPOOL_NOT_USED
639 					if (yytext[1] == '\'')
640 					{
641 						if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF ||
642 							(yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
643 							 PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
644 							ereport(ERROR,
645 									(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
646 									 errmsg("unsafe use of \\' in a string literal"),
647 									 errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
648 									 lexer_errposition()));
649 					}
650 #endif
651 					check_string_escape_warning(yytext[1], yyscanner);
652 					addlitchar(unescape_single_char(yytext[1], yyscanner),
653 							   yyscanner);
654 				}
655 <xe>{xeoctesc}  {
656 					unsigned char c = strtoul(yytext + 1, NULL, 8);
657 
658 					check_escape_warning(yyscanner);
659 					addlitchar(c, yyscanner);
660 					if (c == '\0' || IS_HIGHBIT_SET(c))
661 						yyextra->saw_non_ascii = true;
662 				}
663 <xe>{xehexesc}  {
664 					unsigned char c = strtoul(yytext + 2, NULL, 16);
665 
666 					check_escape_warning(yyscanner);
667 					addlitchar(c, yyscanner);
668 					if (c == '\0' || IS_HIGHBIT_SET(c))
669 						yyextra->saw_non_ascii = true;
670 				}
671 <xq,xe,xus>{quotecontinue} {
672 					/* ignore */
673 				}
674 <xe>.			{
675 					/* This is only needed for \ just before EOF */
676 					addlitchar(yytext[0], yyscanner);
677 				}
678 <xq,xe,xus><<EOF>>		{ yyerror("unterminated quoted string"); }
679 
680 {dolqdelim}		{
681 					SET_YYLLOC();
682 					yyextra->dolqstart = pstrdup(yytext);
683 					BEGIN(xdolq);
684 					startlit();
685 				}
686 {dolqfailed}	{
687 					SET_YYLLOC();
688 					/* throw back all but the initial "$" */
689 					yyless(1);
690 					/* and treat it as {other} */
691 					return yytext[0];
692 				}
693 <xdolq>{dolqdelim} {
694 					if (strcmp(yytext, yyextra->dolqstart) == 0)
695 					{
696 						pfree(yyextra->dolqstart);
697 						yyextra->dolqstart = NULL;
698 						BEGIN(INITIAL);
699 						yylval->str = litbufdup(yyscanner);
700 						return SCONST;
701 					}
702 					else
703 					{
704 						/*
705 						 * When we fail to match $...$ to dolqstart, transfer
706 						 * the $... part to the output, but put back the final
707 						 * $ for rescanning.  Consider $delim$...$junk$delim$
708 						 */
709 						addlit(yytext, yyleng - 1, yyscanner);
710 						yyless(yyleng - 1);
711 					}
712 				}
713 <xdolq>{dolqinside} {
714 					addlit(yytext, yyleng, yyscanner);
715 				}
716 <xdolq>{dolqfailed} {
717 					addlit(yytext, yyleng, yyscanner);
718 				}
719 <xdolq>.		{
720 					/* This is only needed for $ inside the quoted text */
721 					addlitchar(yytext[0], yyscanner);
722 				}
723 <xdolq><<EOF>>	{ yyerror("unterminated dollar-quoted string"); }
724 
725 {xdstart}		{
726 					SET_YYLLOC();
727 					BEGIN(xd);
728 					startlit();
729 				}
730 {xuistart}		{
731 					SET_YYLLOC();
732 					BEGIN(xui);
733 					startlit();
734 				}
735 <xd>{xdstop}	{
736 					char	   *ident;
737 
738 					BEGIN(INITIAL);
739 					if (yyextra->literallen == 0)
740 						yyerror("zero-length delimited identifier");
741 					ident = litbufdup(yyscanner);
742 					if (yyextra->literallen >= NAMEDATALEN)
743 						truncate_identifier(ident, yyextra->literallen, true);
744 					yylval->str = ident;
745 					return IDENT;
746 				}
747 <xui>{dquote} {
748 					yyless(1);
749 					/* xuiend state looks for possible UESCAPE */
750 					BEGIN(xuiend);
751 				}
752 <xuiend>{whitespace} {
753 					/* stay in xuiend state over whitespace */
754 				}
755 <xuiend><<EOF>> |
756 <xuiend>{other} |
757 <xuiend>{xustop1} {
758 					/* no UESCAPE after the quote, throw back everything */
759 					char	   *ident;
760 					int			identlen;
761 
762 					yyless(0);
763 
764 					BEGIN(INITIAL);
765 					if (yyextra->literallen == 0)
766 						yyerror("zero-length delimited identifier");
767 					ident = litbuf_udeescape('\\', yyscanner);
768 					identlen = strlen(ident);
769 					if (identlen >= NAMEDATALEN)
770 						truncate_identifier(ident, identlen, true);
771 					yylval->str = ident;
772 					return IDENT;
773 				}
774 <xuiend>{xustop2}	{
775 					/* found UESCAPE after the end quote */
776 					char	   *ident;
777 					int			identlen;
778 
779 					BEGIN(INITIAL);
780 					if (yyextra->literallen == 0)
781 						yyerror("zero-length delimited identifier");
782 					if (!check_uescapechar(yytext[yyleng - 2]))
783 					{
784 						SET_YYLLOC();
785 						ADVANCE_YYLLOC(yyleng - 2);
786 						yyerror("invalid Unicode escape character");
787 					}
788 					ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
789 					identlen = strlen(ident);
790 					if (identlen >= NAMEDATALEN)
791 						truncate_identifier(ident, identlen, true);
792 					yylval->str = ident;
793 					return IDENT;
794 				}
795 <xd,xui>{xddouble}	{
796 					addlitchar('"', yyscanner);
797 				}
798 <xd,xui>{xdinside}	{
799 					addlit(yytext, yyleng, yyscanner);
800 				}
801 <xd,xui><<EOF>>		{ yyerror("unterminated quoted identifier"); }
802 
803 {xufailed}	{
804 					char	   *ident;
805 
806 					SET_YYLLOC();
807 					/* throw back all but the initial u/U */
808 					yyless(1);
809 					/* and treat it as {identifier} */
810 					ident = downcase_truncate_identifier(yytext, yyleng, true);
811 					yylval->str = ident;
812 					return IDENT;
813 				}
814 
815 {typecast}		{
816 					SET_YYLLOC();
817 					return TYPECAST;
818 				}
819 
820 {dot_dot}		{
821 					SET_YYLLOC();
822 					return DOT_DOT;
823 				}
824 
825 {colon_equals}	{
826 					SET_YYLLOC();
827 					return COLON_EQUALS;
828 				}
829 
830 {equals_greater} {
831 					SET_YYLLOC();
832 					return EQUALS_GREATER;
833 				}
834 
835 {less_equals}	{
836 					SET_YYLLOC();
837 					return LESS_EQUALS;
838 				}
839 
840 {greater_equals} {
841 					SET_YYLLOC();
842 					return GREATER_EQUALS;
843 				}
844 
845 {less_greater}	{
846 					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
847 					SET_YYLLOC();
848 					return NOT_EQUALS;
849 				}
850 
851 {not_equals}	{
852 					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
853 					SET_YYLLOC();
854 					return NOT_EQUALS;
855 				}
856 
857 {self}			{
858 					SET_YYLLOC();
859 					return yytext[0];
860 				}
861 
862 {operator}		{
863 					/*
864 					 * Check for embedded slash-star or dash-dash; those
865 					 * are comment starts, so operator must stop there.
866 					 * Note that slash-star or dash-dash at the first
867 					 * character will match a prior rule, not this one.
868 					 */
869 					int			nchars = yyleng;
870 					char	   *slashstar = strstr(yytext, "/*");
871 					char	   *dashdash = strstr(yytext, "--");
872 
873 					if (slashstar && dashdash)
874 					{
875 						/* if both appear, take the first one */
876 						if (slashstar > dashdash)
877 							slashstar = dashdash;
878 					}
879 					else if (!slashstar)
880 						slashstar = dashdash;
881 					if (slashstar)
882 						nchars = slashstar - yytext;
883 
884 					/*
885 					 * For SQL compatibility, '+' and '-' cannot be the
886 					 * last char of a multi-char operator unless the operator
887 					 * contains chars that are not in SQL operators.
888 					 * The idea is to lex '=-' as two operators, but not
889 					 * to forbid operator names like '?-' that could not be
890 					 * sequences of SQL operators.
891 					 */
892 					while (nchars > 1 &&
893 						   (yytext[nchars - 1] == '+' ||
894 							yytext[nchars - 1] == '-'))
895 					{
896 						int			ic;
897 
898 						for (ic = nchars - 2; ic >= 0; ic--)
899 						{
900 							if (strchr("~!@#^&|`?%", yytext[ic]))
901 								break;
902 						}
903 						if (ic >= 0)
904 							break; /* found a char that makes it OK */
905 						nchars--; /* else remove the +/-, and check again */
906 					}
907 
908 					SET_YYLLOC();
909 
910 					if (nchars < yyleng)
911 					{
912 						/* Strip the unwanted chars from the token */
913 						yyless(nchars);
914 						/*
915 						 * If what we have left is only one char, and it's
916 						 * one of the characters matching "self", then
917 						 * return it as a character token the same way
918 						 * that the "self" rule would have.
919 						 */
920 						if (nchars == 1 &&
921 							strchr(",()[].;:+-*/%^<>=", yytext[0]))
922 							return yytext[0];
923 					}
924 
925 					/*
926 					 * Complain if operator is too long.  Unlike the case
927 					 * for identifiers, we make this an error not a notice-
928 					 * and-truncate, because the odds are we are looking at
929 					 * a syntactic mistake anyway.
930 					 */
931 					if (nchars >= NAMEDATALEN)
932 						yyerror("operator too long");
933 
934 					yylval->str = pstrdup(yytext);
935 					return Op;
936 				}
937 
938 {param}			{
939 					SET_YYLLOC();
940 					yylval->ival = atol(yytext + 1);
941 					return PARAM;
942 				}
943 
944 {integer}		{
945 					SET_YYLLOC();
946 					return process_integer_literal(yytext, yylval);
947 				}
948 {decimal}		{
949 					SET_YYLLOC();
950 					yylval->str = pstrdup(yytext);
951 					return FCONST;
952 				}
953 {decimalfail}	{
954 					/* throw back the .., and treat as integer */
955 					yyless(yyleng - 2);
956 					SET_YYLLOC();
957 					return process_integer_literal(yytext, yylval);
958 				}
959 {real}			{
960 					SET_YYLLOC();
961 					yylval->str = pstrdup(yytext);
962 					return FCONST;
963 				}
964 {realfail1}		{
965 					/*
966 					 * throw back the [Ee], and treat as {decimal}.  Note
967 					 * that it is possible the input is actually {integer},
968 					 * but since this case will almost certainly lead to a
969 					 * syntax error anyway, we don't bother to distinguish.
970 					 */
971 					yyless(yyleng - 1);
972 					SET_YYLLOC();
973 					yylval->str = pstrdup(yytext);
974 					return FCONST;
975 				}
976 {realfail2}		{
977 					/* throw back the [Ee][+-], and proceed as above */
978 					yyless(yyleng - 2);
979 					SET_YYLLOC();
980 					yylval->str = pstrdup(yytext);
981 					return FCONST;
982 				}
983 
984 
985 {identifier}	{
986 					const ScanKeyword *keyword;
987 					char	   *ident;
988 
989 					SET_YYLLOC();
990 
991 					/* Is it a keyword? */
992 					keyword = ScanKeywordLookup(yytext,
993 												yyextra->keywords,
994 												yyextra->num_keywords);
995 					if (keyword != NULL)
996 					{
997 						yylval->keyword = keyword->name;
998 						return keyword->value;
999 					}
1000 
1001 					/*
1002 					 * No.  Convert the identifier to lower case, and truncate
1003 					 * if necessary.
1004 					 */
1005 					ident = downcase_truncate_identifier(yytext, yyleng, true);
1006 					yylval->str = ident;
1007 					return IDENT;
1008 				}
1009 
1010 {other}			{
1011 					SET_YYLLOC();
1012 					return yytext[0];
1013 				}
1014 
1015 <<EOF>>			{
1016 					SET_YYLLOC();
1017 					yyterminate();
1018 				}
1019 
1020 %%
1021 
1022 /*
1023  * Arrange access to yyextra for subroutines of the main yylex() function.
1024  * We expect each subroutine to have a yyscanner parameter.  Rather than
1025  * use the yyget_xxx functions, which might or might not get inlined by the
1026  * compiler, we cheat just a bit and cast yyscanner to the right type.
1027  */
1028 #undef yyextra
1029 #define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)
1030 
1031 /* Likewise for a couple of other things we need. */
1032 #undef yylloc
1033 #define yylloc	(((struct yyguts_t *) yyscanner)->yylloc_r)
1034 #undef yyleng
1035 #define yyleng	(((struct yyguts_t *) yyscanner)->yyleng_r)
1036 
1037 
1038 /*
1039  * scanner_errposition
1040  *		Report a lexer or grammar error cursor position, if possible.
1041  *
1042  * This is expected to be used within an ereport() call.  The return value
1043  * is a dummy (always 0, in fact).
1044  *
1045  * Note that this can only be used for messages emitted during raw parsing
1046  * (essentially, scan.l and gram.y), since it requires the yyscanner struct
1047  * to still be available.
1048  */
1049 int
1050 scanner_errposition(int location, core_yyscan_t yyscanner)
1051 {
1052 #ifdef PGPOOL_NOT_USED
1053 	int		pos;
1054 
1055 	if (location < 0)
1056 		return 0;				/* no-op if location is unknown */
1057 
1058 	/* Convert byte offset to character number */
1059 	pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
1060 	/* And pass it to the ereport mechanism */
1061 	return errposition(pos);
1062 #endif
1063 	return 0;
1064 }
1065 
1066 /*
1067  * scanner_yyerror
1068  *		Report a lexer or grammar error.
1069  *
1070  * The message's cursor position is whatever YYLLOC was last set to,
1071  * ie, the start of the current token if called within yylex(), or the
1072  * most recently lexed token if called from the grammar.
1073  * This is OK for syntax error messages from the Bison parser, because Bison
1074  * parsers report error as soon as the first unparsable token is reached.
1075  * Beware of using yyerror for other purposes, as the cursor position might
1076  * be misleading!
1077  */
1078 void
1079 scanner_yyerror(const char *message, core_yyscan_t yyscanner)
1080 {
1081 	const char *loc = yyextra->scanbuf + *yylloc;
1082 
1083 	if (*loc == YY_END_OF_BUFFER_CHAR)
1084 	{
1085 		ereport(ERROR,
1086 				(errcode(ERRCODE_SYNTAX_ERROR),
1087 		/* translator: %s is typically the translation of "syntax error" */
1088 				 errmsg("%s at end of input", _(message)),
1089 				 lexer_errposition()));
1090 	}
1091 	else
1092 	{
1093 		ereport(ERROR,
1094 				(errcode(ERRCODE_SYNTAX_ERROR),
1095 		/* translator: first %s is typically the translation of "syntax error" */
1096 				 errmsg("%s at or near \"%s\"", _(message), loc),
1097 				 lexer_errposition()));
1098 	}
1099 }
1100 
1101 
1102 /*
1103  * Called before any actual parsing is done
1104  */
1105 core_yyscan_t
1106 scanner_init(const char *str,
1107 			 core_yy_extra_type *yyext,
1108 			 const ScanKeyword *keywords,
1109 			 int num_keywords)
1110 {
1111 	Size		slen = strlen(str);
1112 	yyscan_t	scanner;
1113 
1114 	if (yylex_init(&scanner) != 0)
1115 		elog(ERROR, "yylex_init() failed: %m");
1116 
1117 	core_yyset_extra(yyext, scanner);
1118 
1119 	yyext->keywords = keywords;
1120 	yyext->num_keywords = num_keywords;
1121 
1122 	yyext->backslash_quote = backslash_quote;
1123 	yyext->escape_string_warning = escape_string_warning;
1124 	yyext->standard_conforming_strings = standard_conforming_strings;
1125 
1126 	/*
1127 	 * Make a scan buffer with special termination needed by flex.
1128 	 */
1129 	yyext->scanbuf = (char *) palloc(slen + 2);
1130 	yyext->scanbuflen = slen;
1131 	memcpy(yyext->scanbuf, str, slen);
1132 	yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
1133 	yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
1134 
1135 	/* initialize literal buffer to a reasonable but expansible size */
1136 	yyext->literalalloc = 1024;
1137 	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
1138 	yyext->literallen = 0;
1139 
1140 	return scanner;
1141 }
1142 
1143 
1144 /*
1145  * Called after parsing is done to clean up after scanner_init()
1146  */
1147 void
1148 scanner_finish(core_yyscan_t yyscanner)
1149 {
1150 	/*
1151 	 * We don't bother to call yylex_destroy(), because all it would do is
1152 	 * pfree a small amount of control storage.  It's cheaper to leak the
1153 	 * storage until the parsing context is destroyed.  The amount of space
1154 	 * involved is usually negligible compared to the output parse tree
1155 	 * anyway.
1156 	 *
1157 	 * We do bother to pfree the scanbuf and literal buffer, but only if they
1158 	 * represent a nontrivial amount of space.  The 8K cutoff is arbitrary.
1159 	 */
1160 	if (yyextra->scanbuflen >= 8192)
1161 		pfree(yyextra->scanbuf);
1162 	if (yyextra->literalalloc >= 8192)
1163 		pfree(yyextra->literalbuf);
1164 }
1165 
1166 
1167 static void
1168 addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
1169 {
1170 	/* enlarge buffer if needed */
1171 	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
1172 	{
1173 		do
1174 		{
1175 			yyextra->literalalloc *= 2;
1176 		} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
1177 		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1178 												yyextra->literalalloc);
1179 	}
1180 	/* append new data */
1181 	memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
1182 	yyextra->literallen += yleng;
1183 }
1184 
1185 
1186 static void
1187 addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
1188 {
1189 	/* enlarge buffer if needed */
1190 	if ((yyextra->literallen + 1) >= yyextra->literalalloc)
1191 	{
1192 		yyextra->literalalloc *= 2;
1193 		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1194 												yyextra->literalalloc);
1195 	}
1196 	/* append new data */
1197 	yyextra->literalbuf[yyextra->literallen] = ychar;
1198 	yyextra->literallen += 1;
1199 }
1200 
1201 
1202 /*
1203  * Create a palloc'd copy of literalbuf, adding a trailing null.
1204  */
1205 static char *
1206 litbufdup(core_yyscan_t yyscanner)
1207 {
1208 	int			llen = yyextra->literallen;
1209 	char	   *new;
1210 
1211 	new = palloc(llen + 1);
1212 	memcpy(new, yyextra->literalbuf, llen);
1213 	new[llen] = '\0';
1214 	return new;
1215 }
1216 
1217 static int
1218 process_integer_literal(const char *token, YYSTYPE *lval)
1219 {
1220 	long		val;
1221 	char	   *endptr;
1222 
1223 	errno = 0;
1224 	val = strtol(token, &endptr, 10);
1225 	if (*endptr != '\0' || errno == ERANGE
1226 #ifdef HAVE_LONG_INT_64
1227 	/* if long > 32 bits, check for overflow of int4 */
1228 		|| val != (long) ((int32) val)
1229 #endif
1230 		)
1231 	{
1232 		/* integer too large, treat it as a float */
1233 		lval->str = pstrdup(token);
1234 		return FCONST;
1235 	}
1236 	lval->ival = val;
1237 	return ICONST;
1238 }
1239 
1240 static unsigned int
1241 hexval(unsigned char c)
1242 {
1243 	if (c >= '0' && c <= '9')
1244 		return c - '0';
1245 	if (c >= 'a' && c <= 'f')
1246 		return c - 'a' + 0xA;
1247 	if (c >= 'A' && c <= 'F')
1248 		return c - 'A' + 0xA;
1249 	elog(ERROR, "invalid hexadecimal digit");
1250 	return 0;					/* not reached */
1251 }
1252 
1253 static void
1254 check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner)
1255 {
1256 	if (GetDatabaseEncoding() == PG_UTF8)
1257 		return;
1258 
1259 	if (c > 0x7F)
1260 	{
1261 		ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3);	/* 3 for U&" */
1262 		yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
1263 	}
1264 }
1265 
1266 static bool
1267 is_utf16_surrogate_first(pg_wchar c)
1268 {
1269 	return (c >= 0xD800 && c <= 0xDBFF);
1270 }
1271 
1272 static bool
1273 is_utf16_surrogate_second(pg_wchar c)
1274 {
1275 	return (c >= 0xDC00 && c <= 0xDFFF);
1276 }
1277 
1278 static pg_wchar
1279 surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
1280 {
1281 	return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
1282 }
1283 
1284 static void
1285 addunicode(pg_wchar c, core_yyscan_t yyscanner)
1286 {
1287 	char		buf[8];
1288 
1289 	if (c == 0 || c > 0x10FFFF)
1290 		yyerror("invalid Unicode escape value");
1291 	if (c > 0x7F)
1292 	{
1293 		if (GetDatabaseEncoding() != PG_UTF8)
1294 			yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
1295 		yyextra->saw_non_ascii = true;
1296 	}
1297 	unicode_to_utf8(c, (unsigned char *) buf);
1298 	addlit(buf, pg_mblen(buf), yyscanner);
1299 }
1300 
1301 /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
1302 static bool
1303 check_uescapechar(unsigned char escape)
1304 {
1305 	if (isxdigit(escape)
1306 		|| escape == '+'
1307 		|| escape == '\''
1308 		|| escape == '"'
1309 		|| scanner_isspace(escape))
1310 	{
1311 		return false;
1312 	}
1313 	else
1314 		return true;
1315 }
1316 
1317 /* like litbufdup, but handle unicode escapes */
1318 static char *
1319 litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
1320 {
1321 	char	   *new;
1322 	char	   *litbuf,
1323 			   *in,
1324 			   *out;
1325 	pg_wchar	pair_first = 0;
1326 
1327 	/* Make literalbuf null-terminated to simplify the scanning loop */
1328 	litbuf = yyextra->literalbuf;
1329 	litbuf[yyextra->literallen] = '\0';
1330 
1331 	/*
1332 	 * This relies on the subtle assumption that a UTF-8 expansion cannot be
1333 	 * longer than its escaped representation.
1334 	 */
1335 	new = palloc(yyextra->literallen + 1);
1336 
1337 	in = litbuf;
1338 	out = new;
1339 	while (*in)
1340 	{
1341 		if (in[0] == escape)
1342 		{
1343 			if (in[1] == escape)
1344 			{
1345 				if (pair_first)
1346 				{
1347 					ADVANCE_YYLLOC(in - litbuf + 3);	/* 3 for U&" */
1348 					yyerror("invalid Unicode surrogate pair");
1349 				}
1350 				*out++ = escape;
1351 				in += 2;
1352 			}
1353 			else if (isxdigit((unsigned char) in[1]) &&
1354 					 isxdigit((unsigned char) in[2]) &&
1355 					 isxdigit((unsigned char) in[3]) &&
1356 					 isxdigit((unsigned char) in[4]))
1357 			{
1358 				pg_wchar	unicode;
1359 
1360 				unicode = (hexval(in[1]) << 12) +
1361 					(hexval(in[2]) << 8) +
1362 					(hexval(in[3]) << 4) +
1363 					hexval(in[4]);
1364 				check_unicode_value(unicode, in, yyscanner);
1365 				if (pair_first)
1366 				{
1367 					if (is_utf16_surrogate_second(unicode))
1368 					{
1369 						unicode = surrogate_pair_to_codepoint(pair_first, unicode);
1370 						pair_first = 0;
1371 					}
1372 					else
1373 					{
1374 						ADVANCE_YYLLOC(in - litbuf + 3);		/* 3 for U&" */
1375 						yyerror("invalid Unicode surrogate pair");
1376 					}
1377 				}
1378 				else if (is_utf16_surrogate_second(unicode))
1379 					yyerror("invalid Unicode surrogate pair");
1380 
1381 				if (is_utf16_surrogate_first(unicode))
1382 					pair_first = unicode;
1383 				else
1384 				{
1385 					unicode_to_utf8(unicode, (unsigned char *) out);
1386 					out += pg_mblen(out);
1387 				}
1388 				in += 5;
1389 			}
1390 			else if (in[1] == '+' &&
1391 					 isxdigit((unsigned char) in[2]) &&
1392 					 isxdigit((unsigned char) in[3]) &&
1393 					 isxdigit((unsigned char) in[4]) &&
1394 					 isxdigit((unsigned char) in[5]) &&
1395 					 isxdigit((unsigned char) in[6]) &&
1396 					 isxdigit((unsigned char) in[7]))
1397 			{
1398 				pg_wchar	unicode;
1399 
1400 				unicode = (hexval(in[2]) << 20) +
1401 					(hexval(in[3]) << 16) +
1402 					(hexval(in[4]) << 12) +
1403 					(hexval(in[5]) << 8) +
1404 					(hexval(in[6]) << 4) +
1405 					hexval(in[7]);
1406 				check_unicode_value(unicode, in, yyscanner);
1407 				if (pair_first)
1408 				{
1409 					if (is_utf16_surrogate_second(unicode))
1410 					{
1411 						unicode = surrogate_pair_to_codepoint(pair_first, unicode);
1412 						pair_first = 0;
1413 					}
1414 					else
1415 					{
1416 						ADVANCE_YYLLOC(in - litbuf + 3);		/* 3 for U&" */
1417 						yyerror("invalid Unicode surrogate pair");
1418 					}
1419 				}
1420 				else if (is_utf16_surrogate_second(unicode))
1421 					yyerror("invalid Unicode surrogate pair");
1422 
1423 				if (is_utf16_surrogate_first(unicode))
1424 					pair_first = unicode;
1425 				else
1426 				{
1427 					unicode_to_utf8(unicode, (unsigned char *) out);
1428 					out += pg_mblen(out);
1429 				}
1430 				in += 8;
1431 			}
1432 			else
1433 			{
1434 				ADVANCE_YYLLOC(in - litbuf + 3);		/* 3 for U&" */
1435 				yyerror("invalid Unicode escape value");
1436 			}
1437 		}
1438 		else
1439 		{
1440 			if (pair_first)
1441 			{
1442 				ADVANCE_YYLLOC(in - litbuf + 3);		/* 3 for U&" */
1443 				yyerror("invalid Unicode surrogate pair");
1444 			}
1445 			*out++ = *in++;
1446 		}
1447 	}
1448 
1449 	*out = '\0';
1450 
1451 	/*
1452 	 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
1453 	 * codes; but it's probably not worth the trouble, since this isn't likely
1454 	 * to be a performance-critical path.
1455 	 */
1456 	pg_verifymbstr(new, out - new, false);
1457 	return new;
1458 }
1459 
1460 static unsigned char
1461 unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
1462 {
1463 	switch (c)
1464 	{
1465 		case 'b':
1466 			return '\b';
1467 		case 'f':
1468 			return '\f';
1469 		case 'n':
1470 			return '\n';
1471 		case 'r':
1472 			return '\r';
1473 		case 't':
1474 			return '\t';
1475 		default:
1476 			/* check for backslash followed by non-7-bit-ASCII */
1477 			if (c == '\0' || IS_HIGHBIT_SET(c))
1478 				yyextra->saw_non_ascii = true;
1479 
1480 			return c;
1481 	}
1482 }
1483 
1484 static void
1485 check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
1486 {
1487 	if (ychar == '\'')
1488 	{
1489 		if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1490 			ereport(WARNING,
1491 					(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1492 					 errmsg("nonstandard use of \\' in a string literal"),
1493 					 errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
1494 					 lexer_errposition()));
1495 		yyextra->warn_on_first_escape = false;	/* warn only once per string */
1496 	}
1497 	else if (ychar == '\\')
1498 	{
1499 		if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1500 			ereport(WARNING,
1501 					(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1502 					 errmsg("nonstandard use of \\\\ in a string literal"),
1503 					 errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
1504 					 lexer_errposition()));
1505 		yyextra->warn_on_first_escape = false;	/* warn only once per string */
1506 	}
1507 	else
1508 		check_escape_warning(yyscanner);
1509 }
1510 
1511 static void
1512 check_escape_warning(core_yyscan_t yyscanner)
1513 {
1514 	if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1515 		ereport(WARNING,
1516 				(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1517 				 errmsg("nonstandard use of escape in a string literal"),
1518 		errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
1519 				 lexer_errposition()));
1520 	yyextra->warn_on_first_escape = false;		/* warn only once per string */
1521 }
1522 
1523 /*
1524  * Interface functions to make flex use palloc() instead of malloc().
1525  * It'd be better to make these static, but flex insists otherwise.
1526  */
1527 
1528 void *
1529 core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
1530 {
1531 	return palloc(bytes);
1532 }
1533 
1534 void *
1535 core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
1536 {
1537 	if (ptr)
1538 		return repalloc(ptr, bytes);
1539 	else
1540 		return palloc(bytes);
1541 }
1542 
1543 void
1544 core_yyfree(void *ptr, core_yyscan_t yyscanner)
1545 {
1546 	if (ptr)
1547 		pfree(ptr);
1548 }
1549