1 %top{
2 /*-------------------------------------------------------------------------
3  *
4  * scan.l
5  *	  lexical scanner for PostgreSQL
6  *
7  * NOTE NOTE NOTE:
8  *
9  * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l
10  * and src/interfaces/ecpg/preproc/pgc.l!
11  *
12  * The rules are designed so that the scanner never has to backtrack,
13  * in the sense that there is always a rule that can match the input
14  * consumed so far (the rule action may internally throw back some input
15  * with yyless(), however).  As explained in the flex manual, this makes
16  * for a useful speed increase --- several percent faster when measuring
17  * raw parsing (Flex + Bison).  The extra complexity is mostly in the rules
18  * for handling float numbers and continued string literals.  If you change
19  * the lexical rules, verify that you haven't broken the no-backtrack
20  * property by running flex with the "-b" option and checking that the
21  * resulting "lex.backup" file says that no backing up is needed.  (As of
22  * Postgres 9.2, this check is made automatically by the Makefile.)
23  *
24  *
25  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
26  * Portions Copyright (c) 1994, Regents of the University of California
27  *
28  * IDENTIFICATION
29  *	  src/backend/parser/scan.l
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <ctype.h>
36 #include <unistd.h>
37 
38 #include "common/string.h"
39 #include "parser/gramparse.h"
40 #include "parser/parser.h"		/* only needed for GUC variables */
41 #include "parser/scansup.h"
42 #include "mb/pg_wchar.h"
43 }
44 
45 %{
46 
47 /* LCOV_EXCL_START */
48 
49 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
50 #undef fprintf
51 #define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
52 
53 static void
fprintf_to_ereport(const char * fmt,const char * msg)54 fprintf_to_ereport(const char *fmt, const char *msg)
55 {
56 	ereport(ERROR, (errmsg_internal("%s", msg)));
57 }
58 
59 /*
60  * GUC variables.  This is a DIRECT violation of the warning given at the
61  * head of gram.y, ie flex/bison code must not depend on any GUC variables;
62  * as such, changing their values can induce very unintuitive behavior.
63  * But we shall have to live with it until we can remove these variables.
64  */
65 int			backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
66 bool		escape_string_warning = true;
67 bool		standard_conforming_strings = true;
68 
69 /*
70  * Constant data exported from this file.  This array maps from the
71  * zero-based keyword numbers returned by ScanKeywordLookup to the
72  * Bison token numbers needed by gram.y.  This is exported because
73  * callers need to pass it to scanner_init, if they are using the
74  * standard keyword list ScanKeywords.
75  */
76 #define PG_KEYWORD(kwname, value, category, collabel) value,
77 
78 const uint16 ScanKeywordTokens[] = {
79 #include "parser/kwlist.h"
80 };
81 
82 #undef PG_KEYWORD
83 
84 /*
85  * Set the type of YYSTYPE.
86  */
87 #define YYSTYPE core_YYSTYPE
88 
89 /*
90  * Set the type of yyextra.  All state variables used by the scanner should
91  * be in yyextra, *not* statically allocated.
92  */
93 #define YY_EXTRA_TYPE core_yy_extra_type *
94 
95 /*
96  * Each call to yylex must set yylloc to the location of the found token
97  * (expressed as a byte offset from the start of the input text).
98  * When we parse a token that requires multiple lexer rules to process,
99  * this should be done in the first such rule, else yylloc will point
100  * into the middle of the token.
101  */
102 #define SET_YYLLOC()  (*(yylloc) = yytext - yyextra->scanbuf)
103 
104 /*
105  * Advance yylloc by the given number of bytes.
106  */
107 #define ADVANCE_YYLLOC(delta)  ( *(yylloc) += (delta) )
108 
109 /*
110  * Sometimes, we do want yylloc to point into the middle of a token; this is
111  * useful for instance to throw an error about an escape sequence within a
112  * string literal.  But if we find no error there, we want to revert yylloc
113  * to the token start, so that that's the location reported to the parser.
114  * Use PUSH_YYLLOC/POP_YYLLOC to save/restore yylloc around such code.
115  * (Currently the implied "stack" is just one location, but someday we might
116  * need to nest these.)
117  */
118 #define PUSH_YYLLOC()	(yyextra->save_yylloc = *(yylloc))
119 #define POP_YYLLOC()	(*(yylloc) = yyextra->save_yylloc)
120 
121 #define startlit()	( yyextra->literallen = 0 )
122 static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
123 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
124 static char *litbufdup(core_yyscan_t yyscanner);
125 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
126 static int	process_integer_literal(const char *token, YYSTYPE *lval);
127 static void addunicode(pg_wchar c, yyscan_t yyscanner);
128 
129 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
130 
131 #define lexer_errposition()  scanner_errposition(*(yylloc), yyscanner)
132 
133 static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
134 static void check_escape_warning(core_yyscan_t yyscanner);
135 
136 /*
137  * Work around a bug in flex 2.5.35: it emits a couple of functions that
138  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
139  * this would cause warnings.  Providing our own declarations should be
140  * harmless even when the bug gets fixed.
141  */
142 extern int	core_yyget_column(yyscan_t yyscanner);
143 extern void core_yyset_column(int column_no, yyscan_t yyscanner);
144 
145 %}
146 
147 %option reentrant
148 %option bison-bridge
149 %option bison-locations
150 %option 8bit
151 %option never-interactive
152 %option nodefault
153 %option noinput
154 %option nounput
155 %option noyywrap
156 %option noyyalloc
157 %option noyyrealloc
158 %option noyyfree
159 %option warn
160 %option prefix="core_yy"
161 
162 /*
163  * OK, here is a short description of lex/flex rules behavior.
164  * The longest pattern which matches an input string is always chosen.
165  * For equal-length patterns, the first occurring in the rules list is chosen.
166  * INITIAL is the starting state, to which all non-conditional rules apply.
167  * Exclusive states change parsing rules while the state is active.  When in
168  * an exclusive state, only those rules defined for that state apply.
169  *
170  * We use exclusive states for quoted strings, extended comments,
171  * and to eliminate parsing troubles for numeric strings.
172  * Exclusive states:
173  *  <xb> bit string literal
174  *  <xc> extended C-style comments
175  *  <xd> delimited identifiers (double-quoted identifiers)
176  *  <xh> hexadecimal numeric string
177  *  <xq> standard quoted strings
178  *  <xqs> quote stop (detect continued strings)
179  *  <xe> extended quoted strings (support backslash escape sequences)
180  *  <xdolq> $foo$ quoted strings
181  *  <xui> quoted identifier with Unicode escapes
182  *  <xus> quoted string with Unicode escapes
183  *  <xeu> Unicode surrogate pair in extended quoted string
184  *
185  * Remember to add an <<EOF>> case whenever you add a new exclusive state!
186  * The default one is probably not the right thing.
187  */
188 
189 %x xb
190 %x xc
191 %x xd
192 %x xh
193 %x xq
194 %x xqs
195 %x xe
196 %x xdolq
197 %x xui
198 %x xus
199 %x xeu
200 
201 /*
202  * In order to make the world safe for Windows and Mac clients as well as
203  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
204  * sequence will be seen as two successive newlines, but that doesn't cause
205  * any problems.  Comments that start with -- and extend to the next
206  * newline are treated as equivalent to a single whitespace character.
207  *
208  * NOTE a fine point: if there is no newline following --, we will absorb
209  * everything to the end of the input as a comment.  This is correct.  Older
210  * versions of Postgres failed to recognize -- as a comment if the input
211  * did not end with a newline.
212  *
213  * XXX perhaps \f (formfeed) should be treated as a newline as well?
214  *
215  * XXX if you change the set of whitespace characters, fix scanner_isspace()
216  * to agree.
217  */
218 
219 space			[ \t\n\r\f]
220 horiz_space		[ \t\f]
221 newline			[\n\r]
222 non_newline		[^\n\r]
223 
224 comment			("--"{non_newline}*)
225 
226 whitespace		({space}+|{comment})
227 
228 /*
229  * SQL requires at least one newline in the whitespace separating
230  * string literals that are to be concatenated.  Silly, but who are we
231  * to argue?  Note that {whitespace_with_newline} should not have * after
232  * it, whereas {whitespace} should generally have a * after it...
233  */
234 
235 special_whitespace		({space}+|{comment}{newline})
236 horiz_whitespace		({horiz_space}|{comment})
237 whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
238 
239 quote			'
240 /* If we see {quote} then {quotecontinue}, the quoted string continues */
241 quotecontinue	{whitespace_with_newline}{quote}
242 
243 /*
244  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
245  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
246  * but if there's a dash after {whitespace_with_newline}, it must be consumed
247  * to see if there's another dash --- which would start a {comment} and thus
248  * allow continuation of the {quotecontinue} token.
249  */
250 quotecontinuefail	{whitespace}*"-"?
251 
252 /* Bit string
253  * It is tempting to scan the string for only those characters
254  * which are allowed. However, this leads to silently swallowed
255  * characters if illegal characters are included in the string.
256  * For example, if xbinside is [01] then B'ABCD' is interpreted
257  * as a zero-length string, and the ABCD' is lost!
258  * Better to pass the string forward and let the input routines
259  * validate the contents.
260  */
261 xbstart			[bB]{quote}
262 xbinside		[^']*
263 
264 /* Hexadecimal number */
265 xhstart			[xX]{quote}
266 xhinside		[^']*
267 
268 /* National character */
269 xnstart			[nN]{quote}
270 
271 /* Quoted string that allows backslash escapes */
272 xestart			[eE]{quote}
273 xeinside		[^\\']+
274 xeescape		[\\][^0-7]
275 xeoctesc		[\\][0-7]{1,3}
276 xehexesc		[\\]x[0-9A-Fa-f]{1,2}
277 xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
278 xeunicodefail	[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
279 
280 /* Extended quote
281  * xqdouble implements embedded quote, ''''
282  */
283 xqstart			{quote}
284 xqdouble		{quote}{quote}
285 xqinside		[^']+
286 
287 /* $foo$ style quotes ("dollar quoting")
288  * The quoted string starts with $foo$ where "foo" is an optional string
289  * in the form of an identifier, except that it may not contain "$",
290  * and extends to the first occurrence of an identical string.
291  * There is *no* processing of the quoted text.
292  *
293  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
294  * fails to match its trailing "$".
295  */
296 dolq_start		[A-Za-z\200-\377_]
297 dolq_cont		[A-Za-z\200-\377_0-9]
298 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
299 dolqfailed		\${dolq_start}{dolq_cont}*
300 dolqinside		[^$]+
301 
302 /* Double quote
303  * Allows embedded spaces and other special characters into identifiers.
304  */
305 dquote			\"
306 xdstart			{dquote}
307 xdstop			{dquote}
308 xddouble		{dquote}{dquote}
309 xdinside		[^"]+
310 
311 /* Quoted identifier with Unicode escapes */
312 xuistart		[uU]&{dquote}
313 
314 /* Quoted string with Unicode escapes */
315 xusstart		[uU]&{quote}
316 
317 /* error rule to avoid backup */
318 xufailed		[uU]&
319 
320 
321 /* C-style comments
322  *
323  * The "extended comment" syntax closely resembles allowable operator syntax.
324  * The tricky part here is to get lex to recognize a string starting with
325  * slash-star as a comment, when interpreting it as an operator would produce
326  * a longer match --- remember lex will prefer a longer match!  Also, if we
327  * have something like plus-slash-star, lex will think this is a 3-character
328  * operator whereas we want to see it as a + operator and a comment start.
329  * The solution is two-fold:
330  * 1. append {op_chars}* to xcstart so that it matches as much text as
331  *    {operator} would. Then the tie-breaker (first matching rule of same
332  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
333  *    in case it contains a star-slash that should terminate the comment.
334  * 2. In the operator rule, check for slash-star within the operator, and
335  *    if found throw it back with yyless().  This handles the plus-slash-star
336  *    problem.
337  * Dash-dash comments have similar interactions with the operator rule.
338  */
339 xcstart			\/\*{op_chars}*
340 xcstop			\*+\/
341 xcinside		[^*/]+
342 
343 digit			[0-9]
344 ident_start		[A-Za-z\200-\377_]
345 ident_cont		[A-Za-z\200-\377_0-9\$]
346 
347 identifier		{ident_start}{ident_cont}*
348 
349 /* Assorted special-case operators and operator-like tokens */
350 typecast		"::"
351 dot_dot			\.\.
352 colon_equals	":="
353 
354 /*
355  * These operator-like tokens (unlike the above ones) also match the {operator}
356  * rule, which means that they might be overridden by a longer match if they
357  * are followed by a comment start or a + or - character. Accordingly, if you
358  * add to this list, you must also add corresponding code to the {operator}
359  * block to return the correct token in such cases. (This is not needed in
360  * psqlscan.l since the token value is ignored there.)
361  */
362 equals_greater	"=>"
363 less_equals		"<="
364 greater_equals	">="
365 less_greater	"<>"
366 not_equals		"!="
367 
368 /*
369  * "self" is the set of chars that should be returned as single-character
370  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
371  * which can be one or more characters long (but if a single-char token
372  * appears in the "self" set, it is not to be returned as an Op).  Note
373  * that the sets overlap, but each has some chars that are not in the other.
374  *
375  * If you change either set, adjust the character lists appearing in the
376  * rule for "operator"!
377  */
378 self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
379 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
380 operator		{op_chars}+
381 
382 /* we no longer allow unary minus in numbers.
383  * instead we pass it separately to parser. there it gets
384  * coerced via doNegate() -- Leon aug 20 1999
385  *
386  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
387  *
388  * {realfail1} and {realfail2} are added to prevent the need for scanner
389  * backup when the {real} rule fails to match completely.
390  */
391 
392 integer			{digit}+
393 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
394 decimalfail		{digit}+\.\.
395 real			({integer}|{decimal})[Ee][-+]?{digit}+
396 realfail1		({integer}|{decimal})[Ee]
397 realfail2		({integer}|{decimal})[Ee][-+]
398 
399 param			\${integer}
400 
401 other			.
402 
403 /*
404  * Dollar quoted strings are totally opaque, and no escaping is done on them.
405  * Other quoted strings must allow some special characters such as single-quote
406  *  and newline.
407  * Embedded single-quotes are implemented both in the SQL standard
408  *  style of two adjacent single quotes "''" and in the Postgres/Java style
409  *  of escaped-quote "\'".
410  * Other embedded escaped characters are matched explicitly and the leading
411  *  backslash is dropped from the string.
412  * Note that xcstart must appear before operator, as explained above!
413  *  Also whitespace (comment) must appear before operator.
414  */
415 
416 %%
417 
418 {whitespace}	{
419 					/* ignore */
420 				}
421 
422 {xcstart}		{
423 					/* Set location in case of syntax error in comment */
424 					SET_YYLLOC();
425 					yyextra->xcdepth = 0;
426 					BEGIN(xc);
427 					/* Put back any characters past slash-star; see above */
428 					yyless(2);
429 				}
430 
431 <xc>{
432 {xcstart}		{
433 					(yyextra->xcdepth)++;
434 					/* Put back any characters past slash-star; see above */
435 					yyless(2);
436 				}
437 
438 {xcstop}		{
439 					if (yyextra->xcdepth <= 0)
440 						BEGIN(INITIAL);
441 					else
442 						(yyextra->xcdepth)--;
443 				}
444 
445 {xcinside}		{
446 					/* ignore */
447 				}
448 
449 {op_chars}		{
450 					/* ignore */
451 				}
452 
453 \*+				{
454 					/* ignore */
455 				}
456 
457 <<EOF>>			{
458 					yyerror("unterminated /* comment");
459 				}
460 } /* <xc> */
461 
462 {xbstart}		{
463 					/* Binary bit type.
464 					 * At some point we should simply pass the string
465 					 * forward to the parser and label it there.
466 					 * In the meantime, place a leading "b" on the string
467 					 * to mark it for the input routine as a binary string.
468 					 */
469 					SET_YYLLOC();
470 					BEGIN(xb);
471 					startlit();
472 					addlitchar('b', yyscanner);
473 				}
474 <xh>{xhinside}	|
475 <xb>{xbinside}	{
476 					addlit(yytext, yyleng, yyscanner);
477 				}
478 <xb><<EOF>>		{ yyerror("unterminated bit string literal"); }
479 
480 {xhstart}		{
481 					/* Hexadecimal bit type.
482 					 * At some point we should simply pass the string
483 					 * forward to the parser and label it there.
484 					 * In the meantime, place a leading "x" on the string
485 					 * to mark it for the input routine as a hex string.
486 					 */
487 					SET_YYLLOC();
488 					BEGIN(xh);
489 					startlit();
490 					addlitchar('x', yyscanner);
491 				}
492 <xh><<EOF>>		{ yyerror("unterminated hexadecimal string literal"); }
493 
494 {xnstart}		{
495 					/* National character.
496 					 * We will pass this along as a normal character string,
497 					 * but preceded with an internally-generated "NCHAR".
498 					 */
499 					int		kwnum;
500 
501 					SET_YYLLOC();
502 					yyless(1);	/* eat only 'n' this time */
503 
504 					kwnum = ScanKeywordLookup("nchar",
505 											  yyextra->keywordlist);
506 					if (kwnum >= 0)
507 					{
508 						yylval->keyword = GetScanKeyword(kwnum,
509 														 yyextra->keywordlist);
510 						return yyextra->keyword_tokens[kwnum];
511 					}
512 					else
513 					{
514 						/* If NCHAR isn't a keyword, just return "n" */
515 						yylval->str = pstrdup("n");
516 						return IDENT;
517 					}
518 				}
519 
520 {xqstart}		{
521 					yyextra->warn_on_first_escape = true;
522 					yyextra->saw_non_ascii = false;
523 					SET_YYLLOC();
524 					if (yyextra->standard_conforming_strings)
525 						BEGIN(xq);
526 					else
527 						BEGIN(xe);
528 					startlit();
529 				}
530 {xestart}		{
531 					yyextra->warn_on_first_escape = false;
532 					yyextra->saw_non_ascii = false;
533 					SET_YYLLOC();
534 					BEGIN(xe);
535 					startlit();
536 				}
537 {xusstart}		{
538 					SET_YYLLOC();
539 					if (!yyextra->standard_conforming_strings)
540 						ereport(ERROR,
541 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
542 								 errmsg("unsafe use of string constant with Unicode escapes"),
543 								 errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."),
544 								 lexer_errposition()));
545 					BEGIN(xus);
546 					startlit();
547 				}
548 
549 <xb,xh,xq,xe,xus>{quote} {
550 					/*
551 					 * When we are scanning a quoted string and see an end
552 					 * quote, we must look ahead for a possible continuation.
553 					 * If we don't see one, we know the end quote was in fact
554 					 * the end of the string.  To reduce the lexer table size,
555 					 * we use a single "xqs" state to do the lookahead for all
556 					 * types of strings.
557 					 */
558 					yyextra->state_before_str_stop = YYSTATE;
559 					BEGIN(xqs);
560 				}
561 <xqs>{quotecontinue} {
562 					/*
563 					 * Found a quote continuation, so return to the in-quote
564 					 * state and continue scanning the literal.  Nothing is
565 					 * added to the literal's contents.
566 					 */
567 					BEGIN(yyextra->state_before_str_stop);
568 				}
569 <xqs>{quotecontinuefail} |
570 <xqs>{other} |
571 <xqs><<EOF>>	{
572 					/*
573 					 * Failed to see a quote continuation.  Throw back
574 					 * everything after the end quote, and handle the string
575 					 * according to the state we were in previously.
576 					 */
577 					yyless(0);
578 					BEGIN(INITIAL);
579 
580 					switch (yyextra->state_before_str_stop)
581 					{
582 						case xb:
583 							yylval->str = litbufdup(yyscanner);
584 							return BCONST;
585 						case xh:
586 							yylval->str = litbufdup(yyscanner);
587 							return XCONST;
588 						case xq:
589 						case xe:
590 							/*
591 							 * Check that the data remains valid, if it might
592 							 * have been made invalid by unescaping any chars.
593 							 */
594 							if (yyextra->saw_non_ascii)
595 								pg_verifymbstr(yyextra->literalbuf,
596 											   yyextra->literallen,
597 											   false);
598 							yylval->str = litbufdup(yyscanner);
599 							return SCONST;
600 						case xus:
601 							yylval->str = litbufdup(yyscanner);
602 							return USCONST;
603 						default:
604 							yyerror("unhandled previous state in xqs");
605 					}
606 				}
607 
608 <xq,xe,xus>{xqdouble} {
609 					addlitchar('\'', yyscanner);
610 				}
611 <xq,xus>{xqinside}  {
612 					addlit(yytext, yyleng, yyscanner);
613 				}
614 <xe>{xeinside}  {
615 					addlit(yytext, yyleng, yyscanner);
616 				}
617 <xe>{xeunicode} {
618 					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
619 
620 					/*
621 					 * For consistency with other productions, issue any
622 					 * escape warning with cursor pointing to start of string.
623 					 * We might want to change that, someday.
624 					 */
625 					check_escape_warning(yyscanner);
626 
627 					/* Remember start of overall string token ... */
628 					PUSH_YYLLOC();
629 					/* ... and set the error cursor to point at this esc seq */
630 					SET_YYLLOC();
631 
632 					if (is_utf16_surrogate_first(c))
633 					{
634 						yyextra->utf16_first_part = c;
635 						BEGIN(xeu);
636 					}
637 					else if (is_utf16_surrogate_second(c))
638 						yyerror("invalid Unicode surrogate pair");
639 					else
640 						addunicode(c, yyscanner);
641 
642 					/* Restore yylloc to be start of string token */
643 					POP_YYLLOC();
644 				}
645 <xeu>{xeunicode} {
646 					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
647 
648 					/* Remember start of overall string token ... */
649 					PUSH_YYLLOC();
650 					/* ... and set the error cursor to point at this esc seq */
651 					SET_YYLLOC();
652 
653 					if (!is_utf16_surrogate_second(c))
654 						yyerror("invalid Unicode surrogate pair");
655 
656 					c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
657 
658 					addunicode(c, yyscanner);
659 
660 					/* Restore yylloc to be start of string token */
661 					POP_YYLLOC();
662 
663 					BEGIN(xe);
664 				}
665 <xeu>. |
666 <xeu>\n |
667 <xeu><<EOF>>	{
668 					/* Set the error cursor to point at missing esc seq */
669 					SET_YYLLOC();
670 					yyerror("invalid Unicode surrogate pair");
671 				}
672 <xe,xeu>{xeunicodefail}	{
673 					/* Set the error cursor to point at malformed esc seq */
674 					SET_YYLLOC();
675 					ereport(ERROR,
676 							(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
677 							 errmsg("invalid Unicode escape"),
678 							 errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
679 							 lexer_errposition()));
680 				}
681 <xe>{xeescape}  {
682 					if (yytext[1] == '\'')
683 					{
684 						if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF ||
685 							(yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
686 							 PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
687 							ereport(ERROR,
688 									(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
689 									 errmsg("unsafe use of \\' in a string literal"),
690 									 errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
691 									 lexer_errposition()));
692 					}
693 					check_string_escape_warning(yytext[1], yyscanner);
694 					addlitchar(unescape_single_char(yytext[1], yyscanner),
695 							   yyscanner);
696 				}
697 <xe>{xeoctesc}  {
698 					unsigned char c = strtoul(yytext + 1, NULL, 8);
699 
700 					check_escape_warning(yyscanner);
701 					addlitchar(c, yyscanner);
702 					if (c == '\0' || IS_HIGHBIT_SET(c))
703 						yyextra->saw_non_ascii = true;
704 				}
705 <xe>{xehexesc}  {
706 					unsigned char c = strtoul(yytext + 2, NULL, 16);
707 
708 					check_escape_warning(yyscanner);
709 					addlitchar(c, yyscanner);
710 					if (c == '\0' || IS_HIGHBIT_SET(c))
711 						yyextra->saw_non_ascii = true;
712 				}
713 <xe>.			{
714 					/* This is only needed for \ just before EOF */
715 					addlitchar(yytext[0], yyscanner);
716 				}
717 <xq,xe,xus><<EOF>>		{ yyerror("unterminated quoted string"); }
718 
719 {dolqdelim}		{
720 					SET_YYLLOC();
721 					yyextra->dolqstart = pstrdup(yytext);
722 					BEGIN(xdolq);
723 					startlit();
724 				}
725 {dolqfailed}	{
726 					SET_YYLLOC();
727 					/* throw back all but the initial "$" */
728 					yyless(1);
729 					/* and treat it as {other} */
730 					return yytext[0];
731 				}
732 <xdolq>{dolqdelim} {
733 					if (strcmp(yytext, yyextra->dolqstart) == 0)
734 					{
735 						pfree(yyextra->dolqstart);
736 						yyextra->dolqstart = NULL;
737 						BEGIN(INITIAL);
738 						yylval->str = litbufdup(yyscanner);
739 						return SCONST;
740 					}
741 					else
742 					{
743 						/*
744 						 * When we fail to match $...$ to dolqstart, transfer
745 						 * the $... part to the output, but put back the final
746 						 * $ for rescanning.  Consider $delim$...$junk$delim$
747 						 */
748 						addlit(yytext, yyleng - 1, yyscanner);
749 						yyless(yyleng - 1);
750 					}
751 				}
752 <xdolq>{dolqinside} {
753 					addlit(yytext, yyleng, yyscanner);
754 				}
755 <xdolq>{dolqfailed} {
756 					addlit(yytext, yyleng, yyscanner);
757 				}
758 <xdolq>.		{
759 					/* This is only needed for $ inside the quoted text */
760 					addlitchar(yytext[0], yyscanner);
761 				}
762 <xdolq><<EOF>>	{ yyerror("unterminated dollar-quoted string"); }
763 
764 {xdstart}		{
765 					SET_YYLLOC();
766 					BEGIN(xd);
767 					startlit();
768 				}
769 {xuistart}		{
770 					SET_YYLLOC();
771 					BEGIN(xui);
772 					startlit();
773 				}
774 <xd>{xdstop}	{
775 					char	   *ident;
776 
777 					BEGIN(INITIAL);
778 					if (yyextra->literallen == 0)
779 						yyerror("zero-length delimited identifier");
780 					ident = litbufdup(yyscanner);
781 					if (yyextra->literallen >= NAMEDATALEN)
782 						truncate_identifier(ident, yyextra->literallen, true);
783 					yylval->str = ident;
784 					return IDENT;
785 				}
786 <xui>{dquote}	{
787 					BEGIN(INITIAL);
788 					if (yyextra->literallen == 0)
789 						yyerror("zero-length delimited identifier");
790 					/* can't truncate till after we de-escape the ident */
791 					yylval->str = litbufdup(yyscanner);
792 					return UIDENT;
793 				}
794 <xd,xui>{xddouble}	{
795 					addlitchar('"', yyscanner);
796 				}
797 <xd,xui>{xdinside}	{
798 					addlit(yytext, yyleng, yyscanner);
799 				}
800 <xd,xui><<EOF>>		{ yyerror("unterminated quoted identifier"); }
801 
802 {xufailed}	{
803 					char	   *ident;
804 
805 					SET_YYLLOC();
806 					/* throw back all but the initial u/U */
807 					yyless(1);
808 					/* and treat it as {identifier} */
809 					ident = downcase_truncate_identifier(yytext, yyleng, true);
810 					yylval->str = ident;
811 					return IDENT;
812 				}
813 
814 {typecast}		{
815 					SET_YYLLOC();
816 					return TYPECAST;
817 				}
818 
819 {dot_dot}		{
820 					SET_YYLLOC();
821 					return DOT_DOT;
822 				}
823 
824 {colon_equals}	{
825 					SET_YYLLOC();
826 					return COLON_EQUALS;
827 				}
828 
829 {equals_greater} {
830 					SET_YYLLOC();
831 					return EQUALS_GREATER;
832 				}
833 
834 {less_equals}	{
835 					SET_YYLLOC();
836 					return LESS_EQUALS;
837 				}
838 
839 {greater_equals} {
840 					SET_YYLLOC();
841 					return GREATER_EQUALS;
842 				}
843 
844 {less_greater}	{
845 					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
846 					SET_YYLLOC();
847 					return NOT_EQUALS;
848 				}
849 
850 {not_equals}	{
851 					/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
852 					SET_YYLLOC();
853 					return NOT_EQUALS;
854 				}
855 
856 {self}			{
857 					SET_YYLLOC();
858 					return yytext[0];
859 				}
860 
861 {operator}		{
862 					/*
863 					 * Check for embedded slash-star or dash-dash; those
864 					 * are comment starts, so operator must stop there.
865 					 * Note that slash-star or dash-dash at the first
866 					 * character will match a prior rule, not this one.
867 					 */
868 					int			nchars = yyleng;
869 					char	   *slashstar = strstr(yytext, "/*");
870 					char	   *dashdash = strstr(yytext, "--");
871 
872 					if (slashstar && dashdash)
873 					{
874 						/* if both appear, take the first one */
875 						if (slashstar > dashdash)
876 							slashstar = dashdash;
877 					}
878 					else if (!slashstar)
879 						slashstar = dashdash;
880 					if (slashstar)
881 						nchars = slashstar - yytext;
882 
883 					/*
884 					 * For SQL compatibility, '+' and '-' cannot be the
885 					 * last char of a multi-char operator unless the operator
886 					 * contains chars that are not in SQL operators.
887 					 * The idea is to lex '=-' as two operators, but not
888 					 * to forbid operator names like '?-' that could not be
889 					 * sequences of SQL operators.
890 					 */
891 					if (nchars > 1 &&
892 						(yytext[nchars - 1] == '+' ||
893 						 yytext[nchars - 1] == '-'))
894 					{
895 						int			ic;
896 
897 						for (ic = nchars - 2; ic >= 0; ic--)
898 						{
899 							char c = yytext[ic];
900 							if (c == '~' || c == '!' || c == '@' ||
901 								c == '#' || c == '^' || c == '&' ||
902 								c == '|' || c == '`' || c == '?' ||
903 								c == '%')
904 								break;
905 						}
906 						if (ic < 0)
907 						{
908 							/*
909 							 * didn't find a qualifying character, so remove
910 							 * all trailing [+-]
911 							 */
912 							do {
913 								nchars--;
914 							} while (nchars > 1 &&
915 								 (yytext[nchars - 1] == '+' ||
916 								  yytext[nchars - 1] == '-'));
917 						}
918 					}
919 
920 					SET_YYLLOC();
921 
922 					if (nchars < yyleng)
923 					{
924 						/* Strip the unwanted chars from the token */
925 						yyless(nchars);
926 						/*
927 						 * If what we have left is only one char, and it's
928 						 * one of the characters matching "self", then
929 						 * return it as a character token the same way
930 						 * that the "self" rule would have.
931 						 */
932 						if (nchars == 1 &&
933 							strchr(",()[].;:+-*/%^<>=", yytext[0]))
934 							return yytext[0];
935 						/*
936 						 * Likewise, if what we have left is two chars, and
937 						 * those match the tokens ">=", "<=", "=>", "<>" or
938 						 * "!=", then we must return the appropriate token
939 						 * rather than the generic Op.
940 						 */
941 						if (nchars == 2)
942 						{
943 							if (yytext[0] == '=' && yytext[1] == '>')
944 								return EQUALS_GREATER;
945 							if (yytext[0] == '>' && yytext[1] == '=')
946 								return GREATER_EQUALS;
947 							if (yytext[0] == '<' && yytext[1] == '=')
948 								return LESS_EQUALS;
949 							if (yytext[0] == '<' && yytext[1] == '>')
950 								return NOT_EQUALS;
951 							if (yytext[0] == '!' && yytext[1] == '=')
952 								return NOT_EQUALS;
953 						}
954 					}
955 
956 					/*
957 					 * Complain if operator is too long.  Unlike the case
958 					 * for identifiers, we make this an error not a notice-
959 					 * and-truncate, because the odds are we are looking at
960 					 * a syntactic mistake anyway.
961 					 */
962 					if (nchars >= NAMEDATALEN)
963 						yyerror("operator too long");
964 
965 					yylval->str = pstrdup(yytext);
966 					return Op;
967 				}
968 
969 {param}			{
970 					SET_YYLLOC();
971 					yylval->ival = atol(yytext + 1);
972 					return PARAM;
973 				}
974 
975 {integer}		{
976 					SET_YYLLOC();
977 					return process_integer_literal(yytext, yylval);
978 				}
979 {decimal}		{
980 					SET_YYLLOC();
981 					yylval->str = pstrdup(yytext);
982 					return FCONST;
983 				}
984 {decimalfail}	{
985 					/* throw back the .., and treat as integer */
986 					yyless(yyleng - 2);
987 					SET_YYLLOC();
988 					return process_integer_literal(yytext, yylval);
989 				}
990 {real}			{
991 					SET_YYLLOC();
992 					yylval->str = pstrdup(yytext);
993 					return FCONST;
994 				}
995 {realfail1}		{
996 					/*
997 					 * throw back the [Ee], and figure out whether what
998 					 * remains is an {integer} or {decimal}.
999 					 */
1000 					yyless(yyleng - 1);
1001 					SET_YYLLOC();
1002 					return process_integer_literal(yytext, yylval);
1003 				}
1004 {realfail2}		{
1005 					/* throw back the [Ee][+-], and proceed as above */
1006 					yyless(yyleng - 2);
1007 					SET_YYLLOC();
1008 					return process_integer_literal(yytext, yylval);
1009 				}
1010 
1011 
1012 {identifier}	{
1013 					int			kwnum;
1014 					char	   *ident;
1015 
1016 					SET_YYLLOC();
1017 
1018 					/* Is it a keyword? */
1019 					kwnum = ScanKeywordLookup(yytext,
1020 											  yyextra->keywordlist);
1021 					if (kwnum >= 0)
1022 					{
1023 						yylval->keyword = GetScanKeyword(kwnum,
1024 														 yyextra->keywordlist);
1025 						return yyextra->keyword_tokens[kwnum];
1026 					}
1027 
1028 					/*
1029 					 * No.  Convert the identifier to lower case, and truncate
1030 					 * if necessary.
1031 					 */
1032 					ident = downcase_truncate_identifier(yytext, yyleng, true);
1033 					yylval->str = ident;
1034 					return IDENT;
1035 				}
1036 
1037 {other}			{
1038 					SET_YYLLOC();
1039 					return yytext[0];
1040 				}
1041 
1042 <<EOF>>			{
1043 					SET_YYLLOC();
1044 					yyterminate();
1045 				}
1046 
1047 %%
1048 
1049 /* LCOV_EXCL_STOP */
1050 
1051 /*
1052  * Arrange access to yyextra for subroutines of the main yylex() function.
1053  * We expect each subroutine to have a yyscanner parameter.  Rather than
1054  * use the yyget_xxx functions, which might or might not get inlined by the
1055  * compiler, we cheat just a bit and cast yyscanner to the right type.
1056  */
1057 #undef yyextra
1058 #define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)
1059 
1060 /* Likewise for a couple of other things we need. */
1061 #undef yylloc
1062 #define yylloc	(((struct yyguts_t *) yyscanner)->yylloc_r)
1063 #undef yyleng
1064 #define yyleng	(((struct yyguts_t *) yyscanner)->yyleng_r)
1065 
1066 
1067 /*
1068  * scanner_errposition
1069  *		Report a lexer or grammar error cursor position, if possible.
1070  *
1071  * This is expected to be used within an ereport() call, or via an error
1072  * callback such as setup_scanner_errposition_callback().  The return value
1073  * is a dummy (always 0, in fact).
1074  *
1075  * Note that this can only be used for messages emitted during raw parsing
1076  * (essentially, scan.l, parser.c, and gram.y), since it requires the
1077  * yyscanner struct to still be available.
1078  */
1079 int
1080 scanner_errposition(int location, core_yyscan_t yyscanner)
1081 {
1082 	int			pos;
1083 
1084 	if (location < 0)
1085 		return 0;				/* no-op if location is unknown */
1086 
1087 	/* Convert byte offset to character number */
1088 	pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
1089 	/* And pass it to the ereport mechanism */
1090 	return errposition(pos);
1091 }
1092 
1093 /*
1094  * Error context callback for inserting scanner error location.
1095  *
1096  * Note that this will be called for *any* error occurring while the
1097  * callback is installed.  We avoid inserting an irrelevant error location
1098  * if the error is a query cancel --- are there any other important cases?
1099  */
1100 static void
1101 scb_error_callback(void *arg)
1102 {
1103 	ScannerCallbackState *scbstate = (ScannerCallbackState *) arg;
1104 
1105 	if (geterrcode() != ERRCODE_QUERY_CANCELED)
1106 		(void) scanner_errposition(scbstate->location, scbstate->yyscanner);
1107 }
1108 
1109 /*
1110  * setup_scanner_errposition_callback
1111  *		Arrange for non-scanner errors to report an error position
1112  *
1113  * Sometimes the scanner calls functions that aren't part of the scanner
1114  * subsystem and can't reasonably be passed the yyscanner pointer; yet
1115  * we would like any errors thrown in those functions to be tagged with an
1116  * error location.  Use this function to set up an error context stack
1117  * entry that will accomplish that.  Usage pattern:
1118  *
1119  *		declare a local variable "ScannerCallbackState scbstate"
1120  *		...
1121  *		setup_scanner_errposition_callback(&scbstate, yyscanner, location);
1122  *		call function that might throw error;
1123  *		cancel_scanner_errposition_callback(&scbstate);
1124  */
1125 void
1126 setup_scanner_errposition_callback(ScannerCallbackState *scbstate,
1127 								   core_yyscan_t yyscanner,
1128 								   int location)
1129 {
1130 	/* Setup error traceback support for ereport() */
1131 	scbstate->yyscanner = yyscanner;
1132 	scbstate->location = location;
1133 	scbstate->errcallback.callback = scb_error_callback;
1134 	scbstate->errcallback.arg = (void *) scbstate;
1135 	scbstate->errcallback.previous = error_context_stack;
1136 	error_context_stack = &scbstate->errcallback;
1137 }
1138 
1139 /*
1140  * Cancel a previously-set-up errposition callback.
1141  */
1142 void
1143 cancel_scanner_errposition_callback(ScannerCallbackState *scbstate)
1144 {
1145 	/* Pop the error context stack */
1146 	error_context_stack = scbstate->errcallback.previous;
1147 }
1148 
1149 /*
1150  * scanner_yyerror
1151  *		Report a lexer or grammar error.
1152  *
1153  * The message's cursor position is whatever YYLLOC was last set to,
1154  * ie, the start of the current token if called within yylex(), or the
1155  * most recently lexed token if called from the grammar.
1156  * This is OK for syntax error messages from the Bison parser, because Bison
1157  * parsers report error as soon as the first unparsable token is reached.
1158  * Beware of using yyerror for other purposes, as the cursor position might
1159  * be misleading!
1160  */
1161 void
1162 scanner_yyerror(const char *message, core_yyscan_t yyscanner)
1163 {
1164 	const char *loc = yyextra->scanbuf + *yylloc;
1165 
1166 	if (*loc == YY_END_OF_BUFFER_CHAR)
1167 	{
1168 		ereport(ERROR,
1169 				(errcode(ERRCODE_SYNTAX_ERROR),
1170 		/* translator: %s is typically the translation of "syntax error" */
1171 				 errmsg("%s at end of input", _(message)),
1172 				 lexer_errposition()));
1173 	}
1174 	else
1175 	{
1176 		ereport(ERROR,
1177 				(errcode(ERRCODE_SYNTAX_ERROR),
1178 		/* translator: first %s is typically the translation of "syntax error" */
1179 				 errmsg("%s at or near \"%s\"", _(message), loc),
1180 				 lexer_errposition()));
1181 	}
1182 }
1183 
1184 
1185 /*
1186  * Called before any actual parsing is done
1187  */
1188 core_yyscan_t
1189 scanner_init(const char *str,
1190 			 core_yy_extra_type *yyext,
1191 			 const ScanKeywordList *keywordlist,
1192 			 const uint16 *keyword_tokens)
1193 {
1194 	Size		slen = strlen(str);
1195 	yyscan_t	scanner;
1196 
1197 	if (yylex_init(&scanner) != 0)
1198 		elog(ERROR, "yylex_init() failed: %m");
1199 
1200 	core_yyset_extra(yyext, scanner);
1201 
1202 	yyext->keywordlist = keywordlist;
1203 	yyext->keyword_tokens = keyword_tokens;
1204 
1205 	yyext->backslash_quote = backslash_quote;
1206 	yyext->escape_string_warning = escape_string_warning;
1207 	yyext->standard_conforming_strings = standard_conforming_strings;
1208 
1209 	/*
1210 	 * Make a scan buffer with special termination needed by flex.
1211 	 */
1212 	yyext->scanbuf = (char *) palloc(slen + 2);
1213 	yyext->scanbuflen = slen;
1214 	memcpy(yyext->scanbuf, str, slen);
1215 	yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
1216 	yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
1217 
1218 	/* initialize literal buffer to a reasonable but expansible size */
1219 	yyext->literalalloc = 1024;
1220 	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
1221 	yyext->literallen = 0;
1222 
1223 	return scanner;
1224 }
1225 
1226 
1227 /*
1228  * Called after parsing is done to clean up after scanner_init()
1229  */
1230 void
1231 scanner_finish(core_yyscan_t yyscanner)
1232 {
1233 	/*
1234 	 * We don't bother to call yylex_destroy(), because all it would do is
1235 	 * pfree a small amount of control storage.  It's cheaper to leak the
1236 	 * storage until the parsing context is destroyed.  The amount of space
1237 	 * involved is usually negligible compared to the output parse tree
1238 	 * anyway.
1239 	 *
1240 	 * We do bother to pfree the scanbuf and literal buffer, but only if they
1241 	 * represent a nontrivial amount of space.  The 8K cutoff is arbitrary.
1242 	 */
1243 	if (yyextra->scanbuflen >= 8192)
1244 		pfree(yyextra->scanbuf);
1245 	if (yyextra->literalalloc >= 8192)
1246 		pfree(yyextra->literalbuf);
1247 }
1248 
1249 
1250 static void
1251 addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
1252 {
1253 	/* enlarge buffer if needed */
1254 	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
1255 	{
1256 		do
1257 		{
1258 			yyextra->literalalloc *= 2;
1259 		} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
1260 		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1261 												yyextra->literalalloc);
1262 	}
1263 	/* append new data */
1264 	memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
1265 	yyextra->literallen += yleng;
1266 }
1267 
1268 
1269 static void
1270 addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
1271 {
1272 	/* enlarge buffer if needed */
1273 	if ((yyextra->literallen + 1) >= yyextra->literalalloc)
1274 	{
1275 		yyextra->literalalloc *= 2;
1276 		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1277 												yyextra->literalalloc);
1278 	}
1279 	/* append new data */
1280 	yyextra->literalbuf[yyextra->literallen] = ychar;
1281 	yyextra->literallen += 1;
1282 }
1283 
1284 
1285 /*
1286  * Create a palloc'd copy of literalbuf, adding a trailing null.
1287  */
1288 static char *
1289 litbufdup(core_yyscan_t yyscanner)
1290 {
1291 	int			llen = yyextra->literallen;
1292 	char	   *new;
1293 
1294 	new = palloc(llen + 1);
1295 	memcpy(new, yyextra->literalbuf, llen);
1296 	new[llen] = '\0';
1297 	return new;
1298 }
1299 
1300 /*
1301  * Process {integer}.  Note this will also do the right thing with {decimal},
1302  * ie digits and a decimal point.
1303  */
1304 static int
1305 process_integer_literal(const char *token, YYSTYPE *lval)
1306 {
1307 	int			val;
1308 	char	   *endptr;
1309 
1310 	errno = 0;
1311 	val = strtoint(token, &endptr, 10);
1312 	if (*endptr != '\0' || errno == ERANGE)
1313 	{
1314 		/* integer too large (or contains decimal pt), treat it as a float */
1315 		lval->str = pstrdup(token);
1316 		return FCONST;
1317 	}
1318 	lval->ival = val;
1319 	return ICONST;
1320 }
1321 
1322 static void
1323 addunicode(pg_wchar c, core_yyscan_t yyscanner)
1324 {
1325 	ScannerCallbackState scbstate;
1326 	char		buf[MAX_UNICODE_EQUIVALENT_STRING + 1];
1327 
1328 	if (!is_valid_unicode_codepoint(c))
1329 		yyerror("invalid Unicode escape value");
1330 
1331 	/*
1332 	 * We expect that pg_unicode_to_server() will complain about any
1333 	 * unconvertible code point, so we don't have to set saw_non_ascii.
1334 	 */
1335 	setup_scanner_errposition_callback(&scbstate, yyscanner, *(yylloc));
1336 	pg_unicode_to_server(c, (unsigned char *) buf);
1337 	cancel_scanner_errposition_callback(&scbstate);
1338 	addlit(buf, strlen(buf), yyscanner);
1339 }
1340 
1341 static unsigned char
1342 unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
1343 {
1344 	switch (c)
1345 	{
1346 		case 'b':
1347 			return '\b';
1348 		case 'f':
1349 			return '\f';
1350 		case 'n':
1351 			return '\n';
1352 		case 'r':
1353 			return '\r';
1354 		case 't':
1355 			return '\t';
1356 		default:
1357 			/* check for backslash followed by non-7-bit-ASCII */
1358 			if (c == '\0' || IS_HIGHBIT_SET(c))
1359 				yyextra->saw_non_ascii = true;
1360 
1361 			return c;
1362 	}
1363 }
1364 
1365 static void
1366 check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
1367 {
1368 	if (ychar == '\'')
1369 	{
1370 		if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1371 			ereport(WARNING,
1372 					(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1373 					 errmsg("nonstandard use of \\' in a string literal"),
1374 					 errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
1375 					 lexer_errposition()));
1376 		yyextra->warn_on_first_escape = false;	/* warn only once per string */
1377 	}
1378 	else if (ychar == '\\')
1379 	{
1380 		if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1381 			ereport(WARNING,
1382 					(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1383 					 errmsg("nonstandard use of \\\\ in a string literal"),
1384 					 errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
1385 					 lexer_errposition()));
1386 		yyextra->warn_on_first_escape = false;	/* warn only once per string */
1387 	}
1388 	else
1389 		check_escape_warning(yyscanner);
1390 }
1391 
1392 static void
1393 check_escape_warning(core_yyscan_t yyscanner)
1394 {
1395 	if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1396 		ereport(WARNING,
1397 				(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1398 				 errmsg("nonstandard use of escape in a string literal"),
1399 		errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
1400 				 lexer_errposition()));
1401 	yyextra->warn_on_first_escape = false;		/* warn only once per string */
1402 }
1403 
1404 /*
1405  * Interface functions to make flex use palloc() instead of malloc().
1406  * It'd be better to make these static, but flex insists otherwise.
1407  */
1408 
1409 void *
1410 core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
1411 {
1412 	return palloc(bytes);
1413 }
1414 
1415 void *
1416 core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
1417 {
1418 	if (ptr)
1419 		return repalloc(ptr, bytes);
1420 	else
1421 		return palloc(bytes);
1422 }
1423 
1424 void
1425 core_yyfree(void *ptr, core_yyscan_t yyscanner)
1426 {
1427 	if (ptr)
1428 		pfree(ptr);
1429 }
1430