1 %top{
2 /*-------------------------------------------------------------------------
3 *
4 * scan.l
5 * lexical scanner for PostgreSQL
6 *
7 * NOTE NOTE NOTE:
8 *
9 * The rules in this file must be kept in sync with src/fe_utils/psqlscan.l!
10 *
11 * The rules are designed so that the scanner never has to backtrack,
12 * in the sense that there is always a rule that can match the input
13 * consumed so far (the rule action may internally throw back some input
14 * with yyless(), however). As explained in the flex manual, this makes
15 * for a useful speed increase --- about a third faster than a plain -CF
16 * lexer, in simple testing. The extra complexity is mostly in the rules
17 * for handling float numbers and continued string literals. If you change
18 * the lexical rules, verify that you haven't broken the no-backtrack
19 * property by running flex with the "-b" option and checking that the
20 * resulting "lex.backup" file says that no backing up is needed. (As of
21 * Postgres 9.2, this check is made automatically by the Makefile.)
22 *
23 *
24 * Portions Copyright (c) 2003-2016, PgPool Global Development Group
25 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
26 * Portions Copyright (c) 1994, Regents of the University of California
27 *
28 * IDENTIFICATION
29 * src/backend/parser/scan.l
30 *
31 *-------------------------------------------------------------------------
32 */
33 #include "pool_parser.h"
34
35 #include <ctype.h>
36 #include <unistd.h>
37
38 #include "parser.h" /* only needed for GUC variables */
39 #include "scanner.h"
40 #include "gramparse.h"
41 #include "scansup.h"
42 #include "pg_wchar.h"
43
44 #include "gram.h"
45 #include "utils/palloc.h"
46 #include "utils/elog.h"
47 }
48
49 %{
50 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
51 #undef fprintf
52 #define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
53
54 static void
fprintf_to_ereport(const char * fmt,const char * msg)55 fprintf_to_ereport(const char *fmt, const char *msg)
56 {
57 ereport(ERROR, (errmsg_internal("%s", msg)));
58 }
59
60 /*
61 * GUC variables. This is a DIRECT violation of the warning given at the
62 * head of gram.y, ie flex/bison code must not depend on any GUC variables;
63 * as such, changing their values can induce very unintuitive behavior.
64 * But we shall have to live with it until we can remove these variables.
65 */
66 int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
67 bool escape_string_warning = true;
68 bool standard_conforming_strings = true;
69
70 /*
71 * Set the type of YYSTYPE.
72 */
73 #define YYSTYPE core_YYSTYPE
74
75 /*
76 * Set the type of yyextra. All state variables used by the scanner should
77 * be in yyextra, *not* statically allocated.
78 */
79 #define YY_EXTRA_TYPE core_yy_extra_type *
80
81 /*
82 * Each call to yylex must set yylloc to the location of the found token
83 * (expressed as a byte offset from the start of the input text).
84 * When we parse a token that requires multiple lexer rules to process,
85 * this should be done in the first such rule, else yylloc will point
86 * into the middle of the token.
87 */
88 #define SET_YYLLOC() (*(yylloc) = yytext - yyextra->scanbuf)
89
90 /*
91 * Advance yylloc by the given number of bytes.
92 */
93 #define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) )
94
95 #define startlit() ( yyextra->literallen = 0 )
96 static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
97 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
98 static char *litbufdup(core_yyscan_t yyscanner);
99 static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner);
100 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
101 static int process_integer_literal(const char *token, YYSTYPE *lval);
102 static bool is_utf16_surrogate_first(pg_wchar c);
103 static bool is_utf16_surrogate_second(pg_wchar c);
104 static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
105 static void addunicode(pg_wchar c, yyscan_t yyscanner);
106 static bool check_uescapechar(unsigned char escape);
107
108 #define yyerror(msg) scanner_yyerror(msg, yyscanner)
109
110 #define lexer_errposition() scanner_errposition(*(yylloc), yyscanner)
111
112 static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
113 static void check_escape_warning(core_yyscan_t yyscanner);
114
115 /*
116 * Work around a bug in flex 2.5.35: it emits a couple of functions that
117 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
118 * this would cause warnings. Providing our own declarations should be
119 * harmless even when the bug gets fixed.
120 */
121 extern int core_yyget_column(yyscan_t yyscanner);
122 extern void core_yyset_column(int column_no, yyscan_t yyscanner);
123
124 %}
125
126 %option reentrant
127 %option bison-bridge
128 %option bison-locations
129 %option 8bit
130 %option never-interactive
131 %option nodefault
132 %option noinput
133 %option nounput
134 %option noyywrap
135 %option noyyalloc
136 %option noyyrealloc
137 %option noyyfree
138 %option warn
139 %option prefix="core_yy"
140
141 /*
142 * OK, here is a short description of lex/flex rules behavior.
143 * The longest pattern which matches an input string is always chosen.
144 * For equal-length patterns, the first occurring in the rules list is chosen.
145 * INITIAL is the starting state, to which all non-conditional rules apply.
146 * Exclusive states change parsing rules while the state is active. When in
147 * an exclusive state, only those rules defined for that state apply.
148 *
149 * We use exclusive states for quoted strings, extended comments,
150 * and to eliminate parsing troubles for numeric strings.
151 * Exclusive states:
152 * <xb> bit string literal
153 * <xc> extended C-style comments
154 * <xd> delimited identifiers (double-quoted identifiers)
155 * <xh> hexadecimal numeric string
156 * <xq> standard quoted strings
157 * <xe> extended quoted strings (support backslash escape sequences)
158 * <xdolq> $foo$ quoted strings
159 * <xui> quoted identifier with Unicode escapes
160 * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
161 * <xus> quoted string with Unicode escapes
162 * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
163 * <xeu> Unicode surrogate pair in extended quoted string
164 *
165 * Remember to add an <<EOF>> case whenever you add a new exclusive state!
166 * The default one is probably not the right thing.
167 */
168
169 %x xb
170 %x xc
171 %x xd
172 %x xh
173 %x xe
174 %x xq
175 %x xdolq
176 %x xui
177 %x xuiend
178 %x xus
179 %x xusend
180 %x xeu
181
182 /*
183 * In order to make the world safe for Windows and Mac clients as well as
184 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
185 * sequence will be seen as two successive newlines, but that doesn't cause
186 * any problems. Comments that start with -- and extend to the next
187 * newline are treated as equivalent to a single whitespace character.
188 *
189 * NOTE a fine point: if there is no newline following --, we will absorb
190 * everything to the end of the input as a comment. This is correct. Older
191 * versions of Postgres failed to recognize -- as a comment if the input
192 * did not end with a newline.
193 *
194 * XXX perhaps \f (formfeed) should be treated as a newline as well?
195 *
196 * XXX if you change the set of whitespace characters, fix scanner_isspace()
197 * to agree, and see also the plpgsql lexer.
198 */
199
200 space [ \t\n\r\f]
201 horiz_space [ \t\f]
202 newline [\n\r]
203 non_newline [^\n\r]
204
205 comment ("--"{non_newline}*)
206
207 whitespace ({space}+|{comment})
208
209 /*
210 * SQL requires at least one newline in the whitespace separating
211 * string literals that are to be concatenated. Silly, but who are we
212 * to argue? Note that {whitespace_with_newline} should not have * after
213 * it, whereas {whitespace} should generally have a * after it...
214 */
215
216 special_whitespace ({space}+|{comment}{newline})
217 horiz_whitespace ({horiz_space}|{comment})
218 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
219
220 /*
221 * To ensure that {quotecontinue} can be scanned without having to back up
222 * if the full pattern isn't matched, we include trailing whitespace in
223 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
224 * except for {quote} followed by whitespace and just one "-" (not two,
225 * which would start a {comment}). To cover that we have {quotefail}.
226 * The actions for {quotestop} and {quotefail} must throw back characters
227 * beyond the quote proper.
228 */
229 quote '
230 quotestop {quote}{whitespace}*
231 quotecontinue {quote}{whitespace_with_newline}{quote}
232 quotefail {quote}{whitespace}*"-"
233
234 /* Bit string
235 * It is tempting to scan the string for only those characters
236 * which are allowed. However, this leads to silently swallowed
237 * characters if illegal characters are included in the string.
238 * For example, if xbinside is [01] then B'ABCD' is interpreted
239 * as a zero-length string, and the ABCD' is lost!
240 * Better to pass the string forward and let the input routines
241 * validate the contents.
242 */
243 xbstart [bB]{quote}
244 xbinside [^']*
245
246 /* Hexadecimal number */
247 xhstart [xX]{quote}
248 xhinside [^']*
249
250 /* National character */
251 xnstart [nN]{quote}
252
253 /* Quoted string that allows backslash escapes */
254 xestart [eE]{quote}
255 xeinside [^\\']+
256 xeescape [\\][^0-7]
257 xeoctesc [\\][0-7]{1,3}
258 xehexesc [\\]x[0-9A-Fa-f]{1,2}
259 xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
260 xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
261
262 /* Extended quote
263 * xqdouble implements embedded quote, ''''
264 */
265 xqstart {quote}
266 xqdouble {quote}{quote}
267 xqinside [^']+
268
269 /* $foo$ style quotes ("dollar quoting")
270 * The quoted string starts with $foo$ where "foo" is an optional string
271 * in the form of an identifier, except that it may not contain "$",
272 * and extends to the first occurrence of an identical string.
273 * There is *no* processing of the quoted text.
274 *
275 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
276 * fails to match its trailing "$".
277 */
278 dolq_start [A-Za-z\200-\377_]
279 dolq_cont [A-Za-z\200-\377_0-9]
280 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
281 dolqfailed \${dolq_start}{dolq_cont}*
282 dolqinside [^$]+
283
284 /* Double quote
285 * Allows embedded spaces and other special characters into identifiers.
286 */
287 dquote \"
288 xdstart {dquote}
289 xdstop {dquote}
290 xddouble {dquote}{dquote}
291 xdinside [^"]+
292
293 /* Unicode escapes */
294 uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
295 /* error rule to avoid backup */
296 uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
297
298 /* Quoted identifier with Unicode escapes */
299 xuistart [uU]&{dquote}
300
301 /* Quoted string with Unicode escapes */
302 xusstart [uU]&{quote}
303
304 /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
305 xustop1 {uescapefail}?
306 xustop2 {uescape}
307
308 /* error rule to avoid backup */
309 xufailed [uU]&
310
311
312 /* C-style comments
313 *
314 * The "extended comment" syntax closely resembles allowable operator syntax.
315 * The tricky part here is to get lex to recognize a string starting with
316 * slash-star as a comment, when interpreting it as an operator would produce
317 * a longer match --- remember lex will prefer a longer match! Also, if we
318 * have something like plus-slash-star, lex will think this is a 3-character
319 * operator whereas we want to see it as a + operator and a comment start.
320 * The solution is two-fold:
321 * 1. append {op_chars}* to xcstart so that it matches as much text as
322 * {operator} would. Then the tie-breaker (first matching rule of same
323 * length) ensures xcstart wins. We put back the extra stuff with yyless()
324 * in case it contains a star-slash that should terminate the comment.
325 * 2. In the operator rule, check for slash-star within the operator, and
326 * if found throw it back with yyless(). This handles the plus-slash-star
327 * problem.
328 * Dash-dash comments have similar interactions with the operator rule.
329 */
330 xcstart \/\*{op_chars}*
331 xcstop \*+\/
332 xcinside [^*/]+
333
334 digit [0-9]
335 ident_start [A-Za-z\200-\377_]
336 ident_cont [A-Za-z\200-\377_0-9\$]
337
338 identifier {ident_start}{ident_cont}*
339
340 /* Assorted special-case operators and operator-like tokens */
341 typecast "::"
342 dot_dot \.\.
343 colon_equals ":="
344 equals_greater "=>"
345 less_equals "<="
346 greater_equals ">="
347 less_greater "<>"
348 not_equals "!="
349
350 /*
351 * "self" is the set of chars that should be returned as single-character
352 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
353 * which can be one or more characters long (but if a single-char token
354 * appears in the "self" set, it is not to be returned as an Op). Note
355 * that the sets overlap, but each has some chars that are not in the other.
356 *
357 * If you change either set, adjust the character lists appearing in the
358 * rule for "operator"!
359 */
360 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
361 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
362 operator {op_chars}+
363
364 /* we no longer allow unary minus in numbers.
365 * instead we pass it separately to parser. there it gets
366 * coerced via doNegate() -- Leon aug 20 1999
367 *
368 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
369 *
370 * {realfail1} and {realfail2} are added to prevent the need for scanner
371 * backup when the {real} rule fails to match completely.
372 */
373
374 integer {digit}+
375 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
376 decimalfail {digit}+\.\.
377 real ({integer}|{decimal})[Ee][-+]?{digit}+
378 realfail1 ({integer}|{decimal})[Ee]
379 realfail2 ({integer}|{decimal})[Ee][-+]
380
381 param \${integer}
382
383 other .
384
385 /*
386 * Dollar quoted strings are totally opaque, and no escaping is done on them.
387 * Other quoted strings must allow some special characters such as single-quote
388 * and newline.
389 * Embedded single-quotes are implemented both in the SQL standard
390 * style of two adjacent single quotes "''" and in the Postgres/Java style
391 * of escaped-quote "\'".
392 * Other embedded escaped characters are matched explicitly and the leading
393 * backslash is dropped from the string.
394 * Note that xcstart must appear before operator, as explained above!
395 * Also whitespace (comment) must appear before operator.
396 */
397
398 %%
399
400 {whitespace} {
401 /* ignore */
402 }
403
404 {xcstart} {
405 /* Set location in case of syntax error in comment */
406 SET_YYLLOC();
407 yyextra->xcdepth = 0;
408 BEGIN(xc);
409 /* Put back any characters past slash-star; see above */
410 yyless(2);
411 }
412
413 <xc>{xcstart} {
414 (yyextra->xcdepth)++;
415 /* Put back any characters past slash-star; see above */
416 yyless(2);
417 }
418
419 <xc>{xcstop} {
420 if (yyextra->xcdepth <= 0)
421 BEGIN(INITIAL);
422 else
423 (yyextra->xcdepth)--;
424 }
425
426 <xc>{xcinside} {
427 /* ignore */
428 }
429
430 <xc>{op_chars} {
431 /* ignore */
432 }
433
434 <xc>\*+ {
435 /* ignore */
436 }
437
438 <xc><<EOF>> { yyerror("unterminated /* comment"); }
439
440 {xbstart} {
441 /* Binary bit type.
442 * At some point we should simply pass the string
443 * forward to the parser and label it there.
444 * In the meantime, place a leading "b" on the string
445 * to mark it for the input routine as a binary string.
446 */
447 SET_YYLLOC();
448 BEGIN(xb);
449 startlit();
450 addlitchar('b', yyscanner);
451 }
452 <xb>{quotestop} |
453 <xb>{quotefail} {
454 yyless(1);
455 BEGIN(INITIAL);
456 yylval->str = litbufdup(yyscanner);
457 return BCONST;
458 }
459 <xh>{xhinside} |
460 <xb>{xbinside} {
461 addlit(yytext, yyleng, yyscanner);
462 }
463 <xh>{quotecontinue} |
464 <xb>{quotecontinue} {
465 /* ignore */
466 }
467 <xb><<EOF>> { yyerror("unterminated bit string literal"); }
468
469 {xhstart} {
470 /* Hexadecimal bit type.
471 * At some point we should simply pass the string
472 * forward to the parser and label it there.
473 * In the meantime, place a leading "x" on the string
474 * to mark it for the input routine as a hex string.
475 */
476 SET_YYLLOC();
477 BEGIN(xh);
478 startlit();
479 addlitchar('x', yyscanner);
480 }
481 <xh>{quotestop} |
482 <xh>{quotefail} {
483 yyless(1);
484 BEGIN(INITIAL);
485 yylval->str = litbufdup(yyscanner);
486 return XCONST;
487 }
488 <xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); }
489
490 {xnstart} {
491 /* National character.
492 * We will pass this along as a normal character string,
493 * but preceded with an internally-generated "NCHAR".
494 */
495 const ScanKeyword *keyword;
496
497 SET_YYLLOC();
498 yyless(1); /* eat only 'n' this time */
499
500 keyword = ScanKeywordLookup("nchar",
501 yyextra->keywords,
502 yyextra->num_keywords);
503 if (keyword != NULL)
504 {
505 yylval->keyword = keyword->name;
506 return keyword->value;
507 }
508 else
509 {
510 /* If NCHAR isn't a keyword, just return "n" */
511 yylval->str = pstrdup("n");
512 return IDENT;
513 }
514 }
515
516 {xqstart} {
517 yyextra->warn_on_first_escape = true;
518 yyextra->saw_non_ascii = false;
519 SET_YYLLOC();
520 if (yyextra->standard_conforming_strings)
521 BEGIN(xq);
522 else
523 BEGIN(xe);
524 startlit();
525 }
526 {xestart} {
527 yyextra->warn_on_first_escape = false;
528 yyextra->saw_non_ascii = false;
529 SET_YYLLOC();
530 BEGIN(xe);
531 startlit();
532 }
533 {xusstart} {
534 SET_YYLLOC();
535 if (!yyextra->standard_conforming_strings)
536 ereport(ERROR,
537 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
538 errmsg("unsafe use of string constant with Unicode escapes"),
539 errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."),
540 lexer_errposition()));
541 BEGIN(xus);
542 startlit();
543 }
544 <xq,xe>{quotestop} |
545 <xq,xe>{quotefail} {
546 yyless(1);
547 BEGIN(INITIAL);
548 /*
549 * check that the data remains valid if it might have been
550 * made invalid by unescaping any chars.
551 */
552 if (yyextra->saw_non_ascii)
553 pg_verifymbstr(yyextra->literalbuf,
554 yyextra->literallen,
555 false);
556 yylval->str = litbufdup(yyscanner);
557 return SCONST;
558 }
559 <xus>{quotestop} |
560 <xus>{quotefail} {
561 /* throw back all but the quote */
562 yyless(1);
563 /* xusend state looks for possible UESCAPE */
564 BEGIN(xusend);
565 }
566 <xusend>{whitespace} {
567 /* stay in xusend state over whitespace */
568 }
569 <xusend><<EOF>> |
570 <xusend>{other} |
571 <xusend>{xustop1} {
572 /* no UESCAPE after the quote, throw back everything */
573 yyless(0);
574 BEGIN(INITIAL);
575 yylval->str = litbuf_udeescape('\\', yyscanner);
576 return SCONST;
577 }
578 <xusend>{xustop2} {
579 /* found UESCAPE after the end quote */
580 BEGIN(INITIAL);
581 if (!check_uescapechar(yytext[yyleng - 2]))
582 {
583 SET_YYLLOC();
584 ADVANCE_YYLLOC(yyleng - 2);
585 yyerror("invalid Unicode escape character");
586 }
587 yylval->str = litbuf_udeescape(yytext[yyleng - 2],
588 yyscanner);
589 return SCONST;
590 }
591 <xq,xe,xus>{xqdouble} {
592 addlitchar('\'', yyscanner);
593 }
594 <xq,xus>{xqinside} {
595 addlit(yytext, yyleng, yyscanner);
596 }
597 <xe>{xeinside} {
598 addlit(yytext, yyleng, yyscanner);
599 }
600 <xe>{xeunicode} {
601 pg_wchar c = strtoul(yytext + 2, NULL, 16);
602
603 check_escape_warning(yyscanner);
604
605 if (is_utf16_surrogate_first(c))
606 {
607 yyextra->utf16_first_part = c;
608 BEGIN(xeu);
609 }
610 else if (is_utf16_surrogate_second(c))
611 yyerror("invalid Unicode surrogate pair");
612 else
613 addunicode(c, yyscanner);
614 }
615 <xeu>{xeunicode} {
616 pg_wchar c = strtoul(yytext + 2, NULL, 16);
617
618 if (!is_utf16_surrogate_second(c))
619 yyerror("invalid Unicode surrogate pair");
620
621 c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
622
623 addunicode(c, yyscanner);
624
625 BEGIN(xe);
626 }
627 <xeu>. { yyerror("invalid Unicode surrogate pair"); }
628 <xeu>\n { yyerror("invalid Unicode surrogate pair"); }
629 <xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
630 <xe,xeu>{xeunicodefail} {
631 ereport(ERROR,
632 (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
633 errmsg("invalid Unicode escape"),
634 errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
635 lexer_errposition()));
636 }
637 <xe>{xeescape} {
638 #ifdef PGPOOL_NOT_USED
639 if (yytext[1] == '\'')
640 {
641 if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF ||
642 (yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
643 PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
644 ereport(ERROR,
645 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
646 errmsg("unsafe use of \\' in a string literal"),
647 errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
648 lexer_errposition()));
649 }
650 #endif
651 check_string_escape_warning(yytext[1], yyscanner);
652 addlitchar(unescape_single_char(yytext[1], yyscanner),
653 yyscanner);
654 }
655 <xe>{xeoctesc} {
656 unsigned char c = strtoul(yytext + 1, NULL, 8);
657
658 check_escape_warning(yyscanner);
659 addlitchar(c, yyscanner);
660 if (c == '\0' || IS_HIGHBIT_SET(c))
661 yyextra->saw_non_ascii = true;
662 }
663 <xe>{xehexesc} {
664 unsigned char c = strtoul(yytext + 2, NULL, 16);
665
666 check_escape_warning(yyscanner);
667 addlitchar(c, yyscanner);
668 if (c == '\0' || IS_HIGHBIT_SET(c))
669 yyextra->saw_non_ascii = true;
670 }
671 <xq,xe,xus>{quotecontinue} {
672 /* ignore */
673 }
674 <xe>. {
675 /* This is only needed for \ just before EOF */
676 addlitchar(yytext[0], yyscanner);
677 }
678 <xq,xe,xus><<EOF>> { yyerror("unterminated quoted string"); }
679
680 {dolqdelim} {
681 SET_YYLLOC();
682 yyextra->dolqstart = pstrdup(yytext);
683 BEGIN(xdolq);
684 startlit();
685 }
686 {dolqfailed} {
687 SET_YYLLOC();
688 /* throw back all but the initial "$" */
689 yyless(1);
690 /* and treat it as {other} */
691 return yytext[0];
692 }
693 <xdolq>{dolqdelim} {
694 if (strcmp(yytext, yyextra->dolqstart) == 0)
695 {
696 pfree(yyextra->dolqstart);
697 yyextra->dolqstart = NULL;
698 BEGIN(INITIAL);
699 yylval->str = litbufdup(yyscanner);
700 return SCONST;
701 }
702 else
703 {
704 /*
705 * When we fail to match $...$ to dolqstart, transfer
706 * the $... part to the output, but put back the final
707 * $ for rescanning. Consider $delim$...$junk$delim$
708 */
709 addlit(yytext, yyleng - 1, yyscanner);
710 yyless(yyleng - 1);
711 }
712 }
713 <xdolq>{dolqinside} {
714 addlit(yytext, yyleng, yyscanner);
715 }
716 <xdolq>{dolqfailed} {
717 addlit(yytext, yyleng, yyscanner);
718 }
719 <xdolq>. {
720 /* This is only needed for $ inside the quoted text */
721 addlitchar(yytext[0], yyscanner);
722 }
723 <xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
724
725 {xdstart} {
726 SET_YYLLOC();
727 BEGIN(xd);
728 startlit();
729 }
730 {xuistart} {
731 SET_YYLLOC();
732 BEGIN(xui);
733 startlit();
734 }
735 <xd>{xdstop} {
736 char *ident;
737
738 BEGIN(INITIAL);
739 if (yyextra->literallen == 0)
740 yyerror("zero-length delimited identifier");
741 ident = litbufdup(yyscanner);
742 if (yyextra->literallen >= NAMEDATALEN)
743 truncate_identifier(ident, yyextra->literallen, true);
744 yylval->str = ident;
745 return IDENT;
746 }
747 <xui>{dquote} {
748 yyless(1);
749 /* xuiend state looks for possible UESCAPE */
750 BEGIN(xuiend);
751 }
752 <xuiend>{whitespace} {
753 /* stay in xuiend state over whitespace */
754 }
755 <xuiend><<EOF>> |
756 <xuiend>{other} |
757 <xuiend>{xustop1} {
758 /* no UESCAPE after the quote, throw back everything */
759 char *ident;
760 int identlen;
761
762 yyless(0);
763
764 BEGIN(INITIAL);
765 if (yyextra->literallen == 0)
766 yyerror("zero-length delimited identifier");
767 ident = litbuf_udeescape('\\', yyscanner);
768 identlen = strlen(ident);
769 if (identlen >= NAMEDATALEN)
770 truncate_identifier(ident, identlen, true);
771 yylval->str = ident;
772 return IDENT;
773 }
774 <xuiend>{xustop2} {
775 /* found UESCAPE after the end quote */
776 char *ident;
777 int identlen;
778
779 BEGIN(INITIAL);
780 if (yyextra->literallen == 0)
781 yyerror("zero-length delimited identifier");
782 if (!check_uescapechar(yytext[yyleng - 2]))
783 {
784 SET_YYLLOC();
785 ADVANCE_YYLLOC(yyleng - 2);
786 yyerror("invalid Unicode escape character");
787 }
788 ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
789 identlen = strlen(ident);
790 if (identlen >= NAMEDATALEN)
791 truncate_identifier(ident, identlen, true);
792 yylval->str = ident;
793 return IDENT;
794 }
795 <xd,xui>{xddouble} {
796 addlitchar('"', yyscanner);
797 }
798 <xd,xui>{xdinside} {
799 addlit(yytext, yyleng, yyscanner);
800 }
801 <xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
802
803 {xufailed} {
804 char *ident;
805
806 SET_YYLLOC();
807 /* throw back all but the initial u/U */
808 yyless(1);
809 /* and treat it as {identifier} */
810 ident = downcase_truncate_identifier(yytext, yyleng, true);
811 yylval->str = ident;
812 return IDENT;
813 }
814
815 {typecast} {
816 SET_YYLLOC();
817 return TYPECAST;
818 }
819
820 {dot_dot} {
821 SET_YYLLOC();
822 return DOT_DOT;
823 }
824
825 {colon_equals} {
826 SET_YYLLOC();
827 return COLON_EQUALS;
828 }
829
830 {equals_greater} {
831 SET_YYLLOC();
832 return EQUALS_GREATER;
833 }
834
835 {less_equals} {
836 SET_YYLLOC();
837 return LESS_EQUALS;
838 }
839
840 {greater_equals} {
841 SET_YYLLOC();
842 return GREATER_EQUALS;
843 }
844
845 {less_greater} {
846 /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
847 SET_YYLLOC();
848 return NOT_EQUALS;
849 }
850
851 {not_equals} {
852 /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
853 SET_YYLLOC();
854 return NOT_EQUALS;
855 }
856
857 {self} {
858 SET_YYLLOC();
859 return yytext[0];
860 }
861
862 {operator} {
863 /*
864 * Check for embedded slash-star or dash-dash; those
865 * are comment starts, so operator must stop there.
866 * Note that slash-star or dash-dash at the first
867 * character will match a prior rule, not this one.
868 */
869 int nchars = yyleng;
870 char *slashstar = strstr(yytext, "/*");
871 char *dashdash = strstr(yytext, "--");
872
873 if (slashstar && dashdash)
874 {
875 /* if both appear, take the first one */
876 if (slashstar > dashdash)
877 slashstar = dashdash;
878 }
879 else if (!slashstar)
880 slashstar = dashdash;
881 if (slashstar)
882 nchars = slashstar - yytext;
883
884 /*
885 * For SQL compatibility, '+' and '-' cannot be the
886 * last char of a multi-char operator unless the operator
887 * contains chars that are not in SQL operators.
888 * The idea is to lex '=-' as two operators, but not
889 * to forbid operator names like '?-' that could not be
890 * sequences of SQL operators.
891 */
892 while (nchars > 1 &&
893 (yytext[nchars - 1] == '+' ||
894 yytext[nchars - 1] == '-'))
895 {
896 int ic;
897
898 for (ic = nchars - 2; ic >= 0; ic--)
899 {
900 if (strchr("~!@#^&|`?%", yytext[ic]))
901 break;
902 }
903 if (ic >= 0)
904 break; /* found a char that makes it OK */
905 nchars--; /* else remove the +/-, and check again */
906 }
907
908 SET_YYLLOC();
909
910 if (nchars < yyleng)
911 {
912 /* Strip the unwanted chars from the token */
913 yyless(nchars);
914 /*
915 * If what we have left is only one char, and it's
916 * one of the characters matching "self", then
917 * return it as a character token the same way
918 * that the "self" rule would have.
919 */
920 if (nchars == 1 &&
921 strchr(",()[].;:+-*/%^<>=", yytext[0]))
922 return yytext[0];
923 }
924
925 /*
926 * Complain if operator is too long. Unlike the case
927 * for identifiers, we make this an error not a notice-
928 * and-truncate, because the odds are we are looking at
929 * a syntactic mistake anyway.
930 */
931 if (nchars >= NAMEDATALEN)
932 yyerror("operator too long");
933
934 yylval->str = pstrdup(yytext);
935 return Op;
936 }
937
938 {param} {
939 SET_YYLLOC();
940 yylval->ival = atol(yytext + 1);
941 return PARAM;
942 }
943
944 {integer} {
945 SET_YYLLOC();
946 return process_integer_literal(yytext, yylval);
947 }
948 {decimal} {
949 SET_YYLLOC();
950 yylval->str = pstrdup(yytext);
951 return FCONST;
952 }
953 {decimalfail} {
954 /* throw back the .., and treat as integer */
955 yyless(yyleng - 2);
956 SET_YYLLOC();
957 return process_integer_literal(yytext, yylval);
958 }
959 {real} {
960 SET_YYLLOC();
961 yylval->str = pstrdup(yytext);
962 return FCONST;
963 }
964 {realfail1} {
965 /*
966 * throw back the [Ee], and treat as {decimal}. Note
967 * that it is possible the input is actually {integer},
968 * but since this case will almost certainly lead to a
969 * syntax error anyway, we don't bother to distinguish.
970 */
971 yyless(yyleng - 1);
972 SET_YYLLOC();
973 yylval->str = pstrdup(yytext);
974 return FCONST;
975 }
976 {realfail2} {
977 /* throw back the [Ee][+-], and proceed as above */
978 yyless(yyleng - 2);
979 SET_YYLLOC();
980 yylval->str = pstrdup(yytext);
981 return FCONST;
982 }
983
984
985 {identifier} {
986 const ScanKeyword *keyword;
987 char *ident;
988
989 SET_YYLLOC();
990
991 /* Is it a keyword? */
992 keyword = ScanKeywordLookup(yytext,
993 yyextra->keywords,
994 yyextra->num_keywords);
995 if (keyword != NULL)
996 {
997 yylval->keyword = keyword->name;
998 return keyword->value;
999 }
1000
1001 /*
1002 * No. Convert the identifier to lower case, and truncate
1003 * if necessary.
1004 */
1005 ident = downcase_truncate_identifier(yytext, yyleng, true);
1006 yylval->str = ident;
1007 return IDENT;
1008 }
1009
1010 {other} {
1011 SET_YYLLOC();
1012 return yytext[0];
1013 }
1014
1015 <<EOF>> {
1016 SET_YYLLOC();
1017 yyterminate();
1018 }
1019
1020 %%
1021
1022 /*
1023 * Arrange access to yyextra for subroutines of the main yylex() function.
1024 * We expect each subroutine to have a yyscanner parameter. Rather than
1025 * use the yyget_xxx functions, which might or might not get inlined by the
1026 * compiler, we cheat just a bit and cast yyscanner to the right type.
1027 */
1028 #undef yyextra
1029 #define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
1030
1031 /* Likewise for a couple of other things we need. */
1032 #undef yylloc
1033 #define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r)
1034 #undef yyleng
1035 #define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r)
1036
1037
1038 /*
1039 * scanner_errposition
1040 * Report a lexer or grammar error cursor position, if possible.
1041 *
1042 * This is expected to be used within an ereport() call. The return value
1043 * is a dummy (always 0, in fact).
1044 *
1045 * Note that this can only be used for messages emitted during raw parsing
1046 * (essentially, scan.l and gram.y), since it requires the yyscanner struct
1047 * to still be available.
1048 */
1049 int
1050 scanner_errposition(int location, core_yyscan_t yyscanner)
1051 {
1052 #ifdef PGPOOL_NOT_USED
1053 int pos;
1054
1055 if (location < 0)
1056 return 0; /* no-op if location is unknown */
1057
1058 /* Convert byte offset to character number */
1059 pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
1060 /* And pass it to the ereport mechanism */
1061 return errposition(pos);
1062 #endif
1063 return 0;
1064 }
1065
1066 /*
1067 * scanner_yyerror
1068 * Report a lexer or grammar error.
1069 *
1070 * The message's cursor position is whatever YYLLOC was last set to,
1071 * ie, the start of the current token if called within yylex(), or the
1072 * most recently lexed token if called from the grammar.
1073 * This is OK for syntax error messages from the Bison parser, because Bison
1074 * parsers report error as soon as the first unparsable token is reached.
1075 * Beware of using yyerror for other purposes, as the cursor position might
1076 * be misleading!
1077 */
1078 void
1079 scanner_yyerror(const char *message, core_yyscan_t yyscanner)
1080 {
1081 const char *loc = yyextra->scanbuf + *yylloc;
1082
1083 if (*loc == YY_END_OF_BUFFER_CHAR)
1084 {
1085 ereport(ERROR,
1086 (errcode(ERRCODE_SYNTAX_ERROR),
1087 /* translator: %s is typically the translation of "syntax error" */
1088 errmsg("%s at end of input", _(message)),
1089 lexer_errposition()));
1090 }
1091 else
1092 {
1093 ereport(ERROR,
1094 (errcode(ERRCODE_SYNTAX_ERROR),
1095 /* translator: first %s is typically the translation of "syntax error" */
1096 errmsg("%s at or near \"%s\"", _(message), loc),
1097 lexer_errposition()));
1098 }
1099 }
1100
1101
1102 /*
1103 * Called before any actual parsing is done
1104 */
1105 core_yyscan_t
1106 scanner_init(const char *str,
1107 core_yy_extra_type *yyext,
1108 const ScanKeyword *keywords,
1109 int num_keywords)
1110 {
1111 Size slen = strlen(str);
1112 yyscan_t scanner;
1113
1114 if (yylex_init(&scanner) != 0)
1115 elog(ERROR, "yylex_init() failed: %m");
1116
1117 core_yyset_extra(yyext, scanner);
1118
1119 yyext->keywords = keywords;
1120 yyext->num_keywords = num_keywords;
1121
1122 yyext->backslash_quote = backslash_quote;
1123 yyext->escape_string_warning = escape_string_warning;
1124 yyext->standard_conforming_strings = standard_conforming_strings;
1125
1126 /*
1127 * Make a scan buffer with special termination needed by flex.
1128 */
1129 yyext->scanbuf = (char *) palloc(slen + 2);
1130 yyext->scanbuflen = slen;
1131 memcpy(yyext->scanbuf, str, slen);
1132 yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
1133 yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
1134
1135 /* initialize literal buffer to a reasonable but expansible size */
1136 yyext->literalalloc = 1024;
1137 yyext->literalbuf = (char *) palloc(yyext->literalalloc);
1138 yyext->literallen = 0;
1139
1140 return scanner;
1141 }
1142
1143
1144 /*
1145 * Called after parsing is done to clean up after scanner_init()
1146 */
1147 void
1148 scanner_finish(core_yyscan_t yyscanner)
1149 {
1150 /*
1151 * We don't bother to call yylex_destroy(), because all it would do is
1152 * pfree a small amount of control storage. It's cheaper to leak the
1153 * storage until the parsing context is destroyed. The amount of space
1154 * involved is usually negligible compared to the output parse tree
1155 * anyway.
1156 *
1157 * We do bother to pfree the scanbuf and literal buffer, but only if they
1158 * represent a nontrivial amount of space. The 8K cutoff is arbitrary.
1159 */
1160 if (yyextra->scanbuflen >= 8192)
1161 pfree(yyextra->scanbuf);
1162 if (yyextra->literalalloc >= 8192)
1163 pfree(yyextra->literalbuf);
1164 }
1165
1166
1167 static void
1168 addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
1169 {
1170 /* enlarge buffer if needed */
1171 if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
1172 {
1173 do
1174 {
1175 yyextra->literalalloc *= 2;
1176 } while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
1177 yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1178 yyextra->literalalloc);
1179 }
1180 /* append new data */
1181 memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
1182 yyextra->literallen += yleng;
1183 }
1184
1185
1186 static void
1187 addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
1188 {
1189 /* enlarge buffer if needed */
1190 if ((yyextra->literallen + 1) >= yyextra->literalalloc)
1191 {
1192 yyextra->literalalloc *= 2;
1193 yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
1194 yyextra->literalalloc);
1195 }
1196 /* append new data */
1197 yyextra->literalbuf[yyextra->literallen] = ychar;
1198 yyextra->literallen += 1;
1199 }
1200
1201
1202 /*
1203 * Create a palloc'd copy of literalbuf, adding a trailing null.
1204 */
1205 static char *
1206 litbufdup(core_yyscan_t yyscanner)
1207 {
1208 int llen = yyextra->literallen;
1209 char *new;
1210
1211 new = palloc(llen + 1);
1212 memcpy(new, yyextra->literalbuf, llen);
1213 new[llen] = '\0';
1214 return new;
1215 }
1216
1217 static int
1218 process_integer_literal(const char *token, YYSTYPE *lval)
1219 {
1220 long val;
1221 char *endptr;
1222
1223 errno = 0;
1224 val = strtol(token, &endptr, 10);
1225 if (*endptr != '\0' || errno == ERANGE
1226 #ifdef HAVE_LONG_INT_64
1227 /* if long > 32 bits, check for overflow of int4 */
1228 || val != (long) ((int32) val)
1229 #endif
1230 )
1231 {
1232 /* integer too large, treat it as a float */
1233 lval->str = pstrdup(token);
1234 return FCONST;
1235 }
1236 lval->ival = val;
1237 return ICONST;
1238 }
1239
1240 static unsigned int
1241 hexval(unsigned char c)
1242 {
1243 if (c >= '0' && c <= '9')
1244 return c - '0';
1245 if (c >= 'a' && c <= 'f')
1246 return c - 'a' + 0xA;
1247 if (c >= 'A' && c <= 'F')
1248 return c - 'A' + 0xA;
1249 elog(ERROR, "invalid hexadecimal digit");
1250 return 0; /* not reached */
1251 }
1252
1253 static void
1254 check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner)
1255 {
1256 if (GetDatabaseEncoding() == PG_UTF8)
1257 return;
1258
1259 if (c > 0x7F)
1260 {
1261 ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3); /* 3 for U&" */
1262 yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
1263 }
1264 }
1265
1266 static bool
1267 is_utf16_surrogate_first(pg_wchar c)
1268 {
1269 return (c >= 0xD800 && c <= 0xDBFF);
1270 }
1271
1272 static bool
1273 is_utf16_surrogate_second(pg_wchar c)
1274 {
1275 return (c >= 0xDC00 && c <= 0xDFFF);
1276 }
1277
1278 static pg_wchar
1279 surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
1280 {
1281 return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
1282 }
1283
1284 static void
1285 addunicode(pg_wchar c, core_yyscan_t yyscanner)
1286 {
1287 char buf[8];
1288
1289 if (c == 0 || c > 0x10FFFF)
1290 yyerror("invalid Unicode escape value");
1291 if (c > 0x7F)
1292 {
1293 if (GetDatabaseEncoding() != PG_UTF8)
1294 yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
1295 yyextra->saw_non_ascii = true;
1296 }
1297 unicode_to_utf8(c, (unsigned char *) buf);
1298 addlit(buf, pg_mblen(buf), yyscanner);
1299 }
1300
1301 /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
1302 static bool
1303 check_uescapechar(unsigned char escape)
1304 {
1305 if (isxdigit(escape)
1306 || escape == '+'
1307 || escape == '\''
1308 || escape == '"'
1309 || scanner_isspace(escape))
1310 {
1311 return false;
1312 }
1313 else
1314 return true;
1315 }
1316
1317 /* like litbufdup, but handle unicode escapes */
1318 static char *
1319 litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
1320 {
1321 char *new;
1322 char *litbuf,
1323 *in,
1324 *out;
1325 pg_wchar pair_first = 0;
1326
1327 /* Make literalbuf null-terminated to simplify the scanning loop */
1328 litbuf = yyextra->literalbuf;
1329 litbuf[yyextra->literallen] = '\0';
1330
1331 /*
1332 * This relies on the subtle assumption that a UTF-8 expansion cannot be
1333 * longer than its escaped representation.
1334 */
1335 new = palloc(yyextra->literallen + 1);
1336
1337 in = litbuf;
1338 out = new;
1339 while (*in)
1340 {
1341 if (in[0] == escape)
1342 {
1343 if (in[1] == escape)
1344 {
1345 if (pair_first)
1346 {
1347 ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
1348 yyerror("invalid Unicode surrogate pair");
1349 }
1350 *out++ = escape;
1351 in += 2;
1352 }
1353 else if (isxdigit((unsigned char) in[1]) &&
1354 isxdigit((unsigned char) in[2]) &&
1355 isxdigit((unsigned char) in[3]) &&
1356 isxdigit((unsigned char) in[4]))
1357 {
1358 pg_wchar unicode;
1359
1360 unicode = (hexval(in[1]) << 12) +
1361 (hexval(in[2]) << 8) +
1362 (hexval(in[3]) << 4) +
1363 hexval(in[4]);
1364 check_unicode_value(unicode, in, yyscanner);
1365 if (pair_first)
1366 {
1367 if (is_utf16_surrogate_second(unicode))
1368 {
1369 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
1370 pair_first = 0;
1371 }
1372 else
1373 {
1374 ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
1375 yyerror("invalid Unicode surrogate pair");
1376 }
1377 }
1378 else if (is_utf16_surrogate_second(unicode))
1379 yyerror("invalid Unicode surrogate pair");
1380
1381 if (is_utf16_surrogate_first(unicode))
1382 pair_first = unicode;
1383 else
1384 {
1385 unicode_to_utf8(unicode, (unsigned char *) out);
1386 out += pg_mblen(out);
1387 }
1388 in += 5;
1389 }
1390 else if (in[1] == '+' &&
1391 isxdigit((unsigned char) in[2]) &&
1392 isxdigit((unsigned char) in[3]) &&
1393 isxdigit((unsigned char) in[4]) &&
1394 isxdigit((unsigned char) in[5]) &&
1395 isxdigit((unsigned char) in[6]) &&
1396 isxdigit((unsigned char) in[7]))
1397 {
1398 pg_wchar unicode;
1399
1400 unicode = (hexval(in[2]) << 20) +
1401 (hexval(in[3]) << 16) +
1402 (hexval(in[4]) << 12) +
1403 (hexval(in[5]) << 8) +
1404 (hexval(in[6]) << 4) +
1405 hexval(in[7]);
1406 check_unicode_value(unicode, in, yyscanner);
1407 if (pair_first)
1408 {
1409 if (is_utf16_surrogate_second(unicode))
1410 {
1411 unicode = surrogate_pair_to_codepoint(pair_first, unicode);
1412 pair_first = 0;
1413 }
1414 else
1415 {
1416 ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
1417 yyerror("invalid Unicode surrogate pair");
1418 }
1419 }
1420 else if (is_utf16_surrogate_second(unicode))
1421 yyerror("invalid Unicode surrogate pair");
1422
1423 if (is_utf16_surrogate_first(unicode))
1424 pair_first = unicode;
1425 else
1426 {
1427 unicode_to_utf8(unicode, (unsigned char *) out);
1428 out += pg_mblen(out);
1429 }
1430 in += 8;
1431 }
1432 else
1433 {
1434 ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
1435 yyerror("invalid Unicode escape value");
1436 }
1437 }
1438 else
1439 {
1440 if (pair_first)
1441 {
1442 ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
1443 yyerror("invalid Unicode surrogate pair");
1444 }
1445 *out++ = *in++;
1446 }
1447 }
1448
1449 *out = '\0';
1450
1451 /*
1452 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
1453 * codes; but it's probably not worth the trouble, since this isn't likely
1454 * to be a performance-critical path.
1455 */
1456 pg_verifymbstr(new, out - new, false);
1457 return new;
1458 }
1459
1460 static unsigned char
1461 unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
1462 {
1463 switch (c)
1464 {
1465 case 'b':
1466 return '\b';
1467 case 'f':
1468 return '\f';
1469 case 'n':
1470 return '\n';
1471 case 'r':
1472 return '\r';
1473 case 't':
1474 return '\t';
1475 default:
1476 /* check for backslash followed by non-7-bit-ASCII */
1477 if (c == '\0' || IS_HIGHBIT_SET(c))
1478 yyextra->saw_non_ascii = true;
1479
1480 return c;
1481 }
1482 }
1483
1484 static void
1485 check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
1486 {
1487 if (ychar == '\'')
1488 {
1489 if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1490 ereport(WARNING,
1491 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1492 errmsg("nonstandard use of \\' in a string literal"),
1493 errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
1494 lexer_errposition()));
1495 yyextra->warn_on_first_escape = false; /* warn only once per string */
1496 }
1497 else if (ychar == '\\')
1498 {
1499 if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1500 ereport(WARNING,
1501 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1502 errmsg("nonstandard use of \\\\ in a string literal"),
1503 errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
1504 lexer_errposition()));
1505 yyextra->warn_on_first_escape = false; /* warn only once per string */
1506 }
1507 else
1508 check_escape_warning(yyscanner);
1509 }
1510
1511 static void
1512 check_escape_warning(core_yyscan_t yyscanner)
1513 {
1514 if (yyextra->warn_on_first_escape && yyextra->escape_string_warning)
1515 ereport(WARNING,
1516 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1517 errmsg("nonstandard use of escape in a string literal"),
1518 errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
1519 lexer_errposition()));
1520 yyextra->warn_on_first_escape = false; /* warn only once per string */
1521 }
1522
1523 /*
1524 * Interface functions to make flex use palloc() instead of malloc().
1525 * It'd be better to make these static, but flex insists otherwise.
1526 */
1527
1528 void *
1529 core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
1530 {
1531 return palloc(bytes);
1532 }
1533
1534 void *
1535 core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
1536 {
1537 if (ptr)
1538 return repalloc(ptr, bytes);
1539 else
1540 return palloc(bytes);
1541 }
1542
1543 void
1544 core_yyfree(void *ptr, core_yyscan_t yyscanner)
1545 {
1546 if (ptr)
1547 pfree(ptr);
1548 }
1549