1 %{
2 /*
3 ** A scanner for EMP-style numeric ranges
4 */
5
6 #include "postgres.h"
7
8 #include "parser/gramparse.h"
9 /* Not needed now that this file is compiled as part of gram.y */
10 /* #include "parser/parse.h" */
11 #include "parser/scansup.h"
12 #include "mb/pg_wchar.h"
13
14 #include "parse_keyword.h"
15
16 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
17 #undef fprintf
18 #define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
19
20 static void
fprintf_to_ereport(const char * fmt,const char * msg)21 fprintf_to_ereport(const char *fmt, const char *msg)
22 {
23 ereport(ERROR, (errmsg_internal("%s", msg)));
24 }
25
26 static int xcdepth = 0; /* depth of nesting in slash-star comments */
27 static char *dolqstart; /* current $foo$ quote start string */
28 static bool extended_string = false;
29
30
31 /* No reason to constrain amount of data slurped */
32 #define YY_READ_BUF_SIZE 16777216
33
34 /* Handles to the buffer that the lexer uses internally */
35
36
37 static YY_BUFFER_STATE scanbufhandle;
38
39 #define SET_YYLLOC() (orafce_sql_yylval.val.lloc = yytext - scanbuf)
40
41 /* Handles to the buffer that the lexer uses internally */
42 static char *scanbuf;
43
44 /* flex 2.5.4 doesn't bother with a decl for this */
45
46 int orafce_sql_yylex(void);
47
48 void orafce_sql_scanner_init(const char *str);
49 void orafce_sql_scanner_finish(void);
50
51 /*
52 * literalbuf is used to accumulate literal values when multiple rules
53 * are needed to parse a single literal. Call startlit to reset buffer
54 * to empty, addlit to add text. Note that the buffer is palloc'd and
55 * starts life afresh on every parse cycle.
56 */
57 static char *literalbuf; /* expandable buffer */
58 static int literallen; /* actual current length */
59 static int literalalloc; /* current allocated buffer size */
60
61 #define startlit() (literalbuf[0] = '\0', literallen = 0)
62 static void addlit(char *ytext, int yleng);
63 static void addlitchar(unsigned char ychar);
64 static char *litbufdup(void);
65
66 static int lexer_errposition(void);
67
68 /*
69 * Each call to yylex must set yylloc to the location of the found token
70 * (expressed as a byte offset from the start of the input text).
71 * When we parse a token that requires multiple lexer rules to process,
72 * this should be done in the first such rule, else yylloc will point
73 * into the middle of the token.
74 */
75
76 /* Handles to the buffer that the lexer uses internally */
77 static char *scanbuf;
78
79 static unsigned char unescape_single_char(unsigned char c);
80
81 #ifndef _pg_mbstrlen_with_len
82 #define _pg_mbstrlen_with_len(buf,loc) pg_mbstrlen_with_len(buf,loc)
83 #endif
84
85 %}
86
87 %option 8bit
88 %option never-interactive
89 %option nodefault
90 %option noinput
91 %option nounput
92 %option noyywrap
93 %option prefix="orafce_sql_yy"
94
95 /*
96 * OK, here is a short description of lex/flex rules behavior.
97 * The longest pattern which matches an input string is always chosen.
98 * For equal-length patterns, the first occurring in the rules list is chosen.
99 * INITIAL is the starting state, to which all non-conditional rules apply.
100 * Exclusive states change parsing rules while the state is active. When in
101 * an exclusive state, only those rules defined for that state apply.
102 *
103 * We use exclusive states for quoted strings, extended comments,
104 * and to eliminate parsing troubles for numeric strings.
105 * Exclusive states:
106 * <xb> bit string literal
107 * <xc> extended C-style comments
108 * <xd> delimited identifiers (double-quoted identifiers)
109 * <xh> hexadecimal numeric string
110 * <xq> standard quoted strings
111 * <xe> extended quoted strings (support backslash escape sequences)
112 * <xdolq> $foo$ quoted strings
113 */
114
115 %x xb
116 %x xc
117 %x xd
118 %x xh
119 %x xe
120 %x xq
121 %x xdolq
122
123
124 /*
125 * In order to make the world safe for Windows and Mac clients as well as
126 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
127 * sequence will be seen as two successive newlines, but that doesn't cause
128 * any problems. Comments that start with -- and extend to the next
129 * newline are treated as equivalent to a single whitespace character.
130 *
131 * NOTE a fine point: if there is no newline following --, we will absorb
132 * everything to the end of the input as a comment. This is correct. Older
133 * versions of Postgres failed to recognize -- as a comment if the input
134 * did not end with a newline.
135 *
136 * XXX perhaps \f (formfeed) should be treated as a newline as well?
137 *
138 * XXX if you change the set of whitespace characters, fix scanner_isspace()
139 * to agree, and see also the plpgsql lexer.
140 */
141
142 space [ \t\n\r\f]
143 horiz_space [ \t\f]
144 newline [\n\r]
145 non_newline [^\n\r]
146
147 comment ("--"{non_newline}*)
148
149 whitespace {space}+
150
151 /*
152 * SQL requires at least one newline in the whitespace separating
153 * string literals that are to be concatenated. Silly, but who are we
154 * to argue? Note that {whitespace_with_newline} should not have * after
155 * it, whereas {whitespace} should generally have a * after it...
156 */
157
158 special_whitespace ({space}+|{comment}{newline})
159 horiz_whitespace ({horiz_space}|{comment})
160 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
161
162 /*
163 * To ensure that {quotecontinue} can be scanned without having to back up
164 * if the full pattern isn't matched, we include trailing whitespace in
165 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
166 * except for {quote} followed by whitespace and just one "-" (not two,
167 * which would start a {comment}). To cover that we have {quotefail}.
168 * The actions for {quotestop} and {quotefail} must throw back characters
169 * beyond the quote proper.
170 */
171 quote '
172 quotestop {quote}{whitespace}*
173 quotecontinue {quote}{whitespace_with_newline}{quote}
174 quotefail {quote}{whitespace}*"-"
175
176 /* Bit string
177 * It is tempting to scan the string for only those characters
178 * which are allowed. However, this leads to silently swallowed
179 * characters if illegal characters are included in the string.
180 * For example, if xbinside is [01] then B'ABCD' is interpreted
181 * as a zero-length string, and the ABCD' is lost!
182 * Better to pass the string forward and let the input routines
183 * validate the contents.
184 */
185 xbstart [bB]{quote}
186 xbinside [^']*
187
188 /* Hexadecimal number */
189 xhstart [xX]{quote}
190 xhinside [^']*
191
192 /* National character */
193 xnstart [nN]{quote}
194
195 /* Quoted string that allows backslash escapes */
196 xestart [eE]{quote}
197 xeinside [^\\']+
198 xeescape [\\][^0-7]
199 xeoctesc [\\][0-7]{1,3}
200 xehexesc [\\]x[0-9A-Fa-f]{1,2}
201
202 /* Extended quote
203 * xqdouble implements embedded quote, ''''
204 */
205 xqstart {quote}
206 xqdouble {quote}{quote}
207 xqinside [^']+
208
209 /* $foo$ style quotes ("dollar quoting")
210 * The quoted string starts with $foo$ where "foo" is an optional string
211 * in the form of an identifier, except that it may not contain "$",
212 * and extends to the first occurrence of an identical string.
213 * There is *no* processing of the quoted text.
214 *
215 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
216 * fails to match its trailing "$".
217 */
218 dolq_start [A-Za-z\200-\377_]
219 dolq_cont [A-Za-z\200-\377_0-9]
220 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
221 dolqfailed \${dolq_start}{dolq_cont}*
222 dolqinside [^$]+
223
224 /* Double quote
225 * Allows embedded spaces and other special characters into identifiers.
226 */
227 dquote \"
228 xdstart {dquote}
229 xdstop {dquote}
230 xddouble {dquote}{dquote}
231 xdinside [^"]+
232
233 /* C-style comments
234 *
235 * The "extended comment" syntax closely resembles allowable operator syntax.
236 * The tricky part here is to get lex to recognize a string starting with
237 * slash-star as a comment, when interpreting it as an operator would produce
238 * a longer match --- remember lex will prefer a longer match! Also, if we
239 * have something like plus-slash-star, lex will think this is a 3-character
240 * operator whereas we want to see it as a + operator and a comment start.
241 * The solution is two-fold:
242 * 1. append {op_chars}* to xcstart so that it matches as much text as
243 * {operator} would. Then the tie-breaker (first matching rule of same
244 * length) ensures xcstart wins. We put back the extra stuff with yyless()
245 * in case it contains a star-slash that should terminate the comment.
246 * 2. In the operator rule, check for slash-star within the operator, and
247 * if found throw it back with yyless(). This handles the plus-slash-star
248 * problem.
249 * Dash-dash comments have similar interactions with the operator rule.
250 */
251 xcstart \/\*{op_chars}*
252 xcstop \*+\/
253 xcinside [^*/]+
254
255 digit [0-9]
256 ident_start [A-Za-z\200-\377_]
257 ident_cont [A-Za-z\200-\377_0-9\$]
258
259 identifier {ident_start}{ident_cont}*
260
261 typecast "::"
262
263 /*
264 * "self" is the set of chars that should be returned as single-character
265 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
266 * which can be one or more characters long (but if a single-char token
267 * appears in the "self" set, it is not to be returned as an Op). Note
268 * that the sets overlap, but each has some chars that are not in the other.
269 *
270 * If you change either set, adjust the character lists appearing in the
271 * rule for "operator"!
272 */
273 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
274 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
275 operator {op_chars}+
276
277 /* we no longer allow unary minus in numbers.
278 * instead we pass it separately to parser. there it gets
279 * coerced via doNegate() -- Leon aug 20 1999
280 *
281 * {realfail1} and {realfail2} are added to prevent the need for scanner
282 * backup when the {real} rule fails to match completely.
283 */
284
285 integer {digit}+
286 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
287 real ({integer}|{decimal})[Ee][-+]?{digit}+
288 realfail1 ({integer}|{decimal})[Ee]
289 realfail2 ({integer}|{decimal})[Ee][-+]
290
291 param \${integer}
292
293 other .
294
295 /*
296 * Dollar quoted strings are totally opaque, and no escaping is done on them.
297 * Other quoted strings must allow some special characters such as single-quote
298 * and newline.
299 * Embedded single-quotes are implemented both in the SQL standard
300 * style of two adjacent single quotes "''" and in the Postgres/Java style
301 * of escaped-quote "\'".
302 * Other embedded escaped characters are matched explicitly and the leading
303 * backslash is dropped from the string.
304 * Note that xcstart must appear before operator, as explained above!
305 * Also whitespace (comment) must appear before operator.
306 */
307
308 %%
309
310 {whitespace} {
311 SET_YYLLOC();
312 yylval.val.str = yytext;
313 yylval.val.modificator = NULL;
314 yylval.val.keycode = -1;
315 yylval.val.sep = NULL;
316 return X_WHITESPACE;
317 }
318
319 {comment} {
320 SET_YYLLOC();
321 yylval.val.str = yytext;
322 yylval.val.modificator = "sc";
323 yylval.val.keycode = -1;
324 yylval.val.sep = NULL;
325 return X_COMMENT;
326 }
327
328
329 {xcstart} {
330 /* Set location in case of syntax error in comment */
331 SET_YYLLOC();
332 xcdepth = 0;
333 BEGIN(xc);
334 /* Put back any characters past slash-star; see above */
335 startlit();
336 addlitchar('/');
337 addlitchar('*');
338
339 yyless(2);
340 }
341
342 <xc>{xcstart} {
343 xcdepth++;
344 /* Put back any characters past slash-star; see above */
345 addlitchar('/');
346 addlitchar('*');
347
348 yyless(2);
349 }
350
351 <xc>{xcstop} {
352 if (xcdepth <= 0)
353 {
354 BEGIN(INITIAL);
355 addlitchar('*');
356 addlitchar('/');
357
358 yylval.val.str = litbufdup();
359 yylval.val.modificator = "ec";
360 yylval.val.keycode = -1;
361 yylval.val.sep = NULL;
362 return X_COMMENT;
363 }
364 else
365 {
366 xcdepth--;
367 addlitchar('*');
368 addlitchar('/');
369 }
370
371 }
372
373 <xc>{xcinside} {
374 addlit(yytext, yyleng);
375 }
376
377 <xc>{op_chars} {
378 addlit(yytext, yyleng);
379 }
380
381 <xc>\*+ {
382 addlit(yytext, yyleng);
383 }
384
385 <xc><<EOF>> {
386 yylval.val.str = litbufdup();
387 yylval.val.modificator = "ecu";
388 yylval.val.keycode = -1;
389 yylval.val.sep = NULL;
390 return X_COMMENT;
391
392 }
393
394 {xbstart} {
395 /* Binary bit type.
396 * At some point we should simply pass the string
397 * forward to the parser and label it there.
398 * In the meantime, place a leading "b" on the string
399 * to mark it for the input routine as a binary string.
400 */
401 SET_YYLLOC();
402 BEGIN(xb);
403 startlit();
404 addlitchar('b');
405 }
406 <xb>{quotestop} |
407 <xb>{quotefail} {
408 yyless(1);
409 BEGIN(INITIAL);
410 yylval.val.str = litbufdup();
411 yylval.val.modificator = "b";
412 yylval.val.keycode = -1;
413 yylval.val.sep = NULL;
414 return X_NCONST;
415 }
416 <xh>{xhinside} |
417 <xb>{xbinside} {
418 addlit(yytext, yyleng);
419 }
420 <xh>{quotecontinue} |
421 <xb>{quotecontinue} {
422 /* ignore */
423 }
424 <xb><<EOF>> {
425 yylval.val.str = litbufdup();
426 yylval.val.modificator = "bu";
427 yylval.val.keycode = -1;
428 yylval.val.sep = NULL;
429 return X_NCONST;
430 }
431
432 {xhstart} {
433 /* Hexadecimal bit type.
434 * At some point we should simply pass the string
435 * forward to the parser and label it there.
436 * In the meantime, place a leading "x" on the string
437 * to mark it for the input routine as a hex string.
438 */
439 SET_YYLLOC();
440 BEGIN(xh);
441 startlit();
442 addlitchar('x');
443 }
444 <xh>{quotestop} |
445 <xh>{quotefail} {
446 yyless(1);
447 BEGIN(INITIAL);
448 yylval.val.str = litbufdup();
449 yylval.val.modificator = "x";
450 yylval.val.keycode = -1;
451 yylval.val.sep = NULL;
452 return X_NCONST;
453 }
454 <xh><<EOF>> {
455 yylval.val.str = litbufdup();
456 yylval.val.modificator = "xu";
457 yylval.val.keycode = -1;
458 yylval.val.sep = NULL;
459 return X_NCONST;
460 }
461
462 {xnstart} {
463 /* National character.
464 * We will pass this along as a normal character string,
465 * but preceded with an internally-generated "NCHAR".
466 */
467 const char *keyword;
468 int keycode;
469
470 SET_YYLLOC();
471 yyless(1); /* eat only 'n' this time */
472 /* nchar had better be a keyword! */
473 keyword = orafce_scan_keyword("nchar", &keycode);
474 Assert(keyword != NULL);
475 yylval.val.str = (char*) keyword;
476 yylval.val.keycode = keycode;
477 yylval.val.modificator = NULL;
478 yylval.val.sep = NULL;
479 return X_KEYWORD;
480 }
481
482 {xqstart} {
483 SET_YYLLOC();
484 BEGIN(xq);
485 extended_string = false;
486 startlit();
487 }
488 {xestart} {
489 SET_YYLLOC();
490 BEGIN(xe);
491 extended_string = true;
492 startlit();
493 }
494 <xq,xe>{quotestop} |
495 <xq,xe>{quotefail} {
496 yyless(1);
497 BEGIN(INITIAL);
498 yylval.val.str = litbufdup();
499 yylval.val.modificator = extended_string ? "es" : "qs";
500 yylval.val.keycode = -1;
501 yylval.val.sep = NULL;
502 return X_SCONST;
503 }
504 <xq,xe>{xqdouble} {
505 addlitchar('\'');
506 }
507 <xq>{xqinside} {
508 addlit(yytext, yyleng);
509 }
510 <xe>{xeinside} {
511 addlit(yytext, yyleng);
512 }
513 <xe>{xeescape} {
514 addlitchar(unescape_single_char(yytext[1]));
515 }
516 <xe>{xeoctesc} {
517 unsigned char c = strtoul(yytext+1, NULL, 8);
518
519 addlitchar(c);
520 }
521 <xe>{xehexesc} {
522 unsigned char c = strtoul(yytext+2, NULL, 16);
523
524 addlitchar(c);
525 }
526 <xq,xe>{quotecontinue} {
527 /* ignore */
528 }
529 <xe>. {
530 /* This is only needed for \ just before EOF */
531 addlitchar(yytext[0]);
532 }
533 <xq,xe><<EOF>> {
534 yylval.val.str = litbufdup();
535 yylval.val.modificator = extended_string ? "esu" : "qsu";
536 yylval.val.keycode = -1;
537 yylval.val.sep = NULL;
538 return X_SCONST;
539 }
540
541 {dolqdelim} {
542 SET_YYLLOC();
543 dolqstart = pstrdup(yytext);
544 BEGIN(xdolq);
545 startlit();
546 }
547 {dolqfailed} {
548 /* throw back all but the initial "$" */
549 yyless(1);
550 /* and treat it as {other} */
551 yylval.val.str = yytext;
552 yylval.val.modificator = "dolqf";
553 yylval.val.keycode = -1;
554 yylval.val.sep = NULL;
555 return X_OTHERS;
556 }
557 <xdolq>{dolqdelim} {
558 if (strcmp(yytext, dolqstart) == 0)
559 {
560 yylval.val.sep = dolqstart;
561 yylval.val.modificator = "dolq";
562 BEGIN(INITIAL);
563 yylval.val.str = litbufdup();
564 yylval.val.keycode = -1;
565 return X_SCONST;
566 }
567 else
568 {
569 /*
570 * When we fail to match $...$ to dolqstart, transfer
571 * the $... part to the output, but put back the final
572 * $ for rescanning. Consider $delim$...$junk$delim$
573 */
574 addlit(yytext, yyleng-1);
575 yyless(yyleng-1);
576 }
577 }
578 <xdolq>{dolqinside} {
579 addlit(yytext, yyleng);
580 }
581 <xdolq>{dolqfailed} {
582 addlit(yytext, yyleng);
583 }
584 <xdolq>. {
585 /* This is only needed for inside the quoted text */
586 addlitchar(yytext[0]);
587 }
588 <xdolq><<EOF>> {
589 yylval.val.sep = dolqstart;
590 yylval.val.modificator = "dolqu";
591 yylval.val.str = litbufdup();
592 yylval.val.keycode = -1;
593 yylval.val.sep = NULL;
594 return X_SCONST;
595 }
596
597 {xdstart} {
598 SET_YYLLOC();
599 BEGIN(xd);
600 startlit();
601 }
602 <xd>{xdstop} {
603 char *ident;
604
605 BEGIN(INITIAL);
606 if (literallen == 0)
607 yyerror(NULL, "zero-length delimited identifier");
608 ident = litbufdup();
609 if (literallen >= NAMEDATALEN)
610 truncate_identifier(ident, literallen, true);
611 yylval.val.modificator = "dq";
612 yylval.val.str = ident;
613 yylval.val.keycode = -1;
614 yylval.val.sep = NULL;
615 return X_IDENT;
616 }
617 <xd>{xddouble} {
618 addlitchar('"');
619 }
620 <xd>{xdinside} {
621 addlit(yytext, yyleng);
622 }
623 <xd><<EOF>> {
624 yylval.val.modificator = "dqu";
625 yylval.val.str = litbufdup();
626 yylval.val.keycode = -1;
627 yylval.val.sep = NULL;
628 return X_IDENT;
629 }
630 {typecast} {
631 SET_YYLLOC();
632 yylval.val.modificator = "typecast";
633 yylval.val.keycode = X_TYPECAST;
634 yylval.val.sep = NULL;
635 return X_OTHERS;
636 }
637
638 {self} {
639 SET_YYLLOC();
640 yylval.val.str = yytext;
641 yylval.val.modificator = "self";
642 yylval.val.keycode = yytext[0];
643 yylval.val.sep = NULL;
644 return X_OTHERS;
645 }
646
647 {operator} {
648 /*
649 * Check for embedded slash-star or dash-dash; those
650 * are comment starts, so operator must stop there.
651 * Note that slash-star or dash-dash at the first
652 * character will match a prior rule, not this one.
653 */
654 int nchars = yyleng;
655 char *slashstar = strstr(yytext, "/*");
656 char *dashdash = strstr(yytext, "--");
657
658 if (slashstar && dashdash)
659 {
660 /* if both appear, take the first one */
661 if (slashstar > dashdash)
662 slashstar = dashdash;
663 }
664 else if (!slashstar)
665 slashstar = dashdash;
666 if (slashstar)
667 nchars = slashstar - yytext;
668
669 /*
670 * For SQL compatibility, '+' and '-' cannot be the
671 * last char of a multi-char operator unless the operator
672 * contains chars that are not in SQL operators.
673 * The idea is to lex '=-' as two operators, but not
674 * to forbid operator names like '?-' that could not be
675 * sequences of SQL operators.
676 */
677 while (nchars > 1 &&
678 (yytext[nchars-1] == '+' ||
679 yytext[nchars-1] == '-'))
680 {
681 int ic;
682
683 for (ic = nchars-2; ic >= 0; ic--)
684 {
685 if (strchr("~!@#^&|`?%", yytext[ic]))
686 break;
687 }
688 if (ic >= 0)
689 break; /* found a char that makes it OK */
690 nchars--; /* else remove the +/-, and check again */
691 }
692
693 SET_YYLLOC();
694
695 if (nchars < yyleng)
696 {
697 /* Strip the unwanted chars from the token */
698 yyless(nchars);
699 /*
700 * If what we have left is only one char, and it's
701 * one of the characters matching "self", then
702 * return it as a character token the same way
703 * that the "self" rule would have.
704 */
705 if (nchars == 1 &&
706 strchr(",()[].;:+-*/%^<>=", yytext[0]))
707 {
708 yylval.val.str = yytext;
709 yylval.val.modificator = NULL;
710 yylval.val.keycode = yytext[0];
711 yylval.val.sep = NULL;
712 return X_OTHERS;
713 }
714 }
715
716 /*
717 * Complain if operator is too long. Unlike the case
718 * for identifiers, we make this an error not a notice-
719 * and-truncate, because the odds are we are looking at
720 * a syntactic mistake anyway.
721 */
722 if (nchars >= NAMEDATALEN)
723 yyerror(NULL, "operator too long");
724
725 /* Convert "!=" operator to "<>" for compatibility */
726 yylval.val.modificator = NULL;
727 if (strcmp(yytext, "!=") == 0)
728 yylval.val.str = pstrdup("<>");
729 else
730 yylval.val.str = pstrdup(yytext);
731 yylval.val.keycode = -1;
732 yylval.val.sep = NULL;
733 return X_OP;
734 }
735
736 {param} {
737 SET_YYLLOC();
738 yylval.val.modificator = NULL;
739 yylval.val.str = yytext;
740 yylval.val.keycode = -1;
741 yylval.val.sep = NULL;
742 return X_PARAM;
743 }
744
745 {integer} {
746 long val;
747 char* endptr;
748
749 SET_YYLLOC();
750 errno = 0;
751 val = strtol(yytext, &endptr, 10);
752 if (*endptr != '\0' || errno == ERANGE
753 #ifdef HAVE_LONG_INT_64
754 /* if long > 32 bits, check for overflow of int4 */
755 || val != (long) ((int32) val)
756 #endif
757 )
758 {
759 /* integer too large, treat it as a float */
760 yylval.val.str = pstrdup(yytext);
761 yylval.val.modificator = "f";
762 yylval.val.keycode = -1;
763 yylval.val.sep = NULL;
764 return X_NCONST;
765 }
766 yylval.val.str = yytext;
767 yylval.val.modificator = "i";
768 yylval.val.keycode = -1;
769 yylval.val.sep = NULL;
770 return X_NCONST;
771 }
772 {decimal} {
773 SET_YYLLOC();
774 yylval.val.str = pstrdup(yytext);
775 yylval.val.modificator = "f";
776 yylval.val.keycode = -1;
777 yylval.val.sep = NULL;
778 return X_NCONST;
779 }
780 {real} {
781 SET_YYLLOC();
782 yylval.val.str = pstrdup(yytext);
783 yylval.val.modificator = "f";
784 yylval.val.keycode = -1;
785 yylval.val.sep = NULL;
786 return X_NCONST;
787 }
788 {realfail1} {
789 /*
790 * throw back the [Ee], and treat as {decimal}. Note
791 * that it is possible the input is actually {integer},
792 * but since this case will almost certainly lead to a
793 * syntax error anyway, we don't bother to distinguish.
794 */
795 yyless(yyleng-1);
796 SET_YYLLOC();
797 yylval.val.str = pstrdup(yytext);
798 yylval.val.modificator = "f";
799 yylval.val.keycode = -1;
800 yylval.val.sep = NULL;
801 return X_NCONST;
802 }
803 {realfail2} {
804 /* throw back the [Ee][+-], and proceed as above */
805 yyless(yyleng-2);
806 SET_YYLLOC();
807 yylval.val.str = pstrdup(yytext);
808 yylval.val.modificator = "f";
809 yylval.val.keycode = -1;
810 yylval.val.sep = NULL;
811 return X_NCONST;
812 }
813
814
815 {identifier} {
816 char *ident;
817 const char *keyword;
818 int keycode;
819
820 SET_YYLLOC();
821
822 /* nchar had better be a keyword! */
823 keyword = orafce_scan_keyword("nchar", &keycode);
824
825 /* Is it a keyword? */
826 keyword = orafce_scan_keyword(yytext, &keycode);
827 if (keyword != NULL)
828 {
829 yylval.val.str = (char*) keyword;
830 yylval.val.keycode = keycode;
831 yylval.val.modificator = NULL;
832 yylval.val.sep = NULL;
833 return X_KEYWORD;
834 }
835
836 /*
837 * No. Convert the identifier to lower case, and truncate
838 * if necessary.
839 */
840 ident = downcase_truncate_identifier(yytext, yyleng, true);
841 yylval.val.str = ident;
842 yylval.val.modificator = NULL;
843 yylval.val.keycode = -1;
844 yylval.val.sep = NULL;
845 return X_IDENT;
846 }
847
848 {other} {
849 SET_YYLLOC();
850 yylval.val.str = yytext;
851 yylval.val.modificator = NULL;
852 yylval.val.keycode = yytext[0];
853 yylval.val.sep = NULL;
854 return X_OTHERS;
855 }
856
857 <<EOF>> {
858 SET_YYLLOC();
859 yyterminate();
860 }
861
862 %%
863
864 /*
865 * lexer_errposition
866 * Report a lexical-analysis-time cursor position, if possible.
867 *
868 * This is expected to be used within an ereport() call. The return value
869 * is a dummy (always 0, in fact).
870 *
871 * Note that this can only be used for messages from the lexer itself,
872 * since it depends on scanbuf to still be valid.
873 */
874 static int
875 lexer_errposition(void)
876 {
877 int pos;
878
879 /* Convert byte offset to character number */
880 pos = _pg_mbstrlen_with_len(scanbuf, orafce_sql_yylval.val.lloc) + 1;
881 /* And pass it to the ereport mechanism */
882
883 #if PG_VERSION_NUM >= 130000
884
885 errposition(pos);
886
887 return pos;
888
889 #else
890
891 return errposition(pos);
892
893 #endif
894
895 }
896
897 /*
898 * yyerror
899 * Report a lexer or grammar error.
900 *
901 * The message's cursor position identifies the most recently lexed token.
902 * This is OK for syntax error messages from the Bison parser, because Bison
903 * parsers report error as soon as the first unparsable token is reached.
904 * Beware of using yyerror for other purposes, as the cursor position might
905 * be misleading!
906 */
907 void
908 orafce_sql_yyerror(List **result, const char *message)
909 {
910 const char *loc = scanbuf + orafce_sql_yylval.val.lloc;
911
912 if (*loc == YY_END_OF_BUFFER_CHAR)
913 {
914 ereport(ERROR,
915 (errcode(ERRCODE_SYNTAX_ERROR),
916 errmsg("%s at end of input", message),
917 lexer_errposition()));
918 }
919 else
920 {
921 ereport(ERROR,
922 (errcode(ERRCODE_SYNTAX_ERROR),
923 errmsg("%s at or near \"%s\"", message, loc),
924 lexer_errposition()));
925 }
926 }
927
928
929 /*
930 * Called before any actual parsing is done
931 */
932 void
933 orafce_sql_scanner_init(const char *str)
934 {
935 Size slen = strlen(str);
936
937 /*
938 * Might be left over after ereport()
939 */
940 if (YY_CURRENT_BUFFER)
941 yy_delete_buffer(YY_CURRENT_BUFFER);
942
943 /*
944 * Make a scan buffer with special termination needed by flex.
945 */
946 scanbuflen = slen;
947 scanbuf = palloc(slen + 2);
948 memcpy(scanbuf, str, slen);
949 scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
950 scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
951
952 /* initialize literal buffer to a reasonable but expansible size */
953 literalalloc = 128;
954 literalbuf = (char *) palloc(literalalloc);
955 startlit();
956
957 BEGIN(INITIAL);
958 }
959
960
961 /*
962 * Called after parsing is done to clean up after fdate_scanner_init()
963 */
964 void
965 orafce_sql_scanner_finish(void)
966 {
967 yy_delete_buffer(scanbufhandle);
968 pfree(scanbuf);
969 }
970
971 static void
972 addlit(char *ytext, int yleng)
973 {
974 /* enlarge buffer if needed */
975 if ((literallen+yleng) >= literalalloc)
976 {
977 do {
978 literalalloc *= 2;
979 } while ((literallen+yleng) >= literalalloc);
980 literalbuf = (char *) repalloc(literalbuf, literalalloc);
981 }
982 /* append new data, add trailing null */
983 memcpy(literalbuf+literallen, ytext, yleng);
984 literallen += yleng;
985 literalbuf[literallen] = '\0';
986 }
987
988
989 static void
990 addlitchar(unsigned char ychar)
991 {
992 /* enlarge buffer if needed */
993 if ((literallen+1) >= literalalloc)
994 {
995 literalalloc *= 2;
996 literalbuf = (char *) repalloc(literalbuf, literalalloc);
997 }
998 /* append new data, add trailing null */
999 literalbuf[literallen] = ychar;
1000 literallen += 1;
1001 literalbuf[literallen] = '\0';
1002 }
1003
1004
1005 /*
1006 * One might be tempted to write pstrdup(literalbuf) instead of this,
1007 * but for long literals this is much faster because the length is
1008 * already known.
1009 */
1010 static char *
1011 litbufdup(void)
1012 {
1013 char *new;
1014
1015 new = palloc(literallen + 1);
1016 memcpy(new, literalbuf, literallen+1);
1017 return new;
1018 }
1019
1020
1021 static unsigned char
1022 unescape_single_char(unsigned char c)
1023 {
1024 switch (c)
1025 {
1026 case 'b':
1027 return '\b';
1028 case 'f':
1029 return '\f';
1030 case 'n':
1031 return '\n';
1032 case 'r':
1033 return '\r';
1034 case 't':
1035 return '\t';
1036 default:
1037 return c;
1038 }
1039 }
1040
1041
1042