1 %{
2 /*
3 ** A scanner for EMP-style numeric ranges
4 */
5 
6 #include "postgres.h"
7 
8 #include "parser/gramparse.h"
9 /* Not needed now that this file is compiled as part of gram.y */
10 /* #include "parser/parse.h" */
11 #include "parser/scansup.h"
12 #include "mb/pg_wchar.h"
13 
14 #include "parse_keyword.h"
15 
16 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
17 #undef fprintf
18 #define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
19 
20 static void
fprintf_to_ereport(const char * fmt,const char * msg)21 fprintf_to_ereport(const char *fmt, const char *msg)
22 {
23 	ereport(ERROR, (errmsg_internal("%s", msg)));
24 }
25 
26 static int		xcdepth = 0;	/* depth of nesting in slash-star comments */
27 static char    *dolqstart;      /* current $foo$ quote start string */
28 static bool extended_string = false;
29 
30 
31 /* No reason to constrain amount of data slurped */
32 #define YY_READ_BUF_SIZE 16777216
33 
34 /* Handles to the buffer that the lexer uses internally */
35 
36 
37 static YY_BUFFER_STATE scanbufhandle;
38 
39 #define SET_YYLLOC()  (orafce_sql_yylval.val.lloc = yytext - scanbuf)
40 
41 /* Handles to the buffer that the lexer uses internally */
42 static char *scanbuf;
43 
44 /* flex 2.5.4 doesn't bother with a decl for this */
45 
46 int orafce_sql_yylex(void);
47 
48 void orafce_sql_scanner_init(const char *str);
49 void orafce_sql_scanner_finish(void);
50 
51 /*
52  * literalbuf is used to accumulate literal values when multiple rules
53  * are needed to parse a single literal.  Call startlit to reset buffer
54  * to empty, addlit to add text.  Note that the buffer is palloc'd and
55  * starts life afresh on every parse cycle.
56  */
57 static char	   *literalbuf;		/* expandable buffer */
58 static int		literallen;		/* actual current length */
59 static int		literalalloc;	/* current allocated buffer size */
60 
61 #define startlit()  (literalbuf[0] = '\0', literallen = 0)
62 static void addlit(char *ytext, int yleng);
63 static void addlitchar(unsigned char ychar);
64 static char *litbufdup(void);
65 
66 static int	lexer_errposition(void);
67 
68 /*
69  * Each call to yylex must set yylloc to the location of the found token
70  * (expressed as a byte offset from the start of the input text).
71  * When we parse a token that requires multiple lexer rules to process,
72  * this should be done in the first such rule, else yylloc will point
73  * into the middle of the token.
74  */
75 
76 /* Handles to the buffer that the lexer uses internally */
77 static char *scanbuf;
78 
79 static unsigned char unescape_single_char(unsigned char c);
80 
81 #ifndef _pg_mbstrlen_with_len
82 #define _pg_mbstrlen_with_len(buf,loc) 	pg_mbstrlen_with_len(buf,loc)
83 #endif
84 
85 %}
86 
87 %option 8bit
88 %option never-interactive
89 %option nodefault
90 %option noinput
91 %option nounput
92 %option noyywrap
93 %option prefix="orafce_sql_yy"
94 
95 /*
96  * OK, here is a short description of lex/flex rules behavior.
97  * The longest pattern which matches an input string is always chosen.
98  * For equal-length patterns, the first occurring in the rules list is chosen.
99  * INITIAL is the starting state, to which all non-conditional rules apply.
100  * Exclusive states change parsing rules while the state is active.  When in
101  * an exclusive state, only those rules defined for that state apply.
102  *
103  * We use exclusive states for quoted strings, extended comments,
104  * and to eliminate parsing troubles for numeric strings.
105  * Exclusive states:
106  *  <xb> bit string literal
107  *  <xc> extended C-style comments
108  *  <xd> delimited identifiers (double-quoted identifiers)
109  *  <xh> hexadecimal numeric string
110  *  <xq> standard quoted strings
111  *  <xe> extended quoted strings (support backslash escape sequences)
112  *  <xdolq> $foo$ quoted strings
113  */
114 
115 %x xb
116 %x xc
117 %x xd
118 %x xh
119 %x xe
120 %x xq
121 %x xdolq
122 
123 
124 /*
125  * In order to make the world safe for Windows and Mac clients as well as
126  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
127  * sequence will be seen as two successive newlines, but that doesn't cause
128  * any problems.  Comments that start with -- and extend to the next
129  * newline are treated as equivalent to a single whitespace character.
130  *
131  * NOTE a fine point: if there is no newline following --, we will absorb
132  * everything to the end of the input as a comment.  This is correct.  Older
133  * versions of Postgres failed to recognize -- as a comment if the input
134  * did not end with a newline.
135  *
136  * XXX perhaps \f (formfeed) should be treated as a newline as well?
137  *
138  * XXX if you change the set of whitespace characters, fix scanner_isspace()
139  * to agree, and see also the plpgsql lexer.
140  */
141 
142 space			[ \t\n\r\f]
143 horiz_space		[ \t\f]
144 newline			[\n\r]
145 non_newline		[^\n\r]
146 
147 comment			("--"{non_newline}*)
148 
149 whitespace		{space}+
150 
151 /*
152  * SQL requires at least one newline in the whitespace separating
153  * string literals that are to be concatenated.  Silly, but who are we
154  * to argue?  Note that {whitespace_with_newline} should not have * after
155  * it, whereas {whitespace} should generally have a * after it...
156  */
157 
158 special_whitespace		({space}+|{comment}{newline})
159 horiz_whitespace		({horiz_space}|{comment})
160 whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
161 
162 /*
163  * To ensure that {quotecontinue} can be scanned without having to back up
164  * if the full pattern isn't matched, we include trailing whitespace in
165  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
166  * except for {quote} followed by whitespace and just one "-" (not two,
167  * which would start a {comment}).  To cover that we have {quotefail}.
168  * The actions for {quotestop} and {quotefail} must throw back characters
169  * beyond the quote proper.
170  */
171 quote			'
172 quotestop		{quote}{whitespace}*
173 quotecontinue	{quote}{whitespace_with_newline}{quote}
174 quotefail		{quote}{whitespace}*"-"
175 
176 /* Bit string
177  * It is tempting to scan the string for only those characters
178  * which are allowed. However, this leads to silently swallowed
179  * characters if illegal characters are included in the string.
180  * For example, if xbinside is [01] then B'ABCD' is interpreted
181  * as a zero-length string, and the ABCD' is lost!
182  * Better to pass the string forward and let the input routines
183  * validate the contents.
184  */
185 xbstart			[bB]{quote}
186 xbinside		[^']*
187 
188 /* Hexadecimal number */
189 xhstart			[xX]{quote}
190 xhinside		[^']*
191 
192 /* National character */
193 xnstart			[nN]{quote}
194 
195 /* Quoted string that allows backslash escapes */
196 xestart			[eE]{quote}
197 xeinside		[^\\']+
198 xeescape		[\\][^0-7]
199 xeoctesc		[\\][0-7]{1,3}
200 xehexesc		[\\]x[0-9A-Fa-f]{1,2}
201 
202 /* Extended quote
203  * xqdouble implements embedded quote, ''''
204  */
205 xqstart			{quote}
206 xqdouble		{quote}{quote}
207 xqinside		[^']+
208 
209 /* $foo$ style quotes ("dollar quoting")
210  * The quoted string starts with $foo$ where "foo" is an optional string
211  * in the form of an identifier, except that it may not contain "$",
212  * and extends to the first occurrence of an identical string.
213  * There is *no* processing of the quoted text.
214  *
215  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
216  * fails to match its trailing "$".
217  */
218 dolq_start		[A-Za-z\200-\377_]
219 dolq_cont		[A-Za-z\200-\377_0-9]
220 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
221 dolqfailed		\${dolq_start}{dolq_cont}*
222 dolqinside		[^$]+
223 
224 /* Double quote
225  * Allows embedded spaces and other special characters into identifiers.
226  */
227 dquote			\"
228 xdstart			{dquote}
229 xdstop			{dquote}
230 xddouble		{dquote}{dquote}
231 xdinside		[^"]+
232 
233 /* C-style comments
234  *
235  * The "extended comment" syntax closely resembles allowable operator syntax.
236  * The tricky part here is to get lex to recognize a string starting with
237  * slash-star as a comment, when interpreting it as an operator would produce
238  * a longer match --- remember lex will prefer a longer match!  Also, if we
239  * have something like plus-slash-star, lex will think this is a 3-character
240  * operator whereas we want to see it as a + operator and a comment start.
241  * The solution is two-fold:
242  * 1. append {op_chars}* to xcstart so that it matches as much text as
243  *    {operator} would. Then the tie-breaker (first matching rule of same
244  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
245  *    in case it contains a star-slash that should terminate the comment.
246  * 2. In the operator rule, check for slash-star within the operator, and
247  *    if found throw it back with yyless().  This handles the plus-slash-star
248  *    problem.
249  * Dash-dash comments have similar interactions with the operator rule.
250  */
251 xcstart			\/\*{op_chars}*
252 xcstop			\*+\/
253 xcinside		[^*/]+
254 
255 digit			[0-9]
256 ident_start		[A-Za-z\200-\377_]
257 ident_cont		[A-Za-z\200-\377_0-9\$]
258 
259 identifier		{ident_start}{ident_cont}*
260 
261 typecast		"::"
262 
263 /*
264  * "self" is the set of chars that should be returned as single-character
265  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
266  * which can be one or more characters long (but if a single-char token
267  * appears in the "self" set, it is not to be returned as an Op).  Note
268  * that the sets overlap, but each has some chars that are not in the other.
269  *
270  * If you change either set, adjust the character lists appearing in the
271  * rule for "operator"!
272  */
273 self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
274 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
275 operator		{op_chars}+
276 
277 /* we no longer allow unary minus in numbers.
278  * instead we pass it separately to parser. there it gets
279  * coerced via doNegate() -- Leon aug 20 1999
280  *
281  * {realfail1} and {realfail2} are added to prevent the need for scanner
282  * backup when the {real} rule fails to match completely.
283  */
284 
285 integer			{digit}+
286 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
287 real			({integer}|{decimal})[Ee][-+]?{digit}+
288 realfail1		({integer}|{decimal})[Ee]
289 realfail2		({integer}|{decimal})[Ee][-+]
290 
291 param			\${integer}
292 
293 other			.
294 
295 /*
296  * Dollar quoted strings are totally opaque, and no escaping is done on them.
297  * Other quoted strings must allow some special characters such as single-quote
298  *  and newline.
299  * Embedded single-quotes are implemented both in the SQL standard
300  *  style of two adjacent single quotes "''" and in the Postgres/Java style
301  *  of escaped-quote "\'".
302  * Other embedded escaped characters are matched explicitly and the leading
303  *  backslash is dropped from the string.
304  * Note that xcstart must appear before operator, as explained above!
305  *  Also whitespace (comment) must appear before operator.
306  */
307 
308 %%
309 
310 {whitespace}	{
311 					SET_YYLLOC();
312 					yylval.val.str = yytext;
313 					yylval.val.modificator = NULL;
314 					yylval.val.keycode = -1;
315 					yylval.val.sep = NULL;
316 					return X_WHITESPACE;
317 				}
318 
319 {comment}	{
320 					SET_YYLLOC();
321 					yylval.val.str = yytext;
322 					yylval.val.modificator = "sc";
323 					yylval.val.keycode = -1;
324 					yylval.val.sep = NULL;
325 					return X_COMMENT;
326 				}
327 
328 
329 {xcstart}		{
330 					/* Set location in case of syntax error in comment */
331 					SET_YYLLOC();
332 					xcdepth = 0;
333 					BEGIN(xc);
334 					/* Put back any characters past slash-star; see above */
335 					startlit();
336 					addlitchar('/');
337 					addlitchar('*');
338 
339 					yyless(2);
340 				}
341 
342 <xc>{xcstart}	{
343 					xcdepth++;
344 					/* Put back any characters past slash-star; see above */
345 					addlitchar('/');
346 					addlitchar('*');
347 
348 					yyless(2);
349 				}
350 
351 <xc>{xcstop}	{
352 					if (xcdepth <= 0)
353 					{
354 						BEGIN(INITIAL);
355 						addlitchar('*');
356 						addlitchar('/');
357 
358 						yylval.val.str = litbufdup();
359 						yylval.val.modificator = "ec";
360 						yylval.val.keycode = -1;
361 						yylval.val.sep = NULL;
362 						return X_COMMENT;
363 					}
364 					else
365 					{
366 						xcdepth--;
367 						addlitchar('*');
368 						addlitchar('/');
369 					}
370 
371 				}
372 
373 <xc>{xcinside}	{
374 					addlit(yytext, yyleng);
375 				}
376 
377 <xc>{op_chars}	{
378 					addlit(yytext, yyleng);
379 				}
380 
381 <xc>\*+			{
382 					addlit(yytext, yyleng);
383 				}
384 
385 <xc><<EOF>>		{
386 					yylval.val.str = litbufdup();
387 					yylval.val.modificator = "ecu";
388 					yylval.val.keycode = -1;
389 					yylval.val.sep = NULL;
390 					return X_COMMENT;
391 
392 				}
393 
394 {xbstart}		{
395 					/* Binary bit type.
396 					 * At some point we should simply pass the string
397 					 * forward to the parser and label it there.
398 					 * In the meantime, place a leading "b" on the string
399 					 * to mark it for the input routine as a binary string.
400 					 */
401 					SET_YYLLOC();
402 					BEGIN(xb);
403 					startlit();
404 					addlitchar('b');
405 				}
406 <xb>{quotestop}	|
407 <xb>{quotefail} {
408 					yyless(1);
409 					BEGIN(INITIAL);
410 					yylval.val.str = litbufdup();
411 					yylval.val.modificator = "b";
412 					yylval.val.keycode = -1;
413 					yylval.val.sep = NULL;
414 					return X_NCONST;
415 				}
416 <xh>{xhinside}	|
417 <xb>{xbinside}	{
418 					addlit(yytext, yyleng);
419 				}
420 <xh>{quotecontinue}	|
421 <xb>{quotecontinue}	{
422 					/* ignore */
423 				}
424 <xb><<EOF>>		{
425 					yylval.val.str = litbufdup();
426 					yylval.val.modificator = "bu";
427 					yylval.val.keycode = -1;
428 					yylval.val.sep = NULL;
429 					return X_NCONST;
430 				}
431 
432 {xhstart}		{
433 					/* Hexadecimal bit type.
434 					 * At some point we should simply pass the string
435 					 * forward to the parser and label it there.
436 					 * In the meantime, place a leading "x" on the string
437 					 * to mark it for the input routine as a hex string.
438 					 */
439 					SET_YYLLOC();
440 					BEGIN(xh);
441 					startlit();
442 					addlitchar('x');
443 				}
444 <xh>{quotestop}	|
445 <xh>{quotefail} {
446 					yyless(1);
447 					BEGIN(INITIAL);
448 					yylval.val.str = litbufdup();
449 					yylval.val.modificator = "x";
450 					yylval.val.keycode = -1;
451 					yylval.val.sep = NULL;
452 					return X_NCONST;
453 				}
454 <xh><<EOF>>		{
455 					yylval.val.str = litbufdup();
456 					yylval.val.modificator = "xu";
457 					yylval.val.keycode = -1;
458 					yylval.val.sep = NULL;
459 					return X_NCONST;
460 				}
461 
462 {xnstart}		{
463 					/* National character.
464 					 * We will pass this along as a normal character string,
465 					 * but preceded with an internally-generated "NCHAR".
466 					 */
467 					const char *keyword;
468 					int		keycode;
469 
470 					SET_YYLLOC();
471 					yyless(1);				/* eat only 'n' this time */
472 					/* nchar had better be a keyword! */
473 					keyword = orafce_scan_keyword("nchar", &keycode);
474 					Assert(keyword != NULL);
475 					yylval.val.str = (char*) keyword;
476 					yylval.val.keycode = keycode;
477 					yylval.val.modificator = NULL;
478 					yylval.val.sep = NULL;
479 					return X_KEYWORD;
480 				}
481 
482 {xqstart}		{
483 					SET_YYLLOC();
484 					BEGIN(xq);
485 					extended_string = false;
486 					startlit();
487 				}
488 {xestart}		{
489 					SET_YYLLOC();
490 					BEGIN(xe);
491 					extended_string = true;
492 					startlit();
493 				}
494 <xq,xe>{quotestop}	|
495 <xq,xe>{quotefail} {
496 					yyless(1);
497 					BEGIN(INITIAL);
498 					yylval.val.str = litbufdup();
499 					yylval.val.modificator = extended_string ? "es" : "qs";
500 					yylval.val.keycode = -1;
501 					yylval.val.sep = NULL;
502 					return X_SCONST;
503 				}
504 <xq,xe>{xqdouble} {
505 					addlitchar('\'');
506 				}
507 <xq>{xqinside}  {
508 					addlit(yytext, yyleng);
509 				}
510 <xe>{xeinside}  {
511 					addlit(yytext, yyleng);
512 				}
513 <xe>{xeescape}  {
514 					addlitchar(unescape_single_char(yytext[1]));
515 				}
516 <xe>{xeoctesc}  {
517 					unsigned char c = strtoul(yytext+1, NULL, 8);
518 
519 					addlitchar(c);
520 				}
521 <xe>{xehexesc}  {
522 					unsigned char c = strtoul(yytext+2, NULL, 16);
523 
524 					addlitchar(c);
525 				}
526 <xq,xe>{quotecontinue} {
527 					/* ignore */
528 				}
529 <xe>.			{
530 					/* This is only needed for \ just before EOF */
531 					addlitchar(yytext[0]);
532 				}
533 <xq,xe><<EOF>>		{
534 					yylval.val.str = litbufdup();
535 					yylval.val.modificator = extended_string ? "esu" : "qsu";
536 					yylval.val.keycode = -1;
537 					yylval.val.sep = NULL;
538 					return X_SCONST;
539 				}
540 
541 {dolqdelim}		{
542 					SET_YYLLOC();
543 					dolqstart = pstrdup(yytext);
544 					BEGIN(xdolq);
545 					startlit();
546 				}
547 {dolqfailed}	{
548 					/* throw back all but the initial "$" */
549 					yyless(1);
550 					/* and treat it as {other} */
551 					yylval.val.str = yytext;
552 					yylval.val.modificator = "dolqf";
553 					yylval.val.keycode = -1;
554 					yylval.val.sep = NULL;
555 					return X_OTHERS;
556 				}
557 <xdolq>{dolqdelim} {
558 					if (strcmp(yytext, dolqstart) == 0)
559 					{
560 						yylval.val.sep = dolqstart;
561 						yylval.val.modificator = "dolq";
562 						BEGIN(INITIAL);
563 						yylval.val.str = litbufdup();
564 						yylval.val.keycode = -1;
565 						return X_SCONST;
566 					}
567 					else
568 					{
569 						/*
570 						 * When we fail to match $...$ to dolqstart, transfer
571 						 * the $... part to the output, but put back the final
572 						 * $ for rescanning.  Consider $delim$...$junk$delim$
573 						 */
574 						addlit(yytext, yyleng-1);
575 						yyless(yyleng-1);
576 					}
577 				}
578 <xdolq>{dolqinside} {
579 					addlit(yytext, yyleng);
580 				}
581 <xdolq>{dolqfailed} {
582 					addlit(yytext, yyleng);
583 				}
584 <xdolq>.		{
585 					/* This is only needed for inside the quoted text */
586 					addlitchar(yytext[0]);
587 				}
588 <xdolq><<EOF>>	{
589 					yylval.val.sep = dolqstart;
590 					yylval.val.modificator = "dolqu";
591 					yylval.val.str = litbufdup();
592 					yylval.val.keycode = -1;
593 					yylval.val.sep = NULL;
594 					return X_SCONST;
595 				}
596 
597 {xdstart}		{
598 					SET_YYLLOC();
599 					BEGIN(xd);
600 					startlit();
601 				}
602 <xd>{xdstop}	{
603 					char		   *ident;
604 
605 					BEGIN(INITIAL);
606 					if (literallen == 0)
607 						yyerror(NULL, "zero-length delimited identifier");
608 					ident = litbufdup();
609 					if (literallen >= NAMEDATALEN)
610 						truncate_identifier(ident, literallen, true);
611 					yylval.val.modificator = "dq";
612 					yylval.val.str = ident;
613 					yylval.val.keycode = -1;
614 					yylval.val.sep = NULL;
615 					return X_IDENT;
616 				}
617 <xd>{xddouble}	{
618 					addlitchar('"');
619 				}
620 <xd>{xdinside}	{
621 					addlit(yytext, yyleng);
622 				}
623 <xd><<EOF>>		{
624 					yylval.val.modificator = "dqu";
625 					yylval.val.str = litbufdup();
626 					yylval.val.keycode = -1;
627 					yylval.val.sep = NULL;
628 					return X_IDENT;
629 				}
630 {typecast}		{
631 					SET_YYLLOC();
632 					yylval.val.modificator = "typecast";
633 					yylval.val.keycode = X_TYPECAST;
634 					yylval.val.sep = NULL;
635 					return X_OTHERS;
636 				}
637 
638 {self}			{
639 					SET_YYLLOC();
640 					yylval.val.str = yytext;
641 					yylval.val.modificator = "self";
642 					yylval.val.keycode = yytext[0];
643 					yylval.val.sep = NULL;
644 					return X_OTHERS;
645 				}
646 
647 {operator}		{
648 					/*
649 					 * Check for embedded slash-star or dash-dash; those
650 					 * are comment starts, so operator must stop there.
651 					 * Note that slash-star or dash-dash at the first
652 					 * character will match a prior rule, not this one.
653 					 */
654 					int		nchars = yyleng;
655 					char   *slashstar = strstr(yytext, "/*");
656 					char   *dashdash = strstr(yytext, "--");
657 
658 					if (slashstar && dashdash)
659 					{
660 						/* if both appear, take the first one */
661 						if (slashstar > dashdash)
662 							slashstar = dashdash;
663 					}
664 					else if (!slashstar)
665 						slashstar = dashdash;
666 					if (slashstar)
667 						nchars = slashstar - yytext;
668 
669 					/*
670 					 * For SQL compatibility, '+' and '-' cannot be the
671 					 * last char of a multi-char operator unless the operator
672 					 * contains chars that are not in SQL operators.
673 					 * The idea is to lex '=-' as two operators, but not
674 					 * to forbid operator names like '?-' that could not be
675 					 * sequences of SQL operators.
676 					 */
677 					while (nchars > 1 &&
678 						   (yytext[nchars-1] == '+' ||
679 							yytext[nchars-1] == '-'))
680 					{
681 						int		ic;
682 
683 						for (ic = nchars-2; ic >= 0; ic--)
684 						{
685 							if (strchr("~!@#^&|`?%", yytext[ic]))
686 								break;
687 						}
688 						if (ic >= 0)
689 							break; /* found a char that makes it OK */
690 						nchars--; /* else remove the +/-, and check again */
691 					}
692 
693 					SET_YYLLOC();
694 
695 					if (nchars < yyleng)
696 					{
697 						/* Strip the unwanted chars from the token */
698 						yyless(nchars);
699 						/*
700 						 * If what we have left is only one char, and it's
701 						 * one of the characters matching "self", then
702 						 * return it as a character token the same way
703 						 * that the "self" rule would have.
704 						 */
705 						if (nchars == 1 &&
706 							strchr(",()[].;:+-*/%^<>=", yytext[0]))
707 						{
708 							yylval.val.str = yytext;
709 							yylval.val.modificator = NULL;
710 							yylval.val.keycode = yytext[0];
711 							yylval.val.sep = NULL;
712 							return X_OTHERS;
713 						}
714 					}
715 
716 					/*
717 					 * Complain if operator is too long.  Unlike the case
718 					 * for identifiers, we make this an error not a notice-
719 					 * and-truncate, because the odds are we are looking at
720 					 * a syntactic mistake anyway.
721 					 */
722 					if (nchars >= NAMEDATALEN)
723 						yyerror(NULL, "operator too long");
724 
725 					/* Convert "!=" operator to "<>" for compatibility */
726 					yylval.val.modificator = NULL;
727 					if (strcmp(yytext, "!=") == 0)
728 						yylval.val.str = pstrdup("<>");
729 					else
730 						yylval.val.str = pstrdup(yytext);
731 					yylval.val.keycode = -1;
732 					yylval.val.sep = NULL;
733 					return X_OP;
734 				}
735 
736 {param}			{
737 					SET_YYLLOC();
738 					yylval.val.modificator = NULL;
739 					yylval.val.str = yytext;
740 					yylval.val.keycode = -1;
741 					yylval.val.sep = NULL;
742 					return X_PARAM;
743 				}
744 
745 {integer}		{
746 					long val;
747 					char* endptr;
748 
749 					SET_YYLLOC();
750 					errno = 0;
751 					val = strtol(yytext, &endptr, 10);
752 					if (*endptr != '\0' || errno == ERANGE
753 #ifdef HAVE_LONG_INT_64
754 						/* if long > 32 bits, check for overflow of int4 */
755 						|| val != (long) ((int32) val)
756 #endif
757 						)
758 					{
759 						/* integer too large, treat it as a float */
760 						yylval.val.str = pstrdup(yytext);
761 						yylval.val.modificator = "f";
762 						yylval.val.keycode = -1;
763 						yylval.val.sep = NULL;
764 						return X_NCONST;
765 					}
766 					yylval.val.str = yytext;
767 					yylval.val.modificator = "i";
768 					yylval.val.keycode = -1;
769 					yylval.val.sep = NULL;
770 					return X_NCONST;
771 				}
772 {decimal}		{
773 					SET_YYLLOC();
774 					yylval.val.str = pstrdup(yytext);
775 					yylval.val.modificator = "f";
776 					yylval.val.keycode = -1;
777 					yylval.val.sep = NULL;
778 					return X_NCONST;
779 				}
780 {real}			{
781 					SET_YYLLOC();
782 					yylval.val.str = pstrdup(yytext);
783 					yylval.val.modificator = "f";
784 					yylval.val.keycode = -1;
785 					yylval.val.sep = NULL;
786 					return X_NCONST;
787 				}
788 {realfail1}		{
789 					/*
790 					 * throw back the [Ee], and treat as {decimal}.  Note
791 					 * that it is possible the input is actually {integer},
792 					 * but since this case will almost certainly lead to a
793 					 * syntax error anyway, we don't bother to distinguish.
794 					 */
795 					yyless(yyleng-1);
796 					SET_YYLLOC();
797 					yylval.val.str = pstrdup(yytext);
798 					yylval.val.modificator = "f";
799 					yylval.val.keycode = -1;
800 					yylval.val.sep = NULL;
801 					return X_NCONST;
802 				}
803 {realfail2}		{
804 					/* throw back the [Ee][+-], and proceed as above */
805 					yyless(yyleng-2);
806 					SET_YYLLOC();
807 					yylval.val.str = pstrdup(yytext);
808 					yylval.val.modificator = "f";
809 					yylval.val.keycode = -1;
810 					yylval.val.sep = NULL;
811 					return X_NCONST;
812 				}
813 
814 
815 {identifier}	{
816 					char		   *ident;
817 					const char *keyword;
818 					int		keycode;
819 
820 					SET_YYLLOC();
821 
822 					/* nchar had better be a keyword! */
823 					keyword = orafce_scan_keyword("nchar", &keycode);
824 
825 					/* Is it a keyword? */
826 					keyword = orafce_scan_keyword(yytext, &keycode);
827 					if (keyword != NULL)
828 					{
829 						yylval.val.str = (char*) keyword;
830 						yylval.val.keycode = keycode;
831 						yylval.val.modificator = NULL;
832 						yylval.val.sep = NULL;
833 						return X_KEYWORD;
834 					}
835 
836 					/*
837 					 * No.  Convert the identifier to lower case, and truncate
838 					 * if necessary.
839 					 */
840 					ident = downcase_truncate_identifier(yytext, yyleng, true);
841 					yylval.val.str = ident;
842 					yylval.val.modificator = NULL;
843 					yylval.val.keycode = -1;
844 					yylval.val.sep = NULL;
845 					return X_IDENT;
846 				}
847 
848 {other}			{
849 					SET_YYLLOC();
850 					yylval.val.str = yytext;
851 					yylval.val.modificator = NULL;
852 					yylval.val.keycode = yytext[0];
853 					yylval.val.sep = NULL;
854 					return X_OTHERS;
855 				}
856 
857 <<EOF>>			{
858 					SET_YYLLOC();
859 					yyterminate();
860 				}
861 
862 %%
863 
864 /*
865  * lexer_errposition
866  *		Report a lexical-analysis-time cursor position, if possible.
867  *
868  * This is expected to be used within an ereport() call.  The return value
869  * is a dummy (always 0, in fact).
870  *
871  * Note that this can only be used for messages from the lexer itself,
872  * since it depends on scanbuf to still be valid.
873  */
874 static int
875 lexer_errposition(void)
876 {
877 	int		pos;
878 
879 	/* Convert byte offset to character number */
880 	pos = _pg_mbstrlen_with_len(scanbuf, orafce_sql_yylval.val.lloc) + 1;
881 	/* And pass it to the ereport mechanism */
882 
883 #if PG_VERSION_NUM >= 130000
884 
885 	errposition(pos);
886 
887 	return pos;
888 
889 #else
890 
891 	return errposition(pos);
892 
893 #endif
894 
895 }
896 
897 /*
898  * yyerror
899  *		Report a lexer or grammar error.
900  *
901  * The message's cursor position identifies the most recently lexed token.
902  * This is OK for syntax error messages from the Bison parser, because Bison
903  * parsers report error as soon as the first unparsable token is reached.
904  * Beware of using yyerror for other purposes, as the cursor position might
905  * be misleading!
906  */
907 void
908 orafce_sql_yyerror(List **result, const char *message)
909 {
910 	const char *loc = scanbuf + orafce_sql_yylval.val.lloc;
911 
912 	if (*loc == YY_END_OF_BUFFER_CHAR)
913 	{
914 		ereport(ERROR,
915 				(errcode(ERRCODE_SYNTAX_ERROR),
916 				 errmsg("%s at end of input", message),
917 				 lexer_errposition()));
918 	}
919 	else
920 	{
921 		ereport(ERROR,
922 				(errcode(ERRCODE_SYNTAX_ERROR),
923 				 errmsg("%s at or near \"%s\"", message, loc),
924 				 lexer_errposition()));
925 	}
926 }
927 
928 
929 /*
930  * Called before any actual parsing is done
931  */
932 void
933 orafce_sql_scanner_init(const char *str)
934 {
935 	Size	slen = strlen(str);
936 
937 	/*
938 	 * Might be left over after ereport()
939 	 */
940 	if (YY_CURRENT_BUFFER)
941 		yy_delete_buffer(YY_CURRENT_BUFFER);
942 
943 	/*
944 	 * Make a scan buffer with special termination needed by flex.
945 	 */
946 	scanbuflen = slen;
947 	scanbuf = palloc(slen + 2);
948 	memcpy(scanbuf, str, slen);
949 	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
950 	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
951 
952 	/* initialize literal buffer to a reasonable but expansible size */
953 	literalalloc = 128;
954 	literalbuf = (char *) palloc(literalalloc);
955 	startlit();
956 
957 	BEGIN(INITIAL);
958 }
959 
960 
961 /*
962  * Called after parsing is done to clean up after fdate_scanner_init()
963  */
964 void
965 orafce_sql_scanner_finish(void)
966 {
967 	yy_delete_buffer(scanbufhandle);
968 	pfree(scanbuf);
969 }
970 
971 static void
972 addlit(char *ytext, int yleng)
973 {
974 	/* enlarge buffer if needed */
975 	if ((literallen+yleng) >= literalalloc)
976 	{
977 		do {
978 			literalalloc *= 2;
979 		} while ((literallen+yleng) >= literalalloc);
980 		literalbuf = (char *) repalloc(literalbuf, literalalloc);
981 	}
982 	/* append new data, add trailing null */
983 	memcpy(literalbuf+literallen, ytext, yleng);
984 	literallen += yleng;
985 	literalbuf[literallen] = '\0';
986 }
987 
988 
989 static void
990 addlitchar(unsigned char ychar)
991 {
992 	/* enlarge buffer if needed */
993 	if ((literallen+1) >= literalalloc)
994 	{
995 		literalalloc *= 2;
996 		literalbuf = (char *) repalloc(literalbuf, literalalloc);
997 	}
998 	/* append new data, add trailing null */
999 	literalbuf[literallen] = ychar;
1000 	literallen += 1;
1001 	literalbuf[literallen] = '\0';
1002 }
1003 
1004 
1005 /*
1006  * One might be tempted to write pstrdup(literalbuf) instead of this,
1007  * but for long literals this is much faster because the length is
1008  * already known.
1009  */
1010 static char *
1011 litbufdup(void)
1012 {
1013 	char *new;
1014 
1015 	new = palloc(literallen + 1);
1016 	memcpy(new, literalbuf, literallen+1);
1017 	return new;
1018 }
1019 
1020 
1021 static unsigned char
1022 unescape_single_char(unsigned char c)
1023 {
1024 	switch (c)
1025 	{
1026 		case 'b':
1027 			return '\b';
1028 		case 'f':
1029 			return '\f';
1030 		case 'n':
1031 			return '\n';
1032 		case 'r':
1033 			return '\r';
1034 		case 't':
1035 			return '\t';
1036 		default:
1037 			return c;
1038 	}
1039 }
1040 
1041 
1042