1 %top{ 2 /*------------------------------------------------------------------------- 3 * 4 * pgc.l 5 * lexical scanner for ecpg 6 * 7 * This is a modified version of src/backend/parser/scan.l 8 * 9 * The ecpg scanner is not backup-free, so the fail rules are 10 * only here to simplify syncing this file with scan.l. 11 * 12 * 13 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 14 * Portions Copyright (c) 1994, Regents of the University of California 15 * 16 * IDENTIFICATION 17 * src/interfaces/ecpg/preproc/pgc.l 18 * 19 *------------------------------------------------------------------------- 20 */ 21 #include "postgres_fe.h" 22 23 #include <ctype.h> 24 #include <limits.h> 25 26 #include "common/string.h" 27 28 #include "preproc_extern.h" 29 #include "preproc.h" 30 } 31 32 %{ 33 34 /* LCOV_EXCL_START */ 35 36 extern YYSTYPE base_yylval; 37 38 static int xcdepth = 0; /* depth of nesting in slash-star comments */ 39 static char *dolqstart = NULL; /* current $foo$ quote start string */ 40 41 /* 42 * literalbuf is used to accumulate literal values when multiple rules 43 * are needed to parse a single literal. Call startlit to reset buffer 44 * to empty, addlit to add text. Note that the buffer is permanently 45 * malloc'd to the largest size needed so far in the current run. 46 */ 47 static char *literalbuf = NULL; /* expandable buffer */ 48 static int literallen; /* actual current length */ 49 static int literalalloc; /* current allocated buffer size */ 50 51 /* Used for detecting global state together with braces_open */ 52 static int parenths_open; 53 54 /* Used to tell parse_include() whether the command was #include or #include_next */ 55 static bool include_next; 56 57 #define startlit() (literalbuf[0] = '\0', literallen = 0) 58 static void addlit(char *ytext, int yleng); 59 static void addlitchar(unsigned char); 60 static int process_integer_literal(const char *token, YYSTYPE *lval); 61 static void parse_include(void); 62 static bool ecpg_isspace(char ch); 63 static bool isdefine(void); 64 static bool isinformixdefine(void); 65 66 char *token_start; 67 68 /* vars to keep track of start conditions when scanning literals */ 69 static int state_before_str_start; 70 static int state_before_str_stop; 71 72 struct _yy_buffer 73 { 74 YY_BUFFER_STATE buffer; 75 long lineno; 76 char *filename; 77 struct _yy_buffer *next; 78 } *yy_buffer = NULL; 79 80 static char *old; 81 82 /* 83 * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current 84 * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the 85 * state for the innermost level. (For convenience, stacked_if_value[0] is 86 * initialized as though we are in the active branch of some outermost IF.) 87 * The active field is true if the current branch is active (being expanded). 88 * The saw_active field is true if we have found any successful branch, 89 * so that all subsequent branches of this level should be skipped. 90 * The else_branch field is true if we've found an 'else' (so that another 91 * 'else' or 'elif' at this level is an error.) 92 * For IFs nested within an inactive branch, all branches always have active 93 * set to false, but saw_active and else_branch are maintained normally. 94 * ifcond is valid only while evaluating an if-condition; it's true if we 95 * are doing ifdef, false if ifndef. 96 */ 97 #define MAX_NESTED_IF 128 98 static short preproc_tos; 99 static bool ifcond; 100 static struct _if_value 101 { 102 bool active; 103 bool saw_active; 104 bool else_branch; 105 } stacked_if_value[MAX_NESTED_IF]; 106 107 %} 108 109 %option 8bit 110 %option never-interactive 111 %option nodefault 112 %option noinput 113 %option noyywrap 114 %option warn 115 %option yylineno 116 %option prefix="base_yy" 117 118 /* 119 * OK, here is a short description of lex/flex rules behavior. 120 * The longest pattern which matches an input string is always chosen. 121 * For equal-length patterns, the first occurring in the rules list is chosen. 122 * INITIAL is the starting state, to which all non-conditional rules apply. 123 * Exclusive states change parsing rules while the state is active. When in 124 * an exclusive state, only those rules defined for that state apply. 125 * 126 * We use exclusive states for quoted strings, extended comments, 127 * and to eliminate parsing troubles for numeric strings. 128 * Exclusive states: 129 * <xb> bit string literal 130 * <xc> extended C-style comments 131 * <xd> delimited identifiers (double-quoted identifiers) 132 * <xdc> double-quoted strings in C 133 * <xh> hexadecimal numeric string 134 * <xn> national character quoted strings 135 * <xq> standard quoted strings 136 * <xqs> quote stop (detect continued strings) 137 * <xe> extended quoted strings (support backslash escape sequences) 138 * <xqc> single-quoted strings in C 139 * <xdolq> $foo$ quoted strings 140 * <xui> quoted identifier with Unicode escapes 141 * <xus> quoted string with Unicode escapes 142 * <xcond> condition of an EXEC SQL IFDEF construct 143 * <xskip> skipping the inactive part of an EXEC SQL IFDEF construct 144 * 145 * Note: we intentionally don't mimic the backend's <xeu> state; we have 146 * no need to distinguish it from <xe> state. 147 * 148 * Remember to add an <<EOF>> case whenever you add a new exclusive state! 149 * The default one is probably not the right thing. 150 */ 151 152 %x xb 153 %x xc 154 %x xd 155 %x xdc 156 %x xh 157 %x xn 158 %x xq 159 %x xqs 160 %x xe 161 %x xqc 162 %x xdolq 163 %x xui 164 %x xus 165 %x xcond 166 %x xskip 167 168 /* Additional exclusive states that are specific to ECPG */ 169 %x C SQL incl def def_ident undef 170 171 /* 172 * In order to make the world safe for Windows and Mac clients as well as 173 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n 174 * sequence will be seen as two successive newlines, but that doesn't cause 175 * any problems. SQL-style comments, which start with -- and extend to the 176 * next newline, are treated as equivalent to a single whitespace character. 177 * 178 * NOTE a fine point: if there is no newline following --, we will absorb 179 * everything to the end of the input as a comment. This is correct. Older 180 * versions of Postgres failed to recognize -- as a comment if the input 181 * did not end with a newline. 182 * 183 * XXX perhaps \f (formfeed) should be treated as a newline as well? 184 * 185 * XXX if you change the set of whitespace characters, fix ecpg_isspace() 186 * to agree. 187 */ 188 189 space [ \t\n\r\f] 190 horiz_space [ \t\f] 191 newline [\n\r] 192 non_newline [^\n\r] 193 194 comment ("--"{non_newline}*) 195 196 whitespace ({space}+|{comment}) 197 198 /* 199 * SQL requires at least one newline in the whitespace separating 200 * string literals that are to be concatenated. Silly, but who are we 201 * to argue? Note that {whitespace_with_newline} should not have * after 202 * it, whereas {whitespace} should generally have a * after it... 203 */ 204 205 horiz_whitespace ({horiz_space}|{comment}) 206 whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) 207 208 quote ' 209 /* If we see {quote} then {quotecontinue}, the quoted string continues */ 210 quotecontinue {whitespace_with_newline}{quote} 211 212 /* 213 * {quotecontinuefail} is needed to avoid lexer backup when we fail to match 214 * {quotecontinue}. It might seem that this could just be {whitespace}*, 215 * but if there's a dash after {whitespace_with_newline}, it must be consumed 216 * to see if there's another dash --- which would start a {comment} and thus 217 * allow continuation of the {quotecontinue} token. 218 */ 219 quotecontinuefail {whitespace}*"-"? 220 221 /* Bit string 222 */ 223 xbstart [bB]{quote} 224 xbinside [^']* 225 226 /* Hexadecimal number */ 227 xhstart [xX]{quote} 228 xhinside [^']* 229 230 /* National character */ 231 xnstart [nN]{quote} 232 233 /* Quoted string that allows backslash escapes */ 234 xestart [eE]{quote} 235 xeinside [^\\']+ 236 xeescape [\\][^0-7] 237 xeoctesc [\\][0-7]{1,3} 238 xehexesc [\\]x[0-9A-Fa-f]{1,2} 239 xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}) 240 241 /* Extended quote 242 * xqdouble implements embedded quote, '''' 243 */ 244 xqstart {quote} 245 xqdouble {quote}{quote} 246 xqcquote [\\]{quote} 247 xqinside [^']+ 248 249 /* $foo$ style quotes ("dollar quoting") 250 * The quoted string starts with $foo$ where "foo" is an optional string 251 * in the form of an identifier, except that it may not contain "$", 252 * and extends to the first occurrence of an identical string. 253 * There is *no* processing of the quoted text. 254 * 255 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim} 256 * fails to match its trailing "$". 257 */ 258 dolq_start [A-Za-z\200-\377_] 259 dolq_cont [A-Za-z\200-\377_0-9] 260 dolqdelim \$({dolq_start}{dolq_cont}*)?\$ 261 dolqfailed \${dolq_start}{dolq_cont}* 262 dolqinside [^$]+ 263 264 /* Double quote 265 * Allows embedded spaces and other special characters into identifiers. 266 */ 267 dquote \" 268 xdstart {dquote} 269 xdstop {dquote} 270 xddouble {dquote}{dquote} 271 xdinside [^"]+ 272 273 /* Quoted identifier with Unicode escapes */ 274 xuistart [uU]&{dquote} 275 276 /* Quoted string with Unicode escapes */ 277 xusstart [uU]&{quote} 278 279 /* special stuff for C strings */ 280 xdcqq \\\\ 281 xdcqdq \\\" 282 xdcother [^"] 283 xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) 284 285 286 /* C-style comments 287 * 288 * The "extended comment" syntax closely resembles allowable operator syntax. 289 * The tricky part here is to get lex to recognize a string starting with 290 * slash-star as a comment, when interpreting it as an operator would produce 291 * a longer match --- remember lex will prefer a longer match! Also, if we 292 * have something like plus-slash-star, lex will think this is a 3-character 293 * operator whereas we want to see it as a + operator and a comment start. 294 * The solution is two-fold: 295 * 1. append {op_chars}* to xcstart so that it matches as much text as 296 * {operator} would. Then the tie-breaker (first matching rule of same 297 * length) ensures xcstart wins. We put back the extra stuff with yyless() 298 * in case it contains a star-slash that should terminate the comment. 299 * 2. In the operator rule, check for slash-star within the operator, and 300 * if found throw it back with yyless(). This handles the plus-slash-star 301 * problem. 302 * Dash-dash comments have similar interactions with the operator rule. 303 */ 304 xcstart \/\*{op_chars}* 305 xcstop \*+\/ 306 xcinside [^*/]+ 307 308 digit [0-9] 309 ident_start [A-Za-z\200-\377_] 310 ident_cont [A-Za-z\200-\377_0-9\$] 311 312 identifier {ident_start}{ident_cont}* 313 314 array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])* 315 316 /* Assorted special-case operators and operator-like tokens */ 317 typecast "::" 318 dot_dot \.\. 319 colon_equals ":=" 320 321 /* 322 * These operator-like tokens (unlike the above ones) also match the {operator} 323 * rule, which means that they might be overridden by a longer match if they 324 * are followed by a comment start or a + or - character. Accordingly, if you 325 * add to this list, you must also add corresponding code to the {operator} 326 * block to return the correct token in such cases. (This is not needed in 327 * psqlscan.l since the token value is ignored there.) 328 */ 329 equals_greater "=>" 330 less_equals "<=" 331 greater_equals ">=" 332 less_greater "<>" 333 not_equals "!=" 334 335 /* 336 * "self" is the set of chars that should be returned as single-character 337 * tokens. "op_chars" is the set of chars that can make up "Op" tokens, 338 * which can be one or more characters long (but if a single-char token 339 * appears in the "self" set, it is not to be returned as an Op). Note 340 * that the sets overlap, but each has some chars that are not in the other. 341 * 342 * If you change either set, adjust the character lists appearing in the 343 * rule for "operator"! 344 */ 345 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] 346 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=] 347 operator {op_chars}+ 348 349 /* we no longer allow unary minus in numbers. 350 * instead we pass it separately to parser. there it gets 351 * coerced via doNegate() -- Leon aug 20 1999 352 * 353 * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. 354 * 355 * {realfail1} and {realfail2} are added to prevent the need for scanner 356 * backup when the {real} rule fails to match completely. 357 */ 358 359 integer {digit}+ 360 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) 361 decimalfail {digit}+\.\. 362 real ({integer}|{decimal})[Ee][-+]?{digit}+ 363 realfail1 ({integer}|{decimal})[Ee] 364 realfail2 ({integer}|{decimal})[Ee][-+] 365 366 param \${integer} 367 368 /* special characters for other dbms */ 369 /* we have to react differently in compat mode */ 370 informix_special [\$] 371 372 other . 373 374 /* 375 * Dollar quoted strings are totally opaque, and no escaping is done on them. 376 * Other quoted strings must allow some special characters such as single-quote 377 * and newline. 378 * Embedded single-quotes are implemented both in the SQL standard 379 * style of two adjacent single quotes "''" and in the Postgres/Java style 380 * of escaped-quote "\'". 381 * Other embedded escaped characters are matched explicitly and the leading 382 * backslash is dropped from the string. 383 * Note that xcstart must appear before operator, as explained above! 384 * Also whitespace (comment) must appear before operator. 385 */ 386 387 /* some stuff needed for ecpg */ 388 exec [eE][xX][eE][cC] 389 sql [sS][qQ][lL] 390 define [dD][eE][fF][iI][nN][eE] 391 include [iI][nN][cC][lL][uU][dD][eE] 392 include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT] 393 import [iI][mM][pP][oO][rR][tT] 394 undef [uU][nN][dD][eE][fF] 395 396 /* C version of hex number */ 397 xch 0[xX][0-9A-Fa-f]* 398 399 ccomment "//".*\n 400 401 if [iI][fF] 402 ifdef [iI][fF][dD][eE][fF] 403 ifndef [iI][fF][nN][dD][eE][fF] 404 else [eE][lL][sS][eE] 405 elif [eE][lL][iI][fF] 406 endif [eE][nN][dD][iI][fF] 407 408 struct [sS][tT][rR][uU][cC][tT] 409 410 exec_sql {exec}{space}*{sql}{space}* 411 ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit}) 412 ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit} 413 414 /* we might want to parse all cpp include files */ 415 cppinclude {space}*#{include}{space}* 416 cppinclude_next {space}*#{include_next}{space}* 417 418 /* take care of cpp lines, they may also be continued */ 419 /* first a general line for all commands not starting with "i" */ 420 /* and then the other commands starting with "i", we have to add these 421 * separately because the cppline production would match on "include" too 422 */ 423 cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline} 424 425 %% 426 427 %{ 428 /* code to execute during start of each call of yylex() */ 429 token_start = NULL; 430 %} 431 432 <SQL>{ 433 {whitespace} { 434 /* ignore */ 435 } 436 } /* <SQL> */ 437 438 <C,SQL>{ 439 {xcstart} { 440 token_start = yytext; 441 state_before_str_start = YYSTATE; 442 xcdepth = 0; 443 BEGIN(xc); 444 /* Put back any characters past slash-star; see above */ 445 yyless(2); 446 fputs("/*", yyout); 447 } 448 } /* <C,SQL> */ 449 450 <xc>{ 451 {xcstart} { 452 if (state_before_str_start == SQL) 453 { 454 xcdepth++; 455 /* Put back any characters past slash-star; see above */ 456 yyless(2); 457 fputs("/_*", yyout); 458 } 459 else if (state_before_str_start == C) 460 { 461 ECHO; 462 } 463 } 464 465 {xcstop} { 466 if (state_before_str_start == SQL) 467 { 468 if (xcdepth <= 0) 469 { 470 ECHO; 471 BEGIN(SQL); 472 token_start = NULL; 473 } 474 else 475 { 476 xcdepth--; 477 fputs("*_/", yyout); 478 } 479 } 480 else if (state_before_str_start == C) 481 { 482 ECHO; 483 BEGIN(C); 484 token_start = NULL; 485 } 486 } 487 488 {xcinside} { 489 ECHO; 490 } 491 492 {op_chars} { 493 ECHO; 494 } 495 496 \*+ { 497 ECHO; 498 } 499 500 <<EOF>> { 501 mmfatal(PARSE_ERROR, "unterminated /* comment"); 502 } 503 } /* <xc> */ 504 505 <SQL>{ 506 {xbstart} { 507 token_start = yytext; 508 state_before_str_start = YYSTATE; 509 BEGIN(xb); 510 startlit(); 511 } 512 } /* <SQL> */ 513 514 <xh>{xhinside} | 515 <xb>{xbinside} { 516 addlit(yytext, yyleng); 517 } 518 <xb><<EOF>> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); } 519 520 <SQL>{xhstart} { 521 token_start = yytext; 522 state_before_str_start = YYSTATE; 523 BEGIN(xh); 524 startlit(); 525 } 526 <xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); } 527 528 <C>{xqstart} { 529 token_start = yytext; 530 state_before_str_start = YYSTATE; 531 BEGIN(xqc); 532 startlit(); 533 } 534 535 <SQL>{ 536 {xnstart} { 537 /* National character. 538 * Transfer it as-is to the backend. 539 */ 540 token_start = yytext; 541 state_before_str_start = YYSTATE; 542 BEGIN(xn); 543 startlit(); 544 } 545 546 {xqstart} { 547 token_start = yytext; 548 state_before_str_start = YYSTATE; 549 BEGIN(xq); 550 startlit(); 551 } 552 {xestart} { 553 token_start = yytext; 554 state_before_str_start = YYSTATE; 555 BEGIN(xe); 556 startlit(); 557 } 558 {xusstart} { 559 token_start = yytext; 560 state_before_str_start = YYSTATE; 561 BEGIN(xus); 562 startlit(); 563 } 564 } /* <SQL> */ 565 566 <xb,xh,xq,xqc,xe,xn,xus>{quote} { 567 /* 568 * When we are scanning a quoted string and see an end 569 * quote, we must look ahead for a possible continuation. 570 * If we don't see one, we know the end quote was in fact 571 * the end of the string. To reduce the lexer table size, 572 * we use a single "xqs" state to do the lookahead for all 573 * types of strings. 574 */ 575 state_before_str_stop = YYSTATE; 576 BEGIN(xqs); 577 } 578 <xqs>{quotecontinue} { 579 /* 580 * Found a quote continuation, so return to the in-quote 581 * state and continue scanning the literal. Nothing is 582 * added to the literal's contents. 583 */ 584 BEGIN(state_before_str_stop); 585 } 586 <xqs>{quotecontinuefail} | 587 <xqs>{other} | 588 <xqs><<EOF>> { 589 /* 590 * Failed to see a quote continuation. Throw back 591 * everything after the end quote, and handle the string 592 * according to the state we were in previously. 593 */ 594 yyless(0); 595 BEGIN(state_before_str_start); 596 597 switch (state_before_str_stop) 598 { 599 case xb: 600 if (literalbuf[strspn(literalbuf, "01")] != '\0') 601 mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal"); 602 base_yylval.str = psprintf("b'%s'", literalbuf); 603 return BCONST; 604 case xh: 605 if (literalbuf[strspn(literalbuf, "0123456789abcdefABCDEF")] != '\0') 606 mmerror(PARSE_ERROR, ET_ERROR, "invalid hex string literal"); 607 base_yylval.str = psprintf("x'%s'", literalbuf); 608 return XCONST; 609 case xq: 610 /* fallthrough */ 611 case xqc: 612 base_yylval.str = psprintf("'%s'", literalbuf); 613 return SCONST; 614 case xe: 615 base_yylval.str = psprintf("E'%s'", literalbuf); 616 return SCONST; 617 case xn: 618 base_yylval.str = psprintf("N'%s'", literalbuf); 619 return SCONST; 620 case xus: 621 base_yylval.str = psprintf("U&'%s'", literalbuf); 622 return USCONST; 623 default: 624 mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n"); 625 } 626 } 627 628 <xq,xe,xn,xus>{xqdouble} { addlitchar('\''); } 629 <xqc>{xqcquote} { 630 addlitchar('\\'); 631 addlitchar('\''); 632 } 633 <xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); } 634 <xe>{xeinside} { 635 addlit(yytext, yyleng); 636 } 637 <xe>{xeunicode} { 638 addlit(yytext, yyleng); 639 } 640 <xe>{xeescape} { 641 addlit(yytext, yyleng); 642 } 643 <xe>{xeoctesc} { 644 addlit(yytext, yyleng); 645 } 646 <xe>{xehexesc} { 647 addlit(yytext, yyleng); 648 } 649 <xe>. { 650 /* This is only needed for \ just before EOF */ 651 addlitchar(yytext[0]); 652 } 653 <xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); } 654 655 <SQL>{ 656 {dolqdelim} { 657 token_start = yytext; 658 if (dolqstart) 659 free(dolqstart); 660 dolqstart = mm_strdup(yytext); 661 BEGIN(xdolq); 662 startlit(); 663 addlit(yytext, yyleng); 664 } 665 {dolqfailed} { 666 /* throw back all but the initial "$" */ 667 yyless(1); 668 /* and treat it as {other} */ 669 return yytext[0]; 670 } 671 } /* <SQL> */ 672 673 <xdolq>{dolqdelim} { 674 if (strcmp(yytext, dolqstart) == 0) 675 { 676 addlit(yytext, yyleng); 677 free(dolqstart); 678 dolqstart = NULL; 679 BEGIN(SQL); 680 base_yylval.str = mm_strdup(literalbuf); 681 return SCONST; 682 } 683 else 684 { 685 /* 686 * When we fail to match $...$ to dolqstart, transfer 687 * the $... part to the output, but put back the final 688 * $ for rescanning. Consider $delim$...$junk$delim$ 689 */ 690 addlit(yytext, yyleng - 1); 691 yyless(yyleng - 1); 692 } 693 } 694 <xdolq>{dolqinside} { 695 addlit(yytext, yyleng); 696 } 697 <xdolq>{dolqfailed} { 698 addlit(yytext, yyleng); 699 } 700 <xdolq>. { 701 /* single quote or dollar sign */ 702 addlitchar(yytext[0]); 703 } 704 <xdolq><<EOF>> { mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); } 705 706 <SQL>{ 707 {xdstart} { 708 state_before_str_start = YYSTATE; 709 BEGIN(xd); 710 startlit(); 711 } 712 {xuistart} { 713 state_before_str_start = YYSTATE; 714 BEGIN(xui); 715 startlit(); 716 } 717 } /* <SQL> */ 718 719 <xd>{xdstop} { 720 BEGIN(state_before_str_start); 721 if (literallen == 0) 722 mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); 723 /* The backend will truncate the identifier here. We do not as it does not change the result. */ 724 base_yylval.str = mm_strdup(literalbuf); 725 return CSTRING; 726 } 727 <xdc>{xdstop} { 728 BEGIN(state_before_str_start); 729 base_yylval.str = mm_strdup(literalbuf); 730 return CSTRING; 731 } 732 <xui>{dquote} { 733 BEGIN(state_before_str_start); 734 if (literallen == 2) /* "U&" */ 735 mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); 736 /* The backend will truncate the identifier here. We do not as it does not change the result. */ 737 base_yylval.str = psprintf("U&\"%s\"", literalbuf); 738 return UIDENT; 739 } 740 <xd,xui>{xddouble} { 741 addlitchar('"'); 742 } 743 <xd,xui>{xdinside} { 744 addlit(yytext, yyleng); 745 } 746 <xd,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); } 747 <C>{xdstart} { 748 state_before_str_start = YYSTATE; 749 BEGIN(xdc); 750 startlit(); 751 } 752 <xdc>{xdcinside} { 753 addlit(yytext, yyleng); 754 } 755 <xdc><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); } 756 757 <SQL>{ 758 {typecast} { 759 return TYPECAST; 760 } 761 762 {dot_dot} { 763 return DOT_DOT; 764 } 765 766 {colon_equals} { 767 return COLON_EQUALS; 768 } 769 770 {equals_greater} { 771 return EQUALS_GREATER; 772 } 773 774 {less_equals} { 775 return LESS_EQUALS; 776 } 777 778 {greater_equals} { 779 return GREATER_EQUALS; 780 } 781 782 {less_greater} { 783 /* We accept both "<>" and "!=" as meaning NOT_EQUALS */ 784 return NOT_EQUALS; 785 } 786 787 {not_equals} { 788 /* We accept both "<>" and "!=" as meaning NOT_EQUALS */ 789 return NOT_EQUALS; 790 } 791 792 {informix_special} { 793 /* are we simulating Informix? */ 794 if (INFORMIX_MODE) 795 { 796 unput(':'); 797 } 798 else 799 return yytext[0]; 800 } 801 802 {self} { 803 /* 804 * We may find a ';' inside a structure 805 * definition in a TYPE or VAR statement. 806 * This is not an EOL marker. 807 */ 808 if (yytext[0] == ';' && struct_level == 0) 809 BEGIN(C); 810 return yytext[0]; 811 } 812 813 {operator} { 814 /* 815 * Check for embedded slash-star or dash-dash; those 816 * are comment starts, so operator must stop there. 817 * Note that slash-star or dash-dash at the first 818 * character will match a prior rule, not this one. 819 */ 820 int nchars = yyleng; 821 char *slashstar = strstr(yytext, "/*"); 822 char *dashdash = strstr(yytext, "--"); 823 824 if (slashstar && dashdash) 825 { 826 /* if both appear, take the first one */ 827 if (slashstar > dashdash) 828 slashstar = dashdash; 829 } 830 else if (!slashstar) 831 slashstar = dashdash; 832 if (slashstar) 833 nchars = slashstar - yytext; 834 835 /* 836 * For SQL compatibility, '+' and '-' cannot be the 837 * last char of a multi-char operator unless the operator 838 * contains chars that are not in SQL operators. 839 * The idea is to lex '=-' as two operators, but not 840 * to forbid operator names like '?-' that could not be 841 * sequences of SQL operators. 842 */ 843 if (nchars > 1 && 844 (yytext[nchars - 1] == '+' || 845 yytext[nchars - 1] == '-')) 846 { 847 int ic; 848 849 for (ic = nchars - 2; ic >= 0; ic--) 850 { 851 char c = yytext[ic]; 852 if (c == '~' || c == '!' || c == '@' || 853 c == '#' || c == '^' || c == '&' || 854 c == '|' || c == '`' || c == '?' || 855 c == '%') 856 break; 857 } 858 if (ic < 0) 859 { 860 /* 861 * didn't find a qualifying character, so remove 862 * all trailing [+-] 863 */ 864 do { 865 nchars--; 866 } while (nchars > 1 && 867 (yytext[nchars - 1] == '+' || 868 yytext[nchars - 1] == '-')); 869 } 870 } 871 872 if (nchars < yyleng) 873 { 874 /* Strip the unwanted chars from the token */ 875 yyless(nchars); 876 /* 877 * If what we have left is only one char, and it's 878 * one of the characters matching "self", then 879 * return it as a character token the same way 880 * that the "self" rule would have. 881 */ 882 if (nchars == 1 && 883 strchr(",()[].;:+-*/%^<>=", yytext[0])) 884 return yytext[0]; 885 /* 886 * Likewise, if what we have left is two chars, and 887 * those match the tokens ">=", "<=", "=>", "<>" or 888 * "!=", then we must return the appropriate token 889 * rather than the generic Op. 890 */ 891 if (nchars == 2) 892 { 893 if (yytext[0] == '=' && yytext[1] == '>') 894 return EQUALS_GREATER; 895 if (yytext[0] == '>' && yytext[1] == '=') 896 return GREATER_EQUALS; 897 if (yytext[0] == '<' && yytext[1] == '=') 898 return LESS_EQUALS; 899 if (yytext[0] == '<' && yytext[1] == '>') 900 return NOT_EQUALS; 901 if (yytext[0] == '!' && yytext[1] == '=') 902 return NOT_EQUALS; 903 } 904 } 905 906 base_yylval.str = mm_strdup(yytext); 907 return Op; 908 } 909 910 {param} { 911 base_yylval.ival = atol(yytext+1); 912 return PARAM; 913 } 914 915 {ip} { 916 base_yylval.str = mm_strdup(yytext); 917 return IP; 918 } 919 } /* <SQL> */ 920 921 <C,SQL>{ 922 {integer} { 923 return process_integer_literal(yytext, &base_yylval); 924 } 925 {decimal} { 926 base_yylval.str = mm_strdup(yytext); 927 return FCONST; 928 } 929 {decimalfail} { 930 /* throw back the .., and treat as integer */ 931 yyless(yyleng - 2); 932 return process_integer_literal(yytext, &base_yylval); 933 } 934 {real} { 935 base_yylval.str = mm_strdup(yytext); 936 return FCONST; 937 } 938 {realfail1} { 939 /* 940 * throw back the [Ee], and figure out whether what 941 * remains is an {integer} or {decimal}. 942 */ 943 yyless(yyleng - 1); 944 return process_integer_literal(yytext, &base_yylval); 945 } 946 {realfail2} { 947 /* throw back the [Ee][+-], and proceed as above */ 948 yyless(yyleng - 2); 949 return process_integer_literal(yytext, &base_yylval); 950 } 951 } /* <C,SQL> */ 952 953 <SQL>{ 954 :{identifier}((("->"|\.){identifier})|(\[{array}\]))* { 955 base_yylval.str = mm_strdup(yytext+1); 956 return CVARIABLE; 957 } 958 959 {identifier} { 960 if (!isdefine()) 961 { 962 int kwvalue; 963 964 /* Is it an SQL/ECPG keyword? */ 965 kwvalue = ScanECPGKeywordLookup(yytext); 966 if (kwvalue >= 0) 967 return kwvalue; 968 969 /* Is it a C keyword? */ 970 kwvalue = ScanCKeywordLookup(yytext); 971 if (kwvalue >= 0) 972 return kwvalue; 973 974 /* 975 * None of the above. Return it as an identifier. 976 * 977 * The backend will attempt to truncate and case-fold 978 * the identifier, but I see no good reason for ecpg 979 * to do so; that's just another way that ecpg could get 980 * out of step with the backend. 981 */ 982 base_yylval.str = mm_strdup(yytext); 983 return IDENT; 984 } 985 } 986 987 {other} { 988 return yytext[0]; 989 } 990 } /* <SQL> */ 991 992 /* 993 * Begin ECPG-specific rules 994 */ 995 996 <C>{exec_sql} { BEGIN(SQL); return SQL_START; } 997 <C>{informix_special} { 998 /* are we simulating Informix? */ 999 if (INFORMIX_MODE) 1000 { 1001 BEGIN(SQL); 1002 return SQL_START; 1003 } 1004 else 1005 return S_ANYTHING; 1006 } 1007 <C>{ccomment} { ECHO; } 1008 <C>{xch} { 1009 char* endptr; 1010 1011 errno = 0; 1012 base_yylval.ival = strtoul((char *)yytext,&endptr,16); 1013 if (*endptr != '\0' || errno == ERANGE) 1014 { 1015 errno = 0; 1016 base_yylval.str = mm_strdup(yytext); 1017 return SCONST; 1018 } 1019 return ICONST; 1020 } 1021 <C>{cppinclude} { 1022 if (system_includes) 1023 { 1024 include_next = false; 1025 BEGIN(incl); 1026 } 1027 else 1028 { 1029 base_yylval.str = mm_strdup(yytext); 1030 return CPP_LINE; 1031 } 1032 } 1033 <C>{cppinclude_next} { 1034 if (system_includes) 1035 { 1036 include_next = true; 1037 BEGIN(incl); 1038 } 1039 else 1040 { 1041 base_yylval.str = mm_strdup(yytext); 1042 return CPP_LINE; 1043 } 1044 } 1045 <C,SQL>{cppline} { 1046 base_yylval.str = mm_strdup(yytext); 1047 return CPP_LINE; 1048 } 1049 <C>{identifier} { 1050 /* 1051 * Try to detect a function name: 1052 * look for identifiers at the global scope 1053 * keep the last identifier before the first '(' and '{' 1054 */ 1055 if (braces_open == 0 && parenths_open == 0) 1056 { 1057 if (current_function) 1058 free(current_function); 1059 current_function = mm_strdup(yytext); 1060 } 1061 /* Informix uses SQL defines only in SQL space */ 1062 /* however, some defines have to be taken care of for compatibility */ 1063 if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine()) 1064 { 1065 int kwvalue; 1066 1067 kwvalue = ScanCKeywordLookup(yytext); 1068 if (kwvalue >= 0) 1069 return kwvalue; 1070 else 1071 { 1072 base_yylval.str = mm_strdup(yytext); 1073 return IDENT; 1074 } 1075 } 1076 } 1077 <C>{xcstop} { mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments"); } 1078 <C>":" { return ':'; } 1079 <C>";" { return ';'; } 1080 <C>"," { return ','; } 1081 <C>"*" { return '*'; } 1082 <C>"%" { return '%'; } 1083 <C>"/" { return '/'; } 1084 <C>"+" { return '+'; } 1085 <C>"-" { return '-'; } 1086 <C>"(" { parenths_open++; return '('; } 1087 <C>")" { parenths_open--; return ')'; } 1088 <C,xskip>{space} { ECHO; } 1089 <C>\{ { return '{'; } 1090 <C>\} { return '}'; } 1091 <C>\[ { return '['; } 1092 <C>\] { return ']'; } 1093 <C>\= { return '='; } 1094 <C>"->" { return S_MEMBER; } 1095 <C>">>" { return S_RSHIFT; } 1096 <C>"<<" { return S_LSHIFT; } 1097 <C>"||" { return S_OR; } 1098 <C>"&&" { return S_AND; } 1099 <C>"++" { return S_INC; } 1100 <C>"--" { return S_DEC; } 1101 <C>"==" { return S_EQUAL; } 1102 <C>"!=" { return S_NEQUAL; } 1103 <C>"+=" { return S_ADD; } 1104 <C>"-=" { return S_SUB; } 1105 <C>"*=" { return S_MUL; } 1106 <C>"/=" { return S_DIV; } 1107 <C>"%=" { return S_MOD; } 1108 <C>"->*" { return S_MEMPOINT; } 1109 <C>".*" { return S_DOTPOINT; } 1110 <C>{other} { return S_ANYTHING; } 1111 <C>{exec_sql}{define}{space}* { BEGIN(def_ident); } 1112 <C>{informix_special}{define}{space}* { 1113 /* are we simulating Informix? */ 1114 if (INFORMIX_MODE) 1115 { 1116 BEGIN(def_ident); 1117 } 1118 else 1119 { 1120 yyless(1); 1121 return S_ANYTHING; 1122 } 1123 } 1124 <C>{exec_sql}{undef}{space}* { BEGIN(undef); } 1125 <C>{informix_special}{undef}{space}* { 1126 /* are we simulating Informix? */ 1127 if (INFORMIX_MODE) 1128 { 1129 BEGIN(undef); 1130 } 1131 else 1132 { 1133 yyless(1); 1134 return S_ANYTHING; 1135 } 1136 } 1137 <undef>{identifier}{space}*";" { 1138 struct _defines *ptr, *ptr2 = NULL; 1139 int i; 1140 1141 /* 1142 * Skip the ";" and trailing whitespace. Note that yytext 1143 * contains at least one non-space character plus the ";" 1144 */ 1145 for (i = strlen(yytext)-2; 1146 i > 0 && ecpg_isspace(yytext[i]); 1147 i-- ) 1148 ; 1149 yytext[i+1] = '\0'; 1150 1151 1152 for (ptr = defines; ptr != NULL; ptr2 = ptr, ptr = ptr->next) 1153 { 1154 if (strcmp(yytext, ptr->olddef) == 0) 1155 { 1156 if (ptr2 == NULL) 1157 defines = ptr->next; 1158 else 1159 ptr2->next = ptr->next; 1160 free(ptr->newdef); 1161 free(ptr->olddef); 1162 free(ptr); 1163 break; 1164 } 1165 } 1166 1167 BEGIN(C); 1168 } 1169 <undef>{other}|\n { 1170 mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command"); 1171 yyterminate(); 1172 } 1173 <C>{exec_sql}{include}{space}* { BEGIN(incl); } 1174 <C>{informix_special}{include}{space}* { 1175 /* are we simulating Informix? */ 1176 if (INFORMIX_MODE) 1177 { 1178 BEGIN(incl); 1179 } 1180 else 1181 { 1182 yyless(1); 1183 return S_ANYTHING; 1184 } 1185 } 1186 <C,xskip>{exec_sql}{ifdef}{space}* { 1187 if (preproc_tos >= MAX_NESTED_IF-1) 1188 mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions"); 1189 preproc_tos++; 1190 stacked_if_value[preproc_tos].active = false; 1191 stacked_if_value[preproc_tos].saw_active = false; 1192 stacked_if_value[preproc_tos].else_branch = false; 1193 ifcond = true; 1194 BEGIN(xcond); 1195 } 1196 <C,xskip>{informix_special}{ifdef}{space}* { 1197 /* are we simulating Informix? */ 1198 if (INFORMIX_MODE) 1199 { 1200 if (preproc_tos >= MAX_NESTED_IF-1) 1201 mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions"); 1202 preproc_tos++; 1203 stacked_if_value[preproc_tos].active = false; 1204 stacked_if_value[preproc_tos].saw_active = false; 1205 stacked_if_value[preproc_tos].else_branch = false; 1206 ifcond = true; 1207 BEGIN(xcond); 1208 } 1209 else 1210 { 1211 yyless(1); 1212 return S_ANYTHING; 1213 } 1214 } 1215 <C,xskip>{exec_sql}{ifndef}{space}* { 1216 if (preproc_tos >= MAX_NESTED_IF-1) 1217 mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions"); 1218 preproc_tos++; 1219 stacked_if_value[preproc_tos].active = false; 1220 stacked_if_value[preproc_tos].saw_active = false; 1221 stacked_if_value[preproc_tos].else_branch = false; 1222 ifcond = false; 1223 BEGIN(xcond); 1224 } 1225 <C,xskip>{informix_special}{ifndef}{space}* { 1226 /* are we simulating Informix? */ 1227 if (INFORMIX_MODE) 1228 { 1229 if (preproc_tos >= MAX_NESTED_IF-1) 1230 mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions"); 1231 preproc_tos++; 1232 stacked_if_value[preproc_tos].active = false; 1233 stacked_if_value[preproc_tos].saw_active = false; 1234 stacked_if_value[preproc_tos].else_branch = false; 1235 ifcond = false; 1236 BEGIN(xcond); 1237 } 1238 else 1239 { 1240 yyless(1); 1241 return S_ANYTHING; 1242 } 1243 } 1244 <C,xskip>{exec_sql}{elif}{space}* { 1245 if (preproc_tos == 0) 1246 mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\""); 1247 if (stacked_if_value[preproc_tos].else_branch) 1248 mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\""); 1249 ifcond = true; 1250 BEGIN(xcond); 1251 } 1252 <C,xskip>{informix_special}{elif}{space}* { 1253 /* are we simulating Informix? */ 1254 if (INFORMIX_MODE) 1255 { 1256 if (preproc_tos == 0) 1257 mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\""); 1258 if (stacked_if_value[preproc_tos].else_branch) 1259 mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\""); 1260 ifcond = true; 1261 BEGIN(xcond); 1262 } 1263 else 1264 { 1265 yyless(1); 1266 return S_ANYTHING; 1267 } 1268 } 1269 1270 <C,xskip>{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */ 1271 if ( preproc_tos == 0 ) 1272 mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\""); 1273 else if (stacked_if_value[preproc_tos].else_branch) 1274 mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE"); 1275 else 1276 { 1277 stacked_if_value[preproc_tos].else_branch = true; 1278 stacked_if_value[preproc_tos].active = 1279 (stacked_if_value[preproc_tos-1].active && 1280 !stacked_if_value[preproc_tos].saw_active); 1281 stacked_if_value[preproc_tos].saw_active = true; 1282 1283 if (stacked_if_value[preproc_tos].active) 1284 BEGIN(C); 1285 else 1286 BEGIN(xskip); 1287 } 1288 } 1289 <C,xskip>{informix_special}{else}{space}*";" { 1290 /* are we simulating Informix? */ 1291 if (INFORMIX_MODE) 1292 { 1293 if ( preproc_tos == 0 ) 1294 mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\""); 1295 else if (stacked_if_value[preproc_tos].else_branch) 1296 mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE"); 1297 else 1298 { 1299 stacked_if_value[preproc_tos].else_branch = true; 1300 stacked_if_value[preproc_tos].active = 1301 (stacked_if_value[preproc_tos-1].active && 1302 !stacked_if_value[preproc_tos].saw_active); 1303 stacked_if_value[preproc_tos].saw_active = true; 1304 1305 if (stacked_if_value[preproc_tos].active) 1306 BEGIN(C); 1307 else 1308 BEGIN(xskip); 1309 } 1310 } 1311 else 1312 { 1313 yyless(1); 1314 return S_ANYTHING; 1315 } 1316 } 1317 <C,xskip>{exec_sql}{endif}{space}*";" { 1318 if (preproc_tos == 0) 1319 mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF"); 1320 else 1321 preproc_tos--; 1322 1323 if (stacked_if_value[preproc_tos].active) 1324 BEGIN(C); 1325 else 1326 BEGIN(xskip); 1327 } 1328 <C,xskip>{informix_special}{endif}{space}*";" { 1329 /* are we simulating Informix? */ 1330 if (INFORMIX_MODE) 1331 { 1332 if (preproc_tos == 0) 1333 mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF"); 1334 else 1335 preproc_tos--; 1336 1337 if (stacked_if_value[preproc_tos].active) 1338 BEGIN(C); 1339 else 1340 BEGIN(xskip); 1341 } 1342 else 1343 { 1344 yyless(1); 1345 return S_ANYTHING; 1346 } 1347 } 1348 1349 <xskip>{other} { /* ignore */ } 1350 1351 <xcond>{identifier}{space}*";" { 1352 { 1353 struct _defines *defptr; 1354 unsigned int i; 1355 bool this_active; 1356 1357 /* 1358 * Skip the ";" and trailing whitespace. Note that yytext 1359 * contains at least one non-space character plus the ";" 1360 */ 1361 for (i = strlen(yytext)-2; 1362 i > 0 && ecpg_isspace(yytext[i]); 1363 i-- ) 1364 ; 1365 yytext[i+1] = '\0'; 1366 1367 for (defptr = defines; 1368 defptr != NULL && 1369 strcmp(yytext, defptr->olddef) != 0; 1370 defptr = defptr->next) 1371 /* skip */ ; 1372 1373 this_active = (defptr ? ifcond : !ifcond); 1374 stacked_if_value[preproc_tos].active = 1375 (stacked_if_value[preproc_tos-1].active && 1376 !stacked_if_value[preproc_tos].saw_active && 1377 this_active); 1378 stacked_if_value[preproc_tos].saw_active |= this_active; 1379 } 1380 1381 if (stacked_if_value[preproc_tos].active) 1382 BEGIN(C); 1383 else 1384 BEGIN(xskip); 1385 } 1386 1387 <xcond>{other}|\n { 1388 mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command"); 1389 yyterminate(); 1390 } 1391 <def_ident>{identifier} { 1392 old = mm_strdup(yytext); 1393 BEGIN(def); 1394 startlit(); 1395 } 1396 <def_ident>{other}|\n { 1397 mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command"); 1398 yyterminate(); 1399 } 1400 <def>{space}*";" { 1401 struct _defines *ptr, *this; 1402 1403 for (ptr = defines; ptr != NULL; ptr = ptr->next) 1404 { 1405 if (strcmp(old, ptr->olddef) == 0) 1406 { 1407 free(ptr->newdef); 1408 ptr->newdef = mm_strdup(literalbuf); 1409 } 1410 } 1411 if (ptr == NULL) 1412 { 1413 this = (struct _defines *) mm_alloc(sizeof(struct _defines)); 1414 1415 /* initial definition */ 1416 this->olddef = old; 1417 this->newdef = mm_strdup(literalbuf); 1418 this->next = defines; 1419 this->used = NULL; 1420 defines = this; 1421 } 1422 1423 BEGIN(C); 1424 } 1425 <def>[^;] { addlit(yytext, yyleng); } 1426 <incl>\<[^\>]+\>{space}*";"? { parse_include(); } 1427 <incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); } 1428 <incl>[^;\<\>\"]+";" { parse_include(); } 1429 <incl>{other}|\n { 1430 mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command"); 1431 yyterminate(); 1432 } 1433 1434 <<EOF>> { 1435 if (yy_buffer == NULL) 1436 { 1437 if ( preproc_tos > 0 ) 1438 { 1439 preproc_tos = 0; 1440 mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\""); 1441 } 1442 yyterminate(); 1443 } 1444 else 1445 { 1446 struct _yy_buffer *yb = yy_buffer; 1447 int i; 1448 struct _defines *ptr; 1449 1450 for (ptr = defines; ptr; ptr = ptr->next) 1451 if (ptr->used == yy_buffer) 1452 { 1453 ptr->used = NULL; 1454 break; 1455 } 1456 1457 if (yyin != NULL) 1458 fclose(yyin); 1459 1460 yy_delete_buffer( YY_CURRENT_BUFFER ); 1461 yy_switch_to_buffer(yy_buffer->buffer); 1462 1463 yylineno = yy_buffer->lineno; 1464 1465 /* We have to output the filename only if we change files here */ 1466 i = strcmp(input_filename, yy_buffer->filename); 1467 1468 free(input_filename); 1469 input_filename = yy_buffer->filename; 1470 1471 yy_buffer = yy_buffer->next; 1472 free(yb); 1473 1474 if (i != 0) 1475 output_line_number(); 1476 1477 } 1478 } 1479 1480 <INITIAL>{other}|\n { mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <%s>", PACKAGE_BUGREPORT); } 1481 1482 %% 1483 1484 /* LCOV_EXCL_STOP */ 1485 1486 void 1487 lex_init(void) 1488 { 1489 braces_open = 0; 1490 parenths_open = 0; 1491 current_function = NULL; 1492 1493 yylineno = 1; 1494 1495 /* initialize state for if/else/endif */ 1496 preproc_tos = 0; 1497 stacked_if_value[preproc_tos].active = true; 1498 stacked_if_value[preproc_tos].saw_active = true; 1499 stacked_if_value[preproc_tos].else_branch = false; 1500 1501 /* initialize literal buffer to a reasonable but expansible size */ 1502 if (literalbuf == NULL) 1503 { 1504 literalalloc = 1024; 1505 literalbuf = (char *) mm_alloc(literalalloc); 1506 } 1507 startlit(); 1508 1509 BEGIN(C); 1510 } 1511 1512 static void 1513 addlit(char *ytext, int yleng) 1514 { 1515 /* enlarge buffer if needed */ 1516 if ((literallen+yleng) >= literalalloc) 1517 { 1518 do 1519 literalalloc *= 2; 1520 while ((literallen+yleng) >= literalalloc); 1521 literalbuf = (char *) realloc(literalbuf, literalalloc); 1522 } 1523 /* append new data, add trailing null */ 1524 memcpy(literalbuf+literallen, ytext, yleng); 1525 literallen += yleng; 1526 literalbuf[literallen] = '\0'; 1527 } 1528 1529 static void 1530 addlitchar(unsigned char ychar) 1531 { 1532 /* enlarge buffer if needed */ 1533 if ((literallen+1) >= literalalloc) 1534 { 1535 literalalloc *= 2; 1536 literalbuf = (char *) realloc(literalbuf, literalalloc); 1537 } 1538 /* append new data, add trailing null */ 1539 literalbuf[literallen] = ychar; 1540 literallen += 1; 1541 literalbuf[literallen] = '\0'; 1542 } 1543 1544 /* 1545 * Process {integer}. Note this will also do the right thing with {decimal}, 1546 * ie digits and a decimal point. 1547 */ 1548 static int 1549 process_integer_literal(const char *token, YYSTYPE *lval) 1550 { 1551 int val; 1552 char *endptr; 1553 1554 errno = 0; 1555 val = strtoint(token, &endptr, 10); 1556 if (*endptr != '\0' || errno == ERANGE) 1557 { 1558 /* integer too large (or contains decimal pt), treat it as a float */ 1559 lval->str = mm_strdup(token); 1560 return FCONST; 1561 } 1562 lval->ival = val; 1563 return ICONST; 1564 } 1565 1566 static void 1567 parse_include(void) 1568 { 1569 /* got the include file name */ 1570 struct _yy_buffer *yb; 1571 struct _include_path *ip; 1572 char inc_file[MAXPGPATH]; 1573 unsigned int i; 1574 1575 yb = mm_alloc(sizeof(struct _yy_buffer)); 1576 1577 yb->buffer = YY_CURRENT_BUFFER; 1578 yb->lineno = yylineno; 1579 yb->filename = input_filename; 1580 yb->next = yy_buffer; 1581 1582 yy_buffer = yb; 1583 1584 /* 1585 * skip the ";" if there is one and trailing whitespace. Note that 1586 * yytext contains at least one non-space character plus the ";" 1587 */ 1588 for (i = strlen(yytext)-2; 1589 i > 0 && ecpg_isspace(yytext[i]); 1590 i--) 1591 ; 1592 1593 if (yytext[i] == ';') 1594 i--; 1595 1596 yytext[i+1] = '\0'; 1597 1598 yyin = NULL; 1599 1600 /* If file name is enclosed in '"' remove these and look only in '.' */ 1601 /* Informix does look into all include paths though, except filename starts with '/' */ 1602 if (yytext[0] == '"' && yytext[i] == '"' && 1603 ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/')) 1604 { 1605 yytext[i] = '\0'; 1606 memmove(yytext, yytext+1, strlen(yytext)); 1607 1608 strlcpy(inc_file, yytext, sizeof(inc_file)); 1609 yyin = fopen(inc_file, "r"); 1610 if (!yyin) 1611 { 1612 if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0) 1613 { 1614 strcat(inc_file, ".h"); 1615 yyin = fopen(inc_file, "r"); 1616 } 1617 } 1618 1619 } 1620 else 1621 { 1622 if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>')) 1623 { 1624 yytext[i] = '\0'; 1625 memmove(yytext, yytext+1, strlen(yytext)); 1626 } 1627 1628 for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next) 1629 { 1630 if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH) 1631 { 1632 fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno); 1633 continue; 1634 } 1635 snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext); 1636 yyin = fopen(inc_file, "r"); 1637 if (!yyin) 1638 { 1639 if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0) 1640 { 1641 strcat(inc_file, ".h"); 1642 yyin = fopen( inc_file, "r" ); 1643 } 1644 } 1645 /* if the command was "include_next" we have to disregard the first hit */ 1646 if (yyin && include_next) 1647 { 1648 fclose (yyin); 1649 yyin = NULL; 1650 include_next = false; 1651 } 1652 } 1653 } 1654 if (!yyin) 1655 mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno); 1656 1657 input_filename = mm_strdup(inc_file); 1658 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE )); 1659 yylineno = 1; 1660 output_line_number(); 1661 1662 BEGIN(C); 1663 } 1664 1665 /* 1666 * ecpg_isspace() --- return true if flex scanner considers char whitespace 1667 */ 1668 static bool 1669 ecpg_isspace(char ch) 1670 { 1671 if (ch == ' ' || 1672 ch == '\t' || 1673 ch == '\n' || 1674 ch == '\r' || 1675 ch == '\f') 1676 return true; 1677 return false; 1678 } 1679 1680 static bool isdefine(void) 1681 { 1682 struct _defines *ptr; 1683 1684 /* is it a define? */ 1685 for (ptr = defines; ptr; ptr = ptr->next) 1686 { 1687 if (strcmp(yytext, ptr->olddef) == 0 && ptr->used == NULL) 1688 { 1689 struct _yy_buffer *yb; 1690 1691 yb = mm_alloc(sizeof(struct _yy_buffer)); 1692 1693 yb->buffer = YY_CURRENT_BUFFER; 1694 yb->lineno = yylineno; 1695 yb->filename = mm_strdup(input_filename); 1696 yb->next = yy_buffer; 1697 1698 ptr->used = yy_buffer = yb; 1699 1700 yy_scan_string(ptr->newdef); 1701 return true; 1702 } 1703 } 1704 1705 return false; 1706 } 1707 1708 static bool isinformixdefine(void) 1709 { 1710 const char *new = NULL; 1711 1712 if (strcmp(yytext, "dec_t") == 0) 1713 new = "decimal"; 1714 else if (strcmp(yytext, "intrvl_t") == 0) 1715 new = "interval"; 1716 else if (strcmp(yytext, "dtime_t") == 0) 1717 new = "timestamp"; 1718 1719 if (new) 1720 { 1721 struct _yy_buffer *yb; 1722 1723 yb = mm_alloc(sizeof(struct _yy_buffer)); 1724 1725 yb->buffer = YY_CURRENT_BUFFER; 1726 yb->lineno = yylineno; 1727 yb->filename = mm_strdup(input_filename); 1728 yb->next = yy_buffer; 1729 yy_buffer = yb; 1730 1731 yy_scan_string(new); 1732 return true; 1733 } 1734 1735 return false; 1736 } 1737