1 /*------------------------------------------------------------------------- 2 * 3 * scanner.h 4 * API for the core scanner (flex machine) 5 * 6 * The core scanner is also used by PL/pgSQL, so we provide a public API 7 * for it. However, the rest of the backend is only expected to use the 8 * higher-level API provided by parser.h. 9 * 10 * 11 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 12 * Portions Copyright (c) 1994, Regents of the University of California 13 * 14 * src/include/parser/scanner.h 15 * 16 *------------------------------------------------------------------------- 17 */ 18 19 #ifndef SCANNER_H 20 #define SCANNER_H 21 22 #include "common/keywords.h" 23 24 /* 25 * The scanner returns extra data about scanned tokens in this union type. 26 * Note that this is a subset of the fields used in YYSTYPE of the bison 27 * parsers built atop the scanner. 28 */ 29 typedef union core_YYSTYPE 30 { 31 int ival; /* for integer literals */ 32 char *str; /* for identifiers and non-integer literals */ 33 const char *keyword; /* canonical spelling of keywords */ 34 } core_YYSTYPE; 35 36 /* 37 * We track token locations in terms of byte offsets from the start of the 38 * source string, not the column number/line number representation that 39 * bison uses by default. Also, to minimize overhead we track only one 40 * location (usually the first token location) for each construct, not 41 * the beginning and ending locations as bison does by default. It's 42 * therefore sufficient to make YYLTYPE an int. 43 */ 44 #define YYLTYPE int 45 46 /* 47 * Another important component of the scanner's API is the token code numbers. 48 * However, those are not defined in this file, because bison insists on 49 * defining them for itself. The token codes used by the core scanner are 50 * the ASCII characters plus these: 51 * %token <str> IDENT FCONST SCONST BCONST XCONST Op 52 * %token <ival> ICONST PARAM 53 * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER 54 * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS 55 * The above token definitions *must* be the first ones declared in any 56 * bison parser built atop this scanner, so that they will have consistent 57 * numbers assigned to them (specifically, IDENT = 258 and so on). 58 */ 59 60 /* 61 * The YY_EXTRA data that a flex scanner allows us to pass around. 62 * Private state needed by the core scanner goes here. Note that the actual 63 * yy_extra struct may be larger and have this as its first component, thus 64 * allowing the calling parser to keep some fields of its own in YY_EXTRA. 65 */ 66 typedef struct core_yy_extra_type 67 { 68 /* 69 * The string the scanner is physically scanning. We keep this mainly so 70 * that we can cheaply compute the offset of the current token (yytext). 71 */ 72 char *scanbuf; 73 Size scanbuflen; 74 75 /* 76 * The keyword list to use, and the associated grammar token codes. 77 */ 78 const ScanKeywordList *keywordlist; 79 const uint16 *keyword_tokens; 80 81 /* 82 * Scanner settings to use. These are initialized from the corresponding 83 * GUC variables by scanner_init(). Callers can modify them after 84 * scanner_init() if they don't want the scanner's behavior to follow the 85 * prevailing GUC settings. 86 */ 87 int backslash_quote; 88 bool escape_string_warning; 89 bool standard_conforming_strings; 90 91 /* 92 * literalbuf is used to accumulate literal values when multiple rules are 93 * needed to parse a single literal. Call startlit() to reset buffer to 94 * empty, addlit() to add text. NOTE: the string in literalbuf is NOT 95 * necessarily null-terminated, but there always IS room to add a trailing 96 * null at offset literallen. We store a null only when we need it. 97 */ 98 char *literalbuf; /* palloc'd expandable buffer */ 99 int literallen; /* actual current string length */ 100 int literalalloc; /* current allocated buffer size */ 101 102 int xcdepth; /* depth of nesting in slash-star comments */ 103 char *dolqstart; /* current $foo$ quote start string */ 104 105 /* first part of UTF16 surrogate pair for Unicode escapes */ 106 int32 utf16_first_part; 107 108 /* state variables for literal-lexing warnings */ 109 bool warn_on_first_escape; 110 bool saw_non_ascii; 111 } core_yy_extra_type; 112 113 /* 114 * The type of yyscanner is opaque outside scan.l. 115 */ 116 typedef void *core_yyscan_t; 117 118 119 /* Constant data exported from parser/scan.l */ 120 extern PGDLLIMPORT const uint16 ScanKeywordTokens[]; 121 122 /* Entry points in parser/scan.l */ 123 extern core_yyscan_t scanner_init(const char *str, 124 core_yy_extra_type *yyext, 125 const ScanKeywordList *keywordlist, 126 const uint16 *keyword_tokens); 127 extern void scanner_finish(core_yyscan_t yyscanner); 128 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, 129 core_yyscan_t yyscanner); 130 extern int scanner_errposition(int location, core_yyscan_t yyscanner); 131 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn(); 132 133 #endif /* SCANNER_H */ 134