1 /*------------------------------------------------------------------------- 2 * 3 * psqlscan_int.h 4 * lexical scanner internal declarations 5 * 6 * This file declares the PsqlScanStateData structure used by psqlscan.l 7 * and shared by other lexers compatible with it, such as psqlscanslash.l. 8 * 9 * One difficult aspect of this code is that we need to work in multibyte 10 * encodings that are not ASCII-safe. A "safe" encoding is one in which each 11 * byte of a multibyte character has the high bit set (it's >= 0x80). Since 12 * all our lexing rules treat all high-bit-set characters alike, we don't 13 * really need to care whether such a byte is part of a sequence or not. 14 * In an "unsafe" encoding, we still expect the first byte of a multibyte 15 * sequence to be >= 0x80, but later bytes might not be. If we scan such 16 * a sequence as-is, the lexing rules could easily be fooled into matching 17 * such bytes to ordinary ASCII characters. Our solution for this is to 18 * substitute 0xFF for each non-first byte within the data presented to flex. 19 * The flex rules will then pass the FF's through unmolested. The 20 * psqlscan_emit() subroutine is responsible for looking back to the original 21 * string and replacing FF's with the corresponding original bytes. 22 * 23 * Another interesting thing we do here is scan different parts of the same 24 * input with physically separate flex lexers (ie, lexers written in separate 25 * .l files). We can get away with this because the only part of the 26 * persistent state of a flex lexer that depends on its parsing rule tables 27 * is the start state number, which is easy enough to manage --- usually, 28 * in fact, we just need to set it to INITIAL when changing lexers. But to 29 * make that work at all, we must use re-entrant lexers, so that all the 30 * relevant state is in the yyscan_t attached to the PsqlScanState; 31 * if we were using lexers with separate static state we would soon end up 32 * with dangling buffer pointers in one or the other. Also note that this 33 * is unlikely to work very nicely if the lexers aren't all built with the 34 * same flex version, or if they don't use the same flex options. 35 * 36 * 37 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group 38 * Portions Copyright (c) 1994, Regents of the University of California 39 * 40 * src/include/fe_utils/psqlscan_int.h 41 * 42 *------------------------------------------------------------------------- 43 */ 44 #ifndef PSQLSCAN_INT_H 45 #define PSQLSCAN_INT_H 46 47 #include "fe_utils/psqlscan.h" 48 49 /* 50 * These are just to allow this file to be compilable standalone for header 51 * validity checking; in actual use, this file should always be included 52 * from the body of a flex file, where these symbols are already defined. 53 */ 54 #ifndef YY_TYPEDEF_YY_BUFFER_STATE 55 #define YY_TYPEDEF_YY_BUFFER_STATE 56 typedef struct yy_buffer_state *YY_BUFFER_STATE; 57 #endif 58 #ifndef YY_TYPEDEF_YY_SCANNER_T 59 #define YY_TYPEDEF_YY_SCANNER_T 60 typedef void *yyscan_t; 61 #endif 62 63 /* 64 * We use a stack of flex buffers to handle substitution of psql variables. 65 * Each stacked buffer contains the as-yet-unread text from one psql variable. 66 * When we pop the stack all the way, we resume reading from the outer buffer 67 * identified by scanbufhandle. 68 */ 69 typedef struct StackElem 70 { 71 YY_BUFFER_STATE buf; /* flex input control structure */ 72 char *bufstring; /* data actually being scanned by flex */ 73 char *origstring; /* copy of original data, if needed */ 74 char *varname; /* name of variable providing data, or NULL */ 75 struct StackElem *next; 76 } StackElem; 77 78 /* 79 * All working state of the lexer must be stored in PsqlScanStateData 80 * between calls. This allows us to have multiple open lexer operations, 81 * which is needed for nested include files. The lexer itself is not 82 * recursive, but it must be re-entrant. 83 */ 84 typedef struct PsqlScanStateData 85 { 86 yyscan_t scanner; /* Flex's state for this PsqlScanState */ 87 88 PQExpBuffer output_buf; /* current output buffer */ 89 90 StackElem *buffer_stack; /* stack of variable expansion buffers */ 91 92 /* 93 * These variables always refer to the outer buffer, never to any stacked 94 * variable-expansion buffer. 95 */ 96 YY_BUFFER_STATE scanbufhandle; 97 char *scanbuf; /* start of outer-level input buffer */ 98 const char *scanline; /* current input line at outer level */ 99 100 /* safe_encoding, curline, refline are used by emit() to replace FFs */ 101 int encoding; /* encoding being used now */ 102 bool safe_encoding; /* is current encoding "safe"? */ 103 bool std_strings; /* are string literals standard? */ 104 const char *curline; /* actual flex input string for cur buf */ 105 const char *refline; /* original data for cur buffer */ 106 107 /* 108 * All this state lives across successive input lines, until explicitly 109 * reset by psql_scan_reset. start_state is adopted by yylex() on entry, 110 * and updated with its finishing state on exit. 111 */ 112 int start_state; /* yylex's starting/finishing state */ 113 int state_before_str_stop; /* start cond. before end quote */ 114 int paren_depth; /* depth of nesting in parentheses */ 115 int xcdepth; /* depth of nesting in slash-star comments */ 116 char *dolqstart; /* current $foo$ quote start string */ 117 118 /* 119 * State to track boundaries of BEGIN ... END blocks in function 120 * definitions, so that semicolons do not send query too early. 121 */ 122 int identifier_count; /* identifiers since start of statement */ 123 char identifiers[4]; /* records the first few identifiers */ 124 int begin_depth; /* depth of begin/end pairs */ 125 126 /* 127 * Callback functions provided by the program making use of the lexer, 128 * plus a void* callback passthrough argument. 129 */ 130 const PsqlScanCallbacks *callbacks; 131 void *cb_passthrough; 132 } PsqlScanStateData; 133 134 135 /* 136 * Functions exported by psqlscan.l, but only meant for use within 137 * compatible lexers. 138 */ 139 extern void psqlscan_push_new_buffer(PsqlScanState state, 140 const char *newstr, const char *varname); 141 extern void psqlscan_pop_buffer_stack(PsqlScanState state); 142 extern void psqlscan_select_top_buffer(PsqlScanState state); 143 extern bool psqlscan_var_is_current_source(PsqlScanState state, 144 const char *varname); 145 extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, 146 const char *txt, int len, 147 char **txtcopy); 148 extern void psqlscan_emit(PsqlScanState state, const char *txt, int len); 149 extern char *psqlscan_extract_substring(PsqlScanState state, 150 const char *txt, int len); 151 extern void psqlscan_escape_variable(PsqlScanState state, 152 const char *txt, int len, 153 PsqlScanQuoteType quote); 154 extern void psqlscan_test_variable(PsqlScanState state, 155 const char *txt, int len); 156 157 #endif /* PSQLSCAN_INT_H */ 158