1 /*-------------------------------------------------------------------------
2  *
3  * scanner.h
4  *		API for the core scanner (flex machine)
5  *
6  * The core scanner is also used by PL/pgSQL, so we provide a public API
7  * for it.  However, the rest of the backend is only expected to use the
8  * higher-level API provided by parser.h.
9  *
10  *
11  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  * src/include/parser/scanner.h
15  *
16  *-------------------------------------------------------------------------
17  */
18 
19 #ifndef SCANNER_H
20 #define SCANNER_H
21 
22 #include "common/keywords.h"
23 
24 /*
25  * The scanner returns extra data about scanned tokens in this union type.
26  * Note that this is a subset of the fields used in YYSTYPE of the bison
27  * parsers built atop the scanner.
28  */
29 typedef union core_YYSTYPE
30 {
31 	int			ival;			/* for integer literals */
32 	char	   *str;			/* for identifiers and non-integer literals */
33 	const char *keyword;		/* canonical spelling of keywords */
34 } core_YYSTYPE;
35 
36 /*
37  * We track token locations in terms of byte offsets from the start of the
38  * source string, not the column number/line number representation that
39  * bison uses by default.  Also, to minimize overhead we track only one
40  * location (usually the first token location) for each construct, not
41  * the beginning and ending locations as bison does by default.  It's
42  * therefore sufficient to make YYLTYPE an int.
43  */
44 #define YYLTYPE  int
45 
46 /*
47  * Another important component of the scanner's API is the token code numbers.
48  * However, those are not defined in this file, because bison insists on
49  * defining them for itself.  The token codes used by the core scanner are
50  * the ASCII characters plus these:
51  *	%token <str>	IDENT FCONST SCONST BCONST XCONST Op
52  *	%token <ival>	ICONST PARAM
53  *	%token			TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
54  *	%token			LESS_EQUALS GREATER_EQUALS NOT_EQUALS
55  * The above token definitions *must* be the first ones declared in any
56  * bison parser built atop this scanner, so that they will have consistent
57  * numbers assigned to them (specifically, IDENT = 258 and so on).
58  */
59 
60 /*
61  * The YY_EXTRA data that a flex scanner allows us to pass around.
62  * Private state needed by the core scanner goes here.  Note that the actual
63  * yy_extra struct may be larger and have this as its first component, thus
64  * allowing the calling parser to keep some fields of its own in YY_EXTRA.
65  */
66 typedef struct core_yy_extra_type
67 {
68 	/*
69 	 * The string the scanner is physically scanning.  We keep this mainly so
70 	 * that we can cheaply compute the offset of the current token (yytext).
71 	 */
72 	char	   *scanbuf;
73 	Size		scanbuflen;
74 
75 	/*
76 	 * The keyword list to use.
77 	 */
78 	const ScanKeyword *keywords;
79 	int			num_keywords;
80 
81 	/*
82 	 * Scanner settings to use.  These are initialized from the corresponding
83 	 * GUC variables by scanner_init().  Callers can modify them after
84 	 * scanner_init() if they don't want the scanner's behavior to follow the
85 	 * prevailing GUC settings.
86 	 */
87 	int			backslash_quote;
88 	bool		escape_string_warning;
89 	bool		standard_conforming_strings;
90 
91 	/*
92 	 * literalbuf is used to accumulate literal values when multiple rules are
93 	 * needed to parse a single literal.  Call startlit() to reset buffer to
94 	 * empty, addlit() to add text.  NOTE: the string in literalbuf is NOT
95 	 * necessarily null-terminated, but there always IS room to add a trailing
96 	 * null at offset literallen.  We store a null only when we need it.
97 	 */
98 	char	   *literalbuf;		/* palloc'd expandable buffer */
99 	int			literallen;		/* actual current string length */
100 	int			literalalloc;	/* current allocated buffer size */
101 
102 	int			xcdepth;		/* depth of nesting in slash-star comments */
103 	char	   *dolqstart;		/* current $foo$ quote start string */
104 
105 	/* first part of UTF16 surrogate pair for Unicode escapes */
106 	int32		utf16_first_part;
107 
108 	/* state variables for literal-lexing warnings */
109 	bool		warn_on_first_escape;
110 	bool		saw_non_ascii;
111 } core_yy_extra_type;
112 
113 /*
114  * The type of yyscanner is opaque outside scan.l.
115  */
116 typedef void *core_yyscan_t;
117 
118 
119 /* Entry points in parser/scan.l */
120 extern core_yyscan_t scanner_init(const char *str,
121 			 core_yy_extra_type *yyext,
122 			 const ScanKeyword *keywords,
123 			 int num_keywords);
124 extern void scanner_finish(core_yyscan_t yyscanner);
125 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
126 		   core_yyscan_t yyscanner);
127 extern int	scanner_errposition(int location, core_yyscan_t yyscanner);
128 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
129 
130 #endif							/* SCANNER_H */
131