1 /*
2  *	Copyright (c) 2002-2003, Darren Hiebert
3  *
4  *	This source code is released for free distribution under the terms of the
5  *	GNU General Public License version 2 or (at your option) any later version.
6  *
7  *	This module contains functions for generating tags for PL/SQL language
8  *	files.
9  */
10 
11 /*
12  *	 INCLUDE FILES
13  */
14 #include "general.h"	/* must always come first */
15 
16 #include <ctype.h>	/* to define isalpha () */
17 #ifdef DEBUG
18 #include <stdio.h>
19 #endif
20 #include <string.h>
21 
22 #include "debug.h"
23 #include "entry.h"
24 #include "keyword.h"
25 #include "parse.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29 #include "xtag.h"
30 #include "promise.h"
31 
32 /*
33  *	On-line "Oracle Database PL/SQL Language Reference":
34  *	http://download.oracle.com/docs/cd/B28359_01/appdev.111/b28370/toc.htm
35  *
36  *	Sample PL/SQL code is available from:
37  *	http://www.orafaq.com/faqscrpt.htm#GENPLSQL
38  *
39  *	On-line SQL Anywhere Documentation
40  *	http://www.ianywhere.com/developer/product_manuals/sqlanywhere/index.html
41  */
42 
43 /*
44  *	 MACROS
45  */
46 #define isType(token,t)		(bool) ((token)->type == (t))
47 #define isKeyword(token,k)	(bool) ((token)->keyword == (k))
48 #define isReservedWord(token) (SqlReservedWord[(token)->keyword].fn \
49 							   ?(bool)SqlReservedWord[(token)->keyword].fn(token) \
50 							   :SqlReservedWord[(token)->keyword].bit)
51 #define isIdentChar1(c) \
52 	/*
53 	 * Other databases are less restrictive on the first character of
54 	 * an identifier.
55 	 * isIdentChar1 is used to identify the first character of an
56 	 * identifier, so we are removing some restrictions.
57 	 */ \
58 	(isalpha (c) || (c) == '@' || (c) == '_' )
59 #define isIdentChar(c) \
60 	(isalpha (c) || isdigit (c) || (c) == '$' || \
61 		(c) == '@' || (c) == '_' || (c) == '#')
62 
63 /*
64  *	 DATA DECLARATIONS
65  */
66 
67 /*
68  * Used to specify type of keyword.
69  */
70 enum eKeywordId {
71 	KEYWORD_at,
72 	KEYWORD_begin,
73 	KEYWORD_body,
74 	KEYWORD_call,
75 	KEYWORD_case,
76 	KEYWORD_check,
77 	KEYWORD_commit,
78 	KEYWORD_comment,
79 	KEYWORD_constraint,
80 	KEYWORD_create,
81 	KEYWORD_cursor,
82 	KEYWORD_database,
83 	KEYWORD_datatype,
84 	KEYWORD_declare,
85 	KEYWORD_do,
86 	KEYWORD_domain,
87 	KEYWORD_drop,
88 	KEYWORD_else,
89 	KEYWORD_elseif,
90 	KEYWORD_end,
91 	KEYWORD_endif,
92 	KEYWORD_event,
93 	KEYWORD_exception,
94 	KEYWORD_extension,
95 	KEYWORD_external,
96 	KEYWORD_for,
97 	KEYWORD_foreign,
98 	KEYWORD_from,
99 	KEYWORD_function,
100 	KEYWORD_go,
101 	KEYWORD_handler,
102 	KEYWORD_if,
103 	KEYWORD_index,
104 	KEYWORD_internal,
105 	KEYWORD_is,
106 	KEYWORD_language,
107 	KEYWORD_local,
108 	KEYWORD_loop,
109 	KEYWORD_ml_conn,
110 	KEYWORD_ml_conn_chk,
111 	KEYWORD_ml_conn_dnet,
112 	KEYWORD_ml_conn_java,
113 	KEYWORD_ml_conn_lang,
114 	KEYWORD_ml_prop,
115 	KEYWORD_ml_table,
116 	KEYWORD_ml_table_chk,
117 	KEYWORD_ml_table_dnet,
118 	KEYWORD_ml_table_java,
119 	KEYWORD_ml_table_lang,
120 	KEYWORD_object,
121 	KEYWORD_on,
122 	KEYWORD_package,
123 	KEYWORD_pragma,
124 	KEYWORD_inquiry_directive,
125 	KEYWORD_primary,
126 	KEYWORD_procedure,
127 	KEYWORD_publication,
128 	KEYWORD_record,
129 	KEYWORD_ref,
130 	KEYWORD_references,
131 	KEYWORD_rem,
132 	KEYWORD_result,
133 	KEYWORD_return,
134 	KEYWORD_returns,
135 	KEYWORD_schema,
136 	KEYWORD_select,
137 	KEYWORD_service,
138 	KEYWORD_subtype,
139 	KEYWORD_synonym,
140 	KEYWORD_table,
141 	KEYWORD_temporary,
142 	KEYWORD_then,
143 	KEYWORD_trigger,
144 	KEYWORD_type,
145 	KEYWORD_unique,
146 	KEYWORD_url,
147 	KEYWORD_variable,
148 	KEYWORD_view,
149 	KEYWORD_when,
150 	KEYWORD_while,
151 	KEYWORD_with,
152 	KEYWORD_without,
153 	SQLKEYWORD_COUNT,
154 };
155 typedef int keywordId; /* to allow KEYWORD_NONE */
156 
157 typedef enum eTokenType {
158 	TOKEN_UNDEFINED,
159 	TOKEN_EOF,
160 	TOKEN_BLOCK_LABEL_BEGIN,
161 	TOKEN_BLOCK_LABEL_END,
162 	TOKEN_CHARACTER,
163 	TOKEN_CLOSE_PAREN,
164 	TOKEN_COLON,
165 	TOKEN_SEMICOLON,
166 	TOKEN_COMMA,
167 	TOKEN_IDENTIFIER,
168 	TOKEN_KEYWORD,
169 	TOKEN_OPEN_PAREN,
170 	TOKEN_OPERATOR,
171 	TOKEN_OTHER,
172 	TOKEN_STRING,
173 	TOKEN_PERIOD,
174 	TOKEN_OPEN_CURLY,
175 	TOKEN_CLOSE_CURLY,
176 	TOKEN_OPEN_SQUARE,
177 	TOKEN_CLOSE_SQUARE,
178 	TOKEN_TILDE,
179 	TOKEN_FORWARD_SLASH,
180 	TOKEN_EQUAL
181 } tokenType;
182 
183 typedef struct sTokenInfoSQL {
184 	tokenType	type;
185 	keywordId	keyword;
186 	vString *	string;
187 	vString *	scope;
188 	int         scopeKind;
189 	int         begin_end_nest_lvl;
190 	unsigned long lineNumber;
191 	MIOPos filePosition;
192 
193 	/* When the "guest" extra is enabled, a promise is
194 	 * made always when reading a string (literal or dollar quote).
195 	 * The lexer stores the id of promise to this member.
196 	 * When making the promise, the language of guest parser
197 	 * may not be determined yet.
198 	 *
199 	 *   CREATE FUNCTION ... AS ' sub code_written_in_perl {... ' LANGUAGE plperl;
200 	 *
201 	 * After reading a string, the parser may find LANGUAGE keyword. In the case,
202 	 * the parser updates the language of the promies.
203 	 *
204 	 * This field is filled only when `guest` extra is enabled.
205 	 *
206 	 */
207 	int promise;
208 } tokenInfo;
209 
210 /*
211  *	DATA DEFINITIONS
212  */
213 
214 static langType Lang_sql;
215 
216 typedef enum {
217 	SQLTAG_PLSQL_CCFLAGS,
218 	SQLTAG_DOMAIN,
219 	SQLTAG_FIELD,
220 	SQLTAG_BLOCK_LABEL,
221 	SQLTAG_PACKAGE,
222 	SQLTAG_SERVICE,
223 	SQLTAG_SCHEMA,
224 	SQLTAG_TRIGGER,
225 	SQLTAG_PUBLICATION,
226 	SQLTAG_VIEW,
227 	SQLTAG_DATABASE,
228 	SQLTAG_CURSOR,
229 	SQLTAG_PROTOTYPE,
230 	SQLTAG_EVENT,
231 	SQLTAG_FUNCTION,
232 	SQLTAG_INDEX,
233 	SQLTAG_LOCAL_VARIABLE,
234 	SQLTAG_SYNONYM,
235 	SQLTAG_PROCEDURE,
236 	SQLTAG_RECORD,
237 	SQLTAG_SUBTYPE,
238 	SQLTAG_TABLE,
239 	SQLTAG_VARIABLE,
240 	SQLTAG_MLTABLE,
241 	SQLTAG_MLCONN,
242 	SQLTAG_MLPROP,
243 	SQLTAG_COUNT
244 } sqlKind;
245 
246 static kindDefinition SqlKinds [] = {
247 	{ true,  'C', "ccflag",		  "PLSQL_CCFLAGS"          },
248 	{ true,  'D', "domain",		  "domains"				   },
249 	{ true,  'E', "field",		  "record fields"		   },
250 	{ true,  'L', "label",		  "block label"			   },
251 	{ true,  'P', "package",	  "packages"			   },
252 	{ true,  'R', "service",	  "services"			   },
253 	{ true,  'S', "schema",		  "schemas"			  	   },
254 	{ true,  'T', "trigger",	  "triggers"			   },
255 	{ true,  'U', "publication",  "publications"		   },
256 	{ true,  'V', "view",		  "views"				   },
257 	{ true,  'b', "database",	  "database"			   },
258 	{ true,  'c', "cursor",		  "cursors"				   },
259 	{ false, 'd', "prototype",	  "prototypes"			   },
260 	{ true,  'e', "event",		  "events"				   },
261 	{ true,  'f', "function",	  "functions"			   },
262 	{ true,  'i', "index",		  "indexes"				   },
263 	{ false, 'l', "local",		  "local variables"		   },
264 	{ true,  'n', "synonym",	  "synonyms"			   },
265 	{ true,  'p', "procedure",	  "procedures"			   },
266 	{ false, 'r', "record",		  "records"				   },
267 	{ true,  's', "subtype",	  "subtypes"			   },
268 	{ true,  't', "table",		  "tables"				   },
269 	{ true,  'v', "variable",	  "variables"			   },
270 	{ true,  'x', "mltable",	  "MobiLink Table Scripts" },
271 	{ true,  'y', "mlconn",		  "MobiLink Conn Scripts"  },
272 	{ true,  'z', "mlprop",		  "MobiLink Properties"    },
273 };
274 
275 static const keywordTable SqlKeywordTable [] = {
276 	/* keyword		keyword ID */
277 	{ "as",								KEYWORD_is				      },
278 	{ "at",								KEYWORD_at				      },
279 	{ "begin",							KEYWORD_begin			      },
280 	{ "body",							KEYWORD_body			      },
281 	{ "call",							KEYWORD_call			      },
282 	{ "case",							KEYWORD_case			      },
283 	{ "check",							KEYWORD_check			      },
284 	{ "commit",							KEYWORD_commit				  },
285 	{ "comment",						KEYWORD_comment			      },
286 	{ "constraint",						KEYWORD_constraint		      },
287 	{ "create",							KEYWORD_create				  },
288 	{ "cursor",							KEYWORD_cursor			      },
289 	{ "database",						KEYWORD_database		      },
290 	{ "datatype",						KEYWORD_datatype		      },
291 	{ "declare",						KEYWORD_declare			      },
292 	{ "do",								KEYWORD_do				      },
293 	{ "domain",							KEYWORD_domain				  },
294 	{ "drop",							KEYWORD_drop			      },
295 	{ "else",							KEYWORD_else			      },
296 	{ "elseif",							KEYWORD_elseif			      },
297 	{ "end",							KEYWORD_end				      },
298 	{ "endif",							KEYWORD_endif			      },
299 	{ "event",							KEYWORD_event			      },
300 	{ "exception",						KEYWORD_exception		      },
301 	{ "extension",						KEYWORD_extension		      },
302 	{ "external",						KEYWORD_external		      },
303 	{ "for",							KEYWORD_for				      },
304 	{ "foreign",						KEYWORD_foreign			      },
305 	{ "from",							KEYWORD_from			      },
306 	{ "function",						KEYWORD_function		      },
307 	{ "go",								KEYWORD_go				      },
308 	{ "handler",						KEYWORD_handler			      },
309 	{ "if",								KEYWORD_if				      },
310 	{ "index",							KEYWORD_index			      },
311 	{ "internal",						KEYWORD_internal		      },
312 	{ "is",								KEYWORD_is				      },
313 	{ "language",						KEYWORD_language              },
314 	{ "local",							KEYWORD_local			      },
315 	{ "loop",							KEYWORD_loop			      },
316 	{ "ml_add_connection_script",		KEYWORD_ml_conn			      },
317 	{ "ml_add_dnet_connection_script",	KEYWORD_ml_conn_dnet	      },
318 	{ "ml_add_dnet_table_script",		KEYWORD_ml_table_dnet	      },
319 	{ "ml_add_java_connection_script",	KEYWORD_ml_conn_java	      },
320 	{ "ml_add_java_table_script",		KEYWORD_ml_table_java	      },
321 	{ "ml_add_lang_conn_script_chk",	KEYWORD_ml_conn_chk 	      },
322 	{ "ml_add_lang_connection_script",	KEYWORD_ml_conn_lang	      },
323 	{ "ml_add_lang_table_script",		KEYWORD_ml_table_lang	      },
324 	{ "ml_add_lang_table_script_chk",	KEYWORD_ml_table_chk	      },
325 	{ "ml_add_property",				KEYWORD_ml_prop		 	      },
326 	{ "ml_add_table_script",			KEYWORD_ml_table		      },
327 	{ "object",							KEYWORD_object			      },
328 	{ "on",								KEYWORD_on				      },
329 	{ "package",						KEYWORD_package			      },
330 	{ "pragma",							KEYWORD_pragma			      },
331 	{ "primary",						KEYWORD_primary			      },
332 	{ "procedure",						KEYWORD_procedure		      },
333 	{ "publication",					KEYWORD_publication		      },
334 	{ "record",							KEYWORD_record			      },
335 	{ "ref",							KEYWORD_ref				      },
336 	{ "references",						KEYWORD_references		      },
337 	{ "rem",							KEYWORD_rem				      },
338 	{ "result",							KEYWORD_result			      },
339 	{ "return",							KEYWORD_return			      },
340 	{ "returns",						KEYWORD_returns			      },
341 	{ "schema",							KEYWORD_schema			      },
342 	{ "select",							KEYWORD_select			      },
343 	{ "service",						KEYWORD_service			      },
344 	{ "subtype",						KEYWORD_subtype			      },
345 	{ "synonym",						KEYWORD_synonym			      },
346 	{ "table",							KEYWORD_table			      },
347 	{ "temporary",						KEYWORD_temporary		      },
348 	{ "then",							KEYWORD_then			      },
349 	{ "trigger",						KEYWORD_trigger			      },
350 	{ "type",							KEYWORD_type			      },
351 	{ "unique",							KEYWORD_unique			      },
352 	{ "url",							KEYWORD_url				      },
353 	{ "variable",						KEYWORD_variable		      },
354 	{ "view",							KEYWORD_view			      },
355 	{ "when",							KEYWORD_when			      },
356 	{ "while",							KEYWORD_while			      },
357 	{ "with",							KEYWORD_with			      },
358 	{ "without",						KEYWORD_without			      },
359 };
360 
361 const static struct keywordGroup predefinedInquiryDirective = {
362 	.value = KEYWORD_inquiry_directive,
363 	.addingUnlessExisting = false,
364 	.keywords = {
365 		/* https://docs.oracle.com/en/database/oracle/oracle-database/18/lnpls/plsql-language-fundamentals.html#GUID-3DABF5E1-AC84-448B-810F-31196991EA10 */
366 		"PLSQL_LINE",
367 		"PLSQL_UNIT",
368 		"PLSQL_UNIT_OWNER",
369 		"PLSQL_UNIT_TYPE",
370 		/* https://docs.oracle.com/en/database/oracle/oracle-database/18/lnpls/overview.html#GUID-DF63BC59-22C2-4BA8-9240-F74D505D5102 */
371 		"PLSCOPE_SETTINGS",
372 		"PLSQL_CCFLAGS",
373 		"PLSQL_CODE_TYPE",
374 		"PLSQL_OPTIMIZE_LEVEL",
375 		"PLSQL_WARNINGS",
376 		"NLS_LENGTH_SEMANTICS",
377 		"PERMIT_92_WRAP_FORMAT",
378 		NULL
379 	},
380 };
381 
382 /* A table representing whether a keyword is "reserved word" or not.
383  * "reserved word" cannot be used as an name.
384  * See https://dev.mysql.com/doc/refman/8.0/en/keywords.html about the
385  * difference between keywords and the reserved words.
386  *
387  * We will mark a keyword as a reserved word only if all the SQL dialects
388  * specify it as a reserved word.
389  */
390 struct SqlReservedWord {
391 	/* If fn is non-NULL, value returned from fn(token) is used
392 	 * to repreesnt whether a keyword is reserved (true) or not.
393 	 * If fn is NULL, bit is used. */
394 	unsigned int bit;
395 	bool (* fn) (tokenInfo *const token);
396 };
397 
398 /*
399  * MYSQL
400  * => https://dev.mysql.com/doc/refman/8.0/en/keywords.html
401  * POSTGRESQL,SQL2016,SQL2011,SQL92
402  * => https://www.postgresql.org/docs/12/sql-keywords-appendix.html
403  * ORACLE11g, PLSQL
404  * => https://docs.oracle.com/cd/B28359_01/appdev.111/b31231/appb.htm#CJHIIICD
405  * SQLANYWERE
406  * => http://dcx.sap.com/1200/en/dbreference/alhakeywords.html <the page is gone>
407  */
408 static bool SqlReservedWordPredicatorForIsOrAs (tokenInfo *const token);
409 static struct SqlReservedWord SqlReservedWord [SQLKEYWORD_COUNT] = {
410 	/*
411 	 * RESERVED_BIT: MYSQL & POSTGRESQL&SQL2016&SQL2011&SQL92 & ORACLE11g&PLSQL & SQLANYWERE
412 	 *
413 	 * {  0  } means we have not inspect whether the keyword is reserved or not.
414 	 */
415 	[KEYWORD_at]            = {0 & 0&1&1&1 & 0&1 & 0},
416 	[KEYWORD_begin]         = {0 & 0&1&1&1 & 0&1 & 1},
417 	[KEYWORD_body]          = {0 & 0&0&0&0 & 0&1 & 0},
418 	[KEYWORD_call]          = {1 & 0&1&1&0 & 0&0 & 1},
419 	[KEYWORD_case]          = {1 & 1&1&1&1 & 0&1 & 1},
420 	[KEYWORD_check]         = {1 & 1&1&1&1 & 1&1 & 1},
421 	[KEYWORD_commit]        = {0 & 0&1&1&1 & 0&0 & 0}, /* SQLANYWERE:??? */
422 	[KEYWORD_comment]       = {0 & 0&0&0&0 & 1&1 & 1},
423 	[KEYWORD_constraint]    = {1 & 1&1&1&1 & 0&1 & 1},
424 	[KEYWORD_create]        = {1 & 1&1&1&1 & 1&1 & 1},
425 	[KEYWORD_cursor]        = {1 & 0&1&1&1 & 0&1 & 1},
426 	[KEYWORD_database]      = {         0           },
427 	[KEYWORD_datatype]      = {0 & 0&0&0&0 & 0&0 & 0},
428 	[KEYWORD_declare]       = {1 & 0&1&1&1 & 0&1 & 1},
429 	[KEYWORD_do]            = {0 & 1&0&0&0 & 0&1 & 1},
430 	[KEYWORD_domain]        = {0 & 0&0&0&1 & 0&0 & 0},
431 	[KEYWORD_drop]          = {1 & 0&1&1&1 & 1&1 & 1},
432 	[KEYWORD_else]          = {1 & 1&1&1&1 & 1&1 & 1},
433 	[KEYWORD_elseif]        = {1 & 0&0&0&0 & 0&0 & 1},
434 	[KEYWORD_end]           = {0 & 1&1&1&1 & 0&1 & 1},
435 	[KEYWORD_endif]         = {0 & 0&0&0&0 & 0&0 & 1},
436 	[KEYWORD_event]         = {0 & 0&0&0&0 & 0&0 & 0},
437 	[KEYWORD_exception]     = {0 & 0&0&0&1 & 0&1 & 1},
438 	[KEYWORD_extension]     = {0 & 0&0&0&0 & 0&0 & 0},
439 	[KEYWORD_external]      = {0 & 0&1&1&1 & 0&0 & 0},
440 	[KEYWORD_for]           = {1 & 1&1&1&1 & 1&1 & 1},
441 	[KEYWORD_foreign]       = {1 & 1&1&1&1 & 0&0 & 1},
442 	[KEYWORD_from]          = {1 & 1&1&1&1 & 1&1 & 1},
443 	[KEYWORD_function]      = {1 & 0&1&1&0 & 0&1 & 0},
444 	[KEYWORD_go]            = {0 & 0&0&0&1 & 0&0 & 0},
445 	[KEYWORD_handler]       = {0 & 0&0&0&0 & 0&0 & 0},
446 	[KEYWORD_if]            = {1 & 0&0&0&0 & 0&1 & 1},
447 	[KEYWORD_index]         = {1 & 0&0&0&0 & 1&1 & 1},
448 	[KEYWORD_inquiry_directive] = {        0        },
449 	[KEYWORD_internal]      = {1 & 0&1&1&0 & 0&0 & 0},
450 	[KEYWORD_is]            = {0, SqlReservedWordPredicatorForIsOrAs},
451 	[KEYWORD_language]      = {            0        },
452 	[KEYWORD_local]         = {0 & 0&1&1&1 & 0&0 & 0},
453 	[KEYWORD_loop]          = {1 & 1&1&1&1 & 0&1 & 0},
454 	[KEYWORD_ml_conn]       = {0 & 0&0&0&0 & 0&0 & 0},
455 	[KEYWORD_ml_conn_dnet]  = {0 & 0&0&0&0 & 0&0 & 0},
456 	[KEYWORD_ml_table_dnet] = {0 & 0&0&0&0 & 0&0 & 0},
457 	[KEYWORD_ml_conn_java]  = {0 & 0&0&0&0 & 0&0 & 0},
458 	[KEYWORD_ml_table_java] = {0 & 0&0&0&0 & 0&0 & 0},
459 	[KEYWORD_ml_conn_chk]   = {0 & 0&0&0&0 & 0&0 & 0},
460 	[KEYWORD_ml_conn_lang]  = {0 & 0&0&0&0 & 0&0 & 0},
461 	[KEYWORD_ml_table_lang] = {0 & 0&0&0&0 & 0&0 & 0},
462 	[KEYWORD_ml_table_chk]  = {0 & 0&0&0&0 & 0&0 & 0},
463 	[KEYWORD_ml_prop]       = {0 & 0&0&0&0 & 0&0 & 0},
464 	[KEYWORD_ml_table]      = {0 & 0&0&0&0 & 0&0 & 0},
465 	[KEYWORD_object]        = {0 & 0&0&0&0 & 0&0 & 0},
466 	[KEYWORD_on]            = {1 & 1&1&1&1 & 1&1 & 1},
467 	[KEYWORD_package]       = {0 & 0&0&0&0 & 0&1 & 0},
468 	[KEYWORD_pragma]        = {0 & 0&0&0&0 & 0&1 & 0},
469 	[KEYWORD_primary]       = {1 & 1&1&1&1 & 0&0 & 1},
470 	[KEYWORD_procedure]     = {1 & 0&0&0&0 & 0&1 & 1},
471 	[KEYWORD_publication]   = {0 & 0&0&0&0 & 0&0 & 1},
472 	[KEYWORD_record]        = {0 & 0&0&0&0 & 0&1 & 0},
473 	[KEYWORD_ref]           = {0 & 0&1&1&0 & 0&0 & 0},
474 	[KEYWORD_references]    = {1 & 1&1&1&1 & 0&0 & 1},
475 	[KEYWORD_rem]           = {0 & 0&0&0&0 & 0&0 & 0},
476 	[KEYWORD_result]        = {0 & 0&1&1&0 & 0&0 & 0},
477 	[KEYWORD_return]        = {1 & 0&1&1&0 & 0&1 & 1},
478 	[KEYWORD_returns]       = {0 & 0&0&0&0 & 0&0 & 0},
479 	[KEYWORD_schema]        = {0 & 0&0&0&0 & 0&0 & 0},
480 	[KEYWORD_select]        = {1 & 1&1&1&1 & 1&1 & 1},
481 	[KEYWORD_service]       = {0 & 0&0&0&0 & 0&0 & 0},
482 	[KEYWORD_subtype]       = {0 & 0&0&0&0 & 0&1 & 0},
483 	[KEYWORD_synonym]       = {0 & 0&0&0&0 & 1&0 & 0},
484 	[KEYWORD_table]         = {1 & 1&1&1&1 & 1&1 & 1},
485 	[KEYWORD_temporary]     = {0 & 0&0&0&1 & 0&0 & 1},
486 	[KEYWORD_then]          = {1 & 1&1&1&1 & 1&1 & 1},
487 	[KEYWORD_trigger]       = {1 & 0&1&1&0 & 1&0 & 1},
488 	[KEYWORD_type]          = {0 & 0&0&0&0 & 0&1 & 0},
489 	[KEYWORD_unique]        = {1 & 1&1&1&1 & 1&1 & 1},
490 	[KEYWORD_url]           = {0 & 0&0&0&0 & 0&0 & 0},
491 	[KEYWORD_variable]      = {0 & 0&0&0&0 & 0&0 & 1},
492 	[KEYWORD_view]          = {0 & 0&0&0&1 & 1&1 & 1},
493 	[KEYWORD_when]          = {1 & 1&1&1&1 & 0&1 & 1},
494 	[KEYWORD_while]         = {1 & 0&0&0&0 & 0&1 & 1},
495 	[KEYWORD_with]          = {1 & 1&1&1&1 & 1&1 & 1},
496 	[KEYWORD_without]       = {0 & 0&1&1&0 & 0&0 & 0},
497 };
498 
499 /*
500  *	 FUNCTION DECLARATIONS
501  */
502 
503 /* Recursive calls */
504 static void parseBlock (tokenInfo *const token, const bool local);
505 static void parseBlockFull (tokenInfo *const token, const bool local, langType lang);
506 static void parseDeclare (tokenInfo *const token, const bool local);
507 static void parseKeywords (tokenInfo *const token);
508 static tokenType parseSqlFile (tokenInfo *const token);
509 
510 /*
511  *	 FUNCTION DEFINITIONS
512  */
513 
SqlReservedWordPredicatorForIsOrAs(tokenInfo * const token)514 static bool SqlReservedWordPredicatorForIsOrAs (tokenInfo *const token)
515 {
516 	if (strcasecmp ("as", vStringValue (token->string)) == 0)
517 		return (bool) (1 & 1&1&1&1 & 1&1 & 1);
518 	else						/* for "is" */
519 		return (bool) (1 & 0&1&1&1 & 1&1 & 1);
520 	/* PostgresSQL can use "is" as a name of function. */
521 }
522 
isCmdTerm(tokenInfo * const token)523 static bool isCmdTerm (tokenInfo *const token)
524 {
525 	DebugStatement (
526 			debugPrintf (DEBUG_PARSE
527 				, "\n isCmdTerm: token same  tt:%d  tk:%d\n"
528 				, token->type
529 				, token->keyword
530 				);
531 			);
532 
533 	/*
534 	 * Based on the various customer sites I have been at
535 	 * the most common command delimiters are
536 	 *	   ;
537 	 *	   ~
538 	 *	   /
539 	 *	   go
540 	 * This routine will check for any of these, more
541 	 * can easily be added by modifying readToken and
542 	 * either adding the character to:
543 	 *	   enum eTokenType
544 	 *	   enum eTokenType
545 	 */
546 	return (isType (token, TOKEN_SEMICOLON) ||
547 			isType (token, TOKEN_TILDE) ||
548 			isType (token, TOKEN_FORWARD_SLASH) ||
549 			isKeyword (token, KEYWORD_go));
550 }
551 
isMatchedEnd(tokenInfo * const token,int nest_lvl)552 static bool isMatchedEnd(tokenInfo *const token, int nest_lvl)
553 {
554 	bool terminated = false;
555 	/*
556 	 * Since different forms of SQL allow the use of
557 	 * BEGIN
558 	 * ...
559 	 * END
560 	 * blocks, some statements may not be terminated using
561 	 * the standard delimiters:
562 	 *	   ;
563 	 *	   ~
564 	 *	   /
565 	 *	   go
566 	 * This routine will check to see if we encounter and END
567 	 * for the matching nest level of BEGIN ... END statements.
568 	 * If we find one, then we can assume, the statement was terminated
569 	 * since we have fallen through to the END statement of the BEGIN
570 	 * block.
571 	 */
572 	if ( nest_lvl > 0 && isKeyword (token, KEYWORD_end) )
573 	{
574 		if ( token->begin_end_nest_lvl == nest_lvl )
575 			terminated = true;
576 	}
577 
578 	return terminated;
579 }
580 
newToken(void)581 static tokenInfo *newToken (void)
582 {
583 	tokenInfo *const token = xMalloc (1, tokenInfo);
584 
585 	token->type               = TOKEN_UNDEFINED;
586 	token->keyword            = KEYWORD_NONE;
587 	token->string             = vStringNew ();
588 	token->scope              = vStringNew ();
589 	token->scopeKind          = SQLTAG_COUNT;
590 	token->begin_end_nest_lvl = 0;
591 	token->lineNumber         = getInputLineNumber ();
592 	token->filePosition       = getInputFilePosition ();
593 	token->promise            = -1;
594 
595 	return token;
596 }
597 
deleteToken(tokenInfo * const token)598 static void deleteToken (tokenInfo *const token)
599 {
600 	vStringDelete (token->string);
601 	vStringDelete (token->scope);
602 	eFree (token);
603 }
604 
605 /*
606  *	 Tag generation functions
607  */
608 
makeSqlTag(tokenInfo * const token,const sqlKind kind)609 static void makeSqlTag (tokenInfo *const token, const sqlKind kind)
610 {
611 	if (SqlKinds [kind].enabled)
612 	{
613 		const char *const name = vStringValue (token->string);
614 		tagEntryInfo e;
615 		initTagEntry (&e, name, kind);
616 
617 		e.lineNumber   = token->lineNumber;
618 		e.filePosition = token->filePosition;
619 
620 		if (vStringLength (token->scope) > 0)
621 		{
622 			Assert (token->scopeKind < SQLTAG_COUNT);
623 			e.extensionFields.scopeKindIndex = token->scopeKind;
624 			e.extensionFields.scopeName = vStringValue (token->scope);
625 
626 			if (isXtagEnabled (XTAG_QUALIFIED_TAGS))
627 			{
628 				vString *fulltag;
629 				tagEntryInfo xe = e;
630 
631 				fulltag =  vStringNewCopy (token->scope);
632 				vStringPut (fulltag, '.');
633 				vStringCat (fulltag, token->string);
634 				xe.name = vStringValue (fulltag);
635 				markTagExtraBit (&xe, XTAG_QUALIFIED_TAGS);
636 				makeTagEntry (&xe);
637 				vStringDelete (fulltag);
638 			}
639 		}
640 
641 		makeTagEntry (&e);
642 	}
643 }
644 
645 /*
646  *	 Parsing functions
647  */
648 
parseString(vString * const string,const int delimiter,int * promise)649 static void parseString (vString *const string, const int delimiter, int *promise)
650 {
651 	int offset[2];
652 	unsigned long linenum[3];
653 	enum { START, END, SOURCE };
654 
655 	int c0;
656 
657 	if (promise && !isXtagEnabled(XTAG_GUEST))
658 		promise = NULL;
659 
660 	if (promise)
661 	{
662 		c0 = getcFromInputFile ();
663 		linenum[START] = getInputLineNumber ();
664 		offset[START]  = getInputLineOffset ();
665 		linenum[SOURCE] = getSourceLineNumber ();
666 		ungetcToInputFile(c0);
667 	}
668 
669 	bool end = false;
670 	while (! end)
671 	{
672 		int c = getcFromInputFile ();
673 		if (c == EOF)
674 			end = true;
675 		/*
676 		else if (c == '\\')
677 		{
678 			c = getcFromInputFile(); // This maybe a ' or ". //
679 			vStringPut(string, c);
680 		}
681 		*/
682 		else if (c == delimiter)
683 		{
684 			if (promise)
685 			{
686 				ungetcToInputFile(c);
687 				linenum[END] = getInputLineNumber ();
688 				offset[END]  = getInputLineOffset ();
689 				(void)getcFromInputFile ();
690 				*promise = makePromise (NULL,
691 										linenum [START], offset [START],
692 										linenum [END], offset [END],
693 										linenum [SOURCE]);
694 			}
695 			end = true;
696 		}
697 		else
698 			vStringPut (string, c);
699 	}
700 }
701 
702 /*	Read a C identifier beginning with "firstChar" and places it into "name".
703 */
parseIdentifier(vString * const string,const int firstChar)704 static void parseIdentifier (vString *const string, const int firstChar)
705 {
706 	int c = firstChar;
707 	Assert (isIdentChar1 (c));
708 	do
709 	{
710 		vStringPut (string, c);
711 		c = getcFromInputFile ();
712 	} while (isIdentChar (c));
713 	if (!isspace (c))
714 		ungetcToInputFile (c);		/* unget non-identifier character */
715 }
716 
isCCFlag(const char * str)717 static bool isCCFlag(const char *str)
718 {
719 	return (anyKindEntryInScope(CORK_NIL, str, SQLTAG_PLSQL_CCFLAGS) != 0);
720 }
721 
722 /* Parse a PostgreSQL: dollar-quoted string
723  * https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
724  *
725  * The syntax for dollar-quoted string ca collide with PL/SQL inquiry directive ($$name).
726  * https://docs.oracle.com/en/database/oracle/oracle-database/18/lnpls/plsql-language-fundamentals.html#GUID-E918087C-D5A8-4CEE-841B-5333DE6D4C15
727  * https://github.com/universal-ctags/ctags/issues/3006
728  */
parseDollarQuote(vString * const string,const int delimiter,int * promise)729 static tokenType parseDollarQuote (vString *const string, const int delimiter, int *promise)
730 {
731 	int offset[2];
732 	unsigned long linenum[3];
733 	enum { START, END, SOURCE };
734 
735 	unsigned int len = 0;
736 	char tag[32 /* arbitrary limit */] = {0};
737 	int c = 0;
738 
739 	/* read the tag */
740 	tag[len++] = (char) delimiter;
741 	while ((len + 1) < sizeof tag && c != delimiter)
742 	{
743 		c = getcFromInputFile ();
744 		if (isIdentChar(c))
745 			tag[len++] = (char) c;
746 		else
747 			break;
748 	}
749 	tag[len] = 0;
750 
751 	bool empty_tag = (len == 2);
752 
753 	if (c != delimiter)
754 	{
755 		/* damn that's not valid, what can we do? */
756 		ungetcToInputFile (c);
757 		return TOKEN_UNDEFINED;
758 	}
759 
760 	if (promise && !isXtagEnabled(XTAG_GUEST))
761 		promise = NULL;
762 
763 	if (promise)
764 	{
765 		linenum[START] = getInputLineNumber ();
766 		offset[START]  = getInputLineOffset ();
767 		linenum[SOURCE] = getSourceLineNumber ();
768 	}
769 
770 	/* and read the content (until a matching end tag) */
771 	while ((c = getcFromInputFile ()) != EOF)
772 	{
773 		if (c != delimiter)
774 		{
775 			vStringPut (string, c);
776 			if (empty_tag
777 				&& (KEYWORD_inquiry_directive == lookupCaseKeyword (vStringValue (string),
778 																	Lang_sql)
779 					|| isCCFlag(vStringValue (string))))
780 			{
781 				/* PL/SQL inquiry directives */
782 				int c0 = getcFromInputFile ();
783 
784 				if (c0 != delimiter && (isalnum(c0) || c0 == '_'))
785 				{
786 					vStringPut (string, c0);
787 					continue;
788 				}
789 
790 				ungetcToInputFile (c0);
791 				/* Oracle PL/SQL's inquiry directive ($$name) */
792 				return TOKEN_UNDEFINED;
793 			}
794 		}
795 		else
796 		{
797 			char *end_p = tag;
798 
799 			while (c != EOF && *end_p && ((int) c) == *end_p)
800 			{
801 				c = getcFromInputFile ();
802 				end_p++;
803 			}
804 
805 			if (c != EOF)
806 				ungetcToInputFile (c);
807 
808 			if (! *end_p) /* full tag match */
809 			{
810 				if (promise)
811 				{
812 					linenum[END] = getInputLineNumber ();
813 					offset[END]  = getInputLineOffset ();
814 					if (offset[END] > len)
815 						offset[END] -= len;
816 					*promise = makePromise (NULL,
817 											linenum [START], offset [START],
818 											linenum [END], offset [END],
819 											linenum [SOURCE]);
820 				}
821 				break;
822 			}
823 			else
824 				vStringNCatS (string, tag, (size_t) (end_p - tag));
825 		}
826 	}
827 
828 	return TOKEN_STRING;
829 }
830 
readToken(tokenInfo * const token)831 static void readToken (tokenInfo *const token)
832 {
833 	int c;
834 
835 	token->type			= TOKEN_UNDEFINED;
836 	token->keyword		= KEYWORD_NONE;
837 	vStringClear (token->string);
838 	token->promise      = -1;
839 
840 getNextChar:
841 	do
842 	{
843 		c = getcFromInputFile ();
844 		token->lineNumber   = getInputLineNumber ();
845 		token->filePosition = getInputFilePosition ();
846 		/*
847 		 * Added " to the list of ignores, not sure what this
848 		 * might break but it gets by this issue:
849 		 *	  create table "t1" (...)
850 		 *
851 		 * Darren, the code passes all my tests for both
852 		 * Oracle and SQL Anywhere, but maybe you can tell me
853 		 * what this may effect.
854 		 */
855 	}
856 	while (c == '\t'  ||  c == ' ' ||  c == '\n');
857 
858 	switch (c)
859 	{
860 		case EOF: token->type = TOKEN_EOF;				break;
861 		case '(': token->type = TOKEN_OPEN_PAREN;		break;
862 		case ')': token->type = TOKEN_CLOSE_PAREN;		break;
863 		case ':': token->type = TOKEN_COLON;			break;
864 		case ';': token->type = TOKEN_SEMICOLON;		break;
865 		case '.': token->type = TOKEN_PERIOD;			break;
866 		case ',': token->type = TOKEN_COMMA;			break;
867 		case '{': token->type = TOKEN_OPEN_CURLY;		break;
868 		case '}': token->type = TOKEN_CLOSE_CURLY;		break;
869 		case '~': token->type = TOKEN_TILDE;			break;
870 		case '[': token->type = TOKEN_OPEN_SQUARE;		break;
871 		case ']': token->type = TOKEN_CLOSE_SQUARE;		break;
872 		case '=': token->type = TOKEN_EQUAL;			break;
873 
874 		case '\'':
875 		case '"':
876 				  token->type = TOKEN_STRING;
877 				  parseString (token->string, c, &token->promise);
878 				  token->lineNumber = getInputLineNumber ();
879 				  token->filePosition = getInputFilePosition ();
880 				  break;
881 
882 		case '#':
883 				skipToCharacterInInputFile ('\n');
884 				goto getNextChar;
885 		case '-':
886 				  c = getcFromInputFile ();
887 				  if (c == '-')		/* -- is this the start of a comment? */
888 				  {
889 					  skipToCharacterInInputFile ('\n');
890 					  goto getNextChar;
891 				  }
892 				  else
893 				  {
894 					  if (!isspace (c))
895 						  ungetcToInputFile (c);
896 					  token->type = TOKEN_OPERATOR;
897 				  }
898 				  break;
899 
900 		case '<':
901 		case '>':
902 				  {
903 					  const int initial = c;
904 					  int d = getcFromInputFile ();
905 					  if (d == initial)
906 					  {
907 						  if (initial == '<')
908 							  token->type = TOKEN_BLOCK_LABEL_BEGIN;
909 						  else
910 							  token->type = TOKEN_BLOCK_LABEL_END;
911 					  }
912 					  else
913 					  {
914 						  ungetcToInputFile (d);
915 						  token->type = TOKEN_UNDEFINED;
916 					  }
917 					  break;
918 				  }
919 
920 		case '\\':
921 				  c = getcFromInputFile ();
922 				  if (c != '\\'  && c != '"'  && c != '\''  &&  !isspace (c))
923 					  ungetcToInputFile (c);
924 				  token->type = TOKEN_CHARACTER;
925 				  token->lineNumber = getInputLineNumber ();
926 				  token->filePosition = getInputFilePosition ();
927 				  break;
928 
929 		case '/':
930 				  {
931 					  int d = getcFromInputFile ();
932 					  if ((d != '*') &&		/* is this the start of a comment? */
933 						  (d != '/'))		/* is a one line comment? */
934 					  {
935 						  token->type = TOKEN_FORWARD_SLASH;
936 						  ungetcToInputFile (d);
937 					  }
938 					  else
939 					  {
940 						  if (d == '*')
941 						  {
942 							  skipToCharacterInInputFile2('*', '/');
943 							  goto getNextChar;
944 						  }
945 						  else if (d == '/')	/* is this the start of a comment?  */
946 						  {
947 							  skipToCharacterInInputFile ('\n');
948 							  goto getNextChar;
949 						  }
950 					  }
951 					  break;
952 				  }
953 
954 		case '$':
955 				  token->type = parseDollarQuote (token->string, c, &token->promise);
956 				  token->lineNumber = getInputLineNumber ();
957 				  token->filePosition = getInputFilePosition ();
958 				  break;
959 
960 		default:
961 				  if (! isIdentChar1 (c))
962 					  token->type = TOKEN_UNDEFINED;
963 				  else
964 				  {
965 					  parseIdentifier (token->string, c);
966 					  token->lineNumber = getInputLineNumber ();
967 					  token->filePosition = getInputFilePosition ();
968 					  token->keyword = lookupCaseKeyword (vStringValue (token->string), Lang_sql);
969 					  if (isKeyword (token, KEYWORD_rem))
970 					  {
971 						  vStringClear (token->string);
972 						  skipToCharacterInInputFile ('\n');
973 						  goto getNextChar;
974 					  }
975 					  else if (isKeyword (token, KEYWORD_NONE))
976 						  token->type = TOKEN_IDENTIFIER;
977 					  else
978 						  token->type = TOKEN_KEYWORD;
979 				  }
980 				  break;
981 	}
982 }
983 
984 /*
985  * reads an identifier, possibly quoted:
986  * 		identifier
987  * 		"identifier"
988  * 		[identifier]
989  */
readIdentifier(tokenInfo * const token)990 static void readIdentifier (tokenInfo *const token)
991 {
992 	readToken (token);
993 	if (isType (token, TOKEN_OPEN_SQUARE))
994 	{
995 		tokenInfo *const close_square = newToken ();
996 
997 		readToken (token);
998 		/* eat close square */
999 		readToken (close_square);
1000 		deleteToken (close_square);
1001 	}
1002 }
1003 
1004 /*
1005  *	 Token parsing functions
1006  */
1007 
1008 /*
1009  * static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1010  * {
1011  *	   if (vStringLength (parent->string) > 0)
1012  *	   {
1013  *		   vStringPut (parent->string, '.');
1014  *	   }
1015  *	   vStringCat (parent->string, child->string);
1016  * }
1017  */
1018 
addToScope(tokenInfo * const token,vString * const extra,sqlKind kind)1019 static void addToScope (tokenInfo* const token, vString* const extra, sqlKind kind)
1020 {
1021 	if (vStringLength (token->scope) > 0)
1022 	{
1023 		vStringPut (token->scope, '.');
1024 	}
1025 	vStringCat (token->scope, extra);
1026 	token->scopeKind = kind;
1027 }
1028 
1029 /*
1030  *	 Scanning functions
1031  */
1032 
isOneOfKeyword(tokenInfo * const token,const keywordId * const keywords,unsigned int count)1033 static bool isOneOfKeyword (tokenInfo *const token, const keywordId *const keywords, unsigned int count)
1034 {
1035 	unsigned int i;
1036 	for (i = 0; i < count; i++)
1037 	{
1038 		if (isKeyword (token, keywords[i]))
1039 			return true;
1040 	}
1041 	return false;
1042 }
1043 
findTokenOrKeywords(tokenInfo * const token,const tokenType type,const keywordId * const keywords,unsigned int kcount)1044 static void findTokenOrKeywords (tokenInfo *const token, const tokenType type,
1045 				 const keywordId *const keywords,
1046 				 unsigned int kcount)
1047 {
1048 	while (! isType (token, type) &&
1049 	       ! (isType (token, TOKEN_KEYWORD) && isOneOfKeyword (token, keywords, kcount)) &&
1050 	       ! isType (token, TOKEN_EOF))
1051 	{
1052 		readToken (token);
1053 	}
1054 }
1055 
findToken(tokenInfo * const token,const tokenType type)1056 static void findToken (tokenInfo *const token, const tokenType type)
1057 {
1058 	while (! isType (token, type) &&
1059 		   ! isType (token, TOKEN_EOF))
1060 	{
1061 		readToken (token);
1062 	}
1063 }
1064 
findCmdTerm(tokenInfo * const token,const bool check_first)1065 static void findCmdTerm (tokenInfo *const token, const bool check_first)
1066 {
1067 	int begin_end_nest_lvl = token->begin_end_nest_lvl;
1068 
1069 	if (check_first)
1070 	{
1071 		if (isCmdTerm(token))
1072 			return;
1073 	}
1074 	do
1075 	{
1076 		readToken (token);
1077 	} while (! isCmdTerm(token) &&
1078 			 ! isMatchedEnd(token, begin_end_nest_lvl) &&
1079 			 ! isType (token, TOKEN_EOF));
1080 }
1081 
skipToMatched(tokenInfo * const token)1082 static void skipToMatched(tokenInfo *const token)
1083 {
1084 	int nest_level = 0;
1085 	tokenType open_token;
1086 	tokenType close_token;
1087 
1088 	switch (token->type)
1089 	{
1090 		case TOKEN_OPEN_PAREN:
1091 			open_token  = TOKEN_OPEN_PAREN;
1092 			close_token = TOKEN_CLOSE_PAREN;
1093 			break;
1094 		case TOKEN_OPEN_CURLY:
1095 			open_token  = TOKEN_OPEN_CURLY;
1096 			close_token = TOKEN_CLOSE_CURLY;
1097 			break;
1098 		case TOKEN_OPEN_SQUARE:
1099 			open_token  = TOKEN_OPEN_SQUARE;
1100 			close_token = TOKEN_CLOSE_SQUARE;
1101 			break;
1102 		default:
1103 			return;
1104 	}
1105 
1106 	/*
1107 	 * This routine will skip to a matching closing token.
1108 	 * It will also handle nested tokens like the (, ) below.
1109 	 *	 (	name varchar(30), text binary(10)  )
1110 	 */
1111 
1112 	if (isType (token, open_token))
1113 	{
1114 		nest_level++;
1115 		while (nest_level > 0 && !isType (token, TOKEN_EOF))
1116 		{
1117 			readToken (token);
1118 			if (isType (token, open_token))
1119 			{
1120 				nest_level++;
1121 			}
1122 			if (isType (token, close_token))
1123 			{
1124 				if (nest_level > 0)
1125 				{
1126 					nest_level--;
1127 				}
1128 			}
1129 		}
1130 		readToken (token);
1131 	}
1132 }
1133 
copyToken(tokenInfo * const dest,tokenInfo * const src)1134 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
1135 {
1136 	dest->lineNumber = src->lineNumber;
1137 	dest->filePosition = src->filePosition;
1138 	dest->type = src->type;
1139 	dest->keyword = src->keyword;
1140 	vStringCopy(dest->string, src->string);
1141 	vStringCopy(dest->scope, src->scope);
1142 	dest->scopeKind = src->scopeKind;
1143 }
1144 
skipArgumentList(tokenInfo * const token)1145 static void skipArgumentList (tokenInfo *const token)
1146 {
1147 	/*
1148 	 * Other databases can have arguments with fully declared
1149 	 * datatypes:
1150 	 *	 (	name varchar(30), text binary(10)  )
1151 	 * So we must check for nested open and closing parentheses
1152 	 */
1153 
1154 	if (isType (token, TOKEN_OPEN_PAREN))	/* arguments? */
1155 	{
1156 		skipToMatched (token);
1157 	}
1158 }
1159 
getNamedLanguageFromToken(tokenInfo * const token)1160 static langType getNamedLanguageFromToken(tokenInfo *const token)
1161 {
1162 	langType lang = LANG_IGNORE;
1163 
1164 	if (isType (token, TOKEN_IDENTIFIER))
1165 	{
1166 		if (vStringLength (token->string) > 2
1167 			&& vStringValue (token->string) [0] == 'p'
1168 			&& vStringValue (token->string) [1] == 'l')
1169 		{
1170 			/* Remove first 'pl' and last 'u' for extracting the
1171 			 * name of the language. */
1172 			bool unsafe = (vStringLast(token->string) == 'u');
1173 			lang = getNamedLanguageOrAlias (vStringValue (token->string) + 2,
1174 											vStringLength (token->string)
1175 											- 2
1176 											- (unsafe? 1: 0));
1177 		}
1178 	}
1179 	return lang;
1180 }
1181 
parseSubProgram(tokenInfo * const token)1182 static void parseSubProgram (tokenInfo *const token)
1183 {
1184 	tokenInfo *const name  = newToken ();
1185 	vString * saveScope = vStringNew ();
1186 	sqlKind saveScopeKind;
1187 
1188 	/*
1189 	 * This must handle both prototypes and the body of
1190 	 * the procedures.
1191 	 *
1192 	 * Prototype:
1193 	 *	   FUNCTION func_name RETURN integer;
1194 	 *	   PROCEDURE proc_name( parameters );
1195 	 * Procedure
1196 	 *	   FUNCTION GET_ML_USERNAME RETURN VARCHAR2
1197 	 *	   IS
1198 	 *	   BEGIN
1199 	 *		   RETURN v_sync_user_id;
1200 	 *	   END GET_ML_USERNAME;
1201 	 *
1202 	 *	   PROCEDURE proc_name( parameters )
1203 	 *		   IS
1204 	 *		   BEGIN
1205 	 *		   END;
1206 	 *	   CREATE PROCEDURE proc_name( parameters )
1207 	 *		   EXTERNAL NAME ... ;
1208 	 *	   CREATE PROCEDURE proc_name( parameters )
1209 	 *		   BEGIN
1210 	 *		   END;
1211 	 *
1212 	 *	   CREATE FUNCTION f_GetClassName(
1213 	 *		   IN @object VARCHAR(128)
1214 	 *		  ,IN @code   VARCHAR(128)
1215 	 *	   )
1216 	 *	   RETURNS VARCHAR(200)
1217 	 *	   DETERMINISTIC
1218 	 *	   BEGIN
1219 	 *
1220 	 *		   IF( @object = 'user_state' ) THEN
1221 	 *			   SET something = something;
1222 	 *		   END IF;
1223 	 *
1224 	 *		   RETURN @name;
1225 	 *	   END;
1226 	 *
1227 	 * Note, a Package adds scope to the items within.
1228      *     create or replace package demo_pkg is
1229      *         test_var number;
1230      *         function test_func return varchar2;
1231      *         function more.test_func2 return varchar2;
1232      *     end demo_pkg;
1233 	 * So the tags generated here, contain the package name:
1234      *         demo_pkg.test_var
1235      *         demo_pkg.test_func
1236      *         demo_pkg.more.test_func2
1237 	 */
1238 	const sqlKind kind = isKeyword (token, KEYWORD_function) ?
1239 		SQLTAG_FUNCTION : SQLTAG_PROCEDURE;
1240 	Assert (isKeyword (token, KEYWORD_function) ||
1241 			isKeyword (token, KEYWORD_procedure));
1242 
1243 	vStringCopy(saveScope, token->scope);
1244 	saveScopeKind = token->scopeKind;
1245 	readToken (token);
1246 	copyToken (name, token);
1247 	readToken (token);
1248 
1249 	if (isType (token, TOKEN_PERIOD))
1250 	{
1251 		/*
1252 		 * If this is an Oracle package, then the token->scope should
1253 		 * already be set.  If this is the case, also add this value to the
1254 		 * scope.
1255 		 * If this is not an Oracle package, chances are the scope should be
1256 		 * blank and the value just read is the OWNER or CREATOR of the
1257 		 * function and should not be considered part of the scope.
1258 		 */
1259 		if (vStringLength(saveScope) > 0)
1260 		{
1261 			addToScope(token, name->string, kind);
1262 		}
1263 		readToken (token);
1264 		copyToken (name, token);
1265 		readToken (token);
1266 	}
1267 	if (isType (token, TOKEN_OPEN_PAREN))
1268 	{
1269 		/* Reads to the next token after the TOKEN_CLOSE_PAREN */
1270 		skipArgumentList(token);
1271 	}
1272 
1273 	if (kind == SQLTAG_FUNCTION)
1274 	{
1275 		if (isKeyword (token, KEYWORD_return) ||
1276 			isKeyword (token, KEYWORD_returns))
1277 		{
1278 			/* Read datatype */
1279 			readToken (token);
1280 			/*
1281 			 * Read token after which could be the
1282 			 * command terminator if a prototype
1283 			 * or an open parenthesis
1284 			 */
1285 			readToken (token);
1286 			if (isType (token, TOKEN_OPEN_PAREN))
1287 			{
1288 				/* Reads to the next token after the TOKEN_CLOSE_PAREN */
1289 				skipArgumentList(token);
1290 			}
1291 		}
1292 	}
1293 	if (isCmdTerm (token))
1294 	{
1295 		makeSqlTag (name, SQLTAG_PROTOTYPE);
1296 	}
1297 	else
1298 	{
1299 		langType lang = LANG_IGNORE;
1300 
1301 		while (! isKeyword (token, KEYWORD_is) &&
1302 			   ! isKeyword (token, KEYWORD_begin) &&
1303 			   ! isKeyword (token, KEYWORD_at) &&
1304 			   ! isKeyword (token, KEYWORD_internal) &&
1305 			   ! isKeyword (token, KEYWORD_external) &&
1306 			   ! isKeyword (token, KEYWORD_url) &&
1307 			   ! isType (token, TOKEN_EQUAL) &&
1308 			   ! isType (token, TOKEN_EOF) &&
1309 			   ! isCmdTerm (token))
1310 		{
1311 			if (isKeyword (token, KEYWORD_result))
1312 			{
1313 				readToken (token);
1314 				if (isType (token, TOKEN_OPEN_PAREN))
1315 				{
1316 					/* Reads to the next token after the TOKEN_CLOSE_PAREN */
1317 					skipArgumentList(token);
1318 				}
1319 			} else if (lang == LANG_IGNORE
1320 					   && isKeyword (token, KEYWORD_language)) {
1321 				readToken (token);
1322 				lang = getNamedLanguageFromToken (token);
1323 				if (lang != LANG_IGNORE)
1324 					readToken (token);
1325 			} else {
1326 				readToken (token);
1327 			}
1328 		}
1329 		if (isKeyword (token, KEYWORD_at) ||
1330 			isKeyword (token, KEYWORD_url) ||
1331 			isKeyword (token, KEYWORD_internal) ||
1332 			isKeyword (token, KEYWORD_external))
1333 		{
1334 			addToScope(token, name->string, kind);
1335 			if (isType (name, TOKEN_IDENTIFIER) ||
1336 				isType (name, TOKEN_STRING) ||
1337 				isType (name, TOKEN_KEYWORD))
1338 			{
1339 				makeSqlTag (name, kind);
1340 			}
1341 
1342 			vStringClear (token->scope);
1343 			token->scopeKind = SQLTAG_COUNT;
1344 		}
1345 		if (isType (token, TOKEN_EQUAL))
1346 			readToken (token);
1347 
1348 		if (isKeyword (token, KEYWORD_declare))
1349 			parseDeclare (token, false);
1350 
1351 		if (isKeyword (token, KEYWORD_is) ||
1352 			isKeyword (token, KEYWORD_begin))
1353 		{
1354 			addToScope(token, name->string, kind);
1355 			if (isType (name, TOKEN_IDENTIFIER) ||
1356 				isType (name, TOKEN_STRING) ||
1357 				isType (name, TOKEN_KEYWORD))
1358 			{
1359 				makeSqlTag (name, kind);
1360 			}
1361 
1362 			parseBlockFull (token, true, lang);
1363 			vStringClear (token->scope);
1364 			token->scopeKind = SQLTAG_COUNT;
1365 		}
1366 	}
1367 	vStringCopy(token->scope, saveScope);
1368 	token->scopeKind = saveScopeKind;
1369 	deleteToken (name);
1370 	vStringDelete(saveScope);
1371 }
1372 
parseRecord(tokenInfo * const token)1373 static void parseRecord (tokenInfo *const token)
1374 {
1375 	/*
1376 	 * Make it a bit forgiving, this is called from
1377 	 * multiple functions, parseTable, parseType
1378 	 */
1379 	if (!isType (token, TOKEN_OPEN_PAREN))
1380 		readToken (token);
1381 	if (!isType (token, TOKEN_OPEN_PAREN))
1382 		return;
1383 
1384 	do
1385 	{
1386 		if (isType (token, TOKEN_COMMA) ||
1387 			isType (token, TOKEN_OPEN_PAREN))
1388 		{
1389 			readToken (token);
1390 		}
1391 
1392 		/*
1393 		 * Create table statements can end with various constraints
1394 		 * which must be excluded from the SQLTAG_FIELD.
1395 		 *	  create table t1 (
1396 		 *		  c1 integer,
1397 		 *		  c2 char(30),
1398 		 *		  c3 numeric(10,5),
1399 		 *		  c4 integer,
1400 		 *		  constraint whatever,
1401 		 *		  primary key(c1),
1402 		 *		  foreign key (),
1403 		 *		  check ()
1404 		 *	  )
1405 		 */
1406 		if (! isKeyword(token, KEYWORD_primary) &&
1407 			! isKeyword(token, KEYWORD_references) &&
1408 			! isKeyword(token, KEYWORD_unique) &&
1409 			! isKeyword(token, KEYWORD_check) &&
1410 			! isKeyword(token, KEYWORD_constraint) &&
1411 			! isKeyword(token, KEYWORD_foreign))
1412 		{
1413 			/* keyword test above is redundant as only a TOKEN_KEYWORD could
1414 			 * match any isKeyword() anyway */
1415 			if (isType (token, TOKEN_IDENTIFIER) ||
1416 				isType (token, TOKEN_STRING)     ||
1417 				(isType (token, TOKEN_KEYWORD)
1418 				 && (!isReservedWord (token))))
1419 			{
1420 				makeSqlTag (token, SQLTAG_FIELD);
1421 			}
1422 		}
1423 
1424 		while (! isType (token, TOKEN_COMMA) &&
1425 			   ! isType (token, TOKEN_CLOSE_PAREN) &&
1426 			   ! isType (token, TOKEN_OPEN_PAREN) &&
1427 			   ! isType (token, TOKEN_EOF))
1428 		{
1429 			readToken (token);
1430 			/*
1431 			 * A table structure can look like this:
1432 			 *	  create table t1 (
1433 			 *		  c1 integer,
1434 			 *		  c2 char(30),
1435 			 *		  c3 numeric(10,5),
1436 			 *		  c4 integer
1437 			 *	  )
1438 			 * We can't just look for a COMMA or CLOSE_PAREN
1439 			 * since that will not deal with the numeric(10,5)
1440 			 * case.  So we need to skip the argument list
1441 			 * when we find an open paren.
1442 			 */
1443 			if (isType (token, TOKEN_OPEN_PAREN))
1444 			{
1445 				/* Reads to the next token after the TOKEN_CLOSE_PAREN */
1446 				skipArgumentList(token);
1447 			}
1448 		}
1449 	} while (! isType (token, TOKEN_CLOSE_PAREN) &&
1450 			 ! isType (token, TOKEN_EOF));
1451 }
1452 
parseType(tokenInfo * const token)1453 static void parseType (tokenInfo *const token)
1454 {
1455 	tokenInfo *const name = newToken ();
1456 	vString * saveScope = vStringNew ();
1457 	sqlKind saveScopeKind;
1458 
1459 	vStringCopy(saveScope, token->scope);
1460 	/* If a scope has been set, add it to the name */
1461 	addToScope (name, token->scope, token->scopeKind);
1462 	saveScopeKind = token->scopeKind;
1463 	readToken (name);
1464 	if (isType (name, TOKEN_IDENTIFIER))
1465 	{
1466 		readToken (token);
1467 		if (isKeyword (token, KEYWORD_is))
1468 		{
1469 			readToken (token);
1470 			switch (token->keyword)
1471 			{
1472 				case KEYWORD_record:
1473 				case KEYWORD_object:
1474 					makeSqlTag (name, SQLTAG_RECORD);
1475 					addToScope (token, name->string, SQLTAG_RECORD);
1476 					parseRecord (token);
1477 					break;
1478 
1479 				case KEYWORD_table:
1480 					makeSqlTag (name, SQLTAG_TABLE);
1481 					break;
1482 
1483 				case KEYWORD_ref:
1484 					readToken (token);
1485 					if (isKeyword (token, KEYWORD_cursor))
1486 						makeSqlTag (name, SQLTAG_CURSOR);
1487 					break;
1488 
1489 				default: break;
1490 			}
1491 			vStringClear (token->scope);
1492 			token->scopeKind = SQLTAG_COUNT;
1493 		}
1494 	}
1495 	vStringCopy(token->scope, saveScope);
1496 	token->scopeKind = saveScopeKind;
1497 	deleteToken (name);
1498 	vStringDelete(saveScope);
1499 }
1500 
parseSimple(tokenInfo * const token,const sqlKind kind)1501 static void parseSimple (tokenInfo *const token, const sqlKind kind)
1502 {
1503 	/* This will simply make the tagname from the first word found */
1504 	readToken (token);
1505 	if (isType (token, TOKEN_IDENTIFIER) ||
1506 		isType (token, TOKEN_STRING))
1507 	{
1508 		makeSqlTag (token, kind);
1509 	}
1510 }
1511 
parseDeclare(tokenInfo * const token,const bool local)1512 static void parseDeclare (tokenInfo *const token, const bool local)
1513 {
1514 	/*
1515 	 * PL/SQL declares are of this format:
1516 	 *	  IS|AS
1517 	 *	  [declare]
1518 	 *		 CURSOR curname ...
1519 	 *		 varname1 datatype;
1520 	 *		 varname2 datatype;
1521 	 *		 varname3 datatype;
1522 	 *	  begin
1523 	 */
1524 
1525 	if (isKeyword (token, KEYWORD_declare))
1526 		readToken (token);
1527 	while (! isKeyword (token, KEYWORD_begin) &&
1528 		   ! isKeyword (token, KEYWORD_end) &&
1529 		   ! isType (token, TOKEN_EOF))
1530 	{
1531 		keywordId stoppers [] = {
1532 			KEYWORD_begin,
1533 			KEYWORD_end,
1534 		};
1535 
1536 		switch (token->keyword)
1537 		{
1538 			case KEYWORD_cursor:	parseSimple (token, SQLTAG_CURSOR); break;
1539 			case KEYWORD_function:	parseSubProgram (token); break;
1540 			case KEYWORD_procedure: parseSubProgram (token); break;
1541 			case KEYWORD_subtype:	parseSimple (token, SQLTAG_SUBTYPE); break;
1542 			case KEYWORD_trigger:	parseSimple (token, SQLTAG_TRIGGER); break;
1543 			case KEYWORD_type:		parseType (token); break;
1544 
1545 			default:
1546 				if (isType (token, TOKEN_IDENTIFIER))
1547 				{
1548 					makeSqlTag (token, local? SQLTAG_LOCAL_VARIABLE: SQLTAG_VARIABLE);
1549 				}
1550 				break;
1551 		}
1552 		findTokenOrKeywords (token, TOKEN_SEMICOLON, stoppers, ARRAY_SIZE (stoppers));
1553 		if (isType (token, TOKEN_SEMICOLON))
1554 			readToken (token);
1555 	}
1556 }
1557 
parseDeclareANSI(tokenInfo * const token,const bool local)1558 static void parseDeclareANSI (tokenInfo *const token, const bool local)
1559 {
1560 	tokenInfo *const type = newToken ();
1561 	/*
1562 	 * ANSI declares are of this format:
1563 	 *	 BEGIN
1564 	 *		 DECLARE varname1 datatype;
1565 	 *		 DECLARE varname2 datatype;
1566 	 *		 ...
1567 	 *
1568 	 * This differ from PL/SQL where DECLARE precedes the BEGIN block
1569 	 * and the DECLARE keyword is not repeated.
1570 	 */
1571 	while (isKeyword (token, KEYWORD_declare))
1572 	{
1573 		readToken (token);
1574 		readToken (type);
1575 
1576 		if (isKeyword (type, KEYWORD_cursor))
1577 			makeSqlTag (token, SQLTAG_CURSOR);
1578 		else if (isKeyword (token, KEYWORD_local) &&
1579 				 isKeyword (type, KEYWORD_temporary))
1580 		{
1581 			/*
1582 			 * DECLARE LOCAL TEMPORARY TABLE table_name (
1583 			 *	  c1 int,
1584 			 *	  c2 int
1585 			 * );
1586 			 */
1587 			readToken (token);
1588 			if (isKeyword (token, KEYWORD_table))
1589 			{
1590 				readToken (token);
1591 				if (isType(token, TOKEN_IDENTIFIER) ||
1592 					isType(token, TOKEN_STRING))
1593 				{
1594 					makeSqlTag (token, SQLTAG_TABLE);
1595 				}
1596 			}
1597 		}
1598 		else if (isType (token, TOKEN_IDENTIFIER) ||
1599 				 isType (token, TOKEN_STRING))
1600 		{
1601 			makeSqlTag (token, local? SQLTAG_LOCAL_VARIABLE: SQLTAG_VARIABLE);
1602 		}
1603 		findToken (token, TOKEN_SEMICOLON);
1604 		readToken (token);
1605 	}
1606 	deleteToken (type);
1607 }
1608 
parseLabel(tokenInfo * const token)1609 static void parseLabel (tokenInfo *const token)
1610 {
1611 	/*
1612 	 * A label has this format:
1613 	 *	   <<tobacco_dependency>>
1614 	 *	   DECLARE
1615 	 *		  v_senator VARCHAR2(100) := 'THURMOND, JESSE';
1616 	 *	   BEGIN
1617 	 *		  IF total_contributions (v_senator, 'TOBACCO') > 25000
1618 	 *		  THEN
1619 	 *			 <<alochol_dependency>>
1620 	 *			 DECLARE
1621 	 *				v_senator VARCHAR2(100) := 'WHATEVERIT, TAKES';
1622 	 *			 BEGIN
1623 	 *				...
1624 	 */
1625 
1626 	Assert (isType (token, TOKEN_BLOCK_LABEL_BEGIN));
1627 	readToken (token);
1628 	if (isType (token, TOKEN_IDENTIFIER))
1629 	{
1630 		makeSqlTag (token, SQLTAG_BLOCK_LABEL);
1631 		readToken (token);		  /* read end of label */
1632 	}
1633 }
1634 
parseStatements(tokenInfo * const token,const bool exit_on_endif)1635 static void parseStatements (tokenInfo *const token, const bool exit_on_endif )
1636 {
1637 	/* bool isAnsi   = true; */
1638 	bool stmtTerm = false;
1639 	do
1640 	{
1641 
1642 		if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
1643 			parseLabel (token);
1644 		else
1645 		{
1646 			switch (token->keyword)
1647 			{
1648 				case KEYWORD_exception:
1649 					/*
1650 					 * EXCEPTION
1651 					 *	 <exception handler>;
1652 					 *
1653 					 * Where an exception handler could be:
1654 					 *	 BEGIN
1655 					 *		WHEN OTHERS THEN
1656 					 *			x := x + 3;
1657 					 *	 END;
1658 					 * In this case we need to skip this keyword and
1659 					 * move on to the next token without reading until
1660 					 * TOKEN_SEMICOLON;
1661 					 */
1662 					readToken (token);
1663 					continue;
1664 
1665 				case KEYWORD_when:
1666 					/*
1667 					 * WHEN statements can be used in exception clauses
1668 					 * and CASE statements.  The CASE statement should skip
1669 					 * these given below we skip over to an END statement.
1670 					 * But for an exception clause, we can have:
1671 					 *	   EXCEPTION
1672 					 *		   WHEN OTHERS THEN
1673 					 *		   BEGIN
1674 					 *				  x := x + 3;
1675 					 *		   END;
1676 					 * If we skip to the TOKEN_SEMICOLON, we miss the begin
1677 					 * of a nested BEGIN END block.  So read the next token
1678 					 * after the THEN and restart the LOOP.
1679 					 */
1680 					while (! isKeyword (token, KEYWORD_then) &&
1681 						   ! isType (token, TOKEN_EOF))
1682 						readToken (token);
1683 
1684 					readToken (token);
1685 					continue;
1686 
1687 				case KEYWORD_if:
1688 					/*
1689 					 * We do not want to look for a ; since for an empty
1690 					 * IF block, it would skip over the END.
1691 					 *	IF...THEN
1692 					 *	END IF;
1693 					 *
1694 					 *	IF...THEN
1695 					 *	ELSE
1696 					 *	END IF;
1697 					 *
1698 					 *	IF...THEN
1699 					 *	ELSEIF...THEN
1700 					 *	ELSE
1701 					 *	END IF;
1702 					 *
1703 					 *	or non-ANSI
1704 					 *	IF ...
1705 					 *	BEGIN
1706 					 *	END
1707 					 */
1708 					while (! isKeyword (token, KEYWORD_then)  &&
1709 						   ! isKeyword (token, KEYWORD_begin) &&
1710 						   ! isType (token, TOKEN_EOF))
1711 					{
1712 						readToken (token);
1713 					}
1714 
1715 					if (isKeyword (token, KEYWORD_begin))
1716 					{
1717 						/* isAnsi = false; */
1718 						parseBlock(token, false);
1719 
1720 						/*
1721 						 * Handle the non-Ansi IF blocks.
1722 						 * parseBlock consumes the END, so if the next
1723 						 * token in a command terminator (like GO)
1724 						 * we know we are done with this statement.
1725 						 */
1726 						if (isCmdTerm (token))
1727 							stmtTerm = true;
1728 					}
1729 					else
1730 					{
1731 						readToken (token);
1732 
1733 						while (! isKeyword (token, KEYWORD_end) &&
1734 							   ! isKeyword (token, KEYWORD_endif) &&
1735 							   ! isType (token, TOKEN_EOF))
1736 						{
1737 							if (isKeyword (token, KEYWORD_else) ||
1738 								isKeyword (token, KEYWORD_elseif))
1739 							{
1740 								readToken (token);
1741 							}
1742 
1743 							parseStatements (token, true);
1744 
1745 							if (isCmdTerm(token))
1746 								readToken (token);
1747 
1748 						}
1749 
1750 						/*
1751 						 * parseStatements returns when it finds an END, an IF
1752 						 * should follow the END for ANSI anyway.
1753 						 *	IF...THEN
1754 						 *	END IF;
1755 						 */
1756 						if (isKeyword (token, KEYWORD_end))
1757 							readToken (token);
1758 
1759 						if (isKeyword (token, KEYWORD_if) ||
1760 							isKeyword (token, KEYWORD_endif))
1761 						{
1762 							readToken (token);
1763 							if (isCmdTerm(token))
1764 								stmtTerm = true;
1765 						}
1766 						else
1767 						{
1768 							/*
1769 							 * Well we need to do something here.
1770 							 * There are lots of different END statements
1771 							 * END;
1772 							 * END CASE;
1773 							 * ENDIF;
1774 							 * ENDCASE;
1775 							 */
1776 						}
1777 					}
1778 					break;
1779 
1780 				case KEYWORD_loop:
1781 				case KEYWORD_case:
1782 				case KEYWORD_for:
1783 					/*
1784 					 *	LOOP...
1785 					 *	END LOOP;
1786 					 *
1787 					 *	CASE
1788 					 *	WHEN '1' THEN
1789 					 *	END CASE;
1790 					 *
1791 					 *	FOR loop_name AS cursor_name CURSOR FOR ...
1792 					 *	DO
1793 					 *	END FOR;
1794 					 */
1795 					if (isKeyword (token, KEYWORD_for))
1796 					{
1797 						/* loop name */
1798 						readToken (token);
1799 						/* AS */
1800 						readToken (token);
1801 
1802 						while (! isKeyword (token, KEYWORD_is) &&
1803 							   ! isType (token, TOKEN_EOF))
1804 						{
1805 							/*
1806 							 * If this is not an AS keyword this is
1807 							 * not a proper FOR statement and should
1808 							 * simply be ignored
1809 							 */
1810 							return;
1811 						}
1812 
1813 						while (! isKeyword (token, KEYWORD_do) &&
1814 							   ! isType (token, TOKEN_EOF))
1815 							readToken (token);
1816 					}
1817 
1818 
1819 					readToken (token);
1820 					while (! isKeyword (token, KEYWORD_end) &&
1821 						   ! isType (token, TOKEN_EOF))
1822 					{
1823 						/*
1824 						if ( isKeyword (token, KEYWORD_else) ||
1825 								isKeyword (token, KEYWORD_elseif)    )
1826 							readToken (token);
1827 							*/
1828 
1829 						parseStatements (token, false);
1830 
1831 						if (isCmdTerm(token))
1832 							readToken (token);
1833 					}
1834 
1835 
1836 					if (isKeyword (token, KEYWORD_end ))
1837 						readToken (token);
1838 
1839 					/*
1840 					 * Typically ended with
1841 					 *    END LOOP [loop name];
1842 					 *    END CASE
1843 					 *    END FOR [loop name];
1844 					 */
1845 					if (isKeyword (token, KEYWORD_loop) ||
1846 						isKeyword (token, KEYWORD_case) ||
1847 						isKeyword (token, KEYWORD_for))
1848 					{
1849 						readToken (token);
1850 					}
1851 
1852 					if (isCmdTerm(token))
1853 						stmtTerm = true;
1854 
1855 					break;
1856 
1857 				case KEYWORD_create:
1858 					readToken (token);
1859 					parseKeywords(token);
1860 					break;
1861 
1862 				case KEYWORD_declare:
1863 				case KEYWORD_begin:
1864 					parseBlock (token, true);
1865 					break;
1866 
1867 				case KEYWORD_end:
1868 					break;
1869 
1870 				default:
1871 					readToken (token);
1872 					break;
1873 			}
1874 			/*
1875 			 * Not all statements must end in a semi-colon
1876 			 *	   begin
1877 			 *		   if current publisher <> 'publish' then
1878 			 *			 signal UE_FailStatement
1879 			 *		   end if
1880 			 *	   end;
1881 			 * The last statement prior to an end ("signal" above) does
1882 			 * not need a semi-colon, nor does the end if, since it is
1883 			 * also the last statement prior to the end of the block.
1884 			 *
1885 			 * So we must read to the first semi-colon or an END block
1886 			 */
1887 			while (! stmtTerm &&
1888 				   ! isKeyword (token, KEYWORD_end) &&
1889 				   ! isCmdTerm(token) &&
1890 				   ! isType(token, TOKEN_EOF))
1891 			{
1892 				if (exit_on_endif && isKeyword (token, KEYWORD_endif))
1893 					return;
1894 
1895 				if (isType (token, TOKEN_COLON) )
1896 				{
1897 					/*
1898 					 * A : can signal a loop name
1899 					 *    myloop:
1900 					 *    LOOP
1901 					 *        LEAVE myloop;
1902 					 *    END LOOP;
1903 					 * Unfortunately, labels do not have a
1904 					 * cmd terminator, therefore we have to check
1905 					 * if the next token is a keyword and process
1906 					 * it accordingly.
1907 					 */
1908 					readToken (token);
1909 					if (isKeyword (token, KEYWORD_loop) ||
1910 						isKeyword (token, KEYWORD_while) ||
1911 						isKeyword (token, KEYWORD_for))
1912 					{
1913 						/* parseStatements (token); */
1914 						return;
1915 					}
1916 				}
1917 
1918 				readToken (token);
1919 
1920 				if (isType (token, TOKEN_OPEN_PAREN) ||
1921 				    isType (token, TOKEN_OPEN_CURLY) ||
1922 				    isType (token, TOKEN_OPEN_SQUARE))
1923 				{
1924 					skipToMatched (token);
1925 				}
1926 
1927 				/*
1928 				 * Since we know how to parse various statements
1929 				 * if we detect them, parse them to completion
1930 				 */
1931 				if (isType (token, TOKEN_BLOCK_LABEL_BEGIN) ||
1932 					isKeyword (token, KEYWORD_exception) ||
1933 					isKeyword (token, KEYWORD_loop) ||
1934 					isKeyword (token, KEYWORD_case) ||
1935 					isKeyword (token, KEYWORD_for) ||
1936 					isKeyword (token, KEYWORD_begin))
1937 				{
1938 					parseStatements (token, false);
1939 				}
1940 				else if (isKeyword (token, KEYWORD_if))
1941 					parseStatements (token, true);
1942 
1943 			}
1944 		}
1945 		/*
1946 		 * We assumed earlier all statements ended with a command terminator.
1947 		 * See comment above, now, only read if the current token
1948 		 * is not a command terminator.
1949 		 */
1950 		if (isCmdTerm(token) && ! stmtTerm)
1951 			stmtTerm = true;
1952 
1953 	} while (! isKeyword (token, KEYWORD_end) &&
1954 			 ! (exit_on_endif && isKeyword (token, KEYWORD_endif) ) &&
1955 			 ! isType (token, TOKEN_EOF) &&
1956 			 ! stmtTerm );
1957 }
1958 
parseBlock(tokenInfo * const token,const bool local)1959 static void parseBlock (tokenInfo *const token, const bool local)
1960 {
1961 	parseBlockFull (token, local, LANG_IGNORE);
1962 }
1963 
parseBlockFull(tokenInfo * const token,const bool local,langType lang)1964 static void parseBlockFull (tokenInfo *const token, const bool local, langType lang)
1965 {
1966 	int promise = -1;
1967 
1968 	if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
1969 	{
1970 		parseLabel (token);
1971 		readToken (token);
1972 	}
1973 	if (! isKeyword (token, KEYWORD_begin))
1974 	{
1975 		readToken (token);
1976 		if (isType (token, TOKEN_STRING))
1977 		{
1978 			/* Likely a PostgreSQL FUNCTION name AS '...'
1979 			 * https://www.postgresql.org/docs/current/static/sql-createfunction.html */
1980 			promise = token->promise;
1981 			token->promise = -1;
1982 
1983 			readToken (token);
1984 			while (! isCmdTerm (token)
1985 				   && !isType (token, TOKEN_EOF))
1986 			{
1987 				if (lang == LANG_IGNORE &&
1988 					isKeyword (token, KEYWORD_language))
1989 				{
1990 					readToken (token);
1991 					lang = getNamedLanguageFromToken (token);
1992 					if (lang != LANG_IGNORE)
1993 						readToken (token);
1994 				}
1995 				else
1996 					readToken (token);
1997 			}
1998 
1999 			if (promise != -1 && lang != LANG_IGNORE)
2000 				promiseUpdateLanguage(promise, lang);
2001 		}
2002 		else
2003 		{
2004 			/*
2005 			 * These are Oracle style declares which generally come
2006 			 * between an IS/AS and BEGIN block.
2007 			 */
2008 			parseDeclare (token, local);
2009 		}
2010 	}
2011 	if (isKeyword (token, KEYWORD_begin))
2012 	{
2013 		bool is_transaction = false;
2014 
2015 		readToken (token);
2016 
2017 		/* BEGIN of Postgresql initiates a transaction.
2018 		 *
2019 		 *   BEGIN [ WORK | TRANSACTION ] [ transaction_mode [, ...] ]
2020 		 *
2021 		 * BEGIN of MySQL does the same.
2022 		 *
2023 		 *   BEGIN [WORK]
2024 		 *
2025 		 * BEGIN of SQLite does the same.
2026 		 *
2027 		 *   BEGIN [[DEFERRED | IMMEDIATE | EXCLUSIVE] TRANSACTION]
2028 		 *
2029 		 */
2030 		if (isCmdTerm(token))
2031 		{
2032 			is_transaction = true;
2033 			readToken (token);
2034 		}
2035 		else if (isType (token, TOKEN_IDENTIFIER)
2036 				 && (strcasecmp (vStringValue(token->string), "work") == 0
2037 					 || strcasecmp (vStringValue(token->string), "transaction") == 0
2038 					 || (
2039 						 strcasecmp (vStringValue(token->string), "deferred") == 0
2040 						 || strcasecmp (vStringValue(token->string), "immediate") == 0
2041 						 || strcasecmp (vStringValue(token->string), "exclusive") == 0
2042 						 )
2043 					 ))
2044 			is_transaction = true;
2045 		else
2046 		{
2047 			/*
2048 			 * Check for ANSI declarations which always follow
2049 			 * a BEGIN statement.  This routine will not advance
2050 			 * the token if none are found.
2051 			 */
2052 			parseDeclareANSI (token, local);
2053 		}
2054 
2055 		token->begin_end_nest_lvl++;
2056 		while (! isKeyword (token, KEYWORD_end) &&
2057 			   ! (is_transaction && isKeyword(token, KEYWORD_commit)) &&
2058 			   ! isType (token, TOKEN_EOF))
2059 		{
2060 			parseStatements (token, false);
2061 
2062 			if (isCmdTerm(token))
2063 				readToken (token);
2064 		}
2065 		token->begin_end_nest_lvl--;
2066 
2067 		/*
2068 		 * Read the next token (we will assume
2069 		 * it is the command delimiter)
2070 		 */
2071 		readToken (token);
2072 
2073 		/*
2074 		 * Check if the END block is terminated
2075 		 */
2076 		if (! isCmdTerm (token))
2077 		{
2078 			/*
2079 			 * Not sure what to do here at the moment.
2080 			 * I think the routine that calls parseBlock
2081 			 * must expect the next token has already
2082 			 * been read since it is possible this
2083 			 * token is not a command delimiter.
2084 			 */
2085 			/* findCmdTerm (token, false); */
2086 		}
2087 	}
2088 }
2089 
parsePackage(tokenInfo * const token)2090 static void parsePackage (tokenInfo *const token)
2091 {
2092 	/*
2093 	 * Packages can be specified in a number of ways:
2094 	 *		CREATE OR REPLACE PACKAGE pkg_name AS
2095 	 * or
2096 	 *		CREATE OR REPLACE PACKAGE owner.pkg_name AS
2097 	 * or by specifying a package body
2098 	 *	   CREATE OR REPLACE PACKAGE BODY pkg_name AS
2099 	 *	   CREATE OR REPLACE PACKAGE BODY owner.pkg_name AS
2100 	 */
2101 	tokenInfo *const name = newToken ();
2102 	readIdentifier (name);
2103 	if (isKeyword (name, KEYWORD_body))
2104 	{
2105 		/*
2106 		 * Ignore the BODY tag since we will process
2107 		 * the body or prototypes in the same manner
2108 		 */
2109 		readIdentifier (name);
2110 	}
2111 	/* Check for owner.pkg_name */
2112 	while (! isKeyword (token, KEYWORD_is) &&
2113 		   ! isType (token, TOKEN_EOF))
2114 	{
2115 		readToken (token);
2116 		if ( isType(token, TOKEN_PERIOD) )
2117 		{
2118 			readIdentifier (name);
2119 		}
2120 	}
2121 	if (isKeyword (token, KEYWORD_is))
2122 	{
2123 		if (isType (name, TOKEN_IDENTIFIER) ||
2124 			isType (name, TOKEN_STRING))
2125 		{
2126 			makeSqlTag (name, SQLTAG_PACKAGE);
2127 		}
2128 		addToScope (token, name->string, SQLTAG_PACKAGE);
2129 		parseBlock (token, false);
2130 		vStringClear (token->scope);
2131 		token->scopeKind = SQLTAG_COUNT;
2132 	}
2133 	findCmdTerm (token, false);
2134 	deleteToken (name);
2135 }
2136 
parseColumnsAndAliases(tokenInfo * const token)2137 static void parseColumnsAndAliases (tokenInfo *const token)
2138 {
2139 	bool columnAcceptable = true;
2140 	tokenInfo *const lastId = newToken ();
2141 
2142 	/*
2143 	 * -- A
2144 	 * create table foo as select A;
2145 	 *
2146 	 * -- B
2147 	 * create table foo as select B from ...;
2148 	 *
2149 	 * -- D
2150 	 * create table foo as select C as D from ...;
2151 	 *
2152 	 * -- E, F
2153 	 * create table foo as select E, a.F;
2154 	 *
2155 	 * -- G, H
2156 	 * create table foo as select G, a.H from ...;
2157 	 *
2158 	 * -- J, K
2159 	 * create table foo as select I as J, a.K from ...;
2160 	 *
2161 	 * lastID is used for capturing A, B, E, F, G, H, and K.
2162 	 */
2163 	readToken (token);
2164 	do
2165 	{
2166 		if (isType (token, TOKEN_KEYWORD)
2167 			&& isKeyword (token, KEYWORD_is))
2168 		{
2169 			readToken (token);
2170 			if (isType (token, TOKEN_IDENTIFIER))
2171 			{
2172 				/* Emit the alias */
2173 				makeSqlTag (token, SQLTAG_FIELD);
2174 				columnAcceptable = true;
2175 			}
2176 			lastId->type = TOKEN_UNDEFINED;
2177 		}
2178 		else if ((isType (token, TOKEN_KEYWORD)
2179 				  && isKeyword (token, KEYWORD_from))
2180 				 || isType (token, TOKEN_SEMICOLON)
2181 				 || isType(token, TOKEN_COMMA))
2182 		{
2183 			if (lastId->type == TOKEN_IDENTIFIER)
2184 			{
2185 				/* Emit the column */
2186 				makeSqlTag(lastId, SQLTAG_FIELD);
2187 				columnAcceptable = true;
2188 			}
2189 
2190 			if (isType(token, TOKEN_COMMA))
2191 				lastId->type = TOKEN_UNDEFINED;
2192 			else
2193 				break;
2194 		}
2195 		else if (isType (token, TOKEN_OPEN_PAREN))
2196 		{
2197 			columnAcceptable = false;
2198 			skipToMatched (token);
2199 			lastId->type = TOKEN_UNDEFINED;
2200 			continue;
2201 		}
2202 		else if (isType (token, TOKEN_PERIOD))
2203 		{
2204 			lastId->type = TOKEN_UNDEFINED;
2205 		}
2206 		else if (isType (token, TOKEN_IDENTIFIER))
2207 		{
2208 			if (columnAcceptable)
2209 				copyToken (lastId, token);
2210 		}
2211 		else
2212 		{
2213 			columnAcceptable = false;
2214 			lastId->type = TOKEN_UNDEFINED;
2215 		}
2216 
2217 		readToken (token);
2218 	} while (! isType (token, TOKEN_EOF));
2219 
2220 	deleteToken (lastId);
2221 }
2222 
2223 /* Skip "IF NOT EXISTS"
2224  * https://dev.mysql.com/doc/refman/8.0/en/create-table.html
2225  * https://www.postgresql.org/docs/current/sql-createtable.html
2226  * https://sqlite.org/lang_createtable.html
2227  */
parseIdAfterIfNotExists(tokenInfo * const name,tokenInfo * const token,bool authorization_following)2228 static bool parseIdAfterIfNotExists(tokenInfo *const name,
2229 									tokenInfo *const token,
2230 									bool authorization_following)
2231 {
2232 	if (isKeyword (name, KEYWORD_if)
2233 		&& (isType (token, TOKEN_IDENTIFIER)
2234 			&& vStringLength (token->string) == 3
2235 			&& strcasecmp ("not", vStringValue (token->string)) == 0))
2236 	{
2237 		readToken (token);
2238 		if (isType (token, TOKEN_IDENTIFIER)
2239 			&& vStringLength (token->string) == 6
2240 			&& strcasecmp ("exists", vStringValue (token->string)) == 0)
2241 		{
2242 			readIdentifier (name);
2243 			if (authorization_following
2244 				&& isType (name, TOKEN_IDENTIFIER)
2245 				&& vStringLength (name->string) == 13
2246 				&& strcasecmp("authorization", vStringValue(name->string)) == 0)
2247 			{
2248 				/*
2249 				 * PostgreSQL:
2250 				 * - CREATE SCHEMA IF NOT EXISTS AUTHORIZATION role_specification
2251 				 */
2252 				readIdentifier (name);
2253 			}
2254 			readToken (token);
2255 			return true;
2256 		}
2257 	}
2258 	return false;
2259 }
2260 
parseTable(tokenInfo * const token)2261 static void parseTable (tokenInfo *const token)
2262 {
2263 	tokenInfo *const name = newToken ();
2264 	bool emitted = false;
2265 
2266 	/*
2267 	 * This deals with these formats:
2268 	 *	   create table t1 (c1 int);
2269 	 *	   create global temporary table t2 (c1 int);
2270 	 *	   create table "t3" (c1 int);
2271 	 *	   create table bob.t4 (c1 int);
2272 	 *	   create table bob."t5" (c1 int);
2273 	 *	   create table "bob"."t6" (c1 int);
2274 	 *	   create table bob."t7" (c1 int);
2275 	 * Proxy tables use this format:
2276 	 *	   create existing table bob."t7" AT '...';
2277 	 * SQL Server and Sybase formats
2278      *     create table OnlyTable (
2279      *     create table dbo.HasOwner (
2280      *     create table [dbo].[HasOwnerSquare] (
2281      *     create table master.dbo.HasDb (
2282      *     create table master..HasDbNoOwner (
2283      *     create table [master].dbo.[HasDbAndOwnerSquare] (
2284      *     create table [master]..[HasDbNoOwnerSquare] (
2285 	 * Oracle and PostgreSQL use this format:
2286 	 *     create table FOO as select...
2287 	 * MySQL allows omitting "as" like:
2288 	 *     create table FOO select...
2289 	 *     create table FOO (...) select...
2290 	 * (At least) MYSQL, PostgreSQL, and SQLite takes "IF NOT EXISTS"
2291 	 * between "table" and a table name:
2292 	 *     create table if not exists foo ...
2293 	 */
2294 
2295 	/* This could be a database, owner or table name */
2296 	readIdentifier (name);
2297 	readToken (token);
2298 
2299 	parseIdAfterIfNotExists(name, token, false);
2300 
2301 	if (isType (token, TOKEN_PERIOD))
2302 	{
2303 		/*
2304 		 * This could be a owner or table name.
2305 		 * But this is also a special case since the table can be
2306 		 * referenced with a blank owner:
2307 		 *     dbname..tablename
2308 		 */
2309 		readIdentifier (name);
2310 		/* Check if a blank name was provided */
2311 		if (isType (name, TOKEN_PERIOD))
2312 		{
2313 			readIdentifier (name);
2314 		}
2315 		readToken (token);
2316 		if (isType (token, TOKEN_PERIOD))
2317 		{
2318 			/* This can only be the table name */
2319 			readIdentifier (name);
2320 			readToken (token);
2321 		}
2322 	}
2323 	if (isType (token, TOKEN_OPEN_PAREN))
2324 	{
2325 		if (isType (name, TOKEN_IDENTIFIER) ||
2326 			isType (name, TOKEN_STRING) ||
2327 			(isType (name, TOKEN_KEYWORD)
2328 			 && (!isReservedWord (name))))
2329 		{
2330 			makeSqlTag (name, SQLTAG_TABLE);
2331 			emitted = true;
2332 
2333 			vStringCopy(token->scope, name->string);
2334 			token->scopeKind = SQLTAG_TABLE;
2335 			parseRecord (token);
2336 			vStringClear (token->scope);
2337 			token->scopeKind = SQLTAG_COUNT;
2338 			readToken (token);
2339 		}
2340 		else
2341 			skipToMatched(token);
2342 	}
2343 	else if (isKeyword (token, KEYWORD_at))
2344 	{
2345 		if (isType (name, TOKEN_IDENTIFIER))
2346 		{
2347 			makeSqlTag (name, SQLTAG_TABLE);
2348 		}
2349 	}
2350 
2351 	if (isKeyword (token, KEYWORD_select)
2352 			 /* KEYWORD_is is for recognizing "as" */
2353 			 || isKeyword (token, KEYWORD_is))
2354 	{
2355 		if (isType (name, TOKEN_IDENTIFIER))
2356 		{
2357 			if (!emitted)
2358 				makeSqlTag (name, SQLTAG_TABLE);
2359 
2360 			if (isKeyword (token, KEYWORD_is))
2361 				readToken (token);
2362 
2363 			if (isKeyword (token, KEYWORD_select))
2364 			{
2365 				addToScope (token, name->string, SQLTAG_TABLE);
2366 				parseColumnsAndAliases (token);
2367 				vStringClear (token->scope);
2368 			}
2369 		}
2370 	}
2371 	findCmdTerm (token, true);
2372 	deleteToken (name);
2373 }
2374 
parseIndex(tokenInfo * const token)2375 static void parseIndex (tokenInfo *const token)
2376 {
2377 	tokenInfo *const name  = newToken ();
2378 	tokenInfo *const owner = newToken ();
2379 
2380 	/*
2381 	 * This deals with these formats
2382 	 *	   create index i1 on t1(c1) create index "i2" on t1(c1)
2383 	 *	   create virtual unique clustered index "i3" on t1(c1)
2384 	 *	   create unique clustered index "i4" on t1(c1)
2385 	 *	   create clustered index "i5" on t1(c1)
2386 	 *	   create bitmap index "i6" on t1(c1)
2387 	 */
2388 
2389 	readIdentifier (name);
2390 	readToken (token);
2391 	if (isType (token, TOKEN_PERIOD))
2392 	{
2393 		readIdentifier (name);
2394 		readToken (token);
2395 	}
2396 	if (isKeyword (token, KEYWORD_on) &&
2397 		(isType (name, TOKEN_IDENTIFIER) ||
2398 		 isType (name, TOKEN_STRING)))
2399 	{
2400 		readIdentifier (owner);
2401 		readToken (token);
2402 		if (isType (token, TOKEN_PERIOD))
2403 		{
2404 			readIdentifier (owner);
2405 			readToken (token);
2406 		}
2407 		addToScope(name, owner->string, SQLTAG_TABLE /* FIXME? */);
2408 		makeSqlTag (name, SQLTAG_INDEX);
2409 	}
2410 	findCmdTerm (token, false);
2411 	deleteToken (name);
2412 	deleteToken (owner);
2413 }
2414 
parseEvent(tokenInfo * const token)2415 static void parseEvent (tokenInfo *const token)
2416 {
2417 	tokenInfo *const name = newToken ();
2418 
2419 	/*
2420 	 * This deals with these formats
2421 	 *	   create event e1 handler begin end;
2422 	 *	   create event "e2" handler begin end;
2423 	 *	   create event dba."e3" handler begin end;
2424 	 *	   create event "dba"."e4" handler begin end;
2425 	 */
2426 
2427 	readIdentifier (name);
2428 	readToken (token);
2429 	if (isType (token, TOKEN_PERIOD))
2430 	{
2431 		readIdentifier (name);
2432 	}
2433 	while (! isKeyword (token, KEYWORD_handler) &&
2434 		   ! isType (token, TOKEN_SEMICOLON) &&
2435 		   ! isType (token, TOKEN_EOF))
2436 	{
2437 		readToken (token);
2438 	}
2439 
2440 	if ((isKeyword (token, KEYWORD_handler) ||
2441 		 isType (token, TOKEN_SEMICOLON))
2442 		&& (isType (name, TOKEN_IDENTIFIER) ||
2443 			isType (name, TOKEN_STRING)     ||
2444 			(isType (name, TOKEN_KEYWORD)
2445 			 && (!isReservedWord (name)))))
2446 	{
2447 		makeSqlTag (name, SQLTAG_EVENT);
2448 	}
2449 
2450 	if (isKeyword (token, KEYWORD_handler))
2451 	{
2452 		readToken (token);
2453 		if (isKeyword (token, KEYWORD_begin))
2454 		{
2455 			parseBlock (token, true);
2456 		}
2457 		findCmdTerm (token, true);
2458 	}
2459 	deleteToken (name);
2460 }
2461 
parseTrigger(tokenInfo * const token)2462 static void parseTrigger (tokenInfo *const token)
2463 {
2464 	tokenInfo *const name  = newToken ();
2465 	tokenInfo *const table = newToken ();
2466 
2467 	/*
2468 	 * This deals with these formats
2469 	 *	   create or replace trigger tr1 begin end;
2470 	 *	   create trigger "tr2" begin end;
2471 	 *	   drop trigger "droptr1";
2472 	 *	   create trigger "tr3" CALL sp_something();
2473 	 *	   create trigger "owner"."tr4" begin end;
2474 	 *	   create trigger "tr5" not valid;
2475 	 *	   create trigger "tr6" begin end;
2476 	 */
2477 
2478 	readIdentifier (name);
2479 	readToken (token);
2480 	if (isType (token, TOKEN_PERIOD))
2481 	{
2482 		readIdentifier (name);
2483 		readToken (token);
2484 	}
2485 
2486 	while (! isKeyword (token, KEYWORD_on) &&
2487 		   ! isType (token, TOKEN_EOF) &&
2488 		   ! isCmdTerm (token))
2489 	{
2490 		readToken (token);
2491 	}
2492 
2493 	/*if (! isType (token, TOKEN_SEMICOLON) ) */
2494 	if (! isCmdTerm (token))
2495 	{
2496 		readToken (table);
2497 		readToken (token);
2498 		if (isType (token, TOKEN_PERIOD))
2499 		{
2500 			readToken (table);
2501 			readToken (token);
2502 		}
2503 
2504 		while (! isKeyword (token, KEYWORD_begin) &&
2505 			   ! isKeyword (token, KEYWORD_call) &&
2506 			   ! isCmdTerm (token) &&
2507 			   ! isType (token, TOKEN_EOF))
2508 		{
2509 			if (isKeyword (token, KEYWORD_declare))
2510 			{
2511 				addToScope(token, name->string, SQLTAG_TRIGGER);
2512 				parseDeclare(token, true);
2513 				vStringClear(token->scope);
2514 				token->scopeKind = SQLTAG_COUNT;
2515 			}
2516 			else
2517 				readToken (token);
2518 		}
2519 
2520 		if (isKeyword (token, KEYWORD_begin) ||
2521 			isKeyword (token, KEYWORD_call))
2522 		{
2523 			addToScope(name, table->string, SQLTAG_TABLE);
2524 			makeSqlTag (name, SQLTAG_TRIGGER);
2525 			addToScope(token, table->string, SQLTAG_TABLE);
2526 			if (isKeyword (token, KEYWORD_begin))
2527 			{
2528 				parseBlock (token, true);
2529 			}
2530 			vStringClear(token->scope);
2531 			token->scopeKind = SQLTAG_COUNT;
2532 		}
2533 	}
2534 
2535 	findCmdTerm (token, true);
2536 	deleteToken (name);
2537 	deleteToken (table);
2538 }
2539 
parsePublication(tokenInfo * const token)2540 static void parsePublication (tokenInfo *const token)
2541 {
2542 	tokenInfo *const name = newToken ();
2543 
2544 	/*
2545 	 * This deals with these formats
2546 	 *	   create or replace publication pu1 ()
2547 	 *	   create publication "pu2" ()
2548 	 *	   create publication dba."pu3" ()
2549 	 *	   create publication "dba"."pu4" ()
2550 	 */
2551 
2552 	readIdentifier (name);
2553 	readToken (token);
2554 	if (isType (token, TOKEN_PERIOD))
2555 	{
2556 		readIdentifier (name);
2557 		readToken (token);
2558 	}
2559 	if (isType (token, TOKEN_OPEN_PAREN))
2560 	{
2561 		if (isType (name, TOKEN_IDENTIFIER) ||
2562 			isType (name, TOKEN_STRING))
2563 		{
2564 			makeSqlTag (name, SQLTAG_PUBLICATION);
2565 		}
2566 	}
2567 	findCmdTerm (token, false);
2568 	deleteToken (name);
2569 }
parseService(tokenInfo * const token)2570 static void parseService (tokenInfo *const token)
2571 {
2572 	tokenInfo *const name = newToken ();
2573 
2574 	/*
2575 	 * This deals with these formats
2576 	 *	   CREATE SERVICE s1 TYPE 'HTML'
2577 	 *		   AUTHORIZATION OFF USER DBA AS
2578 	 *		   SELECT *
2579 	 *			 FROM SYS.SYSTABLE;
2580 	 *	   CREATE SERVICE "s2" TYPE 'HTML'
2581 	 *		   AUTHORIZATION OFF USER DBA AS
2582 	 *		   CALL sp_Something();
2583 	 */
2584 
2585 	readIdentifier (name);
2586 	readToken (token);
2587 	if (isKeyword (token, KEYWORD_type))
2588 	{
2589 		if (isType (name, TOKEN_IDENTIFIER) ||
2590 			isType (name, TOKEN_STRING))
2591 		{
2592 			makeSqlTag (name, SQLTAG_SERVICE);
2593 		}
2594 	}
2595 	findCmdTerm (token, false);
2596 	deleteToken (name);
2597 }
2598 
parseDomain(tokenInfo * const token)2599 static void parseDomain (tokenInfo *const token)
2600 {
2601 	tokenInfo *const name = newToken ();
2602 
2603 	/*
2604 	 * This deals with these formats
2605 	 *	   CREATE DOMAIN|DATATYPE [AS] your_name ...;
2606 	 */
2607 
2608 	readIdentifier (name);
2609 	if (isKeyword (name, KEYWORD_is))
2610 	{
2611 		readIdentifier (name);
2612 	}
2613 	readToken (token);
2614 	if (isType (name, TOKEN_IDENTIFIER) ||
2615 		isType (name, TOKEN_STRING))
2616 	{
2617 		makeSqlTag (name, SQLTAG_DOMAIN);
2618 	}
2619 	findCmdTerm (token, false);
2620 	deleteToken (name);
2621 }
2622 
parseDrop(tokenInfo * const token)2623 static void parseDrop (tokenInfo *const token)
2624 {
2625 	/*
2626 	 * This deals with these formats
2627 	 *	   DROP TABLE|PROCEDURE|DOMAIN|DATATYPE name;
2628 	 *
2629 	 * Just simply skip over these statements.
2630 	 * They are often confused with PROCEDURE prototypes
2631 	 * since the syntax is similar, this effectively deals with
2632 	 * the issue for all types.
2633 	 */
2634 
2635 	findCmdTerm (token, false);
2636 }
2637 
parseVariable(tokenInfo * const token)2638 static void parseVariable (tokenInfo *const token)
2639 {
2640 	tokenInfo *const name = newToken ();
2641 
2642 	/*
2643 	 * This deals with these formats
2644 	 *	   create variable varname1 integer;
2645 	 *	   create variable @varname2 integer;
2646 	 *	   create variable "varname3" integer;
2647 	 *	   drop   variable @varname3;
2648 	 */
2649 
2650 	readIdentifier (name);
2651 	readToken (token);
2652 	if (! isType (token, TOKEN_SEMICOLON) &&
2653 		(isType (name, TOKEN_IDENTIFIER) ||
2654 		 isType (name, TOKEN_STRING)))
2655 	{
2656 		makeSqlTag (name, SQLTAG_VARIABLE);
2657 	}
2658 	findCmdTerm (token, true);
2659 
2660 	deleteToken (name);
2661 }
2662 
parseSynonym(tokenInfo * const token)2663 static void parseSynonym (tokenInfo *const token)
2664 {
2665 	tokenInfo *const name = newToken ();
2666 
2667 	/*
2668 	 * This deals with these formats
2669 	 *	   create variable varname1 integer;
2670 	 *	   create variable @varname2 integer;
2671 	 *	   create variable "varname3" integer;
2672 	 *	   drop   variable @varname3;
2673 	 */
2674 
2675 	readIdentifier (name);
2676 	readToken (token);
2677 	if (isKeyword (token, KEYWORD_for) &&
2678 		(isType (name, TOKEN_IDENTIFIER) ||
2679 		 isType (name, TOKEN_STRING)))
2680 	{
2681 		makeSqlTag (name, SQLTAG_SYNONYM);
2682 	}
2683 	findCmdTerm (token, true);
2684 
2685 	deleteToken (name);
2686 }
2687 
parseView(tokenInfo * const token)2688 static void parseView (tokenInfo *const token)
2689 {
2690 	tokenInfo *const name = newToken ();
2691 
2692 	/*
2693 	 * This deals with these formats
2694 	 *     create view VIEW;
2695 	 *     create view VIEW as ...;
2696 	 *     create view VIEW (...) as ...;
2697 	 */
2698 
2699 	readIdentifier (name);
2700 	readToken (token);
2701 	if (isType (token, TOKEN_PERIOD))
2702 	{
2703 		readIdentifier (name);
2704 		readToken (token);
2705 	}
2706 	if (isType (token, TOKEN_OPEN_PAREN))
2707 	{
2708 		skipArgumentList(token);
2709 	}
2710 
2711 	while (! isKeyword (token, KEYWORD_is) &&
2712 		   ! isType (token, TOKEN_SEMICOLON) &&
2713 		   ! isType (token, TOKEN_EOF))
2714 	{
2715 		readToken (token);
2716 	}
2717 
2718 	if (isKeyword (token, KEYWORD_is) &&
2719 		(isType (name, TOKEN_IDENTIFIER) ||
2720 		 isType (name, TOKEN_STRING)))
2721 	{
2722 		makeSqlTag (name, SQLTAG_VIEW);
2723 	}
2724 
2725 	findCmdTerm (token, true);
2726 
2727 	deleteToken (name);
2728 }
2729 
parseMLTable(tokenInfo * const token)2730 static void parseMLTable (tokenInfo *const token)
2731 {
2732 	tokenInfo *const version = newToken ();
2733 	tokenInfo *const table	 = newToken ();
2734 	tokenInfo *const event	 = newToken ();
2735 
2736 	/*
2737 	 * This deals with these formats
2738 	 *	  call dbo.ml_add_table_script( 'version', 'table_name', 'event',
2739 	 *		   'some SQL statement'
2740 	 *		   );
2741 	 */
2742 
2743 	readToken (token);
2744 	if (isType (token, TOKEN_OPEN_PAREN))
2745 	{
2746 		readToken (version);
2747 		readToken (token);
2748 		while (! isType (token, TOKEN_COMMA) &&
2749 			   ! isType (token, TOKEN_CLOSE_PAREN) &&
2750 			   ! isType (token, TOKEN_EOF))
2751 		{
2752 			readToken (token);
2753 		}
2754 
2755 		if (isType (token, TOKEN_COMMA))
2756 		{
2757 			readToken (table);
2758 			readToken (token);
2759 			while (! isType (token, TOKEN_COMMA) &&
2760 				   ! isType (token, TOKEN_CLOSE_PAREN) &&
2761 				   ! isType (token, TOKEN_EOF))
2762 			{
2763 				readToken (token);
2764 			}
2765 
2766 			if (isType (token, TOKEN_COMMA))
2767 			{
2768 				readToken (event);
2769 
2770 				if (isType (version, TOKEN_STRING) &&
2771 					isType (table, TOKEN_STRING) &&
2772 					isType (event, TOKEN_STRING))
2773 				{
2774 					addToScope(version, table->string, SQLTAG_TABLE);
2775 					addToScope(version, event->string, SQLTAG_EVENT);
2776 					makeSqlTag (version, SQLTAG_MLTABLE);
2777 				}
2778 			}
2779 			if (! isType (token, TOKEN_CLOSE_PAREN))
2780 				findToken (token, TOKEN_CLOSE_PAREN);
2781 		}
2782 	}
2783 
2784 	findCmdTerm (token, true);
2785 
2786 	deleteToken (version);
2787 	deleteToken (table);
2788 	deleteToken (event);
2789 }
2790 
parseMLConn(tokenInfo * const token)2791 static void parseMLConn (tokenInfo *const token)
2792 {
2793 	tokenInfo *const version = newToken ();
2794 	tokenInfo *const event	 = newToken ();
2795 
2796 	/*
2797 	 * This deals with these formats
2798 	 *	  call ml_add_connection_script( 'version', 'event',
2799 	 *		   'some SQL statement'
2800 	 *		   );
2801 	 */
2802 
2803 	readToken (token);
2804 	if (isType (token, TOKEN_OPEN_PAREN))
2805 	{
2806 		readToken (version);
2807 		readToken (token);
2808 		while (! isType (token, TOKEN_COMMA) &&
2809 			   ! isType (token, TOKEN_CLOSE_PAREN) &&
2810 			   ! isType (token, TOKEN_EOF))
2811 		{
2812 			readToken (token);
2813 		}
2814 
2815 		if (isType (token, TOKEN_COMMA))
2816 		{
2817 			readToken (event);
2818 
2819 			if (isType (version, TOKEN_STRING) &&
2820 				isType (event, TOKEN_STRING))
2821 			{
2822 				addToScope(version, event->string, SQLTAG_EVENT);
2823 				makeSqlTag (version, SQLTAG_MLCONN);
2824 			}
2825 		}
2826 		if (! isType (token, TOKEN_CLOSE_PAREN))
2827 			findToken (token, TOKEN_CLOSE_PAREN);
2828 
2829 	}
2830 
2831 	findCmdTerm (token, true);
2832 
2833 	deleteToken (version);
2834 	deleteToken (event);
2835 }
2836 
parseMLProp(tokenInfo * const token)2837 static void parseMLProp (tokenInfo *const token)
2838 {
2839 	tokenInfo *const component     = newToken ();
2840 	tokenInfo *const prop_set_name = newToken ();
2841 	tokenInfo *const prop_name     = newToken ();
2842 
2843 	/*
2844 	 * This deals with these formats
2845      *   ml_add_property (
2846      *       'comp_name',
2847      *       'prop_set_name',
2848      *       'prop_name',
2849      *       'prop_value'
2850      *   )
2851 	 */
2852 
2853 	readToken (token);
2854 	if (isType (token, TOKEN_OPEN_PAREN))
2855 	{
2856 		readToken (component);
2857 		readToken (token);
2858 		while (! isType (token, TOKEN_COMMA) &&
2859 			   ! isType (token, TOKEN_CLOSE_PAREN) &&
2860 			   ! isType (token, TOKEN_EOF))
2861 		{
2862 			readToken (token);
2863 		}
2864 
2865 		if (isType (token, TOKEN_COMMA))
2866 		{
2867 			readToken (prop_set_name);
2868 			readToken (token);
2869 			while (! isType (token, TOKEN_COMMA) &&
2870 				   ! isType (token, TOKEN_CLOSE_PAREN) &&
2871 				   ! isType (token, TOKEN_EOF))
2872 			{
2873 				readToken (token);
2874 			}
2875 
2876 			if (isType (token, TOKEN_COMMA))
2877 			{
2878 				readToken (prop_name);
2879 
2880 				if (isType (component, TOKEN_STRING) &&
2881 					isType (prop_set_name, TOKEN_STRING) &&
2882 					isType (prop_name, TOKEN_STRING))
2883 				{
2884 					addToScope(component, prop_set_name->string, SQLTAG_MLPROP /* FIXME */);
2885 					addToScope(component, prop_name->string, SQLTAG_MLPROP /* FIXME */);
2886 					makeSqlTag (component, SQLTAG_MLPROP);
2887 				}
2888 			}
2889 			if (! isType (token, TOKEN_CLOSE_PAREN))
2890 				findToken (token, TOKEN_CLOSE_PAREN);
2891 		}
2892 	}
2893 
2894 	findCmdTerm (token, true);
2895 
2896 	deleteToken (component);
2897 	deleteToken (prop_set_name);
2898 	deleteToken (prop_name);
2899 }
2900 
parseComment(tokenInfo * const token)2901 static void parseComment (tokenInfo *const token)
2902 {
2903 	/*
2904 	 * This deals with this statement:
2905 	 *	   COMMENT TO PRESERVE FORMAT ON PROCEDURE "DBA"."test" IS
2906 	 *	   {create PROCEDURE DBA."test"()
2907 	 *	   BEGIN
2908 	 *		signal dave;
2909 	 *	   END
2910 	 *	   }
2911 	 *	   ;
2912 	 * The comment can contain anything between the CURLY
2913 	 * braces
2914 	 *	   COMMENT ON USER "admin" IS
2915 	 *			'Administration Group'
2916 	 *			;
2917 	 * Or it could be a simple string with no curly braces
2918 	 */
2919 	while (! isKeyword (token, KEYWORD_is) &&
2920 		   ! isType (token, TOKEN_EOF))
2921 	{
2922 		readToken (token);
2923 	}
2924 	readToken (token);
2925 	if (isType(token, TOKEN_OPEN_CURLY))
2926 	{
2927 		findToken (token, TOKEN_CLOSE_CURLY);
2928 	}
2929 
2930 	findCmdTerm (token, true);
2931 }
2932 
parseCCFLAGS(tokenInfo * const token)2933 static void parseCCFLAGS (tokenInfo *const token)
2934 {
2935 	readToken(token);
2936 	if (!isType (token, TOKEN_EQUAL))
2937 	{
2938 		findCmdTerm (token, true);
2939 		return;
2940 	}
2941 
2942 	readToken(token);
2943 	if (!isType (token, TOKEN_STRING))
2944 	{
2945 		findCmdTerm (token, true);
2946 		return;
2947 	}
2948 
2949 	bool in_var = true;
2950 	const char *s = vStringValue(token->string);
2951 	vString *ccflag = vStringNew();
2952 	/* http://web.deu.edu.tr/doc/oracle/B19306_01/server.102/b14237/initparams158.htm#REFRN10261 */
2953 	while (*s)
2954 	{
2955 		if (in_var && isIdentChar1((int)*s))
2956 			vStringPut(ccflag, *s);
2957 		else if (*s == ':' && !vStringIsEmpty(ccflag))
2958 		{
2959 			if (lookupCaseKeyword(vStringValue(ccflag), Lang_sql)
2960 				!= KEYWORD_inquiry_directive)
2961 			{
2962 				int index = makeSimpleTag(ccflag, SQLTAG_PLSQL_CCFLAGS);
2963 				registerEntry(index);
2964 				vStringClear(ccflag);
2965 				in_var = false;
2966 			}
2967 		}
2968 		else if (*s == ',')
2969 			in_var = true;
2970 		s++;
2971 	}
2972 	vStringDelete(ccflag);
2973 
2974 }
2975 
parseDatabase(tokenInfo * const token,enum eKeywordId keyword)2976 static void parseDatabase (tokenInfo *const token, enum eKeywordId keyword)
2977 {
2978 	tokenInfo * name;
2979 
2980 	/*
2981 	 * In MySQL and HPL/SQL, "CREATE DATABASE" and "CREATE SCHEMA"
2982 	 * are the same. However, In PostgreSQL, they are different.
2983 	 * Too support PostgreSQL, we prepare different kinds for them.
2984 	 *
2985 	 * MySQL
2986 	 * A. CREATE {DATABASE | SCHEMA} [IF NOT EXISTS] db_name ...;
2987 	 *
2988 	 * PostgreSQL
2989 	 *
2990 	 * B. CREATE DATABASE name ...;
2991 	 *
2992 	 * C. CREATE SCHEMA schema_name [ AUTHORIZATION role_specification ] [ schema_element [ ... ] ]
2993 	 * D. CREATE SCHEMA AUTHORIZATION role_specification [ schema_element [ ... ] ]
2994 	 * E. CREATE SCHEMA IF NOT EXISTS schema_name [ AUTHORIZATION role_specification ]
2995 	 * F. CREATE SCHEMA IF NOT EXISTS AUTHORIZATION role_specification
2996 	 *
2997 	 * HPL/SQL
2998 	 * G. CREATE DATABASE | SCHEMA [IF NOT EXISTS] dbname_expr...;
2999 	 */
3000 	readIdentifier (token);
3001 	if (keyword == KEYWORD_schema
3002 		&& isType (token, TOKEN_IDENTIFIER)
3003 		&& vStringLength (token->string) == 13
3004 		&& strcasecmp("authorization", vStringValue(token->string)) == 0)
3005 	{
3006 		/* D. */
3007 		readIdentifier (token);
3008 		makeSqlTag (token, SQLTAG_SCHEMA);
3009 		findCmdTerm (token, false);
3010 		return;
3011 	}
3012 
3013 	name = newToken ();
3014 	copyToken (name, token);
3015 	readIdentifier (token);
3016 	parseIdAfterIfNotExists (name, token, true);
3017 
3018 	makeSqlTag (name,
3019 				keyword == KEYWORD_database
3020 				? SQLTAG_DATABASE: SQLTAG_SCHEMA);
3021 	deleteToken (name);
3022 
3023 	/* TODO:
3024 	 *
3025 	 * In PostgreSQL, CREATE FOO can follow to CREATE SCHEMA like:
3026 	 *
3027 	 * -- https://www.postgresql.org/docs/current/sql-createschema.html
3028 	 *
3029 	 *     CREATE SCHEMA hollywood
3030 	 *         CREATE TABLE films (title text, release date, awards text[])
3031 	 *         CREATE VIEW winners AS
3032 	 *             SELECT title, release FROM films WHERE awards IS NOT NULL;
3033 	 *
3034 	 * In above example, "hollywood.films" and "hollywood.winners" should be
3035 	 * tagged.
3036 	 */
3037 	findCmdTerm (token, true);
3038 }
3039 
parseKeywords(tokenInfo * const token)3040 static void parseKeywords (tokenInfo *const token)
3041 {
3042 		switch (token->keyword)
3043 		{
3044 			case KEYWORD_begin:			parseBlock (token, false); break;
3045 			case KEYWORD_inquiry_directive:
3046 				if (strcasecmp(vStringValue(token->string), "PLSQL_CCFLAGS") == 0)
3047 					parseCCFLAGS (token);
3048 				break;
3049 			case KEYWORD_comment:		parseComment (token); break;
3050 			case KEYWORD_cursor:		parseSimple (token, SQLTAG_CURSOR); break;
3051 			case KEYWORD_database:		parseDatabase (token, KEYWORD_database); break;
3052 			case KEYWORD_datatype:		parseDomain (token); break;
3053 			case KEYWORD_declare:		parseBlock (token, false); break;
3054 			case KEYWORD_domain:		parseDomain (token); break;
3055 			case KEYWORD_drop:			parseDrop (token); break;
3056 			case KEYWORD_event:			parseEvent (token); break;
3057 			case KEYWORD_extension:		findCmdTerm (token, false); break;
3058 			case KEYWORD_function:		parseSubProgram (token); break;
3059 			case KEYWORD_if:			parseStatements (token, false); break;
3060 			case KEYWORD_index:			parseIndex (token); break;
3061 			case KEYWORD_ml_table:		parseMLTable (token); break;
3062 			case KEYWORD_ml_table_lang: parseMLTable (token); break;
3063 			case KEYWORD_ml_table_dnet: parseMLTable (token); break;
3064 			case KEYWORD_ml_table_java: parseMLTable (token); break;
3065 			case KEYWORD_ml_table_chk:  parseMLTable (token); break;
3066 			case KEYWORD_ml_conn:		parseMLConn (token); break;
3067 			case KEYWORD_ml_conn_lang:	parseMLConn (token); break;
3068 			case KEYWORD_ml_conn_dnet:	parseMLConn (token); break;
3069 			case KEYWORD_ml_conn_java:	parseMLConn (token); break;
3070 			case KEYWORD_ml_conn_chk:	parseMLConn (token); break;
3071 			case KEYWORD_ml_prop:		parseMLProp (token); break;
3072 			case KEYWORD_package:		parsePackage (token); break;
3073 			case KEYWORD_procedure:		parseSubProgram (token); break;
3074 			case KEYWORD_publication:	parsePublication (token); break;
3075 			case KEYWORD_schema:		parseDatabase (token, KEYWORD_schema); break;
3076 			case KEYWORD_service:		parseService (token); break;
3077 			case KEYWORD_subtype:		parseSimple (token, SQLTAG_SUBTYPE); break;
3078 			case KEYWORD_synonym:		parseSynonym (token); break;
3079 			case KEYWORD_table:			parseTable (token); break;
3080 			case KEYWORD_trigger:		parseTrigger (token); break;
3081 			case KEYWORD_type:			parseType (token); break;
3082 			case KEYWORD_variable:		parseVariable (token); break;
3083 			case KEYWORD_view:			parseView (token); break;
3084 			case KEYWORD_with:			readToken (token); break; /* skip next token */
3085 			case KEYWORD_without:		readToken (token); break; /* skip next token */
3086 			default:				    break;
3087 		}
3088 }
3089 
parseSqlFile(tokenInfo * const token)3090 static tokenType parseSqlFile (tokenInfo *const token)
3091 {
3092 	do
3093 	{
3094 		readToken (token);
3095 
3096 		if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
3097 			parseLabel (token);
3098 		else
3099 			parseKeywords (token);
3100 	} while (! isKeyword (token, KEYWORD_end) &&
3101 			 ! isType (token, TOKEN_EOF));
3102 
3103 	return token->type;
3104 }
3105 
initialize(const langType language)3106 static void initialize (const langType language)
3107 {
3108 	Assert (ARRAY_SIZE (SqlKinds) == SQLTAG_COUNT);
3109 	Lang_sql = language;
3110 	addKeywordGroup (&predefinedInquiryDirective, language);
3111 }
3112 
findSqlTags(void)3113 static void findSqlTags (void)
3114 {
3115 	tokenInfo *const token = newToken ();
3116 
3117 	while (parseSqlFile (token) != TOKEN_EOF);
3118 
3119 	deleteToken (token);
3120 }
3121 
SqlParser(void)3122 extern parserDefinition* SqlParser (void)
3123 {
3124 	static const char *const extensions [] = { "sql", NULL };
3125 	static const char *const aliases [] = {"pgsql", NULL };
3126 	parserDefinition* def = parserNew ("SQL");
3127 	def->kindTable	= SqlKinds;
3128 	def->kindCount	= ARRAY_SIZE (SqlKinds);
3129 	def->extensions = extensions;
3130 	def->aliases    = aliases;
3131 	def->parser		= findSqlTags;
3132 	def->initialize = initialize;
3133 	def->keywordTable = SqlKeywordTable;
3134 	def->keywordCount = ARRAY_SIZE (SqlKeywordTable);
3135 	def->useCork = CORK_QUEUE | CORK_SYMTAB;
3136 	return def;
3137 }
3138