1 // Scintilla source code edit control
2 /** @file LexSQL.cxx
3  ** Lexer for SQL, including PL/SQL and SQL*Plus.
4  **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 
14 #include "Platform.h"
15 
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 
IsAWordChar(int ch)23 static inline bool IsAWordChar(int ch) {
24 	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
25 }
26 
IsAWordStart(int ch)27 static inline bool IsAWordStart(int ch) {
28 	return (ch < 0x80) && (isalpha(ch) || ch == '_');
29 }
30 
IsADoxygenChar(int ch)31 static inline bool IsADoxygenChar(int ch) {
32 	return (islower(ch) || ch == '$' || ch == '@' ||
33 	        ch == '\\' || ch == '&' || ch == '<' ||
34 	        ch == '>' || ch == '#' || ch == '{' ||
35 	        ch == '}' || ch == '[' || ch == ']');
36 }
37 
IsANumberChar(int ch)38 static inline bool IsANumberChar(int ch) {
39 	// Not exactly following number definition (several dots are seen as OK, etc.)
40 	// but probably enough in most cases.
41 	return (ch < 0x80) &&
42 	        (isdigit(ch) || toupper(ch) == 'E' ||
43              ch == '.' || ch == '-' || ch == '+');
44 }
45 
ColouriseSQLDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)46 static void ColouriseSQLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
47                             Accessor &styler) {
48 
49 	WordList &keywords1 = *keywordlists[0];
50 	WordList &keywords2 = *keywordlists[1];
51 	WordList &kw_pldoc = *keywordlists[2];
52 	WordList &kw_sqlplus = *keywordlists[3];
53 	WordList &kw_user1 = *keywordlists[4];
54 	WordList &kw_user2 = *keywordlists[5];
55 	WordList &kw_user3 = *keywordlists[6];
56 	WordList &kw_user4 = *keywordlists[7];
57 
58 	StyleContext sc(startPos, length, initStyle, styler);
59 
60 	bool sqlBackslashEscapes = styler.GetPropertyInt("sql.backslash.escapes", 0) != 0;
61 	bool sqlBackticksIdentifier = styler.GetPropertyInt("lexer.sql.backticks.identifier", 0) != 0;
62 	int styleBeforeDCKeyword = SCE_SQL_DEFAULT;
63 	for (; sc.More(); sc.Forward()) {
64 		// Determine if the current state should terminate.
65 		switch (sc.state) {
66 		case SCE_SQL_OPERATOR:
67 			sc.SetState(SCE_SQL_DEFAULT);
68 			break;
69 		case SCE_SQL_NUMBER:
70 			// We stop the number definition on non-numerical non-dot non-eE non-sign char
71 			if (!IsANumberChar(sc.ch)) {
72 				sc.SetState(SCE_SQL_DEFAULT);
73 			}
74 			break;
75 		case SCE_SQL_IDENTIFIER:
76 			if (!IsAWordChar(sc.ch)) {
77 				int nextState = SCE_SQL_DEFAULT;
78 				char s[1000];
79 				sc.GetCurrentLowered(s, sizeof(s));
80 				if (keywords1.InList(s)) {
81 					sc.ChangeState(SCE_SQL_WORD);
82 				} else if (keywords2.InList(s)) {
83 					sc.ChangeState(SCE_SQL_WORD2);
84 				} else if (kw_sqlplus.InListAbbreviated(s, '~')) {
85 					sc.ChangeState(SCE_SQL_SQLPLUS);
86 					if (strncmp(s, "rem", 3) == 0) {
87 						nextState = SCE_SQL_SQLPLUS_COMMENT;
88 					} else if (strncmp(s, "pro", 3) == 0) {
89 						nextState = SCE_SQL_SQLPLUS_PROMPT;
90 					}
91 				} else if (kw_user1.InList(s)) {
92 					sc.ChangeState(SCE_SQL_USER1);
93 				} else if (kw_user2.InList(s)) {
94 					sc.ChangeState(SCE_SQL_USER2);
95 				} else if (kw_user3.InList(s)) {
96 					sc.ChangeState(SCE_SQL_USER3);
97 				} else if (kw_user4.InList(s)) {
98 					sc.ChangeState(SCE_SQL_USER4);
99 				}
100 				sc.SetState(nextState);
101 			}
102 			break;
103 		case SCE_SQL_QUOTEDIDENTIFIER:
104 			if (sc.ch == 0x60) {
105 				if (sc.chNext == 0x60) {
106 					sc.Forward();	// Ignore it
107 				} else {
108 					sc.ForwardSetState(SCE_SQL_DEFAULT);
109 				}
110 			}
111 			break;
112 		case SCE_SQL_COMMENT:
113 			if (sc.Match('*', '/')) {
114 				sc.Forward();
115 				sc.ForwardSetState(SCE_SQL_DEFAULT);
116 			}
117 			break;
118 		case SCE_SQL_COMMENTDOC:
119 			if (sc.Match('*', '/')) {
120 				sc.Forward();
121 				sc.ForwardSetState(SCE_SQL_DEFAULT);
122 			} else if (sc.ch == '@' || sc.ch == '\\') { // Doxygen support
123 				// Verify that we have the conditions to mark a comment-doc-keyword
124 				if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
125 					styleBeforeDCKeyword = SCE_SQL_COMMENTDOC;
126 					sc.SetState(SCE_SQL_COMMENTDOCKEYWORD);
127 				}
128 			}
129 			break;
130 		case SCE_SQL_COMMENTLINE:
131 		case SCE_SQL_COMMENTLINEDOC:
132 		case SCE_SQL_SQLPLUS_COMMENT:
133 		case SCE_SQL_SQLPLUS_PROMPT:
134 			if (sc.atLineStart) {
135 				sc.SetState(SCE_SQL_DEFAULT);
136 			}
137 			break;
138 		case SCE_SQL_COMMENTDOCKEYWORD:
139 			if ((styleBeforeDCKeyword == SCE_SQL_COMMENTDOC) && sc.Match('*', '/')) {
140 				sc.ChangeState(SCE_SQL_COMMENTDOCKEYWORDERROR);
141 				sc.Forward();
142 				sc.ForwardSetState(SCE_SQL_DEFAULT);
143 			} else if (!IsADoxygenChar(sc.ch)) {
144 				char s[100];
145 				sc.GetCurrentLowered(s, sizeof(s));
146 				if (!isspace(sc.ch) || !kw_pldoc.InList(s + 1)) {
147 					sc.ChangeState(SCE_SQL_COMMENTDOCKEYWORDERROR);
148 				}
149 				sc.SetState(styleBeforeDCKeyword);
150 			}
151 			break;
152 		case SCE_SQL_CHARACTER:
153 			if (sqlBackslashEscapes && sc.ch == '\\') {
154 				sc.Forward();
155 			} else if (sc.ch == '\'') {
156 				if (sc.chNext == '\"') {
157 					sc.Forward();
158 				} else {
159 					sc.ForwardSetState(SCE_SQL_DEFAULT);
160 				}
161 			}
162 			break;
163 		case SCE_SQL_STRING:
164 			if (sc.ch == '\\') {
165 				// Escape sequence
166 				sc.Forward();
167 			} else if (sc.ch == '\"') {
168 				if (sc.chNext == '\"') {
169 					sc.Forward();
170 				} else {
171 					sc.ForwardSetState(SCE_SQL_DEFAULT);
172 				}
173 			}
174 			break;
175 		}
176 
177 		// Determine if a new state should be entered.
178 		if (sc.state == SCE_SQL_DEFAULT) {
179 			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
180 				sc.SetState(SCE_SQL_NUMBER);
181 			} else if (IsAWordStart(sc.ch)) {
182 				sc.SetState(SCE_SQL_IDENTIFIER);
183 			} else if (sc.ch == 0x60 && sqlBackticksIdentifier) {
184 				sc.SetState(SCE_SQL_QUOTEDIDENTIFIER);
185 			} else if (sc.Match('/', '*')) {
186 				if (sc.Match("/**") || sc.Match("/*!")) {	// Support of Doxygen doc. style
187 					sc.SetState(SCE_SQL_COMMENTDOC);
188 				} else {
189 					sc.SetState(SCE_SQL_COMMENT);
190 				}
191 				sc.Forward();	// Eat the * so it isn't used for the end of the comment
192 			} else if (sc.Match('-', '-')) {
193 				// MySQL requires a space or control char after --
194 				// http://dev.mysql.com/doc/mysql/en/ansi-diff-comments.html
195 				// Perhaps we should enforce that with proper property:
196 //~ 			} else if (sc.Match("-- ")) {
197 				sc.SetState(SCE_SQL_COMMENTLINE);
198 			} else if (sc.ch == '#') {
199 				sc.SetState(SCE_SQL_COMMENTLINEDOC);
200 			} else if (sc.ch == '\'') {
201 				sc.SetState(SCE_SQL_CHARACTER);
202 			} else if (sc.ch == '\"') {
203 				sc.SetState(SCE_SQL_STRING);
204 			} else if (isoperator(static_cast<char>(sc.ch))) {
205 				sc.SetState(SCE_SQL_OPERATOR);
206 			}
207 		}
208 	}
209 	sc.Complete();
210 }
211 
IsStreamCommentStyle(int style)212 static bool IsStreamCommentStyle(int style) {
213 	return style == SCE_SQL_COMMENT ||
214 	       style == SCE_SQL_COMMENTDOC ||
215 	       style == SCE_SQL_COMMENTDOCKEYWORD ||
216 	       style == SCE_SQL_COMMENTDOCKEYWORDERROR;
217 }
218 
219 // Store both the current line's fold level and the next lines in the
220 // level store to make it easy to pick up with each increment.
FoldSQLDoc(unsigned int startPos,int length,int initStyle,WordList * [],Accessor & styler)221 static void FoldSQLDoc(unsigned int startPos, int length, int initStyle,
222                             WordList *[], Accessor &styler) {
223 	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
224 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
225 	bool foldOnlyBegin = styler.GetPropertyInt("fold.sql.only.begin", 0) != 0;
226 
227 	unsigned int endPos = startPos + length;
228 	int visibleChars = 0;
229 	int lineCurrent = styler.GetLine(startPos);
230 	int levelCurrent = SC_FOLDLEVELBASE;
231 	if (lineCurrent > 0) {
232 		levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
233 	}
234 	int levelNext = levelCurrent;
235 	char chNext = styler[startPos];
236 	int styleNext = styler.StyleAt(startPos);
237 	int style = initStyle;
238 	bool endFound = false;
239 	for (unsigned int i = startPos; i < endPos; i++) {
240 		char ch = chNext;
241 		chNext = styler.SafeGetCharAt(i + 1);
242 		int stylePrev = style;
243 		style = styleNext;
244 		styleNext = styler.StyleAt(i + 1);
245 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
246 		if (foldComment && IsStreamCommentStyle(style)) {
247 			if (!IsStreamCommentStyle(stylePrev)) {
248 				levelNext++;
249 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
250 				// Comments don't end at end of line and the next character may be unstyled.
251 				levelNext--;
252 			}
253 		}
254 		if (foldComment && (style == SCE_SQL_COMMENTLINE)) {
255 			// MySQL needs -- comments to be followed by space or control char
256 			if ((ch == '-') && (chNext == '-')) {
257 				char chNext2 = styler.SafeGetCharAt(i + 2);
258 				char chNext3 = styler.SafeGetCharAt(i + 3);
259 				if (chNext2 == '{' || chNext3 == '{') {
260 					levelNext++;
261 				} else if (chNext2 == '}' || chNext3 == '}') {
262 					levelNext--;
263 				}
264 			}
265 		}
266 		if (style == SCE_SQL_OPERATOR) {
267 			if (ch == '(') {
268 				levelNext++;
269 			} else if (ch == ')') {
270 				levelNext--;
271 			}
272 		}
273 		// If new keyword (cannot trigger on elseif or nullif, does less tests)
274 		if (style == SCE_SQL_WORD && stylePrev != SCE_SQL_WORD) {
275 			const int MAX_KW_LEN = 6;	// Maximum length of folding keywords
276 			char s[MAX_KW_LEN + 2];
277 			unsigned int j = 0;
278 			for (; j < MAX_KW_LEN + 1; j++) {
279 				if (!iswordchar(styler[i + j])) {
280 					break;
281 				}
282 				s[j] = static_cast<char>(tolower(styler[i + j]));
283 			}
284 			if (j == MAX_KW_LEN + 1) {
285 				// Keyword too long, don't test it
286 				s[0] = '\0';
287 			} else {
288 				s[j] = '\0';
289 			}
290 			if ((!foldOnlyBegin) && (strcmp(s, "if") == 0 || strcmp(s, "loop") == 0)) {
291 				if (endFound) {
292 					// ignore
293 					endFound = false;
294 				} else {
295 					levelNext++;
296 				}
297 			} else if (strcmp(s, "begin") == 0) {
298 				levelNext++;
299 			} else if (strcmp(s, "end") == 0 ||
300 						// DROP TABLE IF EXISTS or CREATE TABLE IF NOT EXISTS
301 						strcmp(s, "exists") == 0) {
302 				endFound = true;
303 				levelNext--;
304 				if (levelNext < SC_FOLDLEVELBASE) {
305 					levelNext = SC_FOLDLEVELBASE;
306 				}
307 			}
308 		}
309 		if (atEOL) {
310 			int levelUse = levelCurrent;
311 			int lev = levelUse | levelNext << 16;
312 			if (visibleChars == 0 && foldCompact)
313 				lev |= SC_FOLDLEVELWHITEFLAG;
314 			if (levelUse < levelNext)
315 				lev |= SC_FOLDLEVELHEADERFLAG;
316 			if (lev != styler.LevelAt(lineCurrent)) {
317 				styler.SetLevel(lineCurrent, lev);
318 			}
319 			lineCurrent++;
320 			levelCurrent = levelNext;
321 			visibleChars = 0;
322 			endFound = false;
323 		}
324 		if (!isspacechar(ch)) {
325 			visibleChars++;
326 		}
327 	}
328 }
329 
330 static const char * const sqlWordListDesc[] = {
331 	"Keywords",
332 	"Database Objects",
333 	"PLDoc",
334 	"SQL*Plus",
335 	"User Keywords 1",
336 	"User Keywords 2",
337 	"User Keywords 3",
338 	"User Keywords 4",
339 	0
340 };
341 
342 LexerModule lmSQL(SCLEX_SQL, ColouriseSQLDoc, "sql", FoldSQLDoc, sqlWordListDesc);
343