1 // Scintilla source code edit control
2 /** @file LexLua.cxx
3  ** Lexer for Lua language.
4  **
5  ** Written by Paul Winwood.
6  ** Folder by Alexey Yutkin.
7  ** Modified by Marcos E. Wurzius & Philippe Lhoste
8  **/
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include "ILexer.h"
18 #include "Scintilla.h"
19 #include "SciLexer.h"
20 
21 #include "WordList.h"
22 #include "LexAccessor.h"
23 #include "Accessor.h"
24 #include "StyleContext.h"
25 #include "CharacterSet.h"
26 #include "LexerModule.h"
27 
28 #ifdef SCI_NAMESPACE
29 using namespace Scintilla;
30 #endif
31 
32 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
33 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
34 // The maximum number of '=' characters allowed is 254.
LongDelimCheck(StyleContext & sc)35 static int LongDelimCheck(StyleContext &sc) {
36 	int sep = 1;
37 	while (sc.GetRelative(sep) == '=' && sep < 0xFF)
38 		sep++;
39 	if (sc.GetRelative(sep) == sc.ch)
40 		return sep;
41 	return 0;
42 }
43 
ColouriseLuaDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)44 static void ColouriseLuaDoc(
45 	unsigned int startPos,
46 	int length,
47 	int initStyle,
48 	WordList *keywordlists[],
49 	Accessor &styler) {
50 
51 	WordList &keywords = *keywordlists[0];
52 	WordList &keywords2 = *keywordlists[1];
53 	WordList &keywords3 = *keywordlists[2];
54 	WordList &keywords4 = *keywordlists[3];
55 	WordList &keywords5 = *keywordlists[4];
56 	WordList &keywords6 = *keywordlists[5];
57 	WordList &keywords7 = *keywordlists[6];
58 	WordList &keywords8 = *keywordlists[7];
59 
60 	// Accepts accented characters
61 	CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
62 	CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
63 	// Not exactly following number definition (several dots are seen as OK, etc.)
64 	// but probably enough in most cases. [pP] is for hex floats.
65 	CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP");
66 	CharacterSet setExponent(CharacterSet::setNone, "eEpP");
67 	CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#");
68 	CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\");
69 
70 	int currentLine = styler.GetLine(startPos);
71 	// Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
72 	// if we are inside such a string. Block comment was introduced in Lua 5.0,
73 	// blocks with separators [=[ ... ]=] in Lua 5.1.
74 	// Continuation of a string (\z whitespace escaping) is controlled by stringWs.
75 	int nestLevel = 0;
76 	int sepCount = 0;
77 	int stringWs = 0;
78 	if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT ||
79 		initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) {
80 		int lineState = styler.GetLineState(currentLine - 1);
81 		nestLevel = lineState >> 9;
82 		sepCount = lineState & 0xFF;
83 		stringWs = lineState & 0x100;
84 	}
85 
86 	// Do not leak onto next line
87 	if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
88 		initStyle = SCE_LUA_DEFAULT;
89 	}
90 
91 	StyleContext sc(startPos, length, initStyle, styler);
92 	if (startPos == 0 && sc.ch == '#') {
93 		// shbang line: # is a comment only if first char of the script
94 		sc.SetState(SCE_LUA_COMMENTLINE);
95 	}
96 	for (; sc.More(); sc.Forward()) {
97 		if (sc.atLineEnd) {
98 			// Update the line state, so it can be seen by next line
99 			currentLine = styler.GetLine(sc.currentPos);
100 			switch (sc.state) {
101 			case SCE_LUA_LITERALSTRING:
102 			case SCE_LUA_COMMENT:
103 			case SCE_LUA_STRING:
104 			case SCE_LUA_CHARACTER:
105 				// Inside a literal string, block comment or string, we set the line state
106 				styler.SetLineState(currentLine, (nestLevel << 9) | stringWs | sepCount);
107 				break;
108 			default:
109 				// Reset the line state
110 				styler.SetLineState(currentLine, 0);
111 				break;
112 			}
113 		}
114 		if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
115 			// Prevent SCE_LUA_STRINGEOL from leaking back to previous line
116 			sc.SetState(SCE_LUA_STRING);
117 		}
118 
119 		// Handle string line continuation
120 		if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
121 				sc.ch == '\\') {
122 			if (sc.chNext == '\n' || sc.chNext == '\r') {
123 				sc.Forward();
124 				if (sc.ch == '\r' && sc.chNext == '\n') {
125 					sc.Forward();
126 				}
127 				continue;
128 			}
129 		}
130 
131 		// Determine if the current state should terminate.
132 		if (sc.state == SCE_LUA_OPERATOR) {
133 			if (sc.ch == ':' && sc.chPrev == ':') {	// :: <label> :: forward scan
134 				sc.Forward();
135 				int ln = 0, maxln = startPos + length - sc.currentPos;
136 				int c;
137 				while (ln < maxln) {		// determine line extent
138 					c = sc.GetRelative(ln);
139 					if (c == '\r' || c == '\n')
140 						break;
141 					ln++;
142 				}
143 				maxln = ln; ln = 0;
144 				while (ln < maxln) {		// skip over spaces/tabs
145 					if (!IsASpaceOrTab(sc.GetRelative(ln)))
146 						break;
147 					ln++;
148 				}
149 				int ws1 = ln;
150 				if (setWordStart.Contains(sc.GetRelative(ln))) {
151 					int i = 0;
152 					char s[100];
153 					while (ln < maxln) {	// get potential label
154 						c = sc.GetRelative(ln);
155 						if (!setWord.Contains(c))
156 							break;
157 						if (i < 90)
158 							s[i++] = c;
159 						ln++;
160 					}
161 					s[i] = '\0'; int lbl = ln;
162 					if (!keywords.InList(s)) {
163 						while (ln < maxln) {		// skip over spaces/tabs
164 							if (!IsASpaceOrTab(sc.GetRelative(ln)))
165 								break;
166 							ln++;
167 						}
168 						int ws2 = ln - lbl;
169 						if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') {
170 							// final :: found, complete valid label construct
171 							sc.ChangeState(SCE_LUA_LABEL);
172 							if (ws1) {
173 								sc.SetState(SCE_LUA_DEFAULT);
174 								sc.Forward(ws1);
175 							}
176 							sc.SetState(SCE_LUA_LABEL);
177 							sc.Forward(lbl - ws1);
178 							if (ws2) {
179 								sc.SetState(SCE_LUA_DEFAULT);
180 								sc.Forward(ws2);
181 							}
182 							sc.SetState(SCE_LUA_LABEL);
183 							sc.Forward(2);
184 						}
185 					}
186 				}
187 			}
188 			sc.SetState(SCE_LUA_DEFAULT);
189 		} else if (sc.state == SCE_LUA_NUMBER) {
190 			// We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
191 			if (!setNumber.Contains(sc.ch)) {
192 				sc.SetState(SCE_LUA_DEFAULT);
193 			} else if (sc.ch == '-' || sc.ch == '+') {
194 				if (!setExponent.Contains(sc.chPrev))
195 					sc.SetState(SCE_LUA_DEFAULT);
196 			}
197 		} else if (sc.state == SCE_LUA_IDENTIFIER) {
198 			if (!(setWord.Contains(sc.ch) || sc.ch == '.') || sc.Match('.', '.')) {
199 				char s[100];
200 				sc.GetCurrent(s, sizeof(s));
201 				if (keywords.InList(s)) {
202 					sc.ChangeState(SCE_LUA_WORD);
203 					if (strcmp(s, "goto") == 0) {	// goto <label> forward scan
204 						sc.SetState(SCE_LUA_DEFAULT);
205 						while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
206 							sc.Forward();
207 						if (setWordStart.Contains(sc.ch)) {
208 							sc.SetState(SCE_LUA_LABEL);
209 							sc.Forward();
210 							while (setWord.Contains(sc.ch))
211 								sc.Forward();
212 							sc.GetCurrent(s, sizeof(s));
213 							if (keywords.InList(s))
214 								sc.ChangeState(SCE_LUA_WORD);
215 						}
216 						sc.SetState(SCE_LUA_DEFAULT);
217 					}
218 				} else if (keywords2.InList(s)) {
219 					sc.ChangeState(SCE_LUA_WORD2);
220 				} else if (keywords3.InList(s)) {
221 					sc.ChangeState(SCE_LUA_WORD3);
222 				} else if (keywords4.InList(s)) {
223 					sc.ChangeState(SCE_LUA_WORD4);
224 				} else if (keywords5.InList(s)) {
225 					sc.ChangeState(SCE_LUA_WORD5);
226 				} else if (keywords6.InList(s)) {
227 					sc.ChangeState(SCE_LUA_WORD6);
228 				} else if (keywords7.InList(s)) {
229 					sc.ChangeState(SCE_LUA_WORD7);
230 				} else if (keywords8.InList(s)) {
231 					sc.ChangeState(SCE_LUA_WORD8);
232 				}
233 				sc.SetState(SCE_LUA_DEFAULT);
234 			}
235 		} else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
236 			if (sc.atLineEnd) {
237 				sc.ForwardSetState(SCE_LUA_DEFAULT);
238 			}
239 		} else if (sc.state == SCE_LUA_STRING) {
240 			if (stringWs) {
241 				if (!IsASpace(sc.ch))
242 					stringWs = 0;
243 			}
244 			if (sc.ch == '\\') {
245 				if (setEscapeSkip.Contains(sc.chNext)) {
246 					sc.Forward();
247 				} else if (sc.chNext == 'z') {
248 					sc.Forward();
249 					stringWs = 0x100;
250 				}
251 			} else if (sc.ch == '\"') {
252 				sc.ForwardSetState(SCE_LUA_DEFAULT);
253 			} else if (stringWs == 0 && sc.atLineEnd) {
254 				sc.ChangeState(SCE_LUA_STRINGEOL);
255 				sc.ForwardSetState(SCE_LUA_DEFAULT);
256 			}
257 		} else if (sc.state == SCE_LUA_CHARACTER) {
258 			if (stringWs) {
259 				if (!IsASpace(sc.ch))
260 					stringWs = 0;
261 			}
262 			if (sc.ch == '\\') {
263 				if (setEscapeSkip.Contains(sc.chNext)) {
264 					sc.Forward();
265 				} else if (sc.chNext == 'z') {
266 					sc.Forward();
267 					stringWs = 0x100;
268 				}
269 			} else if (sc.ch == '\'') {
270 				sc.ForwardSetState(SCE_LUA_DEFAULT);
271 			} else if (stringWs == 0 && sc.atLineEnd) {
272 				sc.ChangeState(SCE_LUA_STRINGEOL);
273 				sc.ForwardSetState(SCE_LUA_DEFAULT);
274 			}
275 		} else if (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT) {
276 			if (sc.ch == '[') {
277 				int sep = LongDelimCheck(sc);
278 				if (sep == 1 && sepCount == 1) {    // [[-only allowed to nest
279 					nestLevel++;
280 					sc.Forward();
281 				}
282 			} else if (sc.ch == ']') {
283 				int sep = LongDelimCheck(sc);
284 				if (sep == 1 && sepCount == 1) {    // un-nest with ]]-only
285 					nestLevel--;
286 					sc.Forward();
287 					if (nestLevel == 0) {
288 						sc.ForwardSetState(SCE_LUA_DEFAULT);
289 					}
290 				} else if (sep > 1 && sep == sepCount) {   // ]=]-style delim
291 					sc.Forward(sep);
292 					sc.ForwardSetState(SCE_LUA_DEFAULT);
293 				}
294 			}
295 		}
296 
297 		// Determine if a new state should be entered.
298 		if (sc.state == SCE_LUA_DEFAULT) {
299 			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
300 				sc.SetState(SCE_LUA_NUMBER);
301 				if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
302 					sc.Forward();
303 				}
304 			} else if (setWordStart.Contains(sc.ch)) {
305 				sc.SetState(SCE_LUA_IDENTIFIER);
306 			} else if (sc.ch == '\"') {
307 				sc.SetState(SCE_LUA_STRING);
308 				stringWs = 0;
309 			} else if (sc.ch == '\'') {
310 				sc.SetState(SCE_LUA_CHARACTER);
311 				stringWs = 0;
312 			} else if (sc.ch == '[') {
313 				sepCount = LongDelimCheck(sc);
314 				if (sepCount == 0) {
315 					sc.SetState(SCE_LUA_OPERATOR);
316 				} else {
317 					nestLevel = 1;
318 					sc.SetState(SCE_LUA_LITERALSTRING);
319 					sc.Forward(sepCount);
320 				}
321 			} else if (sc.Match('-', '-')) {
322 				sc.SetState(SCE_LUA_COMMENTLINE);
323 				if (sc.Match("--[")) {
324 					sc.Forward(2);
325 					sepCount = LongDelimCheck(sc);
326 					if (sepCount > 0) {
327 						nestLevel = 1;
328 						sc.ChangeState(SCE_LUA_COMMENT);
329 						sc.Forward(sepCount);
330 					}
331 				} else {
332 					sc.Forward();
333 				}
334 			} else if (sc.atLineStart && sc.Match('$')) {
335 				sc.SetState(SCE_LUA_PREPROCESSOR);	// Obsolete since Lua 4.0, but still in old code
336 			} else if (setLuaOperator.Contains(sc.ch)) {
337 				sc.SetState(SCE_LUA_OPERATOR);
338 			}
339 		}
340 	}
341 
342 	if (setWord.Contains(sc.chPrev) || sc.chPrev == '.') {
343 		char s[100];
344 		sc.GetCurrent(s, sizeof(s));
345 		if (keywords.InList(s)) {
346 			sc.ChangeState(SCE_LUA_WORD);
347 		} else if (keywords2.InList(s)) {
348 			sc.ChangeState(SCE_LUA_WORD2);
349 		} else if (keywords3.InList(s)) {
350 			sc.ChangeState(SCE_LUA_WORD3);
351 		} else if (keywords4.InList(s)) {
352 			sc.ChangeState(SCE_LUA_WORD4);
353 		} else if (keywords5.InList(s)) {
354 			sc.ChangeState(SCE_LUA_WORD5);
355 		} else if (keywords6.InList(s)) {
356 			sc.ChangeState(SCE_LUA_WORD6);
357 		} else if (keywords7.InList(s)) {
358 			sc.ChangeState(SCE_LUA_WORD7);
359 		} else if (keywords8.InList(s)) {
360 			sc.ChangeState(SCE_LUA_WORD8);
361 		}
362 	}
363 
364 	sc.Complete();
365 }
366 
FoldLuaDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)367 static void FoldLuaDoc(unsigned int startPos, int length, int /* initStyle */, WordList *[],
368                        Accessor &styler) {
369 	unsigned int lengthDoc = startPos + length;
370 	int visibleChars = 0;
371 	int lineCurrent = styler.GetLine(startPos);
372 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
373 	int levelCurrent = levelPrev;
374 	char chNext = styler[startPos];
375 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
376 	int styleNext = styler.StyleAt(startPos);
377 	char s[10];
378 
379 	for (unsigned int i = startPos; i < lengthDoc; i++) {
380 		char ch = chNext;
381 		chNext = styler.SafeGetCharAt(i + 1);
382 		int style = styleNext;
383 		styleNext = styler.StyleAt(i + 1);
384 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
385 		if (style == SCE_LUA_WORD) {
386 			if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
387 				for (unsigned int j = 0; j < 8; j++) {
388 					if (!iswordchar(styler[i + j])) {
389 						break;
390 					}
391 					s[j] = styler[i + j];
392 					s[j + 1] = '\0';
393 				}
394 
395 				if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
396 					levelCurrent++;
397 				}
398 				if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
399 					levelCurrent--;
400 				}
401 			}
402 		} else if (style == SCE_LUA_OPERATOR) {
403 			if (ch == '{' || ch == '(') {
404 				levelCurrent++;
405 			} else if (ch == '}' || ch == ')') {
406 				levelCurrent--;
407 			}
408 		} else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
409 			if (ch == '[') {
410 				levelCurrent++;
411 			} else if (ch == ']') {
412 				levelCurrent--;
413 			}
414 		}
415 
416 		if (atEOL) {
417 			int lev = levelPrev;
418 			if (visibleChars == 0 && foldCompact) {
419 				lev |= SC_FOLDLEVELWHITEFLAG;
420 			}
421 			if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
422 				lev |= SC_FOLDLEVELHEADERFLAG;
423 			}
424 			if (lev != styler.LevelAt(lineCurrent)) {
425 				styler.SetLevel(lineCurrent, lev);
426 			}
427 			lineCurrent++;
428 			levelPrev = levelCurrent;
429 			visibleChars = 0;
430 		}
431 		if (!isspacechar(ch)) {
432 			visibleChars++;
433 		}
434 	}
435 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
436 
437 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
438 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
439 }
440 
441 static const char * const luaWordListDesc[] = {
442 	"Keywords",
443 	"Basic functions",
444 	"String, (table) & math functions",
445 	"(coroutines), I/O & system facilities",
446 	"user1",
447 	"user2",
448 	"user3",
449 	"user4",
450 	0
451 };
452 
453 LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc);
454