1 // Scintilla source code edit control
2 /** @file LexLua.cxx
3  ** Lexer for Lua language.
4  **
5  ** Written by Paul Winwood.
6  ** Folder by Alexey Yutkin.
7  ** Modified by Marcos E. Wurzius & Philippe Lhoste
8  **/
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 
23 #include "StringCopy.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "Accessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "LexerModule.h"
30 
31 using namespace Scintilla;
32 
33 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
34 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
35 // The maximum number of '=' characters allowed is 254.
LongDelimCheck(StyleContext & sc)36 static int LongDelimCheck(StyleContext &sc) {
37 	int sep = 1;
38 	while (sc.GetRelative(sep) == '=' && sep < 0xFF)
39 		sep++;
40 	if (sc.GetRelative(sep) == sc.ch)
41 		return sep;
42 	return 0;
43 }
44 
ColouriseLuaDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)45 static void ColouriseLuaDoc(
46 	Sci_PositionU startPos,
47 	Sci_Position length,
48 	int initStyle,
49 	WordList *keywordlists[],
50 	Accessor &styler) {
51 
52 	const WordList &keywords = *keywordlists[0];
53 	const WordList &keywords2 = *keywordlists[1];
54 	const WordList &keywords3 = *keywordlists[2];
55 	const WordList &keywords4 = *keywordlists[3];
56 	const WordList &keywords5 = *keywordlists[4];
57 	const WordList &keywords6 = *keywordlists[5];
58 	const WordList &keywords7 = *keywordlists[6];
59 	const WordList &keywords8 = *keywordlists[7];
60 
61 	// Accepts accented characters
62 	CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
63 	CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
64 	// Not exactly following number definition (several dots are seen as OK, etc.)
65 	// but probably enough in most cases. [pP] is for hex floats.
66 	CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP");
67 	CharacterSet setExponent(CharacterSet::setNone, "eEpP");
68 	CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#&|");
69 	CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\");
70 
71 	Sci_Position currentLine = styler.GetLine(startPos);
72 	// Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
73 	// if we are inside such a string. Block comment was introduced in Lua 5.0,
74 	// blocks with separators [=[ ... ]=] in Lua 5.1.
75 	// Continuation of a string (\z whitespace escaping) is controlled by stringWs.
76 	int nestLevel = 0;
77 	int sepCount = 0;
78 	int stringWs = 0;
79 	if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT ||
80 		initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) {
81 		const int lineState = styler.GetLineState(currentLine - 1);
82 		nestLevel = lineState >> 9;
83 		sepCount = lineState & 0xFF;
84 		stringWs = lineState & 0x100;
85 	}
86 
87 	// results of identifier/keyword matching
88 	Sci_Position idenPos = 0;
89 	Sci_Position idenWordPos = 0;
90 	int idenStyle = SCE_LUA_IDENTIFIER;
91 	bool foundGoto = false;
92 
93 	// Do not leak onto next line
94 	if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
95 		initStyle = SCE_LUA_DEFAULT;
96 	}
97 
98 	StyleContext sc(startPos, length, initStyle, styler);
99 	if (startPos == 0 && sc.ch == '#' && sc.chNext == '!') {
100 		// shbang line: "#!" is a comment only if located at the start of the script
101 		sc.SetState(SCE_LUA_COMMENTLINE);
102 	}
103 	for (; sc.More(); sc.Forward()) {
104 		if (sc.atLineEnd) {
105 			// Update the line state, so it can be seen by next line
106 			currentLine = styler.GetLine(sc.currentPos);
107 			switch (sc.state) {
108 			case SCE_LUA_LITERALSTRING:
109 			case SCE_LUA_COMMENT:
110 			case SCE_LUA_STRING:
111 			case SCE_LUA_CHARACTER:
112 				// Inside a literal string, block comment or string, we set the line state
113 				styler.SetLineState(currentLine, (nestLevel << 9) | stringWs | sepCount);
114 				break;
115 			default:
116 				// Reset the line state
117 				styler.SetLineState(currentLine, 0);
118 				break;
119 			}
120 		}
121 		if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
122 			// Prevent SCE_LUA_STRINGEOL from leaking back to previous line
123 			sc.SetState(SCE_LUA_STRING);
124 		}
125 
126 		// Handle string line continuation
127 		if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
128 				sc.ch == '\\') {
129 			if (sc.chNext == '\n' || sc.chNext == '\r') {
130 				sc.Forward();
131 				if (sc.ch == '\r' && sc.chNext == '\n') {
132 					sc.Forward();
133 				}
134 				continue;
135 			}
136 		}
137 
138 		// Determine if the current state should terminate.
139 		if (sc.state == SCE_LUA_OPERATOR) {
140 			if (sc.ch == ':' && sc.chPrev == ':') {	// :: <label> :: forward scan
141 				sc.Forward();
142 				Sci_Position ln = 0;
143 				while (IsASpaceOrTab(sc.GetRelative(ln)))	// skip over spaces/tabs
144 					ln++;
145 				Sci_Position ws1 = ln;
146 				if (setWordStart.Contains(sc.GetRelative(ln))) {
147 					int c, i = 0;
148 					char s[100];
149 					while (setWord.Contains(c = sc.GetRelative(ln))) {	// get potential label
150 						if (i < 90)
151 							s[i++] = static_cast<char>(c);
152 						ln++;
153 					}
154 					s[i] = '\0'; Sci_Position lbl = ln;
155 					if (!keywords.InList(s)) {
156 						while (IsASpaceOrTab(sc.GetRelative(ln)))	// skip over spaces/tabs
157 							ln++;
158 						Sci_Position ws2 = ln - lbl;
159 						if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') {
160 							// final :: found, complete valid label construct
161 							sc.ChangeState(SCE_LUA_LABEL);
162 							if (ws1) {
163 								sc.SetState(SCE_LUA_DEFAULT);
164 								sc.ForwardBytes(ws1);
165 							}
166 							sc.SetState(SCE_LUA_LABEL);
167 							sc.ForwardBytes(lbl - ws1);
168 							if (ws2) {
169 								sc.SetState(SCE_LUA_DEFAULT);
170 								sc.ForwardBytes(ws2);
171 							}
172 							sc.SetState(SCE_LUA_LABEL);
173 							sc.ForwardBytes(2);
174 						}
175 					}
176 				}
177 			}
178 			sc.SetState(SCE_LUA_DEFAULT);
179 		} else if (sc.state == SCE_LUA_NUMBER) {
180 			// We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
181 			if (!setNumber.Contains(sc.ch)) {
182 				sc.SetState(SCE_LUA_DEFAULT);
183 			} else if (sc.ch == '-' || sc.ch == '+') {
184 				if (!setExponent.Contains(sc.chPrev))
185 					sc.SetState(SCE_LUA_DEFAULT);
186 			}
187 		} else if (sc.state == SCE_LUA_IDENTIFIER) {
188 			idenPos--;			// commit already-scanned identitier/word parts
189 			if (idenWordPos > 0) {
190 				idenWordPos--;
191 				sc.ChangeState(idenStyle);
192 				sc.ForwardBytes(idenWordPos);
193 				idenPos -= idenWordPos;
194 				if (idenPos > 0) {
195 					sc.SetState(SCE_LUA_IDENTIFIER);
196 					sc.ForwardBytes(idenPos);
197 				}
198 			} else {
199 				sc.ForwardBytes(idenPos);
200 			}
201 			sc.SetState(SCE_LUA_DEFAULT);
202 			if (foundGoto) {					// goto <label> forward scan
203 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
204 					sc.Forward();
205 				if (setWordStart.Contains(sc.ch)) {
206 					sc.SetState(SCE_LUA_LABEL);
207 					sc.Forward();
208 					while (setWord.Contains(sc.ch))
209 						sc.Forward();
210 					char s[100];
211 					sc.GetCurrent(s, sizeof(s));
212 					if (keywords.InList(s))		// labels cannot be keywords
213 						sc.ChangeState(SCE_LUA_WORD);
214 				}
215 				sc.SetState(SCE_LUA_DEFAULT);
216 			}
217 		} else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
218 			if (sc.atLineEnd) {
219 				sc.ForwardSetState(SCE_LUA_DEFAULT);
220 			}
221 		} else if (sc.state == SCE_LUA_STRING) {
222 			if (stringWs) {
223 				if (!IsASpace(sc.ch))
224 					stringWs = 0;
225 			}
226 			if (sc.ch == '\\') {
227 				if (setEscapeSkip.Contains(sc.chNext)) {
228 					sc.Forward();
229 				} else if (sc.chNext == 'z') {
230 					sc.Forward();
231 					stringWs = 0x100;
232 				}
233 			} else if (sc.ch == '\"') {
234 				sc.ForwardSetState(SCE_LUA_DEFAULT);
235 			} else if (stringWs == 0 && sc.atLineEnd) {
236 				sc.ChangeState(SCE_LUA_STRINGEOL);
237 				sc.ForwardSetState(SCE_LUA_DEFAULT);
238 			}
239 		} else if (sc.state == SCE_LUA_CHARACTER) {
240 			if (stringWs) {
241 				if (!IsASpace(sc.ch))
242 					stringWs = 0;
243 			}
244 			if (sc.ch == '\\') {
245 				if (setEscapeSkip.Contains(sc.chNext)) {
246 					sc.Forward();
247 				} else if (sc.chNext == 'z') {
248 					sc.Forward();
249 					stringWs = 0x100;
250 				}
251 			} else if (sc.ch == '\'') {
252 				sc.ForwardSetState(SCE_LUA_DEFAULT);
253 			} else if (stringWs == 0 && sc.atLineEnd) {
254 				sc.ChangeState(SCE_LUA_STRINGEOL);
255 				sc.ForwardSetState(SCE_LUA_DEFAULT);
256 			}
257 		} else if (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT) {
258 			if (sc.ch == '[') {
259 				const int sep = LongDelimCheck(sc);
260 				if (sep == 1 && sepCount == 1) {    // [[-only allowed to nest
261 					nestLevel++;
262 					sc.Forward();
263 				}
264 			} else if (sc.ch == ']') {
265 				int sep = LongDelimCheck(sc);
266 				if (sep == 1 && sepCount == 1) {    // un-nest with ]]-only
267 					nestLevel--;
268 					sc.Forward();
269 					if (nestLevel == 0) {
270 						sc.ForwardSetState(SCE_LUA_DEFAULT);
271 					}
272 				} else if (sep > 1 && sep == sepCount) {   // ]=]-style delim
273 					sc.Forward(sep);
274 					sc.ForwardSetState(SCE_LUA_DEFAULT);
275 				}
276 			}
277 		}
278 
279 		// Determine if a new state should be entered.
280 		if (sc.state == SCE_LUA_DEFAULT) {
281 			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
282 				sc.SetState(SCE_LUA_NUMBER);
283 				if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
284 					sc.Forward();
285 				}
286 			} else if (setWordStart.Contains(sc.ch)) {
287 				// For matching various identifiers with dots and colons, multiple
288 				// matches are done as identifier segments are added. Longest match is
289 				// set to a word style. The non-matched part is in identifier style.
290 				std::string ident;
291 				idenPos = 0;
292 				idenWordPos = 0;
293 				idenStyle = SCE_LUA_IDENTIFIER;
294 				foundGoto = false;
295 				int cNext;
296 				do {
297 					int c;
298 					const Sci_Position idenPosOld = idenPos;
299 					std::string identSeg;
300 					identSeg += static_cast<char>(sc.GetRelative(idenPos++));
301 					while (setWord.Contains(c = sc.GetRelative(idenPos))) {
302 						identSeg += static_cast<char>(c);
303 						idenPos++;
304 					}
305 					if (keywords.InList(identSeg.c_str()) && (idenPosOld > 0)) {
306 						idenPos = idenPosOld - 1;	// keywords cannot mix
307 						ident.pop_back();
308 						break;
309 					}
310 					ident += identSeg;
311 					const char* s = ident.c_str();
312 					int newStyle = SCE_LUA_IDENTIFIER;
313 					if (keywords.InList(s)) {
314 						newStyle = SCE_LUA_WORD;
315 					} else if (keywords2.InList(s)) {
316 						newStyle = SCE_LUA_WORD2;
317 					} else if (keywords3.InList(s)) {
318 						newStyle = SCE_LUA_WORD3;
319 					} else if (keywords4.InList(s)) {
320 						newStyle = SCE_LUA_WORD4;
321 					} else if (keywords5.InList(s)) {
322 						newStyle = SCE_LUA_WORD5;
323 					} else if (keywords6.InList(s)) {
324 						newStyle = SCE_LUA_WORD6;
325 					} else if (keywords7.InList(s)) {
326 						newStyle = SCE_LUA_WORD7;
327 					} else if (keywords8.InList(s)) {
328 						newStyle = SCE_LUA_WORD8;
329 					}
330 					if (newStyle != SCE_LUA_IDENTIFIER) {
331 						idenStyle = newStyle;
332 						idenWordPos = idenPos;
333 					}
334 					if (idenStyle == SCE_LUA_WORD)	// keywords cannot mix
335 						break;
336 					cNext = sc.GetRelative(idenPos + 1);
337 					if ((c == '.' || c == ':') && setWordStart.Contains(cNext)) {
338 						ident += static_cast<char>(c);
339 						idenPos++;
340 					} else {
341 						cNext = 0;
342 					}
343 				} while (cNext);
344 				if ((idenStyle == SCE_LUA_WORD) && (ident.compare("goto") == 0)) {
345 					foundGoto = true;
346 				}
347 				sc.SetState(SCE_LUA_IDENTIFIER);
348 			} else if (sc.ch == '\"') {
349 				sc.SetState(SCE_LUA_STRING);
350 				stringWs = 0;
351 			} else if (sc.ch == '\'') {
352 				sc.SetState(SCE_LUA_CHARACTER);
353 				stringWs = 0;
354 			} else if (sc.ch == '[') {
355 				sepCount = LongDelimCheck(sc);
356 				if (sepCount == 0) {
357 					sc.SetState(SCE_LUA_OPERATOR);
358 				} else {
359 					nestLevel = 1;
360 					sc.SetState(SCE_LUA_LITERALSTRING);
361 					sc.Forward(sepCount);
362 				}
363 			} else if (sc.Match('-', '-')) {
364 				sc.SetState(SCE_LUA_COMMENTLINE);
365 				if (sc.Match("--[")) {
366 					sc.Forward(2);
367 					sepCount = LongDelimCheck(sc);
368 					if (sepCount > 0) {
369 						nestLevel = 1;
370 						sc.ChangeState(SCE_LUA_COMMENT);
371 						sc.Forward(sepCount);
372 					}
373 				} else {
374 					sc.Forward();
375 				}
376 			} else if (sc.atLineStart && sc.Match('$')) {
377 				sc.SetState(SCE_LUA_PREPROCESSOR);	// Obsolete since Lua 4.0, but still in old code
378 			} else if (setLuaOperator.Contains(sc.ch)) {
379 				sc.SetState(SCE_LUA_OPERATOR);
380 			}
381 		}
382 	}
383 
384 	sc.Complete();
385 }
386 
FoldLuaDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)387 static void FoldLuaDoc(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, WordList *[],
388                        Accessor &styler) {
389 	const Sci_PositionU lengthDoc = startPos + length;
390 	int visibleChars = 0;
391 	Sci_Position lineCurrent = styler.GetLine(startPos);
392 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
393 	int levelCurrent = levelPrev;
394 	char chNext = styler[startPos];
395 	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
396 	int styleNext = styler.StyleAt(startPos);
397 
398 	for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
399 		const char ch = chNext;
400 		chNext = styler.SafeGetCharAt(i + 1);
401 		const int style = styleNext;
402 		styleNext = styler.StyleAt(i + 1);
403 		const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
404 		if (style == SCE_LUA_WORD) {
405 			if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
406 				char s[10] = "";
407 				for (Sci_PositionU j = 0; j < 8; j++) {
408 					if (!iswordchar(styler[i + j])) {
409 						break;
410 					}
411 					s[j] = styler[i + j];
412 					s[j + 1] = '\0';
413 				}
414 
415 				if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
416 					levelCurrent++;
417 				}
418 				if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
419 					levelCurrent--;
420 				}
421 			}
422 		} else if (style == SCE_LUA_OPERATOR) {
423 			if (ch == '{' || ch == '(') {
424 				levelCurrent++;
425 			} else if (ch == '}' || ch == ')') {
426 				levelCurrent--;
427 			}
428 		} else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
429 			if (ch == '[') {
430 				levelCurrent++;
431 			} else if (ch == ']') {
432 				levelCurrent--;
433 			}
434 		}
435 
436 		if (atEOL) {
437 			int lev = levelPrev;
438 			if (visibleChars == 0 && foldCompact) {
439 				lev |= SC_FOLDLEVELWHITEFLAG;
440 			}
441 			if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
442 				lev |= SC_FOLDLEVELHEADERFLAG;
443 			}
444 			if (lev != styler.LevelAt(lineCurrent)) {
445 				styler.SetLevel(lineCurrent, lev);
446 			}
447 			lineCurrent++;
448 			levelPrev = levelCurrent;
449 			visibleChars = 0;
450 		}
451 		if (!isspacechar(ch)) {
452 			visibleChars++;
453 		}
454 	}
455 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
456 
457 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
458 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
459 }
460 
461 static const char * const luaWordListDesc[] = {
462 	"Keywords",
463 	"Basic functions",
464 	"String, (table) & math functions",
465 	"(coroutines), I/O & system facilities",
466 	"user1",
467 	"user2",
468 	"user3",
469 	"user4",
470 	0
471 };
472 
473 namespace {
474 
475 LexicalClass lexicalClasses[] = {
476 	// Lexer Lua SCLEX_LUA SCE_LUA_:
477 	0, "SCE_LUA_DEFAULT", "default", "White space: Visible only in View Whitespace mode (or if it has a back colour)",
478 	1, "SCE_LUA_COMMENT", "comment", "Block comment (Lua 5.0)",
479 	2, "SCE_LUA_COMMENTLINE", "comment line", "Line comment",
480 	3, "SCE_LUA_COMMENTDOC", "comment documentation", "Doc comment -- Not used in Lua (yet?)",
481 	4, "SCE_LUA_NUMBER", "literal numeric", "Number",
482 	5, "SCE_LUA_WORD", "keyword", "Keyword",
483 	6, "SCE_LUA_STRING", "literal string", "(Double quoted) String",
484 	7, "SCE_LUA_CHARACTER", "literal string character", "Character (Single quoted string)",
485 	8, "SCE_LUA_LITERALSTRING", "literal string", "Literal string",
486 	9, "SCE_LUA_PREPROCESSOR", "preprocessor", "Preprocessor (obsolete in Lua 4.0 and up)",
487 	10, "SCE_LUA_OPERATOR", "operator", "Operators",
488 	11, "SCE_LUA_IDENTIFIER", "identifier", "Identifier (everything else...)",
489 	12, "SCE_LUA_STRINGEOL", "error literal string", "End of line where string is not closed",
490 	13, "SCE_LUA_WORD2", "identifier", "Other keywords",
491 	14, "SCE_LUA_WORD3", "identifier", "Other keywords",
492 	15, "SCE_LUA_WORD4", "identifier", "Other keywords",
493 	16, "SCE_LUA_WORD5", "identifier", "Other keywords",
494 	17, "SCE_LUA_WORD6", "identifier", "Other keywords",
495 	18, "SCE_LUA_WORD7", "identifier", "Other keywords",
496 	19, "SCE_LUA_WORD8", "identifier", "Other keywords",
497 	20, "SCE_LUA_LABEL", "label", "Labels",
498 };
499 
500 }
501 
502 LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc, lexicalClasses, ELEMENTS(lexicalClasses));
503