1 // Scintilla source code edit control
2 /** @file LexLua.cxx
3  ** Lexer for Lua language.
4  **
5  ** Written by Paul Winwood.
6  ** Folder by Alexey Yutkin.
7  ** Modified by Marcos E. Wurzius & Philippe Lhoste
8  **/
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include "ILexer.h"
18 #include "Scintilla.h"
19 #include "SciLexer.h"
20 
21 #include "WordList.h"
22 #include "LexAccessor.h"
23 #include "Accessor.h"
24 #include "StyleContext.h"
25 #include "CharacterSet.h"
26 #include "LexerModule.h"
27 
28 #ifdef SCI_NAMESPACE
29 using namespace Scintilla;
30 #endif
31 
32 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
33 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
34 // The maximum number of '=' characters allowed is 254.
LongDelimCheck(StyleContext & sc)35 static int LongDelimCheck(StyleContext &sc) {
36 	int sep = 1;
37 	while (sc.GetRelative(sep) == '=' && sep < 0xFF)
38 		sep++;
39 	if (sc.GetRelative(sep) == sc.ch)
40 		return sep;
41 	return 0;
42 }
43 
ColouriseLuaDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)44 static void ColouriseLuaDoc(
45 	unsigned int startPos,
46 	int length,
47 	int initStyle,
48 	WordList *keywordlists[],
49 	Accessor &styler) {
50 
51 	WordList &keywords = *keywordlists[0];
52 	WordList &keywords2 = *keywordlists[1];
53 	WordList &keywords3 = *keywordlists[2];
54 	WordList &keywords4 = *keywordlists[3];
55 	WordList &keywords5 = *keywordlists[4];
56 	WordList &keywords6 = *keywordlists[5];
57 	WordList &keywords7 = *keywordlists[6];
58 	WordList &keywords8 = *keywordlists[7];
59 
60 	// Accepts accented characters
61 	CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
62 	CharacterSet setWord(CharacterSet::setAlphaNum, "._", 0x80, true);
63 	// Not exactly following number definition (several dots are seen as OK, etc.)
64 	// but probably enough in most cases. [pP] is for hex floats.
65 	CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP");
66 	CharacterSet setExponent(CharacterSet::setNone, "eEpP");
67 	CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#");
68 	CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\");
69 
70 	int currentLine = styler.GetLine(startPos);
71 	// Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
72 	// if we are inside such a string. Block comment was introduced in Lua 5.0,
73 	// blocks with separators [=[ ... ]=] in Lua 5.1.
74 	// Continuation of a string (\* whitespace escaping) is controlled by stringWs.
75 	int nestLevel = 0;
76 	int sepCount = 0;
77 	int stringWs = 0;
78 	if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT ||
79 		initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) {
80 		int lineState = styler.GetLineState(currentLine - 1);
81 		nestLevel = lineState >> 9;
82 		sepCount = lineState & 0xFF;
83 		stringWs = lineState & 0x100;
84 	}
85 
86 	// Do not leak onto next line
87 	if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
88 		initStyle = SCE_LUA_DEFAULT;
89 	}
90 
91 	StyleContext sc(startPos, length, initStyle, styler);
92 	if (startPos == 0 && sc.ch == '#') {
93 		// shbang line: # is a comment only if first char of the script
94 		sc.SetState(SCE_LUA_COMMENTLINE);
95 	}
96 	for (; sc.More(); sc.Forward()) {
97 		if (sc.atLineEnd) {
98 			// Update the line state, so it can be seen by next line
99 			currentLine = styler.GetLine(sc.currentPos);
100 			switch (sc.state) {
101 			case SCE_LUA_LITERALSTRING:
102 			case SCE_LUA_COMMENT:
103 			case SCE_LUA_STRING:
104 			case SCE_LUA_CHARACTER:
105 				// Inside a literal string, block comment or string, we set the line state
106 				styler.SetLineState(currentLine, (nestLevel << 9) | stringWs | sepCount);
107 				break;
108 			default:
109 				// Reset the line state
110 				styler.SetLineState(currentLine, 0);
111 				break;
112 			}
113 		}
114 		if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
115 			// Prevent SCE_LUA_STRINGEOL from leaking back to previous line
116 			sc.SetState(SCE_LUA_STRING);
117 		}
118 
119 		// Handle string line continuation
120 		if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
121 				sc.ch == '\\') {
122 			if (sc.chNext == '\n' || sc.chNext == '\r') {
123 				sc.Forward();
124 				if (sc.ch == '\r' && sc.chNext == '\n') {
125 					sc.Forward();
126 				}
127 				continue;
128 			}
129 		}
130 
131 		// Determine if the current state should terminate.
132 		if (sc.state == SCE_LUA_OPERATOR) {
133 			sc.SetState(SCE_LUA_DEFAULT);
134 		} else if (sc.state == SCE_LUA_NUMBER) {
135 			// We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
136 			if (!setNumber.Contains(sc.ch)) {
137 				sc.SetState(SCE_LUA_DEFAULT);
138 			} else if (sc.ch == '-' || sc.ch == '+') {
139 				if (!setExponent.Contains(sc.chPrev))
140 					sc.SetState(SCE_LUA_DEFAULT);
141 			}
142 		} else if (sc.state == SCE_LUA_IDENTIFIER) {
143 			if (!setWord.Contains(sc.ch) || sc.Match('.', '.')) {
144 				char s[100];
145 				sc.GetCurrent(s, sizeof(s));
146 				if (keywords.InList(s)) {
147 					sc.ChangeState(SCE_LUA_WORD);
148 				} else if (keywords2.InList(s)) {
149 					sc.ChangeState(SCE_LUA_WORD2);
150 				} else if (keywords3.InList(s)) {
151 					sc.ChangeState(SCE_LUA_WORD3);
152 				} else if (keywords4.InList(s)) {
153 					sc.ChangeState(SCE_LUA_WORD4);
154 				} else if (keywords5.InList(s)) {
155 					sc.ChangeState(SCE_LUA_WORD5);
156 				} else if (keywords6.InList(s)) {
157 					sc.ChangeState(SCE_LUA_WORD6);
158 				} else if (keywords7.InList(s)) {
159 					sc.ChangeState(SCE_LUA_WORD7);
160 				} else if (keywords8.InList(s)) {
161 					sc.ChangeState(SCE_LUA_WORD8);
162 				}
163 				sc.SetState(SCE_LUA_DEFAULT);
164 			}
165 		} else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
166 			if (sc.atLineEnd) {
167 				sc.ForwardSetState(SCE_LUA_DEFAULT);
168 			}
169 		} else if (sc.state == SCE_LUA_STRING) {
170 			if (stringWs) {
171 				if (!IsASpace(sc.ch))
172 					stringWs = 0;
173 			}
174 			if (sc.ch == '\\') {
175 				if (setEscapeSkip.Contains(sc.chNext)) {
176 					sc.Forward();
177 				} else if (sc.chNext == '*') {
178 					sc.Forward();
179 					stringWs = 0x100;
180 				}
181 			} else if (sc.ch == '\"') {
182 				sc.ForwardSetState(SCE_LUA_DEFAULT);
183 			} else if (stringWs == 0 && sc.atLineEnd) {
184 				sc.ChangeState(SCE_LUA_STRINGEOL);
185 				sc.ForwardSetState(SCE_LUA_DEFAULT);
186 			}
187 		} else if (sc.state == SCE_LUA_CHARACTER) {
188 			if (stringWs) {
189 				if (!IsASpace(sc.ch))
190 					stringWs = 0;
191 			}
192 			if (sc.ch == '\\') {
193 				if (setEscapeSkip.Contains(sc.chNext)) {
194 					sc.Forward();
195 				} else if (sc.chNext == '*') {
196 					sc.Forward();
197 					stringWs = 0x100;
198 				}
199 			} else if (sc.ch == '\'') {
200 				sc.ForwardSetState(SCE_LUA_DEFAULT);
201 			} else if (stringWs == 0 && sc.atLineEnd) {
202 				sc.ChangeState(SCE_LUA_STRINGEOL);
203 				sc.ForwardSetState(SCE_LUA_DEFAULT);
204 			}
205 		} else if (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT) {
206 			if (sc.ch == '[') {
207 				int sep = LongDelimCheck(sc);
208 				if (sep == 1 && sepCount == 1) {    // [[-only allowed to nest
209 					nestLevel++;
210 					sc.Forward();
211 				}
212 			} else if (sc.ch == ']') {
213 				int sep = LongDelimCheck(sc);
214 				if (sep == 1 && sepCount == 1) {    // un-nest with ]]-only
215 					nestLevel--;
216 					sc.Forward();
217 					if (nestLevel == 0) {
218 						sc.ForwardSetState(SCE_LUA_DEFAULT);
219 					}
220 				} else if (sep > 1 && sep == sepCount) {   // ]=]-style delim
221 					sc.Forward(sep);
222 					sc.ForwardSetState(SCE_LUA_DEFAULT);
223 				}
224 			}
225 		}
226 
227 		// Determine if a new state should be entered.
228 		if (sc.state == SCE_LUA_DEFAULT) {
229 			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
230 				sc.SetState(SCE_LUA_NUMBER);
231 				if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
232 					sc.Forward();
233 				}
234 			} else if (setWordStart.Contains(sc.ch)) {
235 				sc.SetState(SCE_LUA_IDENTIFIER);
236 			} else if (sc.ch == '\"') {
237 				sc.SetState(SCE_LUA_STRING);
238 				stringWs = 0;
239 			} else if (sc.ch == '\'') {
240 				sc.SetState(SCE_LUA_CHARACTER);
241 				stringWs = 0;
242 			} else if (sc.ch == '[') {
243 				sepCount = LongDelimCheck(sc);
244 				if (sepCount == 0) {
245 					sc.SetState(SCE_LUA_OPERATOR);
246 				} else {
247 					nestLevel = 1;
248 					sc.SetState(SCE_LUA_LITERALSTRING);
249 					sc.Forward(sepCount);
250 				}
251 			} else if (sc.Match('-', '-')) {
252 				sc.SetState(SCE_LUA_COMMENTLINE);
253 				if (sc.Match("--[")) {
254 					sc.Forward(2);
255 					sepCount = LongDelimCheck(sc);
256 					if (sepCount > 0) {
257 						nestLevel = 1;
258 						sc.ChangeState(SCE_LUA_COMMENT);
259 						sc.Forward(sepCount);
260 					}
261 				} else {
262 					sc.Forward();
263 				}
264 			} else if (sc.atLineStart && sc.Match('$')) {
265 				sc.SetState(SCE_LUA_PREPROCESSOR);	// Obsolete since Lua 4.0, but still in old code
266 			} else if (setLuaOperator.Contains(sc.ch)) {
267 				sc.SetState(SCE_LUA_OPERATOR);
268 			}
269 		}
270 	}
271 
272 	if (setWord.Contains(sc.chPrev)) {
273 		char s[100];
274 		sc.GetCurrent(s, sizeof(s));
275 		if (keywords.InList(s)) {
276 			sc.ChangeState(SCE_LUA_WORD);
277 		} else if (keywords2.InList(s)) {
278 			sc.ChangeState(SCE_LUA_WORD2);
279 		} else if (keywords3.InList(s)) {
280 			sc.ChangeState(SCE_LUA_WORD3);
281 		} else if (keywords4.InList(s)) {
282 			sc.ChangeState(SCE_LUA_WORD4);
283 		} else if (keywords5.InList(s)) {
284 			sc.ChangeState(SCE_LUA_WORD5);
285 		} else if (keywords6.InList(s)) {
286 			sc.ChangeState(SCE_LUA_WORD6);
287 		} else if (keywords7.InList(s)) {
288 			sc.ChangeState(SCE_LUA_WORD7);
289 		} else if (keywords8.InList(s)) {
290 			sc.ChangeState(SCE_LUA_WORD8);
291 		}
292 	}
293 
294 	sc.Complete();
295 }
296 
FoldLuaDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)297 static void FoldLuaDoc(unsigned int startPos, int length, int /* initStyle */, WordList *[],
298                        Accessor &styler) {
299 	unsigned int lengthDoc = startPos + length;
300 	int visibleChars = 0;
301 	int lineCurrent = styler.GetLine(startPos);
302 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
303 	int levelCurrent = levelPrev;
304 	char chNext = styler[startPos];
305 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
306 	int styleNext = styler.StyleAt(startPos);
307 	char s[10];
308 
309 	for (unsigned int i = startPos; i < lengthDoc; i++) {
310 		char ch = chNext;
311 		chNext = styler.SafeGetCharAt(i + 1);
312 		int style = styleNext;
313 		styleNext = styler.StyleAt(i + 1);
314 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
315 		if (style == SCE_LUA_WORD) {
316 			if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
317 				for (unsigned int j = 0; j < 8; j++) {
318 					if (!iswordchar(styler[i + j])) {
319 						break;
320 					}
321 					s[j] = styler[i + j];
322 					s[j + 1] = '\0';
323 				}
324 
325 				if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
326 					levelCurrent++;
327 				}
328 				if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
329 					levelCurrent--;
330 				}
331 			}
332 		} else if (style == SCE_LUA_OPERATOR) {
333 			if (ch == '{' || ch == '(') {
334 				levelCurrent++;
335 			} else if (ch == '}' || ch == ')') {
336 				levelCurrent--;
337 			}
338 		} else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
339 			if (ch == '[') {
340 				levelCurrent++;
341 			} else if (ch == ']') {
342 				levelCurrent--;
343 			}
344 		}
345 
346 		if (atEOL) {
347 			int lev = levelPrev;
348 			if (visibleChars == 0 && foldCompact) {
349 				lev |= SC_FOLDLEVELWHITEFLAG;
350 			}
351 			if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
352 				lev |= SC_FOLDLEVELHEADERFLAG;
353 			}
354 			if (lev != styler.LevelAt(lineCurrent)) {
355 				styler.SetLevel(lineCurrent, lev);
356 			}
357 			lineCurrent++;
358 			levelPrev = levelCurrent;
359 			visibleChars = 0;
360 		}
361 		if (!isspacechar(ch)) {
362 			visibleChars++;
363 		}
364 	}
365 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
366 
367 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
368 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
369 }
370 
371 static const char * const luaWordListDesc[] = {
372 	"Keywords",
373 	"Basic functions",
374 	"String, (table) & math functions",
375 	"(coroutines), I/O & system facilities",
376 	"user1",
377 	"user2",
378 	"user3",
379 	"user4",
380 	0
381 };
382 
383 LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc);
384