1 // Scintilla source code edit control
2 /** @file LexLua.cxx
3  ** Lexer for Lua language.
4  **
5  ** Written by Paul Winwood.
6  ** Folder by Alexey Yutkin.
7  ** Modified by Marcos E. Wurzius & Philippe Lhoste
8  **/
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stdarg.h>
14 #include <stdio.h>
15 
16 #include "Platform.h"
17 
18 #include "PropSet.h"
19 #include "Accessor.h"
20 #include "StyleContext.h"
21 #include "KeyWords.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24 
25 // Extended to accept accented characters
IsAWordChar(int ch)26 static inline bool IsAWordChar(int ch) {
27 	return ch >= 0x80 ||
28 	       (isalnum(ch) || ch == '.' || ch == '_');
29 }
30 
IsAWordStart(int ch)31 static inline bool IsAWordStart(int ch) {
32 	return ch >= 0x80 ||
33 	       (isalpha(ch) || ch == '_');
34 }
35 
IsANumberChar(int ch)36 static inline bool IsANumberChar(int ch) {
37 	// Not exactly following number definition (several dots are seen as OK, etc.)
38 	// but probably enough in most cases.
39 	return (ch < 0x80) &&
40 	        (isdigit(ch) || toupper(ch) == 'E' ||
41 	        ch == '.' || ch == '-' || ch == '+' ||
42 	        (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'));
43 }
44 
IsLuaOperator(int ch)45 static inline bool IsLuaOperator(int ch) {
46 	if (ch >= 0x80 || isalnum(ch)) {
47 		return false;
48 	}
49 	// '.' left out as it is used to make up numbers
50 	if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
51 		ch == '(' || ch == ')' || ch == '=' ||
52 		ch == '{' || ch == '}' || ch == '~' ||
53 		ch == '[' || ch == ']' || ch == ';' ||
54 		ch == '<' || ch == '>' || ch == ',' ||
55 		ch == '.' || ch == '^' || ch == '%' || ch == ':' ||
56 		ch == '#') {
57 		return true;
58 	}
59 	return false;
60 }
61 
62 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
63 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
64 // The maximum number of '=' characters allowed is 254.
LongDelimCheck(StyleContext & sc)65 static int LongDelimCheck(StyleContext &sc) {
66 	int sep = 1;
67 	while (sc.GetRelative(sep) == '=' && sep < 0xFF)
68 		sep++;
69 	if (sc.GetRelative(sep) == sc.ch)
70 		return sep;
71 	return 0;
72 }
73 
ColouriseLuaDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)74 static void ColouriseLuaDoc(
75 	unsigned int startPos,
76 	int length,
77 	int initStyle,
78 	WordList *keywordlists[],
79 	Accessor &styler) {
80 
81 	WordList &keywords = *keywordlists[0];
82 	WordList &keywords2 = *keywordlists[1];
83 	WordList &keywords3 = *keywordlists[2];
84 	WordList &keywords4 = *keywordlists[3];
85 	WordList &keywords5 = *keywordlists[4];
86 	WordList &keywords6 = *keywordlists[5];
87 	WordList &keywords7 = *keywordlists[6];
88 	WordList &keywords8 = *keywordlists[7];
89 
90 	int currentLine = styler.GetLine(startPos);
91 	// Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
92 	// if we are inside such a string. Block comment was introduced in Lua 5.0,
93 	// blocks with separators [=[ ... ]=] in Lua 5.1.
94 	int nestLevel = 0;
95 	int sepCount = 0;
96 	if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT) {
97 		int lineState = styler.GetLineState(currentLine - 1);
98 		nestLevel = lineState >> 8;
99 		sepCount = lineState & 0xFF;
100 	}
101 
102 	// Do not leak onto next line
103 	if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
104 		initStyle = SCE_LUA_DEFAULT;
105 	}
106 
107 	StyleContext sc(startPos, length, initStyle, styler);
108 	if (startPos == 0 && sc.ch == '#') {
109 		// shbang line: # is a comment only if first char of the script
110 		sc.SetState(SCE_LUA_COMMENTLINE);
111 	}
112 	for (; sc.More(); sc.Forward()) {
113 		if (sc.atLineEnd) {
114 			// Update the line state, so it can be seen by next line
115 			currentLine = styler.GetLine(sc.currentPos);
116 			switch (sc.state) {
117 			case SCE_LUA_LITERALSTRING:
118 			case SCE_LUA_COMMENT:
119 				// Inside a literal string or block comment, we set the line state
120 				styler.SetLineState(currentLine, (nestLevel << 8) | sepCount);
121 				break;
122 			default:
123 				// Reset the line state
124 				styler.SetLineState(currentLine, 0);
125 				break;
126 			}
127 		}
128 		if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
129 			// Prevent SCE_LUA_STRINGEOL from leaking back to previous line
130 			sc.SetState(SCE_LUA_STRING);
131 		}
132 
133 		// Handle string line continuation
134 		if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
135 		        sc.ch == '\\') {
136 			if (sc.chNext == '\n' || sc.chNext == '\r') {
137 				sc.Forward();
138 				if (sc.ch == '\r' && sc.chNext == '\n') {
139 					sc.Forward();
140 				}
141 				continue;
142 			}
143 		}
144 
145 		// Determine if the current state should terminate.
146 		if (sc.state == SCE_LUA_OPERATOR) {
147 			sc.SetState(SCE_LUA_DEFAULT);
148 		} else if (sc.state == SCE_LUA_NUMBER) {
149 			// We stop the number definition on non-numerical non-dot non-eE non-sign non-hexdigit char
150 			if (!IsANumberChar(sc.ch)) {
151 				sc.SetState(SCE_LUA_DEFAULT);
152 			}
153 		} else if (sc.state == SCE_LUA_IDENTIFIER) {
154 			if (!IsAWordChar(sc.ch) || sc.Match('.', '.')) {
155 				char s[100];
156 				sc.GetCurrent(s, sizeof(s));
157 				if (keywords.InList(s)) {
158 					sc.ChangeState(SCE_LUA_WORD);
159 				} else if (keywords2.InList(s)) {
160 					sc.ChangeState(SCE_LUA_WORD2);
161 				} else if (keywords3.InList(s)) {
162 					sc.ChangeState(SCE_LUA_WORD3);
163 				} else if (keywords4.InList(s)) {
164 					sc.ChangeState(SCE_LUA_WORD4);
165 				} else if (keywords5.InList(s)) {
166 					sc.ChangeState(SCE_LUA_WORD5);
167 				} else if (keywords6.InList(s)) {
168 					sc.ChangeState(SCE_LUA_WORD6);
169 				} else if (keywords6.InList(s)) {
170 					sc.ChangeState(SCE_LUA_WORD6);
171 				} else if (keywords7.InList(s)) {
172 					sc.ChangeState(SCE_LUA_WORD7);
173 				} else if (keywords8.InList(s)) {
174 					sc.ChangeState(SCE_LUA_WORD8);
175 				}
176 				sc.SetState(SCE_LUA_DEFAULT);
177 			}
178 		} else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
179 			if (sc.atLineEnd) {
180 				sc.ForwardSetState(SCE_LUA_DEFAULT);
181 			}
182 		} else if (sc.state == SCE_LUA_STRING) {
183 			if (sc.ch == '\\') {
184 				if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
185 					sc.Forward();
186 				}
187 			} else if (sc.ch == '\"') {
188 				sc.ForwardSetState(SCE_LUA_DEFAULT);
189 			} else if (sc.atLineEnd) {
190 				sc.ChangeState(SCE_LUA_STRINGEOL);
191 				sc.ForwardSetState(SCE_LUA_DEFAULT);
192 			}
193 		} else if (sc.state == SCE_LUA_CHARACTER) {
194 			if (sc.ch == '\\') {
195 				if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
196 					sc.Forward();
197 				}
198 			} else if (sc.ch == '\'') {
199 				sc.ForwardSetState(SCE_LUA_DEFAULT);
200 			} else if (sc.atLineEnd) {
201 				sc.ChangeState(SCE_LUA_STRINGEOL);
202 				sc.ForwardSetState(SCE_LUA_DEFAULT);
203 			}
204 		} else if (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT) {
205 			if (sc.ch == '[') {
206 				int sep = LongDelimCheck(sc);
207 				if (sep == 1 && sepCount == 1) {    // [[-only allowed to nest
208 					nestLevel++;
209 					sc.Forward();
210 				}
211 			} else if (sc.ch == ']') {
212 				int sep = LongDelimCheck(sc);
213 				if (sep == 1 && sepCount == 1) {    // un-nest with ]]-only
214 					nestLevel--;
215 					sc.Forward();
216 					if (nestLevel == 0) {
217 						sc.ForwardSetState(SCE_LUA_DEFAULT);
218 					}
219 				} else if (sep > 1 && sep == sepCount) {   // ]=]-style delim
220 					sc.Forward(sep);
221 					sc.ForwardSetState(SCE_LUA_DEFAULT);
222 				}
223 			}
224 		}
225 
226 		// Determine if a new state should be entered.
227 		if (sc.state == SCE_LUA_DEFAULT) {
228 			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
229 				sc.SetState(SCE_LUA_NUMBER);
230 				if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
231 					sc.Forward(1);
232 				}
233 			} else if (IsAWordStart(sc.ch)) {
234 				sc.SetState(SCE_LUA_IDENTIFIER);
235 			} else if (sc.ch == '\"') {
236 				sc.SetState(SCE_LUA_STRING);
237 			} else if (sc.ch == '\'') {
238 				sc.SetState(SCE_LUA_CHARACTER);
239 			} else if (sc.ch == '[') {
240 				sepCount = LongDelimCheck(sc);
241 				if (sepCount == 0) {
242 					sc.SetState(SCE_LUA_OPERATOR);
243 				} else {
244 					nestLevel = 1;
245 					sc.SetState(SCE_LUA_LITERALSTRING);
246 					sc.Forward(sepCount);
247 				}
248 			} else if (sc.Match('-', '-')) {
249 				sc.SetState(SCE_LUA_COMMENTLINE);
250 				if (sc.Match("--[")) {
251 					sc.Forward(2);
252 					sepCount = LongDelimCheck(sc);
253 					if (sepCount > 0) {
254 						nestLevel = 1;
255 						sc.ChangeState(SCE_LUA_COMMENT);
256 						sc.Forward(sepCount);
257 					}
258 				} else {
259 					sc.Forward();
260 				}
261 			} else if (sc.atLineStart && sc.Match('$')) {
262 				sc.SetState(SCE_LUA_PREPROCESSOR);	// Obsolete since Lua 4.0, but still in old code
263 			} else if (IsLuaOperator(static_cast<char>(sc.ch))) {
264 				sc.SetState(SCE_LUA_OPERATOR);
265 			}
266 		}
267 	}
268 	sc.Complete();
269 }
270 
FoldLuaDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)271 static void FoldLuaDoc(unsigned int startPos, int length, int /* initStyle */, WordList *[],
272                        Accessor &styler) {
273 	unsigned int lengthDoc = startPos + length;
274 	int visibleChars = 0;
275 	int lineCurrent = styler.GetLine(startPos);
276 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
277 	int levelCurrent = levelPrev;
278 	char chNext = styler[startPos];
279 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
280 	int styleNext = styler.StyleAt(startPos);
281 	char s[10];
282 
283 	for (unsigned int i = startPos; i < lengthDoc; i++) {
284 		char ch = chNext;
285 		chNext = styler.SafeGetCharAt(i + 1);
286 		int style = styleNext;
287 		styleNext = styler.StyleAt(i + 1);
288 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
289 		if (style == SCE_LUA_WORD) {
290 			if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
291 				for (unsigned int j = 0; j < 8; j++) {
292 					if (!iswordchar(styler[i + j])) {
293 						break;
294 					}
295 					s[j] = styler[i + j];
296 					s[j + 1] = '\0';
297 				}
298 
299 				if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
300 					levelCurrent++;
301 				}
302 				if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
303 					levelCurrent--;
304 				}
305 			}
306 		} else if (style == SCE_LUA_OPERATOR) {
307 			if (ch == '{' || ch == '(') {
308 				levelCurrent++;
309 			} else if (ch == '}' || ch == ')') {
310 				levelCurrent--;
311 			}
312 		} else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
313 			if (ch == '[') {
314 				levelCurrent++;
315 			} else if (ch == ']') {
316 				levelCurrent--;
317 			}
318 		}
319 
320 		if (atEOL) {
321 			int lev = levelPrev;
322 			if (visibleChars == 0 && foldCompact) {
323 				lev |= SC_FOLDLEVELWHITEFLAG;
324 			}
325 			if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
326 				lev |= SC_FOLDLEVELHEADERFLAG;
327 			}
328 			if (lev != styler.LevelAt(lineCurrent)) {
329 				styler.SetLevel(lineCurrent, lev);
330 			}
331 			lineCurrent++;
332 			levelPrev = levelCurrent;
333 			visibleChars = 0;
334 		}
335 		if (!isspacechar(ch)) {
336 			visibleChars++;
337 		}
338 	}
339 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
340 
341 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
342 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
343 }
344 
345 static const char * const luaWordListDesc[] = {
346 	"Keywords",
347 	"Basic functions",
348 	"String, (table) & math functions",
349 	"(coroutines), I/O & system facilities",
350 	"user1",
351 	"user2",
352 	"user3",
353 	"user4",
354 	0
355 };
356 
357 LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc);
358