1 // Scintilla source code edit control
2 /** @file LexLua.cxx
3 ** Lexer for Lua language.
4 **
5 ** Written by Paul Winwood.
6 ** Folder by Alexey Yutkin.
7 ** Modified by Marcos E. Wurzius & Philippe Lhoste
8 **/
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16
17 #include <string>
18
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #include "StringCopy.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "Accessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "LexerModule.h"
30
31 using namespace Scintilla;
32
33 // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
34 // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
35 // The maximum number of '=' characters allowed is 254.
LongDelimCheck(StyleContext & sc)36 static int LongDelimCheck(StyleContext &sc) {
37 int sep = 1;
38 while (sc.GetRelative(sep) == '=' && sep < 0xFF)
39 sep++;
40 if (sc.GetRelative(sep) == sc.ch)
41 return sep;
42 return 0;
43 }
44
ColouriseLuaDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)45 static void ColouriseLuaDoc(
46 Sci_PositionU startPos,
47 Sci_Position length,
48 int initStyle,
49 WordList *keywordlists[],
50 Accessor &styler) {
51
52 const WordList &keywords = *keywordlists[0];
53 const WordList &keywords2 = *keywordlists[1];
54 const WordList &keywords3 = *keywordlists[2];
55 const WordList &keywords4 = *keywordlists[3];
56 const WordList &keywords5 = *keywordlists[4];
57 const WordList &keywords6 = *keywordlists[5];
58 const WordList &keywords7 = *keywordlists[6];
59 const WordList &keywords8 = *keywordlists[7];
60
61 // Accepts accented characters
62 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
63 CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
64 // Not exactly following number definition (several dots are seen as OK, etc.)
65 // but probably enough in most cases. [pP] is for hex floats.
66 CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP");
67 CharacterSet setExponent(CharacterSet::setNone, "eEpP");
68 CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#&|");
69 CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\");
70
71 Sci_Position currentLine = styler.GetLine(startPos);
72 // Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level,
73 // if we are inside such a string. Block comment was introduced in Lua 5.0,
74 // blocks with separators [=[ ... ]=] in Lua 5.1.
75 // Continuation of a string (\z whitespace escaping) is controlled by stringWs.
76 int nestLevel = 0;
77 int sepCount = 0;
78 int stringWs = 0;
79 if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT ||
80 initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) {
81 const int lineState = styler.GetLineState(currentLine - 1);
82 nestLevel = lineState >> 9;
83 sepCount = lineState & 0xFF;
84 stringWs = lineState & 0x100;
85 }
86
87 // results of identifier/keyword matching
88 Sci_Position idenPos = 0;
89 Sci_Position idenWordPos = 0;
90 int idenStyle = SCE_LUA_IDENTIFIER;
91 bool foundGoto = false;
92
93 // Do not leak onto next line
94 if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
95 initStyle = SCE_LUA_DEFAULT;
96 }
97
98 StyleContext sc(startPos, length, initStyle, styler);
99 if (startPos == 0 && sc.ch == '#' && sc.chNext == '!') {
100 // shbang line: "#!" is a comment only if located at the start of the script
101 sc.SetState(SCE_LUA_COMMENTLINE);
102 }
103 for (; sc.More(); sc.Forward()) {
104 if (sc.atLineEnd) {
105 // Update the line state, so it can be seen by next line
106 currentLine = styler.GetLine(sc.currentPos);
107 switch (sc.state) {
108 case SCE_LUA_LITERALSTRING:
109 case SCE_LUA_COMMENT:
110 case SCE_LUA_STRING:
111 case SCE_LUA_CHARACTER:
112 // Inside a literal string, block comment or string, we set the line state
113 styler.SetLineState(currentLine, (nestLevel << 9) | stringWs | sepCount);
114 break;
115 default:
116 // Reset the line state
117 styler.SetLineState(currentLine, 0);
118 break;
119 }
120 }
121 if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
122 // Prevent SCE_LUA_STRINGEOL from leaking back to previous line
123 sc.SetState(SCE_LUA_STRING);
124 }
125
126 // Handle string line continuation
127 if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
128 sc.ch == '\\') {
129 if (sc.chNext == '\n' || sc.chNext == '\r') {
130 sc.Forward();
131 if (sc.ch == '\r' && sc.chNext == '\n') {
132 sc.Forward();
133 }
134 continue;
135 }
136 }
137
138 // Determine if the current state should terminate.
139 if (sc.state == SCE_LUA_OPERATOR) {
140 if (sc.ch == ':' && sc.chPrev == ':') { // :: <label> :: forward scan
141 sc.Forward();
142 Sci_Position ln = 0;
143 while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs
144 ln++;
145 Sci_Position ws1 = ln;
146 if (setWordStart.Contains(sc.GetRelative(ln))) {
147 int c, i = 0;
148 char s[100];
149 while (setWord.Contains(c = sc.GetRelative(ln))) { // get potential label
150 if (i < 90)
151 s[i++] = static_cast<char>(c);
152 ln++;
153 }
154 s[i] = '\0'; Sci_Position lbl = ln;
155 if (!keywords.InList(s)) {
156 while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs
157 ln++;
158 Sci_Position ws2 = ln - lbl;
159 if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') {
160 // final :: found, complete valid label construct
161 sc.ChangeState(SCE_LUA_LABEL);
162 if (ws1) {
163 sc.SetState(SCE_LUA_DEFAULT);
164 sc.ForwardBytes(ws1);
165 }
166 sc.SetState(SCE_LUA_LABEL);
167 sc.ForwardBytes(lbl - ws1);
168 if (ws2) {
169 sc.SetState(SCE_LUA_DEFAULT);
170 sc.ForwardBytes(ws2);
171 }
172 sc.SetState(SCE_LUA_LABEL);
173 sc.ForwardBytes(2);
174 }
175 }
176 }
177 }
178 sc.SetState(SCE_LUA_DEFAULT);
179 } else if (sc.state == SCE_LUA_NUMBER) {
180 // We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
181 if (!setNumber.Contains(sc.ch)) {
182 sc.SetState(SCE_LUA_DEFAULT);
183 } else if (sc.ch == '-' || sc.ch == '+') {
184 if (!setExponent.Contains(sc.chPrev))
185 sc.SetState(SCE_LUA_DEFAULT);
186 }
187 } else if (sc.state == SCE_LUA_IDENTIFIER) {
188 idenPos--; // commit already-scanned identitier/word parts
189 if (idenWordPos > 0) {
190 idenWordPos--;
191 sc.ChangeState(idenStyle);
192 sc.ForwardBytes(idenWordPos);
193 idenPos -= idenWordPos;
194 if (idenPos > 0) {
195 sc.SetState(SCE_LUA_IDENTIFIER);
196 sc.ForwardBytes(idenPos);
197 }
198 } else {
199 sc.ForwardBytes(idenPos);
200 }
201 sc.SetState(SCE_LUA_DEFAULT);
202 if (foundGoto) { // goto <label> forward scan
203 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
204 sc.Forward();
205 if (setWordStart.Contains(sc.ch)) {
206 sc.SetState(SCE_LUA_LABEL);
207 sc.Forward();
208 while (setWord.Contains(sc.ch))
209 sc.Forward();
210 char s[100];
211 sc.GetCurrent(s, sizeof(s));
212 if (keywords.InList(s)) // labels cannot be keywords
213 sc.ChangeState(SCE_LUA_WORD);
214 }
215 sc.SetState(SCE_LUA_DEFAULT);
216 }
217 } else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
218 if (sc.atLineEnd) {
219 sc.ForwardSetState(SCE_LUA_DEFAULT);
220 }
221 } else if (sc.state == SCE_LUA_STRING) {
222 if (stringWs) {
223 if (!IsASpace(sc.ch))
224 stringWs = 0;
225 }
226 if (sc.ch == '\\') {
227 if (setEscapeSkip.Contains(sc.chNext)) {
228 sc.Forward();
229 } else if (sc.chNext == 'z') {
230 sc.Forward();
231 stringWs = 0x100;
232 }
233 } else if (sc.ch == '\"') {
234 sc.ForwardSetState(SCE_LUA_DEFAULT);
235 } else if (stringWs == 0 && sc.atLineEnd) {
236 sc.ChangeState(SCE_LUA_STRINGEOL);
237 sc.ForwardSetState(SCE_LUA_DEFAULT);
238 }
239 } else if (sc.state == SCE_LUA_CHARACTER) {
240 if (stringWs) {
241 if (!IsASpace(sc.ch))
242 stringWs = 0;
243 }
244 if (sc.ch == '\\') {
245 if (setEscapeSkip.Contains(sc.chNext)) {
246 sc.Forward();
247 } else if (sc.chNext == 'z') {
248 sc.Forward();
249 stringWs = 0x100;
250 }
251 } else if (sc.ch == '\'') {
252 sc.ForwardSetState(SCE_LUA_DEFAULT);
253 } else if (stringWs == 0 && sc.atLineEnd) {
254 sc.ChangeState(SCE_LUA_STRINGEOL);
255 sc.ForwardSetState(SCE_LUA_DEFAULT);
256 }
257 } else if (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT) {
258 if (sc.ch == '[') {
259 const int sep = LongDelimCheck(sc);
260 if (sep == 1 && sepCount == 1) { // [[-only allowed to nest
261 nestLevel++;
262 sc.Forward();
263 }
264 } else if (sc.ch == ']') {
265 int sep = LongDelimCheck(sc);
266 if (sep == 1 && sepCount == 1) { // un-nest with ]]-only
267 nestLevel--;
268 sc.Forward();
269 if (nestLevel == 0) {
270 sc.ForwardSetState(SCE_LUA_DEFAULT);
271 }
272 } else if (sep > 1 && sep == sepCount) { // ]=]-style delim
273 sc.Forward(sep);
274 sc.ForwardSetState(SCE_LUA_DEFAULT);
275 }
276 }
277 }
278
279 // Determine if a new state should be entered.
280 if (sc.state == SCE_LUA_DEFAULT) {
281 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
282 sc.SetState(SCE_LUA_NUMBER);
283 if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
284 sc.Forward();
285 }
286 } else if (setWordStart.Contains(sc.ch)) {
287 // For matching various identifiers with dots and colons, multiple
288 // matches are done as identifier segments are added. Longest match is
289 // set to a word style. The non-matched part is in identifier style.
290 std::string ident;
291 idenPos = 0;
292 idenWordPos = 0;
293 idenStyle = SCE_LUA_IDENTIFIER;
294 foundGoto = false;
295 int cNext;
296 do {
297 int c;
298 const Sci_Position idenPosOld = idenPos;
299 std::string identSeg;
300 identSeg += static_cast<char>(sc.GetRelative(idenPos++));
301 while (setWord.Contains(c = sc.GetRelative(idenPos))) {
302 identSeg += static_cast<char>(c);
303 idenPos++;
304 }
305 if (keywords.InList(identSeg.c_str()) && (idenPosOld > 0)) {
306 idenPos = idenPosOld - 1; // keywords cannot mix
307 ident.pop_back();
308 break;
309 }
310 ident += identSeg;
311 const char* s = ident.c_str();
312 int newStyle = SCE_LUA_IDENTIFIER;
313 if (keywords.InList(s)) {
314 newStyle = SCE_LUA_WORD;
315 } else if (keywords2.InList(s)) {
316 newStyle = SCE_LUA_WORD2;
317 } else if (keywords3.InList(s)) {
318 newStyle = SCE_LUA_WORD3;
319 } else if (keywords4.InList(s)) {
320 newStyle = SCE_LUA_WORD4;
321 } else if (keywords5.InList(s)) {
322 newStyle = SCE_LUA_WORD5;
323 } else if (keywords6.InList(s)) {
324 newStyle = SCE_LUA_WORD6;
325 } else if (keywords7.InList(s)) {
326 newStyle = SCE_LUA_WORD7;
327 } else if (keywords8.InList(s)) {
328 newStyle = SCE_LUA_WORD8;
329 }
330 if (newStyle != SCE_LUA_IDENTIFIER) {
331 idenStyle = newStyle;
332 idenWordPos = idenPos;
333 }
334 if (idenStyle == SCE_LUA_WORD) // keywords cannot mix
335 break;
336 cNext = sc.GetRelative(idenPos + 1);
337 if ((c == '.' || c == ':') && setWordStart.Contains(cNext)) {
338 ident += static_cast<char>(c);
339 idenPos++;
340 } else {
341 cNext = 0;
342 }
343 } while (cNext);
344 if ((idenStyle == SCE_LUA_WORD) && (ident.compare("goto") == 0)) {
345 foundGoto = true;
346 }
347 sc.SetState(SCE_LUA_IDENTIFIER);
348 } else if (sc.ch == '\"') {
349 sc.SetState(SCE_LUA_STRING);
350 stringWs = 0;
351 } else if (sc.ch == '\'') {
352 sc.SetState(SCE_LUA_CHARACTER);
353 stringWs = 0;
354 } else if (sc.ch == '[') {
355 sepCount = LongDelimCheck(sc);
356 if (sepCount == 0) {
357 sc.SetState(SCE_LUA_OPERATOR);
358 } else {
359 nestLevel = 1;
360 sc.SetState(SCE_LUA_LITERALSTRING);
361 sc.Forward(sepCount);
362 }
363 } else if (sc.Match('-', '-')) {
364 sc.SetState(SCE_LUA_COMMENTLINE);
365 if (sc.Match("--[")) {
366 sc.Forward(2);
367 sepCount = LongDelimCheck(sc);
368 if (sepCount > 0) {
369 nestLevel = 1;
370 sc.ChangeState(SCE_LUA_COMMENT);
371 sc.Forward(sepCount);
372 }
373 } else {
374 sc.Forward();
375 }
376 } else if (sc.atLineStart && sc.Match('$')) {
377 sc.SetState(SCE_LUA_PREPROCESSOR); // Obsolete since Lua 4.0, but still in old code
378 } else if (setLuaOperator.Contains(sc.ch)) {
379 sc.SetState(SCE_LUA_OPERATOR);
380 }
381 }
382 }
383
384 sc.Complete();
385 }
386
FoldLuaDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)387 static void FoldLuaDoc(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, WordList *[],
388 Accessor &styler) {
389 const Sci_PositionU lengthDoc = startPos + length;
390 int visibleChars = 0;
391 Sci_Position lineCurrent = styler.GetLine(startPos);
392 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
393 int levelCurrent = levelPrev;
394 char chNext = styler[startPos];
395 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
396 int styleNext = styler.StyleAt(startPos);
397
398 for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
399 const char ch = chNext;
400 chNext = styler.SafeGetCharAt(i + 1);
401 const int style = styleNext;
402 styleNext = styler.StyleAt(i + 1);
403 const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
404 if (style == SCE_LUA_WORD) {
405 if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
406 char s[10] = "";
407 for (Sci_PositionU j = 0; j < 8; j++) {
408 if (!iswordchar(styler[i + j])) {
409 break;
410 }
411 s[j] = styler[i + j];
412 s[j + 1] = '\0';
413 }
414
415 if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
416 levelCurrent++;
417 }
418 if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
419 levelCurrent--;
420 }
421 }
422 } else if (style == SCE_LUA_OPERATOR) {
423 if (ch == '{' || ch == '(') {
424 levelCurrent++;
425 } else if (ch == '}' || ch == ')') {
426 levelCurrent--;
427 }
428 } else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
429 if (ch == '[') {
430 levelCurrent++;
431 } else if (ch == ']') {
432 levelCurrent--;
433 }
434 }
435
436 if (atEOL) {
437 int lev = levelPrev;
438 if (visibleChars == 0 && foldCompact) {
439 lev |= SC_FOLDLEVELWHITEFLAG;
440 }
441 if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
442 lev |= SC_FOLDLEVELHEADERFLAG;
443 }
444 if (lev != styler.LevelAt(lineCurrent)) {
445 styler.SetLevel(lineCurrent, lev);
446 }
447 lineCurrent++;
448 levelPrev = levelCurrent;
449 visibleChars = 0;
450 }
451 if (!isspacechar(ch)) {
452 visibleChars++;
453 }
454 }
455 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
456
457 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
458 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
459 }
460
461 static const char * const luaWordListDesc[] = {
462 "Keywords",
463 "Basic functions",
464 "String, (table) & math functions",
465 "(coroutines), I/O & system facilities",
466 "user1",
467 "user2",
468 "user3",
469 "user4",
470 0
471 };
472
473 namespace {
474
475 LexicalClass lexicalClasses[] = {
476 // Lexer Lua SCLEX_LUA SCE_LUA_:
477 0, "SCE_LUA_DEFAULT", "default", "White space: Visible only in View Whitespace mode (or if it has a back colour)",
478 1, "SCE_LUA_COMMENT", "comment", "Block comment (Lua 5.0)",
479 2, "SCE_LUA_COMMENTLINE", "comment line", "Line comment",
480 3, "SCE_LUA_COMMENTDOC", "comment documentation", "Doc comment -- Not used in Lua (yet?)",
481 4, "SCE_LUA_NUMBER", "literal numeric", "Number",
482 5, "SCE_LUA_WORD", "keyword", "Keyword",
483 6, "SCE_LUA_STRING", "literal string", "(Double quoted) String",
484 7, "SCE_LUA_CHARACTER", "literal string character", "Character (Single quoted string)",
485 8, "SCE_LUA_LITERALSTRING", "literal string", "Literal string",
486 9, "SCE_LUA_PREPROCESSOR", "preprocessor", "Preprocessor (obsolete in Lua 4.0 and up)",
487 10, "SCE_LUA_OPERATOR", "operator", "Operators",
488 11, "SCE_LUA_IDENTIFIER", "identifier", "Identifier (everything else...)",
489 12, "SCE_LUA_STRINGEOL", "error literal string", "End of line where string is not closed",
490 13, "SCE_LUA_WORD2", "identifier", "Other keywords",
491 14, "SCE_LUA_WORD3", "identifier", "Other keywords",
492 15, "SCE_LUA_WORD4", "identifier", "Other keywords",
493 16, "SCE_LUA_WORD5", "identifier", "Other keywords",
494 17, "SCE_LUA_WORD6", "identifier", "Other keywords",
495 18, "SCE_LUA_WORD7", "identifier", "Other keywords",
496 19, "SCE_LUA_WORD8", "identifier", "Other keywords",
497 20, "SCE_LUA_LABEL", "label", "Labels",
498 };
499
500 }
501
502 LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc, lexicalClasses, ELEMENTS(lexicalClasses));
503