1 // Scintilla source code edit control
2 // Encoding: UTF-8
3 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
4 // The License.txt file describes the conditions under which this software may be distributed.
5 /** @file LexErlang.cxx
6 ** Lexer for Erlang.
7 ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
8 ** Originally wrote by Peter-Henry Mander,
9 ** based on Matlab lexer by José Fonseca.
10 **/
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
18
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #include "WordList.h"
24 #include "LexAccessor.h"
25 #include "Accessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29
30 using namespace Scintilla;
31
is_radix(int radix,int ch)32 static int is_radix(int radix, int ch) {
33 int digit;
34
35 if (36 < radix || 2 > radix)
36 return 0;
37
38 if (isdigit(ch)) {
39 digit = ch - '0';
40 } else if (isalnum(ch)) {
41 digit = toupper(ch) - 'A' + 10;
42 } else {
43 return 0;
44 }
45
46 return (digit < radix);
47 }
48
49 typedef enum {
50 STATE_NULL,
51 COMMENT,
52 COMMENT_FUNCTION,
53 COMMENT_MODULE,
54 COMMENT_DOC,
55 COMMENT_DOC_MACRO,
56 ATOM_UNQUOTED,
57 ATOM_QUOTED,
58 NODE_NAME_UNQUOTED,
59 NODE_NAME_QUOTED,
60 MACRO_START,
61 MACRO_UNQUOTED,
62 MACRO_QUOTED,
63 RECORD_START,
64 RECORD_UNQUOTED,
65 RECORD_QUOTED,
66 NUMERAL_START,
67 NUMERAL_BASE_VALUE,
68 NUMERAL_FLOAT,
69 NUMERAL_EXPONENT,
70 PREPROCESSOR
71 } atom_parse_state_t;
72
IsAWordChar(const int ch)73 static inline bool IsAWordChar(const int ch) {
74 return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
75 }
76
ColouriseErlangDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)77 static void ColouriseErlangDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
78 WordList *keywordlists[], Accessor &styler) {
79
80 StyleContext sc(startPos, length, initStyle, styler);
81 WordList &reservedWords = *keywordlists[0];
82 WordList &erlangBIFs = *keywordlists[1];
83 WordList &erlangPreproc = *keywordlists[2];
84 WordList &erlangModulesAtt = *keywordlists[3];
85 WordList &erlangDoc = *keywordlists[4];
86 WordList &erlangDocMacro = *keywordlists[5];
87 int radix_digits = 0;
88 int exponent_digits = 0;
89 atom_parse_state_t parse_state = STATE_NULL;
90 atom_parse_state_t old_parse_state = STATE_NULL;
91 bool to_late_to_comment = false;
92 char cur[100];
93 int old_style = SCE_ERLANG_DEFAULT;
94
95 styler.StartAt(startPos);
96
97 for (; sc.More(); sc.Forward()) {
98 int style = SCE_ERLANG_DEFAULT;
99 if (STATE_NULL != parse_state) {
100
101 switch (parse_state) {
102
103 case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
104
105 /* COMMENTS ------------------------------------------------------*/
106 case COMMENT : {
107 if (sc.ch != '%') {
108 to_late_to_comment = true;
109 } else if (!to_late_to_comment && sc.ch == '%') {
110 // Switch to comment level 2 (Function)
111 sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
112 old_style = SCE_ERLANG_COMMENT_FUNCTION;
113 parse_state = COMMENT_FUNCTION;
114 sc.Forward();
115 }
116 }
117 // V--- Falling through!
118 // Falls through.
119 case COMMENT_FUNCTION : {
120 if (sc.ch != '%') {
121 to_late_to_comment = true;
122 } else if (!to_late_to_comment && sc.ch == '%') {
123 // Switch to comment level 3 (Module)
124 sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
125 old_style = SCE_ERLANG_COMMENT_MODULE;
126 parse_state = COMMENT_MODULE;
127 sc.Forward();
128 }
129 }
130 // V--- Falling through!
131 // Falls through.
132 case COMMENT_MODULE : {
133 if (parse_state != COMMENT) {
134 // Search for comment documentation
135 if (sc.chNext == '@') {
136 old_parse_state = parse_state;
137 parse_state = ('{' == sc.ch)
138 ? COMMENT_DOC_MACRO
139 : COMMENT_DOC;
140 sc.ForwardSetState(sc.state);
141 }
142 }
143
144 // All comments types fall here.
145 if (sc.atLineEnd) {
146 to_late_to_comment = false;
147 sc.SetState(SCE_ERLANG_DEFAULT);
148 parse_state = STATE_NULL;
149 }
150 } break;
151
152 case COMMENT_DOC :
153 // V--- Falling through!
154 case COMMENT_DOC_MACRO : {
155
156 if (!isalnum(sc.ch)) {
157 // Try to match documentation comment
158 sc.GetCurrent(cur, sizeof(cur));
159
160 if (parse_state == COMMENT_DOC_MACRO
161 && erlangDocMacro.InList(cur)) {
162 sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
163 while (sc.ch != '}' && !sc.atLineEnd)
164 sc.Forward();
165 } else if (erlangDoc.InList(cur)) {
166 sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
167 } else {
168 sc.ChangeState(old_style);
169 }
170
171 // Switch back to old state
172 sc.SetState(old_style);
173 parse_state = old_parse_state;
174 }
175
176 if (sc.atLineEnd) {
177 to_late_to_comment = false;
178 sc.ChangeState(old_style);
179 sc.SetState(SCE_ERLANG_DEFAULT);
180 parse_state = STATE_NULL;
181 }
182 } break;
183
184 /* -------------------------------------------------------------- */
185 /* Atoms ---------------------------------------------------------*/
186 case ATOM_UNQUOTED : {
187 if ('@' == sc.ch){
188 parse_state = NODE_NAME_UNQUOTED;
189 } else if (sc.ch == ':') {
190 // Searching for module name
191 if (sc.chNext == ' ') {
192 // error
193 sc.ChangeState(SCE_ERLANG_UNKNOWN);
194 parse_state = STATE_NULL;
195 } else {
196 sc.Forward();
197 if (isalnum(sc.ch)) {
198 sc.GetCurrent(cur, sizeof(cur));
199 sc.ChangeState(SCE_ERLANG_MODULES);
200 sc.SetState(SCE_ERLANG_MODULES);
201 }
202 }
203 } else if (!IsAWordChar(sc.ch)) {
204
205 sc.GetCurrent(cur, sizeof(cur));
206 if (reservedWords.InList(cur)) {
207 style = SCE_ERLANG_KEYWORD;
208 } else if (erlangBIFs.InList(cur)
209 && strcmp(cur,"erlang:")){
210 style = SCE_ERLANG_BIFS;
211 } else if (sc.ch == '(' || '/' == sc.ch){
212 style = SCE_ERLANG_FUNCTION_NAME;
213 } else {
214 style = SCE_ERLANG_ATOM;
215 }
216
217 sc.ChangeState(style);
218 sc.SetState(SCE_ERLANG_DEFAULT);
219 parse_state = STATE_NULL;
220 }
221
222 } break;
223
224 case ATOM_QUOTED : {
225 if ( '@' == sc.ch ){
226 parse_state = NODE_NAME_QUOTED;
227 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
228 sc.ChangeState(SCE_ERLANG_ATOM);
229 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
230 parse_state = STATE_NULL;
231 }
232 } break;
233
234 /* -------------------------------------------------------------- */
235 /* Node names ----------------------------------------------------*/
236 case NODE_NAME_UNQUOTED : {
237 if ('@' == sc.ch) {
238 sc.SetState(SCE_ERLANG_DEFAULT);
239 parse_state = STATE_NULL;
240 } else if (!IsAWordChar(sc.ch)) {
241 sc.ChangeState(SCE_ERLANG_NODE_NAME);
242 sc.SetState(SCE_ERLANG_DEFAULT);
243 parse_state = STATE_NULL;
244 }
245 } break;
246
247 case NODE_NAME_QUOTED : {
248 if ('@' == sc.ch) {
249 sc.SetState(SCE_ERLANG_DEFAULT);
250 parse_state = STATE_NULL;
251 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
252 sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
253 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
254 parse_state = STATE_NULL;
255 }
256 } break;
257
258 /* -------------------------------------------------------------- */
259 /* Records -------------------------------------------------------*/
260 case RECORD_START : {
261 if ('\'' == sc.ch) {
262 parse_state = RECORD_QUOTED;
263 } else if (isalpha(sc.ch) && islower(sc.ch)) {
264 parse_state = RECORD_UNQUOTED;
265 } else { // error
266 sc.SetState(SCE_ERLANG_DEFAULT);
267 parse_state = STATE_NULL;
268 }
269 } break;
270
271 case RECORD_UNQUOTED : {
272 if (!IsAWordChar(sc.ch)) {
273 sc.ChangeState(SCE_ERLANG_RECORD);
274 sc.SetState(SCE_ERLANG_DEFAULT);
275 parse_state = STATE_NULL;
276 }
277 } break;
278
279 case RECORD_QUOTED : {
280 if ('\'' == sc.ch && '\\' != sc.chPrev) {
281 sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
282 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
283 parse_state = STATE_NULL;
284 }
285 } break;
286
287 /* -------------------------------------------------------------- */
288 /* Macros --------------------------------------------------------*/
289 case MACRO_START : {
290 if ('\'' == sc.ch) {
291 parse_state = MACRO_QUOTED;
292 } else if (isalpha(sc.ch)) {
293 parse_state = MACRO_UNQUOTED;
294 } else { // error
295 sc.SetState(SCE_ERLANG_DEFAULT);
296 parse_state = STATE_NULL;
297 }
298 } break;
299
300 case MACRO_UNQUOTED : {
301 if (!IsAWordChar(sc.ch)) {
302 sc.ChangeState(SCE_ERLANG_MACRO);
303 sc.SetState(SCE_ERLANG_DEFAULT);
304 parse_state = STATE_NULL;
305 }
306 } break;
307
308 case MACRO_QUOTED : {
309 if ('\'' == sc.ch && '\\' != sc.chPrev) {
310 sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
311 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
312 parse_state = STATE_NULL;
313 }
314 } break;
315
316 /* -------------------------------------------------------------- */
317 /* Numerics ------------------------------------------------------*/
318 /* Simple integer */
319 case NUMERAL_START : {
320 if (isdigit(sc.ch)) {
321 radix_digits *= 10;
322 radix_digits += sc.ch - '0'; // Assuming ASCII here!
323 } else if ('#' == sc.ch) {
324 if (2 > radix_digits || 36 < radix_digits) {
325 sc.SetState(SCE_ERLANG_DEFAULT);
326 parse_state = STATE_NULL;
327 } else {
328 parse_state = NUMERAL_BASE_VALUE;
329 }
330 } else if ('.' == sc.ch && isdigit(sc.chNext)) {
331 radix_digits = 0;
332 parse_state = NUMERAL_FLOAT;
333 } else if ('e' == sc.ch || 'E' == sc.ch) {
334 exponent_digits = 0;
335 parse_state = NUMERAL_EXPONENT;
336 } else {
337 radix_digits = 0;
338 sc.ChangeState(SCE_ERLANG_NUMBER);
339 sc.SetState(SCE_ERLANG_DEFAULT);
340 parse_state = STATE_NULL;
341 }
342 } break;
343
344 /* Integer in other base than 10 (x#yyy) */
345 case NUMERAL_BASE_VALUE : {
346 if (!is_radix(radix_digits,sc.ch)) {
347 radix_digits = 0;
348
349 if (!isalnum(sc.ch))
350 sc.ChangeState(SCE_ERLANG_NUMBER);
351
352 sc.SetState(SCE_ERLANG_DEFAULT);
353 parse_state = STATE_NULL;
354 }
355 } break;
356
357 /* Float (x.yyy) */
358 case NUMERAL_FLOAT : {
359 if ('e' == sc.ch || 'E' == sc.ch) {
360 exponent_digits = 0;
361 parse_state = NUMERAL_EXPONENT;
362 } else if (!isdigit(sc.ch)) {
363 sc.ChangeState(SCE_ERLANG_NUMBER);
364 sc.SetState(SCE_ERLANG_DEFAULT);
365 parse_state = STATE_NULL;
366 }
367 } break;
368
369 /* Exponent, either integer or float (xEyy, x.yyEzzz) */
370 case NUMERAL_EXPONENT : {
371 if (('-' == sc.ch || '+' == sc.ch)
372 && (isdigit(sc.chNext))) {
373 sc.Forward();
374 } else if (!isdigit(sc.ch)) {
375 if (0 < exponent_digits)
376 sc.ChangeState(SCE_ERLANG_NUMBER);
377 sc.SetState(SCE_ERLANG_DEFAULT);
378 parse_state = STATE_NULL;
379 } else {
380 ++exponent_digits;
381 }
382 } break;
383
384 /* -------------------------------------------------------------- */
385 /* Preprocessor --------------------------------------------------*/
386 case PREPROCESSOR : {
387 if (!IsAWordChar(sc.ch)) {
388
389 sc.GetCurrent(cur, sizeof(cur));
390 if (erlangPreproc.InList(cur)) {
391 style = SCE_ERLANG_PREPROC;
392 } else if (erlangModulesAtt.InList(cur)) {
393 style = SCE_ERLANG_MODULES_ATT;
394 }
395
396 sc.ChangeState(style);
397 sc.SetState(SCE_ERLANG_DEFAULT);
398 parse_state = STATE_NULL;
399 }
400 } break;
401
402 }
403
404 } /* End of : STATE_NULL != parse_state */
405 else
406 {
407 switch (sc.state) {
408 case SCE_ERLANG_VARIABLE : {
409 if (!IsAWordChar(sc.ch))
410 sc.SetState(SCE_ERLANG_DEFAULT);
411 } break;
412 case SCE_ERLANG_STRING : {
413 if (sc.ch == '\"' && sc.chPrev != '\\')
414 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
415 } break;
416 case SCE_ERLANG_COMMENT : {
417 if (sc.atLineEnd)
418 sc.SetState(SCE_ERLANG_DEFAULT);
419 } break;
420 case SCE_ERLANG_CHARACTER : {
421 if (sc.chPrev == '\\') {
422 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
423 } else if (sc.ch != '\\') {
424 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
425 }
426 } break;
427 case SCE_ERLANG_OPERATOR : {
428 if (sc.chPrev == '.') {
429 if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
430 || sc.ch == '^') {
431 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
432 } else if (sc.ch == '\'') {
433 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
434 } else {
435 sc.SetState(SCE_ERLANG_DEFAULT);
436 }
437 } else {
438 sc.SetState(SCE_ERLANG_DEFAULT);
439 }
440 } break;
441 }
442 }
443
444 if (sc.state == SCE_ERLANG_DEFAULT) {
445 bool no_new_state = false;
446
447 switch (sc.ch) {
448 case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
449 case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
450 case '%' : {
451 parse_state = COMMENT;
452 sc.SetState(SCE_ERLANG_COMMENT);
453 } break;
454 case '#' : {
455 parse_state = RECORD_START;
456 sc.SetState(SCE_ERLANG_UNKNOWN);
457 } break;
458 case '?' : {
459 parse_state = MACRO_START;
460 sc.SetState(SCE_ERLANG_UNKNOWN);
461 } break;
462 case '\'' : {
463 parse_state = ATOM_QUOTED;
464 sc.SetState(SCE_ERLANG_UNKNOWN);
465 } break;
466 case '+' :
467 case '-' : {
468 if (IsADigit(sc.chNext)) {
469 parse_state = NUMERAL_START;
470 radix_digits = 0;
471 sc.SetState(SCE_ERLANG_UNKNOWN);
472 } else if (sc.ch != '+') {
473 parse_state = PREPROCESSOR;
474 sc.SetState(SCE_ERLANG_UNKNOWN);
475 }
476 } break;
477 default : no_new_state = true;
478 }
479
480 if (no_new_state) {
481 if (isdigit(sc.ch)) {
482 parse_state = NUMERAL_START;
483 radix_digits = sc.ch - '0';
484 sc.SetState(SCE_ERLANG_UNKNOWN);
485 } else if (isupper(sc.ch) || '_' == sc.ch) {
486 sc.SetState(SCE_ERLANG_VARIABLE);
487 } else if (isalpha(sc.ch)) {
488 parse_state = ATOM_UNQUOTED;
489 sc.SetState(SCE_ERLANG_UNKNOWN);
490 } else if (isoperator(static_cast<char>(sc.ch))
491 || sc.ch == '\\') {
492 sc.SetState(SCE_ERLANG_OPERATOR);
493 }
494 }
495 }
496
497 }
498 sc.Complete();
499 }
500
ClassifyErlangFoldPoint(Accessor & styler,int styleNext,Sci_Position keyword_start)501 static int ClassifyErlangFoldPoint(
502 Accessor &styler,
503 int styleNext,
504 Sci_Position keyword_start
505 ) {
506 int lev = 0;
507 if (styler.Match(keyword_start,"case")
508 || (
509 styler.Match(keyword_start,"fun")
510 && (SCE_ERLANG_FUNCTION_NAME != styleNext)
511 )
512 || styler.Match(keyword_start,"if")
513 || styler.Match(keyword_start,"query")
514 || styler.Match(keyword_start,"receive")
515 ) {
516 ++lev;
517 } else if (styler.Match(keyword_start,"end")) {
518 --lev;
519 }
520
521 return lev;
522 }
523
FoldErlangDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList **,Accessor & styler)524 static void FoldErlangDoc(
525 Sci_PositionU startPos, Sci_Position length, int initStyle,
526 WordList** /*keywordlists*/, Accessor &styler
527 ) {
528 Sci_PositionU endPos = startPos + length;
529 Sci_Position currentLine = styler.GetLine(startPos);
530 int lev;
531 int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
532 int currentLevel = previousLevel;
533 int styleNext = styler.StyleAt(startPos);
534 int style = initStyle;
535 int stylePrev;
536 Sci_Position keyword_start = 0;
537 char ch;
538 char chNext = styler.SafeGetCharAt(startPos);
539 bool atEOL;
540
541 for (Sci_PositionU i = startPos; i < endPos; i++) {
542 ch = chNext;
543 chNext = styler.SafeGetCharAt(i + 1);
544
545 // Get styles
546 stylePrev = style;
547 style = styleNext;
548 styleNext = styler.StyleAt(i + 1);
549 atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
550
551 if (stylePrev != SCE_ERLANG_KEYWORD
552 && style == SCE_ERLANG_KEYWORD) {
553 keyword_start = i;
554 }
555
556 // Fold on keywords
557 if (stylePrev == SCE_ERLANG_KEYWORD
558 && style != SCE_ERLANG_KEYWORD
559 && style != SCE_ERLANG_ATOM
560 ) {
561 currentLevel += ClassifyErlangFoldPoint(styler,
562 styleNext,
563 keyword_start);
564 }
565
566 // Fold on comments
567 if (style == SCE_ERLANG_COMMENT
568 || style == SCE_ERLANG_COMMENT_MODULE
569 || style == SCE_ERLANG_COMMENT_FUNCTION) {
570
571 if (ch == '%' && chNext == '{') {
572 currentLevel++;
573 } else if (ch == '%' && chNext == '}') {
574 currentLevel--;
575 }
576 }
577
578 // Fold on braces
579 if (style == SCE_ERLANG_OPERATOR) {
580 if (ch == '{' || ch == '(' || ch == '[') {
581 currentLevel++;
582 } else if (ch == '}' || ch == ')' || ch == ']') {
583 currentLevel--;
584 }
585 }
586
587
588 if (atEOL) {
589 lev = previousLevel;
590
591 if (currentLevel > previousLevel)
592 lev |= SC_FOLDLEVELHEADERFLAG;
593
594 if (lev != styler.LevelAt(currentLine))
595 styler.SetLevel(currentLine, lev);
596
597 currentLine++;
598 previousLevel = currentLevel;
599 }
600
601 }
602
603 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
604 styler.SetLevel(currentLine,
605 previousLevel
606 | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
607 }
608
609 static const char * const erlangWordListDesc[] = {
610 "Erlang Reserved words",
611 "Erlang BIFs",
612 "Erlang Preprocessor",
613 "Erlang Module Attributes",
614 "Erlang Documentation",
615 "Erlang Documentation Macro",
616 0
617 };
618
619 LexerModule lmErlang(
620 SCLEX_ERLANG,
621 ColouriseErlangDoc,
622 "erlang",
623 FoldErlangDoc,
624 erlangWordListDesc);
625