1 // Scintilla source code edit control
2 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
3 // The License.txt file describes the conditions under which this software may be distributed.
4 /** @file LexErlang.cxx
5  ** Lexer for Erlang.
6  ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
7  ** Originally wrote by Peter-Henry Mander,
8  ** based on Matlab lexer by Jos� Fonseca.
9  **/
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32 
is_radix(int radix,int ch)33 static int is_radix(int radix, int ch) {
34 	int digit;
35 
36 	if (36 < radix || 2 > radix)
37 		return 0;
38 
39 	if (isdigit(ch)) {
40 		digit = ch - '0';
41 	} else if (isalnum(ch)) {
42 		digit = toupper(ch) - 'A' + 10;
43 	} else {
44 		return 0;
45 	}
46 
47 	return (digit < radix);
48 }
49 
50 typedef enum {
51 	STATE_NULL,
52 	COMMENT,
53 	COMMENT_FUNCTION,
54 	COMMENT_MODULE,
55 	COMMENT_DOC,
56 	COMMENT_DOC_MACRO,
57 	ATOM_UNQUOTED,
58 	ATOM_QUOTED,
59 	NODE_NAME_UNQUOTED,
60 	NODE_NAME_QUOTED,
61 	MACRO_START,
62 	MACRO_UNQUOTED,
63 	MACRO_QUOTED,
64 	RECORD_START,
65 	RECORD_UNQUOTED,
66 	RECORD_QUOTED,
67 	NUMERAL_START,
68 	NUMERAL_BASE_VALUE,
69 	NUMERAL_FLOAT,
70 	NUMERAL_EXPONENT,
71 	PREPROCESSOR
72 } atom_parse_state_t;
73 
IsAWordChar(const int ch)74 static inline bool IsAWordChar(const int ch) {
75 	return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
76 }
77 
ColouriseErlangDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)78 static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
79 								WordList *keywordlists[], Accessor &styler) {
80 
81 	StyleContext sc(startPos, length, initStyle, styler);
82 	WordList &reservedWords = *keywordlists[0];
83 	WordList &erlangBIFs = *keywordlists[1];
84 	WordList &erlangPreproc = *keywordlists[2];
85 	WordList &erlangModulesAtt = *keywordlists[3];
86 	WordList &erlangDoc = *keywordlists[4];
87 	WordList &erlangDocMacro = *keywordlists[5];
88 	int radix_digits = 0;
89 	int exponent_digits = 0;
90 	atom_parse_state_t parse_state = STATE_NULL;
91 	atom_parse_state_t old_parse_state = STATE_NULL;
92 	bool to_late_to_comment = false;
93 	char cur[100];
94 	int old_style = SCE_ERLANG_DEFAULT;
95 
96 	styler.StartAt(startPos);
97 
98 	for (; sc.More(); sc.Forward()) {
99 		int style = SCE_ERLANG_DEFAULT;
100 		if (STATE_NULL != parse_state) {
101 
102 			switch (parse_state) {
103 
104 				case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
105 
106 			/* COMMENTS ------------------------------------------------------*/
107 				case COMMENT : {
108 					if (sc.ch != '%') {
109 						to_late_to_comment = true;
110 					} else if (!to_late_to_comment && sc.ch == '%') {
111 						// Switch to comment level 2 (Function)
112 						sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
113 						old_style = SCE_ERLANG_COMMENT_FUNCTION;
114 						parse_state = COMMENT_FUNCTION;
115 						sc.Forward();
116 					}
117 				}
118 				// V--- Falling through!
119 				case COMMENT_FUNCTION : {
120 					if (sc.ch != '%') {
121 						to_late_to_comment = true;
122 					} else if (!to_late_to_comment && sc.ch == '%') {
123 						// Switch to comment level 3 (Module)
124 						sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
125 						old_style = SCE_ERLANG_COMMENT_MODULE;
126 						parse_state = COMMENT_MODULE;
127 						sc.Forward();
128 					}
129 				}
130 				// V--- Falling through!
131 				case COMMENT_MODULE : {
132 					if (parse_state != COMMENT) {
133 						// Search for comment documentation
134 						if (sc.chNext == '@') {
135 							old_parse_state = parse_state;
136 							parse_state = ('{' == sc.ch)
137 											? COMMENT_DOC_MACRO
138 											: COMMENT_DOC;
139 							sc.ForwardSetState(sc.state);
140 						}
141 					}
142 
143 					// All comments types fall here.
144 					if (sc.atLineEnd) {
145 						to_late_to_comment = false;
146 						sc.SetState(SCE_ERLANG_DEFAULT);
147 						parse_state = STATE_NULL;
148 					}
149 				} break;
150 
151 				case COMMENT_DOC :
152 				// V--- Falling through!
153 				case COMMENT_DOC_MACRO : {
154 
155 					if (!isalnum(sc.ch)) {
156 						// Try to match documentation comment
157 						sc.GetCurrent(cur, sizeof(cur));
158 
159 						if (parse_state == COMMENT_DOC_MACRO
160 							&& erlangDocMacro.InList(cur)) {
161 								sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
162 								while (sc.ch != '}' && !sc.atLineEnd)
163 									sc.Forward();
164 						} else if (erlangDoc.InList(cur)) {
165 							sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
166 						} else {
167 							sc.ChangeState(old_style);
168 						}
169 
170 						// Switch back to old state
171 						sc.SetState(old_style);
172 						parse_state = old_parse_state;
173 					}
174 
175 					if (sc.atLineEnd) {
176 						to_late_to_comment = false;
177 						sc.ChangeState(old_style);
178 						sc.SetState(SCE_ERLANG_DEFAULT);
179 						parse_state = STATE_NULL;
180 					}
181 				} break;
182 
183 			/* -------------------------------------------------------------- */
184 			/* Atoms ---------------------------------------------------------*/
185 				case ATOM_UNQUOTED : {
186 					if ('@' == sc.ch){
187 						parse_state = NODE_NAME_UNQUOTED;
188 					} else if (sc.ch == ':') {
189 						// Searching for module name
190 						if (sc.chNext == ' ') {
191 							// error
192 							sc.ChangeState(SCE_ERLANG_UNKNOWN);
193 							parse_state = STATE_NULL;
194 						} else {
195 							sc.Forward();
196 							if (isalnum(sc.ch))  {
197 								sc.GetCurrent(cur, sizeof(cur));
198 								sc.ChangeState(SCE_ERLANG_MODULES);
199 								sc.SetState(SCE_ERLANG_MODULES);
200 							}
201 						}
202 					} else if (!IsAWordChar(sc.ch)) {
203 
204 						sc.GetCurrent(cur, sizeof(cur));
205 						if (reservedWords.InList(cur)) {
206 							style = SCE_ERLANG_KEYWORD;
207 						} else if (erlangBIFs.InList(cur)
208 									&& strcmp(cur,"erlang:")){
209 							style = SCE_ERLANG_BIFS;
210 						} else if (sc.ch == '(' || '/' == sc.ch){
211 							style = SCE_ERLANG_FUNCTION_NAME;
212 						} else {
213 							style = SCE_ERLANG_ATOM;
214 						}
215 
216 						sc.ChangeState(style);
217 						sc.SetState(SCE_ERLANG_DEFAULT);
218 						parse_state = STATE_NULL;
219 					}
220 
221 				} break;
222 
223 				case ATOM_QUOTED : {
224 					if ( '@' == sc.ch ){
225 						parse_state = NODE_NAME_QUOTED;
226 					} else if ('\'' == sc.ch && '\\' != sc.chPrev) {
227 						sc.ChangeState(SCE_ERLANG_ATOM);
228 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
229 						parse_state = STATE_NULL;
230 					}
231 				} break;
232 
233 			/* -------------------------------------------------------------- */
234 			/* Node names ----------------------------------------------------*/
235 				case NODE_NAME_UNQUOTED : {
236 					if ('@' == sc.ch) {
237 						sc.SetState(SCE_ERLANG_DEFAULT);
238 						parse_state = STATE_NULL;
239 					} else if (!IsAWordChar(sc.ch)) {
240 						sc.ChangeState(SCE_ERLANG_NODE_NAME);
241 						sc.SetState(SCE_ERLANG_DEFAULT);
242 						parse_state = STATE_NULL;
243 					}
244 				} break;
245 
246 				case NODE_NAME_QUOTED : {
247 					if ('@' == sc.ch) {
248 						sc.SetState(SCE_ERLANG_DEFAULT);
249 						parse_state = STATE_NULL;
250 					} else if ('\'' == sc.ch && '\\' != sc.chPrev) {
251 						sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
252 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
253 						parse_state = STATE_NULL;
254 					}
255 				} break;
256 
257 			/* -------------------------------------------------------------- */
258 			/* Records -------------------------------------------------------*/
259 				case RECORD_START : {
260 					if ('\'' == sc.ch) {
261 						parse_state = RECORD_QUOTED;
262 					} else if (isalpha(sc.ch) && islower(sc.ch)) {
263 						parse_state = RECORD_UNQUOTED;
264 					} else { // error
265 						sc.SetState(SCE_ERLANG_DEFAULT);
266 						parse_state = STATE_NULL;
267 					}
268 				} break;
269 
270 				case RECORD_UNQUOTED : {
271 					if (!IsAWordChar(sc.ch)) {
272 						sc.ChangeState(SCE_ERLANG_RECORD);
273 						sc.SetState(SCE_ERLANG_DEFAULT);
274 						parse_state = STATE_NULL;
275 					}
276 				} break;
277 
278 				case RECORD_QUOTED : {
279 					if ('\'' == sc.ch && '\\' != sc.chPrev) {
280 						sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
281 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
282 						parse_state = STATE_NULL;
283 					}
284 				} break;
285 
286 			/* -------------------------------------------------------------- */
287 			/* Macros --------------------------------------------------------*/
288 				case MACRO_START : {
289 					if ('\'' == sc.ch) {
290 						parse_state = MACRO_QUOTED;
291 					} else if (isalpha(sc.ch)) {
292 						parse_state = MACRO_UNQUOTED;
293 					} else { // error
294 						sc.SetState(SCE_ERLANG_DEFAULT);
295 						parse_state = STATE_NULL;
296 					}
297 				} break;
298 
299 				case MACRO_UNQUOTED : {
300 					if (!IsAWordChar(sc.ch)) {
301 						sc.ChangeState(SCE_ERLANG_MACRO);
302 						sc.SetState(SCE_ERLANG_DEFAULT);
303 						parse_state = STATE_NULL;
304 					}
305 				} break;
306 
307 				case MACRO_QUOTED : {
308 					if ('\'' == sc.ch && '\\' != sc.chPrev) {
309 						sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
310 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
311 						parse_state = STATE_NULL;
312 					}
313 				} break;
314 
315 			/* -------------------------------------------------------------- */
316 			/* Numerics ------------------------------------------------------*/
317 			/* Simple integer */
318 				case NUMERAL_START : {
319 					if (isdigit(sc.ch)) {
320 						radix_digits *= 10;
321 						radix_digits += sc.ch - '0'; // Assuming ASCII here!
322 					} else if ('#' == sc.ch) {
323 						if (2 > radix_digits || 36 < radix_digits) {
324 							sc.SetState(SCE_ERLANG_DEFAULT);
325 							parse_state = STATE_NULL;
326 						} else {
327 							parse_state = NUMERAL_BASE_VALUE;
328 						}
329 					} else if ('.' == sc.ch && isdigit(sc.chNext)) {
330 						radix_digits = 0;
331 						parse_state = NUMERAL_FLOAT;
332 					} else if ('e' == sc.ch || 'E' == sc.ch) {
333 						exponent_digits = 0;
334 						parse_state = NUMERAL_EXPONENT;
335 					} else {
336 						radix_digits = 0;
337 						sc.ChangeState(SCE_ERLANG_NUMBER);
338 						sc.SetState(SCE_ERLANG_DEFAULT);
339 						parse_state = STATE_NULL;
340 					}
341 				} break;
342 
343 			/* Integer in other base than 10 (x#yyy) */
344 				case NUMERAL_BASE_VALUE : {
345 					if (!is_radix(radix_digits,sc.ch)) {
346 						radix_digits = 0;
347 
348 						if (!isalnum(sc.ch))
349 							sc.ChangeState(SCE_ERLANG_NUMBER);
350 
351 						sc.SetState(SCE_ERLANG_DEFAULT);
352 						parse_state = STATE_NULL;
353 					}
354 				} break;
355 
356 			/* Float (x.yyy) */
357 				case NUMERAL_FLOAT : {
358 					if ('e' == sc.ch || 'E' == sc.ch) {
359 						exponent_digits = 0;
360 						parse_state = NUMERAL_EXPONENT;
361 					} else if (!isdigit(sc.ch)) {
362 						sc.ChangeState(SCE_ERLANG_NUMBER);
363 						sc.SetState(SCE_ERLANG_DEFAULT);
364 						parse_state = STATE_NULL;
365 					}
366 				} break;
367 
368 			/* Exponent, either integer or float (xEyy, x.yyEzzz) */
369 				case NUMERAL_EXPONENT : {
370 					if (('-' == sc.ch || '+' == sc.ch)
371 							&& (isdigit(sc.chNext))) {
372 						sc.Forward();
373 					} else if (!isdigit(sc.ch)) {
374 						if (0 < exponent_digits)
375 							sc.ChangeState(SCE_ERLANG_NUMBER);
376 						sc.SetState(SCE_ERLANG_DEFAULT);
377 						parse_state = STATE_NULL;
378 					} else {
379 						++exponent_digits;
380 					}
381 				} break;
382 
383 			/* -------------------------------------------------------------- */
384 			/* Preprocessor --------------------------------------------------*/
385 				case PREPROCESSOR : {
386 					if (!IsAWordChar(sc.ch)) {
387 
388 						sc.GetCurrent(cur, sizeof(cur));
389 						if (erlangPreproc.InList(cur)) {
390 							style = SCE_ERLANG_PREPROC;
391 						} else if (erlangModulesAtt.InList(cur)) {
392 							style = SCE_ERLANG_MODULES_ATT;
393 						}
394 
395 						sc.ChangeState(style);
396 						sc.SetState(SCE_ERLANG_DEFAULT);
397 						parse_state = STATE_NULL;
398 					}
399 				} break;
400 
401 			}
402 
403 		} /* End of : STATE_NULL != parse_state */
404 		else
405 		{
406 			switch (sc.state) {
407 				case SCE_ERLANG_VARIABLE : {
408 					if (!IsAWordChar(sc.ch))
409 						sc.SetState(SCE_ERLANG_DEFAULT);
410 				} break;
411 				case SCE_ERLANG_STRING : {
412 					 if (sc.ch == '\"' && sc.chPrev != '\\')
413 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
414 				} break;
415 				case SCE_ERLANG_COMMENT : {
416 					 if (sc.atLineEnd)
417 						sc.SetState(SCE_ERLANG_DEFAULT);
418 				} break;
419 				case SCE_ERLANG_CHARACTER : {
420 					if (sc.chPrev == '\\') {
421 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
422 					} else if (sc.ch != '\\') {
423 						sc.ForwardSetState(SCE_ERLANG_DEFAULT);
424 					}
425 				} break;
426 				case SCE_ERLANG_OPERATOR : {
427 					if (sc.chPrev == '.') {
428 						if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
429 							|| sc.ch == '^') {
430 							sc.ForwardSetState(SCE_ERLANG_DEFAULT);
431 						} else if (sc.ch == '\'') {
432 							sc.ForwardSetState(SCE_ERLANG_DEFAULT);
433 						} else {
434 							sc.SetState(SCE_ERLANG_DEFAULT);
435 						}
436 					} else {
437 						sc.SetState(SCE_ERLANG_DEFAULT);
438 					}
439 				} break;
440 			}
441 		}
442 
443 		if (sc.state == SCE_ERLANG_DEFAULT) {
444 			bool no_new_state = false;
445 
446 			switch (sc.ch) {
447 				case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
448 				case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
449 				case '%' : {
450 					parse_state = COMMENT;
451 					sc.SetState(SCE_ERLANG_COMMENT);
452 				} break;
453 				case '#' : {
454 					parse_state = RECORD_START;
455 					sc.SetState(SCE_ERLANG_UNKNOWN);
456 				} break;
457 				case '?' : {
458 					parse_state = MACRO_START;
459 					sc.SetState(SCE_ERLANG_UNKNOWN);
460 				} break;
461 				case '\'' : {
462 					parse_state = ATOM_QUOTED;
463 					sc.SetState(SCE_ERLANG_UNKNOWN);
464 				} break;
465 				case '+' :
466 				case '-' : {
467 					if (IsADigit(sc.chNext)) {
468 						parse_state = NUMERAL_START;
469 						radix_digits = 0;
470 						sc.SetState(SCE_ERLANG_UNKNOWN);
471 					} else if (sc.ch != '+') {
472 						parse_state = PREPROCESSOR;
473 						sc.SetState(SCE_ERLANG_UNKNOWN);
474 					}
475 				} break;
476 				default : no_new_state = true;
477 			}
478 
479 			if (no_new_state) {
480 				if (isdigit(sc.ch)) {
481 					parse_state = NUMERAL_START;
482 					radix_digits = sc.ch - '0';
483 					sc.SetState(SCE_ERLANG_UNKNOWN);
484 				} else if (isupper(sc.ch) || '_' == sc.ch) {
485 					sc.SetState(SCE_ERLANG_VARIABLE);
486 				} else if (isalpha(sc.ch)) {
487 					parse_state = ATOM_UNQUOTED;
488 					sc.SetState(SCE_ERLANG_UNKNOWN);
489 				} else if (isoperator(static_cast<char>(sc.ch))
490 							|| sc.ch == '\\') {
491 					sc.SetState(SCE_ERLANG_OPERATOR);
492 				}
493 			}
494 		}
495 
496 	}
497 	sc.Complete();
498 }
499 
ClassifyErlangFoldPoint(Accessor & styler,int styleNext,int keyword_start)500 static int ClassifyErlangFoldPoint(
501 	Accessor &styler,
502 	int styleNext,
503 	int keyword_start
504 ) {
505 	int lev = 0;
506 	if (styler.Match(keyword_start,"case")
507 		|| (
508 			styler.Match(keyword_start,"fun")
509 			&& (SCE_ERLANG_FUNCTION_NAME != styleNext)
510 			)
511 		|| styler.Match(keyword_start,"if")
512 		|| styler.Match(keyword_start,"query")
513 		|| styler.Match(keyword_start,"receive")
514 	) {
515 		++lev;
516 	} else if (styler.Match(keyword_start,"end")) {
517 		--lev;
518 	}
519 
520 	return lev;
521 }
522 
FoldErlangDoc(unsigned int startPos,int length,int initStyle,WordList **,Accessor & styler)523 static void FoldErlangDoc(
524 	unsigned int startPos, int length, int initStyle,
525 	WordList** /*keywordlists*/, Accessor &styler
526 ) {
527 	unsigned int endPos = startPos + length;
528 	int currentLine = styler.GetLine(startPos);
529 	int lev;
530 	int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
531 	int currentLevel = previousLevel;
532 	int styleNext = styler.StyleAt(startPos);
533 	int style = initStyle;
534 	int stylePrev;
535 	int keyword_start = 0;
536 	char ch;
537 	char chNext = styler.SafeGetCharAt(startPos);
538 	bool atEOL;
539 
540 	for (unsigned int i = startPos; i < endPos; i++) {
541 		ch = chNext;
542 		chNext = styler.SafeGetCharAt(i + 1);
543 
544 		// Get styles
545 		stylePrev = style;
546 		style = styleNext;
547 		styleNext = styler.StyleAt(i + 1);
548 		atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
549 
550 		if (stylePrev != SCE_ERLANG_KEYWORD
551 			&& style == SCE_ERLANG_KEYWORD) {
552 			keyword_start = i;
553 		}
554 
555 		// Fold on keywords
556 		if (stylePrev == SCE_ERLANG_KEYWORD
557 			&& style != SCE_ERLANG_KEYWORD
558 			&& style != SCE_ERLANG_ATOM
559 		) {
560 			currentLevel += ClassifyErlangFoldPoint(styler,
561 													styleNext,
562 													keyword_start);
563 		}
564 
565 		// Fold on comments
566 		if (style == SCE_ERLANG_COMMENT
567 			|| style == SCE_ERLANG_COMMENT_MODULE
568 			|| style == SCE_ERLANG_COMMENT_FUNCTION) {
569 
570 			if (ch == '%' && chNext == '{') {
571 				currentLevel++;
572 			} else if (ch == '%' && chNext == '}') {
573 				currentLevel--;
574 			}
575 		}
576 
577 		// Fold on braces
578 		if (style == SCE_ERLANG_OPERATOR) {
579 			if (ch == '{' || ch == '(' || ch == '[') {
580 				currentLevel++;
581 			} else if (ch == '}' || ch == ')' || ch == ']') {
582 				currentLevel--;
583 			}
584 		}
585 
586 
587 		if (atEOL) {
588 			lev = previousLevel;
589 
590 			if (currentLevel > previousLevel)
591 				lev |= SC_FOLDLEVELHEADERFLAG;
592 
593 			if (lev != styler.LevelAt(currentLine))
594 				styler.SetLevel(currentLine, lev);
595 
596 			currentLine++;
597 			previousLevel = currentLevel;
598 		}
599 
600 	}
601 
602 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
603 	styler.SetLevel(currentLine,
604 					previousLevel
605 					| (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
606 }
607 
608 static const char * const erlangWordListDesc[] = {
609 	"Erlang Reserved words",
610 	"Erlang BIFs",
611 	"Erlang Preprocessor",
612 	"Erlang Module Attributes",
613 	"Erlang Documentation",
614 	"Erlang Documentation Macro",
615 	0
616 };
617 
618 LexerModule lmErlang(
619 	SCLEX_ERLANG,
620 	ColouriseErlangDoc,
621 	"erlang",
622 	FoldErlangDoc,
623 	erlangWordListDesc);
624