1 // Scintilla source code edit control
2 /** @file LexErlang.cxx
3  ** Lexer for Erlang.
4  ** Written by Peter-Henry Mander, based on Matlab lexer by Jos� Fonseca
5  **/
6 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 
15 #include "Platform.h"
16 
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "StyleContext.h"
20 #include "KeyWords.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 /*
25    TODO:
26    o  _Param should be a new lexical type
27 */
28 
is_radix(int radix,int ch)29 static int is_radix(int radix, int ch) {
30    int digit;
31    if ( 16 < radix || 2 > radix ) {
32       return 0;
33    }
34    if ( isdigit(ch) ) {
35       digit = ch - '0';
36    } else if ( isxdigit(ch) ) {
37       digit = toupper(ch) - 'A' + 10;
38    } else {
39       return 0;
40    }
41    if ( digit < radix ) {
42       return 1;
43    } else {
44       return 0;
45    }
46 }
47 
48 typedef enum {
49    STATE_NULL,
50    ATOM_UNQUOTED,
51    ATOM_QUOTED,
52    ATOM_FUN_NAME,
53    NODE_NAME_UNQUOTED,
54    NODE_NAME_QUOTED,
55    MACRO_START,
56    MACRO_UNQUOTED,
57    MACRO_QUOTED,
58    RECORD_START,
59    RECORD_UNQUOTED,
60    RECORD_QUOTED,
61    NUMERAL_START,
62    NUMERAL_SIGNED,
63    NUMERAL_RADIX_LITERAL,
64    NUMERAL_SPECULATIVE_MANTISSA,
65    NUMERAL_FLOAT_MANTISSA,
66    NUMERAL_FLOAT_EXPONENT,
67    NUMERAL_FLOAT_SIGNED_EXPONENT,
68    PARSE_ERROR
69 } atom_parse_state_t;
70 
ColouriseErlangDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)71 static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
72                                WordList *keywordlists[], Accessor &styler) {
73 
74 	WordList &keywords = *keywordlists[0];
75 
76 	styler.StartAt(startPos);
77 
78 	StyleContext sc(startPos, length, initStyle, styler);
79    atom_parse_state_t parse_state = STATE_NULL;
80    int radix_digits = 0;
81    int exponent_digits = 0;
82 	for (; sc.More(); sc.Forward()) {
83       if ( STATE_NULL != parse_state ) {
84          switch (parse_state) {
85          case STATE_NULL:
86 				sc.SetState(SCE_ERLANG_DEFAULT);
87             break;
88          case ATOM_UNQUOTED:
89             if ( '@' == sc.ch ){
90                parse_state = NODE_NAME_UNQUOTED;
91             } else if ( !isalnum(sc.ch) && sc.ch != '_' ) {
92                char s[100];
93                sc.GetCurrent(s, sizeof(s));
94                if (keywords.InList(s)) {
95                   sc.ChangeState(SCE_ERLANG_KEYWORD);
96                   sc.SetState(SCE_ERLANG_DEFAULT);
97                   parse_state = STATE_NULL;
98                } else {
99                   if ( '/' == sc.ch ) {
100                      parse_state = ATOM_FUN_NAME;
101                   } else {
102                      sc.ChangeState(SCE_ERLANG_ATOM);
103                      sc.SetState(SCE_ERLANG_DEFAULT);
104                      parse_state = STATE_NULL;
105                   }
106                }
107             }
108             break;
109          case ATOM_QUOTED:
110             if ( '@' == sc.ch ){
111                parse_state = NODE_NAME_QUOTED;
112             } else if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
113                sc.ChangeState(SCE_ERLANG_ATOM);
114                sc.ForwardSetState(SCE_ERLANG_DEFAULT);
115                parse_state = STATE_NULL;
116             }
117             break;
118          case ATOM_FUN_NAME:
119             if ( !isdigit(sc.ch) ) {
120                sc.ChangeState(SCE_ERLANG_FUNCTION_NAME);
121                sc.SetState(SCE_ERLANG_DEFAULT);
122                parse_state = STATE_NULL;
123             }
124             break;
125          case NODE_NAME_QUOTED:
126             if ( '@' == sc.ch ) {
127                sc.SetState(SCE_ERLANG_DEFAULT);
128                parse_state = STATE_NULL;
129             } else if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
130                sc.ChangeState(SCE_ERLANG_NODE_NAME);
131                sc.ForwardSetState(SCE_ERLANG_DEFAULT);
132                parse_state = STATE_NULL;
133             }
134             break;
135          case NODE_NAME_UNQUOTED:
136             if ( '@' == sc.ch ) {
137                sc.SetState(SCE_ERLANG_DEFAULT);
138                parse_state = STATE_NULL;
139             } else if ( !isalnum(sc.ch) && sc.ch != '_' ) {
140                sc.ChangeState(SCE_ERLANG_NODE_NAME);
141                sc.SetState(SCE_ERLANG_DEFAULT);
142                parse_state = STATE_NULL;
143             }
144             break;
145          case RECORD_START:
146             if ( '\'' == sc.ch ) {
147                parse_state = RECORD_QUOTED;
148             } else if (isalpha(sc.ch) && islower(sc.ch)) {
149                parse_state = RECORD_UNQUOTED;
150             } else { // error
151                sc.SetState(SCE_ERLANG_DEFAULT);
152                parse_state = STATE_NULL;
153             }
154             break;
155          case RECORD_QUOTED:
156             if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
157                sc.ChangeState(SCE_ERLANG_RECORD);
158                sc.ForwardSetState(SCE_ERLANG_DEFAULT);
159                parse_state = STATE_NULL;
160             }
161             break;
162          case RECORD_UNQUOTED:
163             if ( !isalpha(sc.ch) && '_' != sc.ch ) {
164                sc.ChangeState(SCE_ERLANG_RECORD);
165                sc.SetState(SCE_ERLANG_DEFAULT);
166                parse_state = STATE_NULL;
167             }
168             break;
169          case MACRO_START:
170             if ( '\'' == sc.ch ) {
171                parse_state = MACRO_QUOTED;
172             } else if (isalpha(sc.ch)) {
173                parse_state = MACRO_UNQUOTED;
174             } else { // error
175                sc.SetState(SCE_ERLANG_DEFAULT);
176                parse_state = STATE_NULL;
177             }
178             break;
179          case MACRO_UNQUOTED:
180             if ( !isalpha(sc.ch) && '_' != sc.ch ) {
181                sc.ChangeState(SCE_ERLANG_MACRO);
182                sc.SetState(SCE_ERLANG_DEFAULT);
183                parse_state = STATE_NULL;
184             }
185             break;
186          case MACRO_QUOTED:
187             if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
188                sc.ChangeState(SCE_ERLANG_MACRO);
189                sc.ForwardSetState(SCE_ERLANG_DEFAULT);
190                parse_state = STATE_NULL;
191             }
192             break;
193          case NUMERAL_START:
194             if ( isdigit(sc.ch) ) {
195                radix_digits *= 10;
196                radix_digits += sc.ch - '0'; // Assuming ASCII here!
197             } else if ( '#' == sc.ch ) {
198                if ( 2 > radix_digits || 16 < radix_digits) {
199                   sc.SetState(SCE_ERLANG_DEFAULT);
200                   parse_state = STATE_NULL;
201                } else {
202                   parse_state = NUMERAL_RADIX_LITERAL;
203                }
204             } else if ( '.' == sc.ch && isdigit(sc.chNext)) {
205                radix_digits = 0;
206                parse_state = NUMERAL_FLOAT_MANTISSA;
207             } else if ( 'e' == sc.ch || 'E' == sc.ch ) {
208                exponent_digits = 0;
209                parse_state = NUMERAL_FLOAT_EXPONENT;
210             } else {
211                radix_digits = 0;
212                sc.ChangeState(SCE_ERLANG_NUMBER);
213                sc.SetState(SCE_ERLANG_DEFAULT);
214                parse_state = STATE_NULL;
215             }
216             break;
217          case NUMERAL_RADIX_LITERAL:
218             if ( !is_radix(radix_digits,sc.ch) ) {
219                radix_digits = 0;
220                if ( !isalnum(sc.ch) ) {
221                   sc.ChangeState(SCE_ERLANG_NUMBER);
222                }
223                sc.SetState(SCE_ERLANG_DEFAULT);
224                parse_state = STATE_NULL;
225             }
226             break;
227          case NUMERAL_FLOAT_MANTISSA:
228             if ( 'e' == sc.ch || 'E' == sc.ch ) {
229                exponent_digits = 0;
230                parse_state = NUMERAL_FLOAT_EXPONENT;
231             } else if ( !isdigit(sc.ch) ) {
232                sc.ChangeState(SCE_ERLANG_NUMBER);
233                sc.SetState(SCE_ERLANG_DEFAULT);
234                parse_state = STATE_NULL;
235             }
236             break;
237          case NUMERAL_FLOAT_EXPONENT:
238             if ( '-' == sc.ch || '+' == sc.ch ) {
239                parse_state = NUMERAL_FLOAT_SIGNED_EXPONENT;
240             } else if ( !isdigit(sc.ch) ) {
241                if ( 0 < exponent_digits ) {
242                   sc.ChangeState(SCE_ERLANG_NUMBER);
243                }
244                sc.SetState(SCE_ERLANG_DEFAULT);
245                parse_state = STATE_NULL;
246             } else {
247                ++exponent_digits;
248             }
249             break;
250          case NUMERAL_FLOAT_SIGNED_EXPONENT:
251             if ( !isdigit(sc.ch) ) {
252                if ( 0 < exponent_digits ) {
253                   sc.ChangeState(SCE_ERLANG_NUMBER);
254                }
255                sc.SetState(SCE_ERLANG_DEFAULT);
256                parse_state = STATE_NULL;
257             } else {
258                ++exponent_digits;
259             }
260             break;
261          case NUMERAL_SIGNED:
262             if ( !isdigit(sc.ch) ) {
263                sc.ChangeState(SCE_ERLANG_NUMBER);
264                sc.SetState(SCE_ERLANG_DEFAULT);
265                parse_state = STATE_NULL;
266             } else if ( '.' == sc.ch ) {
267                parse_state = NUMERAL_FLOAT_MANTISSA;
268             }
269             break;
270          case NUMERAL_SPECULATIVE_MANTISSA:
271             if ( !isdigit(sc.ch) ) {
272                sc.ChangeState(SCE_ERLANG_OPERATOR);
273                sc.SetState(SCE_ERLANG_DEFAULT);
274                parse_state = STATE_NULL;
275             } else {
276                parse_state = NUMERAL_FLOAT_MANTISSA;
277             }
278             break;
279          case PARSE_ERROR:
280 				sc.SetState(SCE_ERLANG_DEFAULT);
281             parse_state = STATE_NULL;
282             break;
283          }
284       } else if (sc.state == SCE_ERLANG_OPERATOR) {
285 			if (sc.chPrev == '.') {
286 				if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\' || sc.ch == '^') {
287 					sc.ForwardSetState(SCE_ERLANG_DEFAULT);
288 				} else if (sc.ch == '\'') {
289 					sc.ForwardSetState(SCE_ERLANG_DEFAULT);
290 				} else {
291 					sc.SetState(SCE_ERLANG_DEFAULT);
292 				}
293 			} else {
294 				sc.SetState(SCE_ERLANG_DEFAULT);
295 			}
296 		} else if (sc.state == SCE_ERLANG_VARIABLE) {
297 			if (!isalnum(sc.ch) && sc.ch != '_') {
298             sc.SetState(SCE_ERLANG_DEFAULT);
299 			}
300 		} else if (sc.state == SCE_ERLANG_STRING) {
301 			if (sc.ch == '\"' && sc.chPrev != '\\') {
302 				sc.ForwardSetState(SCE_ERLANG_DEFAULT);
303 			}
304 		} else if (sc.state == SCE_ERLANG_COMMENT ) {
305 			if (sc.atLineEnd) {
306 				sc.SetState(SCE_ERLANG_DEFAULT);
307 			}
308       } else if (sc.state == SCE_ERLANG_CHARACTER ) {
309          if ( sc.chPrev == '\\' ) {
310             sc.ForwardSetState(SCE_ERLANG_DEFAULT);
311          } else if ( sc.ch != '\\' ) {
312             sc.ForwardSetState(SCE_ERLANG_DEFAULT);
313          }
314       }
315 
316 		if (sc.state == SCE_ERLANG_DEFAULT) {
317 			if (sc.ch == '%') {
318 				sc.SetState(SCE_ERLANG_COMMENT);
319 			} else if (sc.ch == '\"') {
320             sc.SetState(SCE_ERLANG_STRING);
321          } else if (sc.ch == '#') {
322             parse_state = RECORD_START;
323 				sc.SetState(SCE_ERLANG_UNKNOWN);
324          } else if (sc.ch == '?') {
325             parse_state = MACRO_START;
326 				sc.SetState(SCE_ERLANG_UNKNOWN);
327          } else if (sc.ch == '$') {
328 				sc.SetState(SCE_ERLANG_CHARACTER);
329          } else if (sc.ch == '\'') {
330             parse_state = ATOM_QUOTED;
331 				sc.SetState(SCE_ERLANG_UNKNOWN);
332 			} else if ( isdigit(sc.ch) ) {
333             parse_state = NUMERAL_START;
334             radix_digits = sc.ch - '0';
335 				sc.SetState(SCE_ERLANG_UNKNOWN);
336          } else if ( '.' == sc.ch ) {
337             parse_state = NUMERAL_SPECULATIVE_MANTISSA;
338 				sc.SetState(SCE_ERLANG_UNKNOWN);
339 			} else if (isalpha(sc.ch) && isupper(sc.ch)) {
340 				sc.SetState(SCE_ERLANG_VARIABLE);
341 			} else if (isalpha(sc.ch)) {
342             parse_state = ATOM_UNQUOTED;
343 				sc.SetState(SCE_ERLANG_UNKNOWN);
344 			} else if (isoperator(static_cast<char>(sc.ch)) || sc.ch == '\\') {
345 				sc.SetState(SCE_ERLANG_OPERATOR);
346 			}
347 		}
348 	}
349 	sc.Complete();
350 }
351 
ClassifyFoldPointErlang(Accessor & styler,int styleNext,int keyword_start)352 static int ClassifyFoldPointErlang(
353    Accessor &styler,
354    int styleNext,
355    int keyword_start
356 ) {
357 	int lev = 0;
358    if ( styler.Match(keyword_start,"case")
359       || (
360             styler.Match(keyword_start,"fun")
361          && SCE_ERLANG_FUNCTION_NAME != styleNext)
362       || styler.Match(keyword_start,"if")
363       || styler.Match(keyword_start,"query")
364       || styler.Match(keyword_start,"receive")
365    ) {
366       ++lev;
367    } else if ( styler.Match(keyword_start,"end") ) {
368       --lev;
369    }
370 	return lev;
371 }
372 
373 
FoldErlangDoc(unsigned int startPos,int length,int initStyle,WordList **,Accessor & styler)374 static void FoldErlangDoc(
375    unsigned int startPos, int length, int initStyle,
376    WordList** /*keywordlists*/, Accessor &styler
377 ) {
378 	unsigned int endPos = startPos + length;
379 	//~ int visibleChars = 0;
380 	int lineCurrent = styler.GetLine(startPos);
381 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
382 	int levelCurrent = levelPrev;
383 	char chNext = styler.SafeGetCharAt(startPos);
384 	int styleNext = styler.StyleAt(startPos);
385 	int style = initStyle;
386 	int keyword_start = 0;
387 
388    bool fold_keywords = true;
389    bool fold_comments = true;
390    bool fold_braces = true;
391    bool fold_function_clauses = false;
392    bool fold_clauses = false;
393 
394    //int clause_level = 0;
395 
396 	for (unsigned int i = startPos; i < endPos; i++) {
397 		char ch = chNext;
398 		chNext = styler.SafeGetCharAt(i + 1);
399 		int stylePrev = style;
400 		style = styleNext;
401 		styleNext = styler.StyleAt(i + 1);
402 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
403 
404       if ( (stylePrev != SCE_ERLANG_KEYWORD) && (style == SCE_ERLANG_KEYWORD) ) {
405          keyword_start = i;
406       }
407       if ( fold_keywords ) {
408          if ( (stylePrev == SCE_ERLANG_KEYWORD)
409             && (style != SCE_ERLANG_KEYWORD)
410             && (style != SCE_ERLANG_ATOM)
411          ) {
412             levelCurrent += ClassifyFoldPointErlang(styler,styleNext,keyword_start);
413          }
414       }
415 
416       if ( fold_comments ) {
417          if (style == SCE_ERLANG_COMMENT) {
418             if ((ch == '%') && (chNext == '{')) {
419                levelCurrent++;
420             } else if ((ch == '%') && (chNext == '}')) {
421                levelCurrent--;
422             }
423          }
424       }
425 
426       if ( fold_function_clauses ) {
427          if ( (SC_FOLDLEVELBASE == levelCurrent) /*&& (style == SCE_ERLANG_OPERATOR)*/ ) {
428             if ( (ch == '-') && (chNext == '>')) {
429                //~ fprintf(stderr,"levelCurrent=%d\n", levelCurrent);
430                //++clause_level;
431                //~ if ( 0 < clause_level )
432                   ++levelCurrent;
433             }
434          }
435          //~ if (  (stylePrev != SCE_ERLANG_RECORD)
436             //~ && (style != SCE_ERLANG_NUMBER)
437             //~ && (style != SCE_ERLANG_STRING)
438             //~ && (style != SCE_ERLANG_COMMENT)
439          //~ ) {
440             if ( (SC_FOLDLEVELBASE+1 == levelCurrent) && (ch == '.') ) {
441                //--clause_level;
442                //~ if ( 0 == clause_level )
443                   --levelCurrent;
444             }
445          //~ }
446       }
447 
448       if ( fold_clauses ) {
449          if ( (0 < levelCurrent) && (style == SCE_ERLANG_OPERATOR) ) {
450             if ((ch == '-') && (chNext == '>')) {
451                levelCurrent++;
452             }
453             if ( (ch == ';') ) {
454                levelCurrent--;
455             }
456          }
457          if ( (stylePrev != SCE_ERLANG_RECORD)
458             && (style != SCE_ERLANG_NUMBER)
459             && (style != SCE_ERLANG_STRING)
460             && (style != SCE_ERLANG_COMMENT)
461          ) {
462             if ( (ch == '.') ) {
463                levelCurrent--;
464             }
465          }
466          if (  (stylePrev == SCE_ERLANG_KEYWORD)
467             && (style != SCE_ERLANG_KEYWORD)
468             && (style != SCE_ERLANG_ATOM)
469             && (
470                styler.Match(keyword_start,"end") // 'end' counted twice if fold_keywords too
471                || styler.Match(keyword_start,"after") )
472          ) {
473             levelCurrent--;
474          }
475       }
476 
477       if ( fold_braces ) {
478          if (style == SCE_ERLANG_OPERATOR) {
479             if ( (ch == '{') || (ch == '(') || (ch == '[') ) {
480                levelCurrent++;
481             } else if ( (ch == '}') || (ch == ')') || (ch == ']') ) {
482                levelCurrent--;
483             }
484          }
485       }
486 
487 		if (atEOL) {
488 			int lev = levelPrev;
489 			//~ if (visibleChars == 0 && foldCompact)
490 				//~ lev |= SC_FOLDLEVELWHITEFLAG;
491 			//~ if ((levelCurrent > levelPrev) && (visibleChars > 0))
492 			if ((levelCurrent > levelPrev)) {
493 				lev |= SC_FOLDLEVELHEADERFLAG;
494          }
495 			if (lev != styler.LevelAt(lineCurrent)) {
496 				styler.SetLevel(lineCurrent, lev);
497 			}
498 			lineCurrent++;
499 			levelPrev = levelCurrent;
500 			//~ visibleChars = 0;
501 		}
502 		//~ if (!isspacechar(ch))
503 			//~ visibleChars++;
504 
505 	}
506 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
507 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
508 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
509 }
510 
511 static const char * const erlangWordListDesc[] = {
512 	"Keywords",
513 	0
514 };
515 
516 LexerModule lmErlang(
517    SCLEX_ERLANG,
518    ColouriseErlangDoc,
519    "erlang",
520    FoldErlangDoc,
521    erlangWordListDesc);
522 
523