1 // Scintilla source code edit control
2 /** @file LexErlang.cxx
3 ** Lexer for Erlang.
4 ** Written by Peter-Henry Mander, based on Matlab lexer by Jos� Fonseca
5 **/
6 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "StyleContext.h"
20 #include "KeyWords.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23
24 /*
25 TODO:
26 o _Param should be a new lexical type
27 */
28
is_radix(int radix,int ch)29 static int is_radix(int radix, int ch) {
30 int digit;
31 if ( 16 < radix || 2 > radix ) {
32 return 0;
33 }
34 if ( isdigit(ch) ) {
35 digit = ch - '0';
36 } else if ( isxdigit(ch) ) {
37 digit = toupper(ch) - 'A' + 10;
38 } else {
39 return 0;
40 }
41 if ( digit < radix ) {
42 return 1;
43 } else {
44 return 0;
45 }
46 }
47
48 typedef enum {
49 STATE_NULL,
50 ATOM_UNQUOTED,
51 ATOM_QUOTED,
52 ATOM_FUN_NAME,
53 NODE_NAME_UNQUOTED,
54 NODE_NAME_QUOTED,
55 MACRO_START,
56 MACRO_UNQUOTED,
57 MACRO_QUOTED,
58 RECORD_START,
59 RECORD_UNQUOTED,
60 RECORD_QUOTED,
61 NUMERAL_START,
62 NUMERAL_SIGNED,
63 NUMERAL_RADIX_LITERAL,
64 NUMERAL_SPECULATIVE_MANTISSA,
65 NUMERAL_FLOAT_MANTISSA,
66 NUMERAL_FLOAT_EXPONENT,
67 NUMERAL_FLOAT_SIGNED_EXPONENT,
68 PARSE_ERROR
69 } atom_parse_state_t;
70
ColouriseErlangDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)71 static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
72 WordList *keywordlists[], Accessor &styler) {
73
74 WordList &keywords = *keywordlists[0];
75
76 styler.StartAt(startPos);
77
78 StyleContext sc(startPos, length, initStyle, styler);
79 atom_parse_state_t parse_state = STATE_NULL;
80 int radix_digits = 0;
81 int exponent_digits = 0;
82 for (; sc.More(); sc.Forward()) {
83 if ( STATE_NULL != parse_state ) {
84 switch (parse_state) {
85 case STATE_NULL:
86 sc.SetState(SCE_ERLANG_DEFAULT);
87 break;
88 case ATOM_UNQUOTED:
89 if ( '@' == sc.ch ){
90 parse_state = NODE_NAME_UNQUOTED;
91 } else if ( !isalnum(sc.ch) && sc.ch != '_' ) {
92 char s[100];
93 sc.GetCurrent(s, sizeof(s));
94 if (keywords.InList(s)) {
95 sc.ChangeState(SCE_ERLANG_KEYWORD);
96 sc.SetState(SCE_ERLANG_DEFAULT);
97 parse_state = STATE_NULL;
98 } else {
99 if ( '/' == sc.ch ) {
100 parse_state = ATOM_FUN_NAME;
101 } else {
102 sc.ChangeState(SCE_ERLANG_ATOM);
103 sc.SetState(SCE_ERLANG_DEFAULT);
104 parse_state = STATE_NULL;
105 }
106 }
107 }
108 break;
109 case ATOM_QUOTED:
110 if ( '@' == sc.ch ){
111 parse_state = NODE_NAME_QUOTED;
112 } else if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
113 sc.ChangeState(SCE_ERLANG_ATOM);
114 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
115 parse_state = STATE_NULL;
116 }
117 break;
118 case ATOM_FUN_NAME:
119 if ( !isdigit(sc.ch) ) {
120 sc.ChangeState(SCE_ERLANG_FUNCTION_NAME);
121 sc.SetState(SCE_ERLANG_DEFAULT);
122 parse_state = STATE_NULL;
123 }
124 break;
125 case NODE_NAME_QUOTED:
126 if ( '@' == sc.ch ) {
127 sc.SetState(SCE_ERLANG_DEFAULT);
128 parse_state = STATE_NULL;
129 } else if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
130 sc.ChangeState(SCE_ERLANG_NODE_NAME);
131 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
132 parse_state = STATE_NULL;
133 }
134 break;
135 case NODE_NAME_UNQUOTED:
136 if ( '@' == sc.ch ) {
137 sc.SetState(SCE_ERLANG_DEFAULT);
138 parse_state = STATE_NULL;
139 } else if ( !isalnum(sc.ch) && sc.ch != '_' ) {
140 sc.ChangeState(SCE_ERLANG_NODE_NAME);
141 sc.SetState(SCE_ERLANG_DEFAULT);
142 parse_state = STATE_NULL;
143 }
144 break;
145 case RECORD_START:
146 if ( '\'' == sc.ch ) {
147 parse_state = RECORD_QUOTED;
148 } else if (isalpha(sc.ch) && islower(sc.ch)) {
149 parse_state = RECORD_UNQUOTED;
150 } else { // error
151 sc.SetState(SCE_ERLANG_DEFAULT);
152 parse_state = STATE_NULL;
153 }
154 break;
155 case RECORD_QUOTED:
156 if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
157 sc.ChangeState(SCE_ERLANG_RECORD);
158 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
159 parse_state = STATE_NULL;
160 }
161 break;
162 case RECORD_UNQUOTED:
163 if ( !isalpha(sc.ch) && '_' != sc.ch ) {
164 sc.ChangeState(SCE_ERLANG_RECORD);
165 sc.SetState(SCE_ERLANG_DEFAULT);
166 parse_state = STATE_NULL;
167 }
168 break;
169 case MACRO_START:
170 if ( '\'' == sc.ch ) {
171 parse_state = MACRO_QUOTED;
172 } else if (isalpha(sc.ch)) {
173 parse_state = MACRO_UNQUOTED;
174 } else { // error
175 sc.SetState(SCE_ERLANG_DEFAULT);
176 parse_state = STATE_NULL;
177 }
178 break;
179 case MACRO_UNQUOTED:
180 if ( !isalpha(sc.ch) && '_' != sc.ch ) {
181 sc.ChangeState(SCE_ERLANG_MACRO);
182 sc.SetState(SCE_ERLANG_DEFAULT);
183 parse_state = STATE_NULL;
184 }
185 break;
186 case MACRO_QUOTED:
187 if ( '\'' == sc.ch && '\\' != sc.chPrev ) {
188 sc.ChangeState(SCE_ERLANG_MACRO);
189 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
190 parse_state = STATE_NULL;
191 }
192 break;
193 case NUMERAL_START:
194 if ( isdigit(sc.ch) ) {
195 radix_digits *= 10;
196 radix_digits += sc.ch - '0'; // Assuming ASCII here!
197 } else if ( '#' == sc.ch ) {
198 if ( 2 > radix_digits || 16 < radix_digits) {
199 sc.SetState(SCE_ERLANG_DEFAULT);
200 parse_state = STATE_NULL;
201 } else {
202 parse_state = NUMERAL_RADIX_LITERAL;
203 }
204 } else if ( '.' == sc.ch && isdigit(sc.chNext)) {
205 radix_digits = 0;
206 parse_state = NUMERAL_FLOAT_MANTISSA;
207 } else if ( 'e' == sc.ch || 'E' == sc.ch ) {
208 exponent_digits = 0;
209 parse_state = NUMERAL_FLOAT_EXPONENT;
210 } else {
211 radix_digits = 0;
212 sc.ChangeState(SCE_ERLANG_NUMBER);
213 sc.SetState(SCE_ERLANG_DEFAULT);
214 parse_state = STATE_NULL;
215 }
216 break;
217 case NUMERAL_RADIX_LITERAL:
218 if ( !is_radix(radix_digits,sc.ch) ) {
219 radix_digits = 0;
220 if ( !isalnum(sc.ch) ) {
221 sc.ChangeState(SCE_ERLANG_NUMBER);
222 }
223 sc.SetState(SCE_ERLANG_DEFAULT);
224 parse_state = STATE_NULL;
225 }
226 break;
227 case NUMERAL_FLOAT_MANTISSA:
228 if ( 'e' == sc.ch || 'E' == sc.ch ) {
229 exponent_digits = 0;
230 parse_state = NUMERAL_FLOAT_EXPONENT;
231 } else if ( !isdigit(sc.ch) ) {
232 sc.ChangeState(SCE_ERLANG_NUMBER);
233 sc.SetState(SCE_ERLANG_DEFAULT);
234 parse_state = STATE_NULL;
235 }
236 break;
237 case NUMERAL_FLOAT_EXPONENT:
238 if ( '-' == sc.ch || '+' == sc.ch ) {
239 parse_state = NUMERAL_FLOAT_SIGNED_EXPONENT;
240 } else if ( !isdigit(sc.ch) ) {
241 if ( 0 < exponent_digits ) {
242 sc.ChangeState(SCE_ERLANG_NUMBER);
243 }
244 sc.SetState(SCE_ERLANG_DEFAULT);
245 parse_state = STATE_NULL;
246 } else {
247 ++exponent_digits;
248 }
249 break;
250 case NUMERAL_FLOAT_SIGNED_EXPONENT:
251 if ( !isdigit(sc.ch) ) {
252 if ( 0 < exponent_digits ) {
253 sc.ChangeState(SCE_ERLANG_NUMBER);
254 }
255 sc.SetState(SCE_ERLANG_DEFAULT);
256 parse_state = STATE_NULL;
257 } else {
258 ++exponent_digits;
259 }
260 break;
261 case NUMERAL_SIGNED:
262 if ( !isdigit(sc.ch) ) {
263 sc.ChangeState(SCE_ERLANG_NUMBER);
264 sc.SetState(SCE_ERLANG_DEFAULT);
265 parse_state = STATE_NULL;
266 } else if ( '.' == sc.ch ) {
267 parse_state = NUMERAL_FLOAT_MANTISSA;
268 }
269 break;
270 case NUMERAL_SPECULATIVE_MANTISSA:
271 if ( !isdigit(sc.ch) ) {
272 sc.ChangeState(SCE_ERLANG_OPERATOR);
273 sc.SetState(SCE_ERLANG_DEFAULT);
274 parse_state = STATE_NULL;
275 } else {
276 parse_state = NUMERAL_FLOAT_MANTISSA;
277 }
278 break;
279 case PARSE_ERROR:
280 sc.SetState(SCE_ERLANG_DEFAULT);
281 parse_state = STATE_NULL;
282 break;
283 }
284 } else if (sc.state == SCE_ERLANG_OPERATOR) {
285 if (sc.chPrev == '.') {
286 if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\' || sc.ch == '^') {
287 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
288 } else if (sc.ch == '\'') {
289 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
290 } else {
291 sc.SetState(SCE_ERLANG_DEFAULT);
292 }
293 } else {
294 sc.SetState(SCE_ERLANG_DEFAULT);
295 }
296 } else if (sc.state == SCE_ERLANG_VARIABLE) {
297 if (!isalnum(sc.ch) && sc.ch != '_') {
298 sc.SetState(SCE_ERLANG_DEFAULT);
299 }
300 } else if (sc.state == SCE_ERLANG_STRING) {
301 if (sc.ch == '\"' && sc.chPrev != '\\') {
302 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
303 }
304 } else if (sc.state == SCE_ERLANG_COMMENT ) {
305 if (sc.atLineEnd) {
306 sc.SetState(SCE_ERLANG_DEFAULT);
307 }
308 } else if (sc.state == SCE_ERLANG_CHARACTER ) {
309 if ( sc.chPrev == '\\' ) {
310 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
311 } else if ( sc.ch != '\\' ) {
312 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
313 }
314 }
315
316 if (sc.state == SCE_ERLANG_DEFAULT) {
317 if (sc.ch == '%') {
318 sc.SetState(SCE_ERLANG_COMMENT);
319 } else if (sc.ch == '\"') {
320 sc.SetState(SCE_ERLANG_STRING);
321 } else if (sc.ch == '#') {
322 parse_state = RECORD_START;
323 sc.SetState(SCE_ERLANG_UNKNOWN);
324 } else if (sc.ch == '?') {
325 parse_state = MACRO_START;
326 sc.SetState(SCE_ERLANG_UNKNOWN);
327 } else if (sc.ch == '$') {
328 sc.SetState(SCE_ERLANG_CHARACTER);
329 } else if (sc.ch == '\'') {
330 parse_state = ATOM_QUOTED;
331 sc.SetState(SCE_ERLANG_UNKNOWN);
332 } else if ( isdigit(sc.ch) ) {
333 parse_state = NUMERAL_START;
334 radix_digits = sc.ch - '0';
335 sc.SetState(SCE_ERLANG_UNKNOWN);
336 } else if ( '.' == sc.ch ) {
337 parse_state = NUMERAL_SPECULATIVE_MANTISSA;
338 sc.SetState(SCE_ERLANG_UNKNOWN);
339 } else if (isalpha(sc.ch) && isupper(sc.ch)) {
340 sc.SetState(SCE_ERLANG_VARIABLE);
341 } else if (isalpha(sc.ch)) {
342 parse_state = ATOM_UNQUOTED;
343 sc.SetState(SCE_ERLANG_UNKNOWN);
344 } else if (isoperator(static_cast<char>(sc.ch)) || sc.ch == '\\') {
345 sc.SetState(SCE_ERLANG_OPERATOR);
346 }
347 }
348 }
349 sc.Complete();
350 }
351
ClassifyFoldPointErlang(Accessor & styler,int styleNext,int keyword_start)352 static int ClassifyFoldPointErlang(
353 Accessor &styler,
354 int styleNext,
355 int keyword_start
356 ) {
357 int lev = 0;
358 if ( styler.Match(keyword_start,"case")
359 || (
360 styler.Match(keyword_start,"fun")
361 && SCE_ERLANG_FUNCTION_NAME != styleNext)
362 || styler.Match(keyword_start,"if")
363 || styler.Match(keyword_start,"query")
364 || styler.Match(keyword_start,"receive")
365 ) {
366 ++lev;
367 } else if ( styler.Match(keyword_start,"end") ) {
368 --lev;
369 }
370 return lev;
371 }
372
373
FoldErlangDoc(unsigned int startPos,int length,int initStyle,WordList **,Accessor & styler)374 static void FoldErlangDoc(
375 unsigned int startPos, int length, int initStyle,
376 WordList** /*keywordlists*/, Accessor &styler
377 ) {
378 unsigned int endPos = startPos + length;
379 //~ int visibleChars = 0;
380 int lineCurrent = styler.GetLine(startPos);
381 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
382 int levelCurrent = levelPrev;
383 char chNext = styler.SafeGetCharAt(startPos);
384 int styleNext = styler.StyleAt(startPos);
385 int style = initStyle;
386 int keyword_start = 0;
387
388 bool fold_keywords = true;
389 bool fold_comments = true;
390 bool fold_braces = true;
391 bool fold_function_clauses = false;
392 bool fold_clauses = false;
393
394 //int clause_level = 0;
395
396 for (unsigned int i = startPos; i < endPos; i++) {
397 char ch = chNext;
398 chNext = styler.SafeGetCharAt(i + 1);
399 int stylePrev = style;
400 style = styleNext;
401 styleNext = styler.StyleAt(i + 1);
402 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
403
404 if ( (stylePrev != SCE_ERLANG_KEYWORD) && (style == SCE_ERLANG_KEYWORD) ) {
405 keyword_start = i;
406 }
407 if ( fold_keywords ) {
408 if ( (stylePrev == SCE_ERLANG_KEYWORD)
409 && (style != SCE_ERLANG_KEYWORD)
410 && (style != SCE_ERLANG_ATOM)
411 ) {
412 levelCurrent += ClassifyFoldPointErlang(styler,styleNext,keyword_start);
413 }
414 }
415
416 if ( fold_comments ) {
417 if (style == SCE_ERLANG_COMMENT) {
418 if ((ch == '%') && (chNext == '{')) {
419 levelCurrent++;
420 } else if ((ch == '%') && (chNext == '}')) {
421 levelCurrent--;
422 }
423 }
424 }
425
426 if ( fold_function_clauses ) {
427 if ( (SC_FOLDLEVELBASE == levelCurrent) /*&& (style == SCE_ERLANG_OPERATOR)*/ ) {
428 if ( (ch == '-') && (chNext == '>')) {
429 //~ fprintf(stderr,"levelCurrent=%d\n", levelCurrent);
430 //++clause_level;
431 //~ if ( 0 < clause_level )
432 ++levelCurrent;
433 }
434 }
435 //~ if ( (stylePrev != SCE_ERLANG_RECORD)
436 //~ && (style != SCE_ERLANG_NUMBER)
437 //~ && (style != SCE_ERLANG_STRING)
438 //~ && (style != SCE_ERLANG_COMMENT)
439 //~ ) {
440 if ( (SC_FOLDLEVELBASE+1 == levelCurrent) && (ch == '.') ) {
441 //--clause_level;
442 //~ if ( 0 == clause_level )
443 --levelCurrent;
444 }
445 //~ }
446 }
447
448 if ( fold_clauses ) {
449 if ( (0 < levelCurrent) && (style == SCE_ERLANG_OPERATOR) ) {
450 if ((ch == '-') && (chNext == '>')) {
451 levelCurrent++;
452 }
453 if ( (ch == ';') ) {
454 levelCurrent--;
455 }
456 }
457 if ( (stylePrev != SCE_ERLANG_RECORD)
458 && (style != SCE_ERLANG_NUMBER)
459 && (style != SCE_ERLANG_STRING)
460 && (style != SCE_ERLANG_COMMENT)
461 ) {
462 if ( (ch == '.') ) {
463 levelCurrent--;
464 }
465 }
466 if ( (stylePrev == SCE_ERLANG_KEYWORD)
467 && (style != SCE_ERLANG_KEYWORD)
468 && (style != SCE_ERLANG_ATOM)
469 && (
470 styler.Match(keyword_start,"end") // 'end' counted twice if fold_keywords too
471 || styler.Match(keyword_start,"after") )
472 ) {
473 levelCurrent--;
474 }
475 }
476
477 if ( fold_braces ) {
478 if (style == SCE_ERLANG_OPERATOR) {
479 if ( (ch == '{') || (ch == '(') || (ch == '[') ) {
480 levelCurrent++;
481 } else if ( (ch == '}') || (ch == ')') || (ch == ']') ) {
482 levelCurrent--;
483 }
484 }
485 }
486
487 if (atEOL) {
488 int lev = levelPrev;
489 //~ if (visibleChars == 0 && foldCompact)
490 //~ lev |= SC_FOLDLEVELWHITEFLAG;
491 //~ if ((levelCurrent > levelPrev) && (visibleChars > 0))
492 if ((levelCurrent > levelPrev)) {
493 lev |= SC_FOLDLEVELHEADERFLAG;
494 }
495 if (lev != styler.LevelAt(lineCurrent)) {
496 styler.SetLevel(lineCurrent, lev);
497 }
498 lineCurrent++;
499 levelPrev = levelCurrent;
500 //~ visibleChars = 0;
501 }
502 //~ if (!isspacechar(ch))
503 //~ visibleChars++;
504
505 }
506 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
507 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
508 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
509 }
510
511 static const char * const erlangWordListDesc[] = {
512 "Keywords",
513 0
514 };
515
516 LexerModule lmErlang(
517 SCLEX_ERLANG,
518 ColouriseErlangDoc,
519 "erlang",
520 FoldErlangDoc,
521 erlangWordListDesc);
522
523