1 // -*- coding: utf-8 -*-
2 // Scintilla source code edit control
3 /**
4 * @file LexModula.cxx
5 * @author Dariusz "DKnoto" Knociński
6 * @date 2011/02/03
7 * @brief Lexer for Modula-2/3 documents.
8 */
9 // The License.txt file describes the conditions under which this software may
10 // be distributed.
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
18
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #include "PropSetSimple.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "Accessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "LexerModule.h"
30
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
34
35 #ifdef DEBUG_LEX_MODULA
36 #define DEBUG_STATE( p, c )\
37 fprintf( stderr, "Unknown state: currentPos = %d, char = '%c'\n", p, c );
38 #else
39 #define DEBUG_STATE( p, c )
40 #endif
41
IsDigitOfBase(unsigned ch,unsigned base)42 static inline bool IsDigitOfBase( unsigned ch, unsigned base ) {
43 if( ch < '0' || ch > 'f' ) return false;
44 if( base <= 10 ) {
45 if( ch >= ( '0' + base ) ) return false;
46 } else {
47 if( ch > '9' ) {
48 unsigned nb = base - 10;
49 if( ( ch < 'A' ) || ( ch >= ( 'A' + nb ) ) ) {
50 if( ( ch < 'a' ) || ( ch >= ( 'a' + nb ) ) ) {
51 return false;
52 }
53 }
54 }
55 }
56 return true;
57 }
58
IsOperator(StyleContext & sc,WordList & op)59 static inline unsigned IsOperator( StyleContext & sc, WordList & op ) {
60 int i;
61 char s[3];
62
63 s[0] = sc.ch;
64 s[1] = sc.chNext;
65 s[2] = 0;
66 for( i = 0; i < op.len; i++ ) {
67 if( ( strlen( op.words[i] ) == 2 ) &&
68 ( s[0] == op.words[i][0] && s[1] == op.words[i][1] ) ) {
69 return 2;
70 }
71 }
72 s[1] = 0;
73 for( i = 0; i < op.len; i++ ) {
74 if( ( strlen( op.words[i] ) == 1 ) &&
75 ( s[0] == op.words[i][0] ) ) {
76 return 1;
77 }
78 }
79 return 0;
80 }
81
IsEOL(Accessor & styler,unsigned curPos)82 static inline bool IsEOL( Accessor &styler, unsigned curPos ) {
83 unsigned ch = styler.SafeGetCharAt( curPos );
84 if( ( ch == '\r' && styler.SafeGetCharAt( curPos + 1 ) == '\n' ) ||
85 ( ch == '\n' ) ) {
86 return true;
87 }
88 return false;
89 }
90
checkStatement(Accessor & styler,int & curPos,const char * stt,bool spaceAfter=true)91 static inline bool checkStatement(
92 Accessor &styler,
93 int &curPos,
94 const char *stt, bool spaceAfter = true ) {
95 int len = static_cast<int>(strlen( stt ));
96 int i;
97 for( i = 0; i < len; i++ ) {
98 if( styler.SafeGetCharAt( curPos + i ) != stt[i] ) {
99 return false;
100 }
101 }
102 if( spaceAfter ) {
103 if( ! isspace( styler.SafeGetCharAt( curPos + i ) ) ) {
104 return false;
105 }
106 }
107 curPos += ( len - 1 );
108 return true;
109 }
110
checkEndSemicolon(Accessor & styler,int & curPos,int endPos)111 static inline bool checkEndSemicolon(
112 Accessor &styler,
113 int &curPos, int endPos )
114 {
115 const char *stt = "END";
116 int len = static_cast<int>(strlen( stt ));
117 int i;
118 for( i = 0; i < len; i++ ) {
119 if( styler.SafeGetCharAt( curPos + i ) != stt[i] ) {
120 return false;
121 }
122 }
123 while( isspace( styler.SafeGetCharAt( curPos + i ) ) ) {
124 i++;
125 if( ( curPos + i ) >= endPos ) return false;
126 }
127 if( styler.SafeGetCharAt( curPos + i ) != ';' ) {
128 return false;
129 }
130 curPos += ( i - 1 );
131 return true;
132 }
133
checkKeyIdentOper(Accessor & styler,int & curPos,int endPos,const char * stt,const char etk)134 static inline bool checkKeyIdentOper(
135
136 Accessor &styler,
137 int &curPos, int endPos,
138 const char *stt, const char etk ) {
139 int newPos = curPos;
140 if( ! checkStatement( styler, newPos, stt ) )
141 return false;
142 newPos++;
143 if( newPos >= endPos )
144 return false;
145 if( ! isspace( styler.SafeGetCharAt( newPos ) ) )
146 return false;
147 newPos++;
148 if( newPos >= endPos )
149 return false;
150 while( isspace( styler.SafeGetCharAt( newPos ) ) ) {
151 newPos++;
152 if( newPos >= endPos )
153 return false;
154 }
155 if( ! isalpha( styler.SafeGetCharAt( newPos ) ) )
156 return false;
157 newPos++;
158 if( newPos >= endPos )
159 return false;
160 char ch;
161 ch = styler.SafeGetCharAt( newPos );
162 while( isalpha( ch ) || isdigit( ch ) || ch == '_' ) {
163 newPos++;
164 if( newPos >= endPos ) return false;
165 ch = styler.SafeGetCharAt( newPos );
166 }
167 while( isspace( styler.SafeGetCharAt( newPos ) ) ) {
168 newPos++;
169 if( newPos >= endPos ) return false;
170 }
171 if( styler.SafeGetCharAt( newPos ) != etk )
172 return false;
173 curPos = newPos;
174 return true;
175 }
176
FoldModulaDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)177 static void FoldModulaDoc( unsigned int startPos,
178 int length,
179 int , WordList *[],
180 Accessor &styler)
181 {
182 int curLine = styler.GetLine(startPos);
183 int curLevel = SC_FOLDLEVELBASE;
184 int endPos = startPos + length;
185 if( curLine > 0 )
186 curLevel = styler.LevelAt( curLine - 1 ) >> 16;
187 int curPos = startPos;
188 int style = styler.StyleAt( curPos );
189 int visChars = 0;
190 int nextLevel = curLevel;
191
192 while( curPos < endPos ) {
193 if( ! isspace( styler.SafeGetCharAt( curPos ) ) ) visChars++;
194
195 switch( style ) {
196 case SCE_MODULA_COMMENT:
197 if( checkStatement( styler, curPos, "(*" ) )
198 nextLevel++;
199 else
200 if( checkStatement( styler, curPos, "*)" ) )
201 nextLevel--;
202 break;
203
204 case SCE_MODULA_DOXYCOMM:
205 if( checkStatement( styler, curPos, "(**", false ) )
206 nextLevel++;
207 else
208 if( checkStatement( styler, curPos, "*)" ) )
209 nextLevel--;
210 break;
211
212 case SCE_MODULA_KEYWORD:
213 if( checkStatement( styler, curPos, "IF" ) )
214 nextLevel++;
215 else
216 if( checkStatement( styler, curPos, "BEGIN" ) )
217 nextLevel++;
218 else
219 if( checkStatement( styler, curPos, "TRY" ) )
220 nextLevel++;
221 else
222 if( checkStatement( styler, curPos, "LOOP" ) )
223 nextLevel++;
224 else
225 if( checkStatement( styler, curPos, "FOR" ) )
226 nextLevel++;
227 else
228 if( checkStatement( styler, curPos, "WHILE" ) )
229 nextLevel++;
230 else
231 if( checkStatement( styler, curPos, "REPEAT" ) )
232 nextLevel++;
233 else
234 if( checkStatement( styler, curPos, "UNTIL" ) )
235 nextLevel--;
236 else
237 if( checkStatement( styler, curPos, "WITH" ) )
238 nextLevel++;
239 else
240 if( checkStatement( styler, curPos, "CASE" ) )
241 nextLevel++;
242 else
243 if( checkStatement( styler, curPos, "TYPECASE" ) )
244 nextLevel++;
245 else
246 if( checkStatement( styler, curPos, "LOCK" ) )
247 nextLevel++;
248 else
249 if( checkKeyIdentOper( styler, curPos, endPos, "PROCEDURE", '(' ) )
250 nextLevel++;
251 else
252 if( checkKeyIdentOper( styler, curPos, endPos, "END", ';' ) ) {
253 int cln = curLine;
254 int clv_old = curLevel;
255 int pos;
256 char ch;
257 int clv_new;
258 while( cln > 0 ) {
259 clv_new = styler.LevelAt( cln - 1 ) >> 16;
260 if( clv_new < clv_old ) {
261 nextLevel--;
262 pos = styler.LineStart( cln );
263 while( ( ch = styler.SafeGetCharAt( pos ) ) != '\n' ) {
264 if( ch == 'P' ) {
265 if( styler.StyleAt(pos) == SCE_MODULA_KEYWORD ) {
266 if( checkKeyIdentOper( styler, pos, endPos,
267 "PROCEDURE", '(' ) ) {
268 break;
269 }
270 }
271 }
272 pos++;
273 }
274 clv_old = clv_new;
275 }
276 cln--;
277 }
278 }
279 else
280 if( checkKeyIdentOper( styler, curPos, endPos, "END", '.' ) )
281 nextLevel--;
282 else
283 if( checkEndSemicolon( styler, curPos, endPos ) )
284 nextLevel--;
285 else {
286 while( styler.StyleAt( curPos + 1 ) == SCE_MODULA_KEYWORD )
287 curPos++;
288 }
289 break;
290
291 default:
292 break;
293 }
294
295 if( IsEOL( styler, curPos ) || ( curPos == endPos - 1 ) ) {
296 int efectiveLevel = curLevel | nextLevel << 16;
297 if( visChars == 0 )
298 efectiveLevel |= SC_FOLDLEVELWHITEFLAG;
299 if( curLevel < nextLevel )
300 efectiveLevel |= SC_FOLDLEVELHEADERFLAG;
301 if( efectiveLevel != styler.LevelAt(curLine) ) {
302 styler.SetLevel(curLine, efectiveLevel );
303 }
304 curLine++;
305 curLevel = nextLevel;
306 if( IsEOL( styler, curPos ) && ( curPos == endPos - 1 ) ) {
307 styler.SetLevel( curLine, ( curLevel | curLevel << 16)
308 | SC_FOLDLEVELWHITEFLAG);
309 }
310 visChars = 0;
311 }
312 curPos++;
313 style = styler.StyleAt( curPos );
314 }
315 }
316
skipWhiteSpaces(StyleContext & sc)317 static inline bool skipWhiteSpaces( StyleContext & sc ) {
318 while( isspace( sc.ch ) ) {
319 sc.SetState( SCE_MODULA_DEFAULT );
320 if( sc.More() )
321 sc.Forward();
322 else
323 return false;
324 }
325 return true;
326 }
327
ColouriseModulaDoc(unsigned int startPos,int length,int initStyle,WordList * wl[],Accessor & styler)328 static void ColouriseModulaDoc( unsigned int startPos,
329 int length,
330 int initStyle,
331 WordList *wl[],
332 Accessor &styler ) {
333 WordList& keyWords = *wl[0];
334 WordList& reservedWords = *wl[1];
335 WordList& operators = *wl[2];
336 WordList& pragmaWords = *wl[3];
337 WordList& escapeCodes = *wl[4];
338 WordList& doxyKeys = *wl[5];
339
340 const int BUFLEN = 128;
341
342 char buf[BUFLEN];
343 int i, kl;
344
345 int charPos = 0;
346
347 StyleContext sc( startPos, length, initStyle, styler );
348
349 while( sc.More() ) {
350 switch( sc.state ) {
351 case SCE_MODULA_DEFAULT:
352 if( ! skipWhiteSpaces( sc ) ) break;
353
354 if( sc.ch == '(' && sc.chNext == '*' ) {
355 if( sc.GetRelative(2) == '*' ) {
356 sc.SetState( SCE_MODULA_DOXYCOMM );
357 sc.Forward();
358 } else {
359 sc.SetState( SCE_MODULA_COMMENT );
360 }
361 sc.Forward();
362 }
363 else
364 if( isalpha( sc.ch ) ) {
365 if( isupper( sc.ch ) && isupper( sc.chNext ) ) {
366 for( i = 0; i < BUFLEN - 1; i++ ) {
367 buf[i] = sc.GetRelative(i);
368 if( !isalpha( buf[i] ) && !(buf[i] == '_') )
369 break;
370 }
371 kl = i;
372 buf[kl] = 0;
373
374 if( keyWords.InList( buf ) ) {
375 sc.SetState( SCE_MODULA_KEYWORD );
376 sc.Forward( kl );
377 sc.SetState( SCE_MODULA_DEFAULT );
378 continue;
379 }
380 else
381 if( reservedWords.InList( buf ) ) {
382 sc.SetState( SCE_MODULA_RESERVED );
383 sc.Forward( kl );
384 sc.SetState( SCE_MODULA_DEFAULT );
385 continue;
386 } else {
387 /** check procedure identifier */
388 }
389 } else {
390 for( i = 0; i < BUFLEN - 1; i++ ) {
391 buf[i] = sc.GetRelative(i);
392 if( !isalpha( buf[i] ) &&
393 !isdigit( buf[i] ) &&
394 !(buf[i] == '_') )
395 break;
396 }
397 kl = i;
398 buf[kl] = 0;
399
400 sc.SetState( SCE_MODULA_DEFAULT );
401 sc.Forward( kl );
402 continue;
403 }
404 }
405 else
406 if( isdigit( sc.ch ) ) {
407 sc.SetState( SCE_MODULA_NUMBER );
408 continue;
409 }
410 else
411 if( sc.ch == '\"' ) {
412 sc.SetState( SCE_MODULA_STRING );
413 }
414 else
415 if( sc.ch == '\'' ) {
416 charPos = sc.currentPos;
417 sc.SetState( SCE_MODULA_CHAR );
418 }
419 else
420 if( sc.ch == '<' && sc.chNext == '*' ) {
421 sc.SetState( SCE_MODULA_PRAGMA );
422 sc.Forward();
423 } else {
424 unsigned len = IsOperator( sc, operators );
425 if( len > 0 ) {
426 sc.SetState( SCE_MODULA_OPERATOR );
427 sc.Forward( len );
428 sc.SetState( SCE_MODULA_DEFAULT );
429 continue;
430 } else {
431 DEBUG_STATE( sc.currentPos, sc.ch );
432 }
433 }
434 break;
435
436 case SCE_MODULA_COMMENT:
437 if( sc.ch == '*' && sc.chNext == ')' ) {
438 sc.Forward( 2 );
439 sc.SetState( SCE_MODULA_DEFAULT );
440 continue;
441 }
442 break;
443
444 case SCE_MODULA_DOXYCOMM:
445 switch( sc.ch ) {
446 case '*':
447 if( sc.chNext == ')' ) {
448 sc.Forward( 2 );
449 sc.SetState( SCE_MODULA_DEFAULT );
450 continue;
451 }
452 break;
453
454 case '@':
455 if( islower( sc.chNext ) ) {
456 for( i = 0; i < BUFLEN - 1; i++ ) {
457 buf[i] = sc.GetRelative(i+1);
458 if( isspace( buf[i] ) ) break;
459 }
460 buf[i] = 0;
461 kl = i;
462
463 if( doxyKeys.InList( buf ) ) {
464 sc.SetState( SCE_MODULA_DOXYKEY );
465 sc.Forward( kl + 1 );
466 sc.SetState( SCE_MODULA_DOXYCOMM );
467 }
468 }
469 break;
470
471 default:
472 break;
473 }
474 break;
475
476 case SCE_MODULA_NUMBER:
477 {
478 buf[0] = sc.ch;
479 for( i = 1; i < BUFLEN - 1; i++ ) {
480 buf[i] = sc.GetRelative(i);
481 if( ! isdigit( buf[i] ) )
482 break;
483 }
484 kl = i;
485 buf[kl] = 0;
486
487 switch( sc.GetRelative(kl) ) {
488 case '_':
489 {
490 int base = atoi( buf );
491 if( base < 2 || base > 16 ) {
492 sc.SetState( SCE_MODULA_BADSTR );
493 } else {
494 int imax;
495
496 kl++;
497 for( i = 0; i < BUFLEN - 1; i++ ) {
498 buf[i] = sc.GetRelative(kl+i);
499 if( ! IsDigitOfBase( buf[i], 16 ) ) {
500 break;
501 }
502 }
503 imax = i;
504 for( i = 0; i < imax; i++ ) {
505 if( ! IsDigitOfBase( buf[i], base ) ) {
506 sc.SetState( SCE_MODULA_BADSTR );
507 break;
508 }
509 }
510 kl += imax;
511 }
512 sc.SetState( SCE_MODULA_BASENUM );
513 for( i = 0; i < kl; i++ ) {
514 sc.Forward();
515 }
516 sc.SetState( SCE_MODULA_DEFAULT );
517 continue;
518 }
519 break;
520
521 case '.':
522 if( sc.GetRelative(kl+1) == '.' ) {
523 kl--;
524 for( i = 0; i < kl; i++ ) {
525 sc.Forward();
526 }
527 sc.Forward();
528 sc.SetState( SCE_MODULA_DEFAULT );
529 continue;
530 } else {
531 bool doNext = false;
532
533 kl++;
534
535 buf[0] = sc.GetRelative(kl);
536 if( isdigit( buf[0] ) ) {
537 for( i = 0;; i++ ) {
538 if( !isdigit(sc.GetRelative(kl+i)) )
539 break;
540 }
541 kl += i;
542 buf[0] = sc.GetRelative(kl);
543
544 switch( buf[0] )
545 {
546 case 'E':
547 case 'e':
548 case 'D':
549 case 'd':
550 case 'X':
551 case 'x':
552 kl++;
553 buf[0] = sc.GetRelative(kl);
554 if( buf[0] == '-' || buf[0] == '+' ) {
555 kl++;
556 }
557 buf[0] = sc.GetRelative(kl);
558 if( isdigit( buf[0] ) ) {
559 for( i = 0;; i++ ) {
560 if( !isdigit(sc.GetRelative(kl+i)) ) {
561 buf[0] = sc.GetRelative(kl+i);
562 break;
563 }
564 }
565 kl += i;
566 doNext = true;
567 } else {
568 sc.SetState( SCE_MODULA_BADSTR );
569 }
570 break;
571
572 default:
573 doNext = true;
574 break;
575 }
576 } else {
577 sc.SetState( SCE_MODULA_BADSTR );
578 }
579
580 if( doNext ) {
581 if( ! isspace( buf[0] ) &&
582 buf[0] != ')' &&
583 buf[0] != '>' &&
584 buf[0] != '<' &&
585 buf[0] != '=' &&
586 buf[0] != '#' &&
587 buf[0] != '+' &&
588 buf[0] != '-' &&
589 buf[0] != '*' &&
590 buf[0] != '/' &&
591 buf[0] != ',' &&
592 buf[0] != ';'
593 ) {
594 sc.SetState( SCE_MODULA_BADSTR );
595 } else {
596 kl--;
597 }
598 }
599 }
600 sc.SetState( SCE_MODULA_FLOAT );
601 for( i = 0; i < kl; i++ ) {
602 sc.Forward();
603 }
604 sc.SetState( SCE_MODULA_DEFAULT );
605 continue;
606 break;
607
608 default:
609 for( i = 0; i < kl; i++ ) {
610 sc.Forward();
611 }
612 break;
613 }
614 sc.SetState( SCE_MODULA_DEFAULT );
615 continue;
616 }
617 break;
618
619 case SCE_MODULA_STRING:
620 if( sc.ch == '\"' ) {
621 sc.Forward();
622 sc.SetState( SCE_MODULA_DEFAULT );
623 continue;
624 } else {
625 if( sc.ch == '\\' ) {
626 i = 1;
627 if( IsDigitOfBase( sc.chNext, 8 ) ) {
628 for( i = 1; i < BUFLEN - 1; i++ ) {
629 if( ! IsDigitOfBase(sc.GetRelative(i+1), 8 ) )
630 break;
631 }
632 if( i == 3 ) {
633 sc.SetState( SCE_MODULA_STRSPEC );
634 } else {
635 sc.SetState( SCE_MODULA_BADSTR );
636 }
637 } else {
638 buf[0] = sc.chNext;
639 buf[1] = 0;
640
641 if( escapeCodes.InList( buf ) ) {
642 sc.SetState( SCE_MODULA_STRSPEC );
643 } else {
644 sc.SetState( SCE_MODULA_BADSTR );
645 }
646 }
647 sc.Forward(i+1);
648 sc.SetState( SCE_MODULA_STRING );
649 continue;
650 }
651 }
652 break;
653
654 case SCE_MODULA_CHAR:
655 if( sc.ch == '\'' ) {
656 sc.Forward();
657 sc.SetState( SCE_MODULA_DEFAULT );
658 continue;
659 }
660 else
661 if( ( sc.currentPos - charPos ) == 1 ) {
662 if( sc.ch == '\\' ) {
663 i = 1;
664 if( IsDigitOfBase( sc.chNext, 8 ) ) {
665 for( i = 1; i < BUFLEN - 1; i++ ) {
666 if( ! IsDigitOfBase(sc.GetRelative(i+1), 8 ) )
667 break;
668 }
669 if( i == 3 ) {
670 sc.SetState( SCE_MODULA_CHARSPEC );
671 } else {
672 sc.SetState( SCE_MODULA_BADSTR );
673 }
674 } else {
675 buf[0] = sc.chNext;
676 buf[1] = 0;
677
678 if( escapeCodes.InList( buf ) ) {
679 sc.SetState( SCE_MODULA_CHARSPEC );
680 } else {
681 sc.SetState( SCE_MODULA_BADSTR );
682 }
683 }
684 sc.Forward(i+1);
685 sc.SetState( SCE_MODULA_CHAR );
686 continue;
687 }
688 } else {
689 sc.SetState( SCE_MODULA_BADSTR );
690 sc.Forward();
691 sc.SetState( SCE_MODULA_CHAR );
692 continue;
693 }
694 break;
695
696 case SCE_MODULA_PRAGMA:
697 if( sc.ch == '*' && sc.chNext == '>' ) {
698 sc.Forward();
699 sc.Forward();
700 sc.SetState( SCE_MODULA_DEFAULT );
701 continue;
702 }
703 else
704 if( isupper( sc.ch ) && isupper( sc.chNext ) ) {
705 buf[0] = sc.ch;
706 buf[1] = sc.chNext;
707 for( i = 2; i < BUFLEN - 1; i++ ) {
708 buf[i] = sc.GetRelative(i);
709 if( !isupper( buf[i] ) )
710 break;
711 }
712 kl = i;
713 buf[kl] = 0;
714 if( pragmaWords.InList( buf ) ) {
715 sc.SetState( SCE_MODULA_PRGKEY );
716 sc.Forward( kl );
717 sc.SetState( SCE_MODULA_PRAGMA );
718 continue;
719 }
720 }
721 break;
722
723 default:
724 break;
725 }
726 sc.Forward();
727 }
728 sc.Complete();
729 }
730
731 static const char *const modulaWordListDesc[] =
732 {
733 "Keywords",
734 "ReservedKeywords",
735 "Operators",
736 "PragmaKeyswords",
737 "EscapeCodes",
738 "DoxygeneKeywords",
739 0
740 };
741
742 LexerModule lmModula( SCLEX_MODULA, ColouriseModulaDoc, "modula", FoldModulaDoc,
743 modulaWordListDesc);
744