1 // Scintilla source code edit control
2 // Nimrod lexer
3 // (c) 2009 Andreas Rumpf
4 /** @file LexNimrod.cxx
5  ** Lexer for Nimrod.
6  **/
7 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include "ILexer.h"
18 #include "Scintilla.h"
19 #include "SciLexer.h"
20 
21 #include "WordList.h"
22 #include "LexAccessor.h"
23 #include "Accessor.h"
24 #include "StyleContext.h"
25 #include "CharacterSet.h"
26 #include "LexerModule.h"
27 
28 #ifdef SCI_NAMESPACE
29 using namespace Scintilla;
30 #endif
31 
IsAWordChar(int ch)32 static inline bool IsAWordChar(int ch) {
33 	return (ch >= 0x80) || isalnum(ch) || ch == '_';
34 }
35 
tillEndOfTripleQuote(Accessor & styler,int pos,int max)36 static int tillEndOfTripleQuote(Accessor &styler, int pos, int max) {
37   /* search for """ */
38   for (;;) {
39     if (styler.SafeGetCharAt(pos, '\0') == '\0') return pos;
40     if (pos >= max) return pos;
41     if (styler.Match(pos, "\"\"\"")) {
42       return pos + 2;
43     }
44     pos++;
45   }
46 }
47 
48 #define CR 13 /* use both because Scite allows changing the line ending */
49 #define LF 10
50 
isNewLine(int ch)51 static bool inline isNewLine(int ch) {
52   return ch == CR || ch == LF;
53 }
54 
scanString(Accessor & styler,int pos,int max,bool rawMode)55 static int scanString(Accessor &styler, int pos, int max, bool rawMode) {
56   for (;;) {
57     if (pos >= max) return pos;
58     char ch = styler.SafeGetCharAt(pos, '\0');
59     if (ch == CR || ch == LF || ch == '\0') return pos;
60     if (ch == '"') return pos;
61     if (ch == '\\' && !rawMode) {
62       pos += 2;
63     } else {
64       pos++;
65     }
66   }
67 }
68 
scanChar(Accessor & styler,int pos,int max)69 static int scanChar(Accessor &styler, int pos, int max) {
70   for (;;) {
71     if (pos >= max) return pos;
72     char ch = styler.SafeGetCharAt(pos, '\0');
73     if (ch == CR || ch == LF || ch == '\0') return pos;
74     if (ch == '\'' && !isalnum(styler.SafeGetCharAt(pos+1, '\0')) )
75       return pos;
76     if (ch == '\\') {
77       pos += 2;
78     } else {
79       pos++;
80     }
81   }
82 }
83 
scanIdent(Accessor & styler,int pos,WordList & keywords)84 static int scanIdent(Accessor &styler, int pos, WordList &keywords) {
85   char buf[100]; /* copy to lowercase and ignore underscores */
86   int i = 0;
87 
88   for (;;) {
89     char ch = styler.SafeGetCharAt(pos, '\0');
90     if (!IsAWordChar(ch)) break;
91     if (ch != '_' && i < ((int)sizeof(buf))-1) {
92       buf[i] = static_cast<char>(tolower(ch));
93       i++;
94     }
95     pos++;
96   }
97   buf[i] = '\0';
98   /* look for keyword */
99   if (keywords.InList(buf)) {
100     styler.ColourTo(pos-1, SCE_P_WORD);
101   } else {
102     styler.ColourTo(pos-1, SCE_P_IDENTIFIER);
103   }
104   return pos;
105 }
106 
scanNumber(Accessor & styler,int pos)107 static int scanNumber(Accessor &styler, int pos) {
108   char ch, ch2;
109   ch = styler.SafeGetCharAt(pos, '\0');
110   ch2 = styler.SafeGetCharAt(pos+1, '\0');
111   if (ch == '0' && (ch2 == 'b' || ch2 == 'B')) {
112     /* binary number: */
113     pos += 2;
114     for (;;) {
115       ch = styler.SafeGetCharAt(pos, '\0');
116       if (ch == '_' || (ch >= '0' && ch <= '1')) ++pos;
117       else break;
118     }
119   } else if (ch == '0' &&
120             (ch2 == 'o' || ch2 == 'O' || ch2 == 'c' || ch2 == 'C')) {
121     /* octal number: */
122     pos += 2;
123     for (;;) {
124       ch = styler.SafeGetCharAt(pos, '\0');
125       if (ch == '_' || (ch >= '0' && ch <= '7')) ++pos;
126       else break;
127     }
128   } else if (ch == '0' && (ch2 == 'x' || ch2 == 'X')) {
129     /* hexadecimal number: */
130     pos += 2;
131     for (;;) {
132       ch = styler.SafeGetCharAt(pos, '\0');
133       if (ch == '_' || (ch >= '0' && ch <= '9')
134           || (ch >= 'a' && ch <= 'f')
135           || (ch >= 'A' && ch <= 'F')) ++pos;
136       else break;
137     }
138   } else {
139     // skip decimal part:
140     for (;;) {
141       ch = styler.SafeGetCharAt(pos, '\0');
142       if (ch == '_' || (ch >= '0' && ch <= '9')) ++pos;
143       else break;
144     }
145     ch2 = styler.SafeGetCharAt(pos+1, '\0');
146     if (ch == '.' && ch2 >= '0' && ch2 <= '9') {
147       ++pos; // skip '.'
148       for (;;) {
149         ch = styler.SafeGetCharAt(pos, '\0');
150         if (ch == '_' || (ch >= '0' && ch <= '9')) ++pos;
151         else break;
152       }
153     }
154     if (ch == 'e' || ch == 'E') {
155       ++pos;
156       ch = styler.SafeGetCharAt(pos, '\0');
157       if (ch == '-' || ch == '+') ++pos;
158       for (;;) {
159         ch = styler.SafeGetCharAt(pos, '\0');
160         if (ch == '_' || (ch >= '0' && ch <= '9')) ++pos;
161         else break;
162       }
163     }
164   }
165   if (ch == '\'') {
166     /* a type suffix: */
167     pos++;
168     for (;;) {
169       ch = styler.SafeGetCharAt(pos);
170       if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
171          || (ch >= 'a' && ch <= 'z') || ch == '_') ++pos;
172       else break;
173     }
174   }
175   styler.ColourTo(pos-1, SCE_P_NUMBER);
176   return pos;
177 }
178 
179 /* rewritten from scratch, because I couldn't get rid of the bugs...
180    (A character based approach sucks!)
181 */
ColouriseNimrodDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)182 static void ColouriseNimrodDoc(unsigned int startPos, int length, int initStyle,
183                                 WordList *keywordlists[], Accessor &styler) {
184   int pos = startPos;
185   int max = startPos + length;
186   char ch;
187   WordList &keywords = *keywordlists[0];
188 
189   styler.StartAt(startPos);
190   styler.StartSegment(startPos);
191 
192   switch (initStyle) {
193     /* check where we are: */
194     case SCE_P_TRIPLEDOUBLE:
195       pos = tillEndOfTripleQuote(styler, pos, max);
196       styler.ColourTo(pos, SCE_P_TRIPLEDOUBLE);
197       pos++;
198     break;
199     default: /* nothing to do: */
200     break;
201   }
202   while (pos < max) {
203     ch = styler.SafeGetCharAt(pos, '\0');
204     switch (ch) {
205       case '\0': return;
206       case '#': {
207         bool doccomment = (styler.SafeGetCharAt(pos+1) == '#');
208         while (pos < max && !isNewLine(styler.SafeGetCharAt(pos, LF))) pos++;
209         if (doccomment)
210           styler.ColourTo(pos, SCE_C_COMMENTLINEDOC);
211         else
212           styler.ColourTo(pos, SCE_P_COMMENTLINE);
213       } break;
214       case 'r': case 'R': {
215         if (styler.SafeGetCharAt(pos+1) == '"') {
216           pos = scanString(styler, pos+2, max, true);
217           styler.ColourTo(pos, SCE_P_STRING);
218           pos++;
219         } else {
220           pos = scanIdent(styler, pos, keywords);
221         }
222       } break;
223       case '"':
224         if (styler.Match(pos+1, "\"\"")) {
225           pos = tillEndOfTripleQuote(styler, pos+3, max);
226           styler.ColourTo(pos, SCE_P_TRIPLEDOUBLE);
227         } else {
228           pos = scanString(styler, pos+1, max, false);
229           styler.ColourTo(pos, SCE_P_STRING);
230         }
231         pos++;
232       break;
233       case '\'':
234         pos = scanChar(styler, pos+1, max);
235         styler.ColourTo(pos, SCE_P_CHARACTER);
236         pos++;
237       break;
238       default: // identifers, numbers, operators, whitespace
239         if (ch >= '0' && ch <= '9') {
240           pos = scanNumber(styler, pos);
241         } else if (IsAWordChar(ch)) {
242           pos = scanIdent(styler, pos, keywords);
243         } else if (ch == '`') {
244           pos++;
245           while (pos < max) {
246             ch = styler.SafeGetCharAt(pos, LF);
247             if (ch == '`') {
248               ++pos;
249               break;
250             }
251             if (ch == CR || ch == LF) break;
252             ++pos;
253           }
254           styler.ColourTo(pos, SCE_P_IDENTIFIER);
255         } else if (strchr("()[]{}:=;-\\/&%$!+<>|^?,.*~@", ch)) {
256           styler.ColourTo(pos, SCE_P_OPERATOR);
257           pos++;
258         } else {
259           styler.ColourTo(pos, SCE_P_DEFAULT);
260           pos++;
261         }
262       break;
263     }
264   }
265 }
266 
IsCommentLine(int line,Accessor & styler)267 static bool IsCommentLine(int line, Accessor &styler) {
268 	int pos = styler.LineStart(line);
269 	int eol_pos = styler.LineStart(line + 1) - 1;
270 	for (int i = pos; i < eol_pos; i++) {
271 		char ch = styler[i];
272 		if (ch == '#')
273 			return true;
274 		else if (ch != ' ' && ch != '\t')
275 			return false;
276 	}
277 	return false;
278 }
279 
IsQuoteLine(int line,Accessor & styler)280 static bool IsQuoteLine(int line, Accessor &styler) {
281 	int style = styler.StyleAt(styler.LineStart(line)) & 31;
282 	return ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
283 }
284 
285 
FoldNimrodDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)286 static void FoldNimrodDoc(unsigned int startPos, int length,
287                           int /*initStyle - unused*/,
288                           WordList *[], Accessor &styler) {
289 	const int maxPos = startPos + length;
290 	const int maxLines = styler.GetLine(maxPos - 1); // Requested last line
291 	const int docLines = styler.GetLine(styler.Length() - 1); // Available last line
292 	const bool foldComment = styler.GetPropertyInt("fold.comment.nimrod") != 0;
293 	const bool foldQuotes = styler.GetPropertyInt("fold.quotes.nimrod") != 0;
294 
295 	// Backtrack to previous non-blank line so we can determine indent level
296 	// for any white space lines (needed esp. within triple quoted strings)
297 	// and so we can fix any preceding fold level (which is why we go back
298 	// at least one line in all cases)
299 	int spaceFlags = 0;
300 	int lineCurrent = styler.GetLine(startPos);
301 	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
302 	while (lineCurrent > 0) {
303 		lineCurrent--;
304 		indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
305 		if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
306 		        (!IsCommentLine(lineCurrent, styler)) &&
307 		        (!IsQuoteLine(lineCurrent, styler)))
308 			break;
309 	}
310 	int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
311 
312 	// Set up initial loop state
313 	startPos = styler.LineStart(lineCurrent);
314 	int prev_state = SCE_P_DEFAULT & 31;
315 	if (lineCurrent >= 1)
316 		prev_state = styler.StyleAt(startPos - 1) & 31;
317 	int prevQuote = foldQuotes && ((prev_state == SCE_P_TRIPLE) ||
318 	                               (prev_state == SCE_P_TRIPLEDOUBLE));
319 	int prevComment = 0;
320 	if (lineCurrent >= 1)
321 		prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
322 
323 	// Process all characters to end of requested range or end of any triple quote
324 	// or comment that hangs over the end of the range.  Cap processing in all cases
325 	// to end of document (in case of unclosed quote or comment at end).
326 	while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) ||
327 	                                      prevQuote || prevComment)) {
328 
329 		// Gather info
330 		int lev = indentCurrent;
331 		int lineNext = lineCurrent + 1;
332 		int indentNext = indentCurrent;
333 		int quote = false;
334 		if (lineNext <= docLines) {
335 			// Information about next line is only available if not at end of document
336 			indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
337 			int style = styler.StyleAt(styler.LineStart(lineNext)) & 31;
338 			quote = foldQuotes && ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
339 		}
340 		const int quote_start = (quote && !prevQuote);
341 		const int quote_continue = (quote && prevQuote);
342 		const int comment = foldComment && IsCommentLine(lineCurrent, styler);
343 		const int comment_start = (comment && !prevComment && (lineNext <= docLines) &&
344 		                           IsCommentLine(lineNext, styler) &&
345 		                           (lev > SC_FOLDLEVELBASE));
346 		const int comment_continue = (comment && prevComment);
347 		if ((!quote || !prevQuote) && !comment)
348 			indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
349 		if (quote)
350 			indentNext = indentCurrentLevel;
351 		if (indentNext & SC_FOLDLEVELWHITEFLAG)
352 			indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
353 
354 		if (quote_start) {
355 			// Place fold point at start of triple quoted string
356 			lev |= SC_FOLDLEVELHEADERFLAG;
357 		} else if (quote_continue || prevQuote) {
358 			// Add level to rest of lines in the string
359 			lev = lev + 1;
360 		} else if (comment_start) {
361 			// Place fold point at start of a block of comments
362 			lev |= SC_FOLDLEVELHEADERFLAG;
363 		} else if (comment_continue) {
364 			// Add level to rest of lines in the block
365 			lev = lev + 1;
366 		}
367 
368 		// Skip past any blank lines for next indent level info; we skip also
369 		// comments (all comments, not just those starting in column 0)
370 		// which effectively folds them into surrounding code rather
371 		// than screwing up folding.
372 
373 		while (!quote &&
374 		        (lineNext < docLines) &&
375 		        ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
376 		         (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
377 
378 			lineNext++;
379 			indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
380 		}
381 
382 		const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
383 		const int levelBeforeComments =
384 		    Maximum(indentCurrentLevel,levelAfterComments);
385 
386 		// Now set all the indent levels on the lines we skipped
387 		// Do this from end to start.  Once we encounter one line
388 		// which is indented more than the line after the end of
389 		// the comment-block, use the level of the block before
390 
391 		int skipLine = lineNext;
392 		int skipLevel = levelAfterComments;
393 
394 		while (--skipLine > lineCurrent) {
395 			int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
396 
397 			if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
398 				skipLevel = levelBeforeComments;
399 
400 			int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
401 
402 			styler.SetLevel(skipLine, skipLevel | whiteFlag);
403 		}
404 
405 		// Set fold header on non-quote/non-comment line
406 		if (!quote && !comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG) ) {
407 			if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) <
408 			     (indentNext & SC_FOLDLEVELNUMBERMASK))
409 				lev |= SC_FOLDLEVELHEADERFLAG;
410 		}
411 
412 		// Keep track of triple quote and block comment state of previous line
413 		prevQuote = quote;
414 		prevComment = comment_start || comment_continue;
415 
416 		// Set fold level for this line and move to next line
417 		styler.SetLevel(lineCurrent, lev);
418 		indentCurrent = indentNext;
419 		lineCurrent = lineNext;
420 	}
421 
422 	// NOTE: Cannot set level of last line here because indentCurrent doesn't have
423 	// header flag set; the loop above is crafted to take care of this case!
424 	//styler.SetLevel(lineCurrent, indentCurrent);
425 }
426 
427 static const char * const nimrodWordListDesc[] = {
428 	"Keywords",
429 	0
430 };
431 
432 LexerModule lmNimrod(SCLEX_NIMROD, ColouriseNimrodDoc, "nimrod", FoldNimrodDoc,
433 				     nimrodWordListDesc);
434