1 /******************************************************************
2  *    LexHaskell.cxx
3  *
4  *    A haskell lexer for the scintilla code control.
5  *    Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6  *    External lexer stuff inspired from the caml external lexer.
7  *
8  *    Written by Tobias Engvall - tumm at dtek dot chalmers dot se
9  *
10  *    Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
11  *
12  *    TODO:
13  *    * Implement a folder :)
14  *    * Nice Character-lexing (stuff inside '\''), LexPython has
15  *      this.
16  *
17  *
18  *****************************************************************/
19 
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdarg.h>
24 #include <assert.h>
25 #include <ctype.h>
26 
27 #include "ILexer.h"
28 #include "Scintilla.h"
29 #include "SciLexer.h"
30 
31 #include "PropSetSimple.h"
32 #include "WordList.h"
33 #include "LexAccessor.h"
34 #include "Accessor.h"
35 #include "StyleContext.h"
36 #include "CharacterSet.h"
37 #include "LexerModule.h"
38 
39 #ifdef SCI_NAMESPACE
40 using namespace Scintilla;
41 #endif
42 
43 #ifdef BUILD_AS_EXTERNAL_LEXER
44 
45 #include "ExternalLexer.h"
46 #include "WindowAccessor.h"
47 
48 #define BUILD_EXTERNAL_LEXER 0
49 
50 #endif
51 
52 #define HA_MODE_DEFAULT     0
53 #define HA_MODE_IMPORT1     1
54 #define HA_MODE_IMPORT2     2
55 #define HA_MODE_IMPORT3     3
56 #define HA_MODE_MODULE      4
57 #define HA_MODE_FFI         5
58 #define HA_MODE_TYPE        6
59 
IsNewline(const int ch)60 static inline bool IsNewline(const int ch) {
61    return (ch == '\n' || ch == '\r');
62 }
63 
IsWhitespace(const int ch)64 static inline bool IsWhitespace(const int ch) {
65    return (  ch == ' '
66           || ch == '\t'
67           || IsNewline(ch) );
68 }
69 
IsAWordStart(const int ch)70 static inline bool IsAWordStart(const int ch) {
71    return (ch < 0x80) && (isalnum(ch) || ch == '_');
72 }
73 
IsAWordChar(const int ch)74 static inline bool IsAWordChar(const int ch) {
75    return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_' || ch == '\'');
76 }
77 
ColorizeHaskellDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)78 static void ColorizeHaskellDoc(unsigned int startPos, int length, int initStyle,
79                                WordList *keywordlists[], Accessor &styler) {
80 
81    WordList &keywords = *keywordlists[0];
82    WordList &ffi      = *keywordlists[1];
83 
84    StyleContext sc(startPos, length, initStyle, styler);
85 
86    int lineCurrent = styler.GetLine(startPos);
87    int state = lineCurrent ? styler.GetLineState(lineCurrent-1)
88                            : HA_MODE_DEFAULT;
89    int mode  = state & 0xF;
90    int xmode = state >> 4;
91 
92    while (sc.More()) {
93       // Check for state end
94 
95          // Operator
96       if (sc.state == SCE_HA_OPERATOR) {
97          if (isascii(sc.ch) && isoperator(static_cast<char>(sc.ch))) {
98             sc.Forward();
99          } else {
100             styler.ColourTo(sc.currentPos - 1, sc.state);
101             sc.ChangeState(SCE_HA_DEFAULT);
102          }
103       }
104          // String
105       else if (sc.state == SCE_HA_STRING) {
106          if (sc.ch == '\"') {
107 			sc.Forward();
108             styler.ColourTo(sc.currentPos-1, sc.state);
109             sc.ChangeState(SCE_HA_DEFAULT);
110          } else if (sc.ch == '\\') {
111             sc.Forward(2);
112          } else if (sc.atLineEnd) {
113 			styler.ColourTo(sc.currentPos-1, sc.state);
114 			sc.ChangeState(SCE_HA_DEFAULT);
115 		 } else {
116 			sc.Forward();
117 		 }
118       }
119          // Char
120       else if (sc.state == SCE_HA_CHARACTER) {
121          if (sc.ch == '\'') {
122 			sc.Forward();
123             styler.ColourTo(sc.currentPos-1, sc.state);
124             sc.ChangeState(SCE_HA_DEFAULT);
125          } else if (sc.ch == '\\') {
126             sc.Forward(2);
127          } else if (sc.atLineEnd) {
128 			styler.ColourTo(sc.currentPos-1, sc.state);
129 			sc.ChangeState(SCE_HA_DEFAULT);
130 		 } else {
131 			sc.Forward();
132 		 }
133       }
134          // Number
135       else if (sc.state == SCE_HA_NUMBER) {
136          if (IsADigit(sc.ch, xmode)) {
137             sc.Forward();
138          } else if ((xmode == 10) &&
139                     (sc.ch == 'e' || sc.ch == 'E') &&
140                     (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
141 			sc.Forward();
142 			if (sc.ch == '+' || sc.ch == '-')
143 				sc.Forward();
144          } else {
145             styler.ColourTo(sc.currentPos - 1, sc.state);
146             sc.ChangeState(SCE_HA_DEFAULT);
147          }
148       }
149          // Identifier
150       else if (sc.state == SCE_HA_IDENTIFIER) {
151          if (IsAWordChar(sc.ch)) {
152             sc.Forward();
153          } else {
154             char s[100];
155             sc.GetCurrent(s, sizeof(s));
156             int style = sc.state;
157             int new_mode = 0;
158             if (keywords.InList(s)) {
159                style = SCE_HA_KEYWORD;
160             } else if (isupper(s[0])) {
161                if (mode >= HA_MODE_IMPORT1 && mode <= HA_MODE_IMPORT3) {
162                   style    = SCE_HA_MODULE;
163                   new_mode = HA_MODE_IMPORT2;
164                } else if (mode == HA_MODE_MODULE)
165                   style = SCE_HA_MODULE;
166                else
167                   style = SCE_HA_CAPITAL;
168             } else if (mode == HA_MODE_IMPORT1 &&
169                        strcmp(s,"qualified") == 0) {
170                 style    = SCE_HA_KEYWORD;
171                 new_mode = HA_MODE_IMPORT1;
172             } else if (mode == HA_MODE_IMPORT2) {
173                 if (strcmp(s,"as") == 0) {
174                    style    = SCE_HA_KEYWORD;
175                    new_mode = HA_MODE_IMPORT3;
176                } else if (strcmp(s,"hiding") == 0) {
177                    style     = SCE_HA_KEYWORD;
178                }
179             } else if (mode == HA_MODE_FFI) {
180 			   if (ffi.InList(s)) {
181                   style = SCE_HA_KEYWORD;
182                   new_mode = HA_MODE_FFI;
183                }
184             }
185             else if (mode == HA_MODE_TYPE) {
186                if (strcmp(s,"family") == 0)
187                   style    = SCE_HA_KEYWORD;
188 			}
189             styler.ColourTo(sc.currentPos - 1, style);
190             if (strcmp(s,"import") == 0 && mode != HA_MODE_FFI)
191                new_mode = HA_MODE_IMPORT1;
192             else if (strcmp(s,"module") == 0)
193                new_mode = HA_MODE_MODULE;
194             else if (strcmp(s,"foreign") == 0)
195                new_mode = HA_MODE_FFI;
196             else if (strcmp(s,"type") == 0)
197                new_mode = HA_MODE_TYPE;
198             sc.ChangeState(SCE_HA_DEFAULT);
199             mode = new_mode;
200          }
201       }
202 
203          // Comments
204             // Oneliner
205       else if (sc.state == SCE_HA_COMMENTLINE) {
206          if (sc.atLineEnd) {
207             styler.ColourTo(sc.currentPos - 1, sc.state);
208             sc.ChangeState(SCE_HA_DEFAULT);
209          } else {
210             sc.Forward();
211          }
212       }
213             // Nested
214       else if (sc.state == SCE_HA_COMMENTBLOCK) {
215          if (sc.Match("{-")) {
216             sc.Forward(2);
217             xmode++;
218          }
219          else if (sc.Match("-}")) {
220             sc.Forward(2);
221             xmode--;
222             if (xmode == 0) {
223                styler.ColourTo(sc.currentPos - 1, sc.state);
224                sc.ChangeState(SCE_HA_DEFAULT);
225             }
226          } else {
227             if (sc.atLineEnd) {
228 				// Remember the line state for future incremental lexing
229 				styler.SetLineState(lineCurrent, (xmode << 4) | mode);
230 				lineCurrent++;
231 			}
232             sc.Forward();
233          }
234       }
235       // New state?
236       if (sc.state == SCE_HA_DEFAULT) {
237          // Digit
238          if (IsADigit(sc.ch) ||
239              (sc.ch == '.' && IsADigit(sc.chNext)) ||
240              (sc.ch == '-' && IsADigit(sc.chNext))) {
241             styler.ColourTo(sc.currentPos - 1, sc.state);
242             sc.ChangeState(SCE_HA_NUMBER);
243             if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
244 				// Match anything starting with "0x" or "0X", too
245 				sc.Forward(2);
246 				xmode = 16;
247             } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
248 				// Match anything starting with "0x" or "0X", too
249 				sc.Forward(2);
250 				xmode = 8;
251             } else {
252 				sc.Forward();
253 				xmode = 10;
254 			}
255             mode = HA_MODE_DEFAULT;
256          }
257          // Comment line
258          else if (sc.Match("--")) {
259             styler.ColourTo(sc.currentPos - 1, sc.state);
260             sc.Forward(2);
261             sc.ChangeState(SCE_HA_COMMENTLINE);
262          // Comment block
263          }
264          else if (sc.Match("{-")) {
265             styler.ColourTo(sc.currentPos - 1, sc.state);
266             sc.Forward(2);
267             sc.ChangeState(SCE_HA_COMMENTBLOCK);
268             xmode = 1;
269          }
270          // String
271          else if (sc.Match('\"')) {
272             styler.ColourTo(sc.currentPos - 1, sc.state);
273             sc.Forward();
274             sc.ChangeState(SCE_HA_STRING);
275          }
276          // Character
277          else if (sc.Match('\'')) {
278             styler.ColourTo(sc.currentPos - 1, sc.state);
279             sc.Forward();
280             sc.ChangeState(SCE_HA_CHARACTER);
281          }
282          else if (sc.ch == '(' || sc.ch == ')' ||
283                   sc.ch == '{' || sc.ch == '}' ||
284                   sc.ch == '[' || sc.ch == ']') {
285 			styler.ColourTo(sc.currentPos - 1, sc.state);
286 			sc.Forward();
287 			styler.ColourTo(sc.currentPos - 1, SCE_HA_OPERATOR);
288 			mode = HA_MODE_DEFAULT;
289 		 }
290          // Operator
291          else if (isascii(sc.ch) && isoperator(static_cast<char>(sc.ch))) {
292             styler.ColourTo(sc.currentPos - 1, sc.state);
293             sc.Forward();
294             sc.ChangeState(SCE_HA_OPERATOR);
295             mode = HA_MODE_DEFAULT;
296          }
297          // Keyword
298          else if (IsAWordStart(sc.ch)) {
299             styler.ColourTo(sc.currentPos - 1, sc.state);
300             sc.Forward();
301             sc.ChangeState(SCE_HA_IDENTIFIER);
302          } else {
303             if (sc.atLineEnd) {
304 				// Remember the line state for future incremental lexing
305 				styler.SetLineState(lineCurrent, (xmode << 4) | mode);
306 				lineCurrent++;
307 			}
308             sc.Forward();
309          }
310       }
311    }
312    sc.Complete();
313 }
314 
315 // External stuff - used for dynamic-loading, not implemented in wxStyledTextCtrl yet.
316 // Inspired by the caml external lexer - Credits to Robert Roessler - http://www.rftp.com
317 #ifdef BUILD_EXTERNAL_LEXER
318 static const char* LexerName = "haskell";
319 
Lex(unsigned int lexer,unsigned int startPos,int length,int initStyle,char * words[],WindowID window,char * props)320 void EXT_LEXER_DECL Lex(unsigned int lexer, unsigned int startPos, int length, int initStyle,
321                         char *words[], WindowID window, char *props)
322 {
323    PropSetSimple ps;
324    ps.SetMultiple(props);
325    WindowAccessor wa(window, ps);
326 
327    int nWL = 0;
328    for (; words[nWL]; nWL++) ;
329    WordList** wl = new WordList* [nWL + 1];
330    int i = 0;
331    for (; i<nWL; i++)
332    {
333       wl[i] = new WordList();
334       wl[i]->Set(words[i]);
335    }
336    wl[i] = 0;
337 
338    ColorizeHaskellDoc(startPos, length, initStyle, wl, wa);
339    wa.Flush();
340    for (i=nWL-1;i>=0;i--)
341       delete wl[i];
342    delete [] wl;
343 }
344 
Fold(unsigned int lexer,unsigned int startPos,int length,int initStyle,char * words[],WindowID window,char * props)345 void EXT_LEXER_DECL Fold (unsigned int lexer, unsigned int startPos, int length, int initStyle,
346                         char *words[], WindowID window, char *props)
347 {
348 
349 }
350 
GetLexerCount()351 int EXT_LEXER_DECL GetLexerCount()
352 {
353    return 1;
354 }
355 
GetLexerName(unsigned int Index,char * name,int buflength)356 void EXT_LEXER_DECL GetLexerName(unsigned int Index, char *name, int buflength)
357 {
358    if (buflength > 0) {
359       buflength--;
360       int n = strlen(LexerName);
361       if (n > buflength)
362          n = buflength;
363       memcpy(name, LexerName, n), name[n] = '\0';
364    }
365 }
366 #endif
367 
368 LexerModule lmHaskell(SCLEX_HASKELL, ColorizeHaskellDoc, "haskell");
369