1 /******************************************************************
2  *    LexHaskell.cxx
3  *
4  *    A haskell lexer for the scintilla code control.
5  *    Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6  *    External lexer stuff inspired from the caml external lexer.
7  *    Folder copied from Python's.
8  *
9  *    Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10  *
11  *    Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12  *
13  *    Improved by kudah <kudahkukarek@gmail.com>
14  *
15  *    TODO:
16  *    * A proper lexical folder to fold group declarations, comments, pragmas,
17  *      #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18  *      etc.
19  *
20  *****************************************************************/
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include <ctype.h>
27 
28 #include <string>
29 #include <map>
30 
31 #include "ILexer.h"
32 #include "Scintilla.h"
33 #include "SciLexer.h"
34 
35 #include "PropSetSimple.h"
36 #include "WordList.h"
37 #include "LexAccessor.h"
38 #include "Accessor.h"
39 #include "StyleContext.h"
40 #include "CharacterSet.h"
41 #include "CharacterCategory.h"
42 #include "LexerModule.h"
43 #include "OptionSet.h"
44 
45 #ifdef SCI_NAMESPACE
46 using namespace Scintilla;
47 #endif
48 
49 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
50 // Note, letter modifiers are prohibited.
51 
u_iswupper(int ch)52 static int u_iswupper (int ch) {
53    CharacterCategory c = CategoriseCharacter(ch);
54    return c == ccLu || c == ccLt;
55 }
56 
u_iswalpha(int ch)57 static int u_iswalpha (int ch) {
58    CharacterCategory c = CategoriseCharacter(ch);
59    return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
60 }
61 
u_iswalnum(int ch)62 static int u_iswalnum (int ch) {
63    CharacterCategory c = CategoriseCharacter(ch);
64    return c == ccLl || c == ccLu || c == ccLt || c == ccLo
65        || c == ccNd || c == ccNo;
66 }
67 
u_IsHaskellSymbol(int ch)68 static int u_IsHaskellSymbol(int ch) {
69    CharacterCategory c = CategoriseCharacter(ch);
70    return c == ccPc || c == ccPd || c == ccPo
71        || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
72 }
73 
IsHaskellLetter(const int ch)74 static inline bool IsHaskellLetter(const int ch) {
75    if (IsASCII(ch)) {
76       return (ch >= 'a' && ch <= 'z')
77           || (ch >= 'A' && ch <= 'Z');
78    } else {
79       return u_iswalpha(ch) != 0;
80    }
81 }
82 
IsHaskellAlphaNumeric(const int ch)83 static inline bool IsHaskellAlphaNumeric(const int ch) {
84    if (IsASCII(ch)) {
85       return IsAlphaNumeric(ch);
86    } else {
87       return u_iswalnum(ch) != 0;
88    }
89 }
90 
IsHaskellUpperCase(const int ch)91 static inline bool IsHaskellUpperCase(const int ch) {
92    if (IsASCII(ch)) {
93       return ch >= 'A' && ch <= 'Z';
94    } else {
95       return u_iswupper(ch) != 0;
96    }
97 }
98 
IsAnHaskellOperatorChar(const int ch)99 static inline bool IsAnHaskellOperatorChar(const int ch) {
100    if (IsASCII(ch)) {
101       return
102          (  ch == '!' || ch == '#' || ch == '$' || ch == '%'
103          || ch == '&' || ch == '*' || ch == '+' || ch == '-'
104          || ch == '.' || ch == '/' || ch == ':' || ch == '<'
105          || ch == '=' || ch == '>' || ch == '?' || ch == '@'
106          || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
107    } else {
108       return u_IsHaskellSymbol(ch) != 0;
109    }
110 }
111 
IsAHaskellWordStart(const int ch)112 static inline bool IsAHaskellWordStart(const int ch) {
113    return IsHaskellLetter(ch) || ch == '_';
114 }
115 
IsAHaskellWordChar(const int ch)116 static inline bool IsAHaskellWordChar(const int ch) {
117    return (  IsHaskellAlphaNumeric(ch)
118           || ch == '_'
119           || ch == '\'');
120 }
121 
IsCommentBlockStyle(int style)122 static inline bool IsCommentBlockStyle(int style) {
123    return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
124 }
125 
IsCommentStyle(int style)126 static inline bool IsCommentStyle(int style) {
127    return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
128        || ( style == SCE_HA_LITERATE_COMMENT
129          || style == SCE_HA_LITERATE_CODEDELIM);
130 }
131 
132 // styles which do not belong to Haskell, but to external tools
IsExternalStyle(int style)133 static inline bool IsExternalStyle(int style) {
134    return ( style == SCE_HA_PREPROCESSOR
135          || style == SCE_HA_LITERATE_COMMENT
136          || style == SCE_HA_LITERATE_CODEDELIM);
137 }
138 
CommentBlockStyleFromNestLevel(const unsigned int nestLevel)139 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
140    return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
141 }
142 
143 // Mangled version of lexlib/Accessor.cxx IndentAmount.
144 // Modified to treat comment blocks as whitespace
145 // plus special case for commentline/preprocessor.
HaskellIndentAmount(Accessor & styler,const Sci_Position line)146 static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
147 
148    // Determines the indentation level of the current line
149    // Comment blocks are treated as whitespace
150 
151    Sci_Position pos = styler.LineStart(line);
152    Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
153 
154    char ch = styler[pos];
155    int style = styler.StyleAt(pos);
156 
157    int indent = 0;
158    bool inPrevPrefix = line > 0;
159 
160    Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
161 
162    while ((  ch == ' ' || ch == '\t'
163           || IsCommentBlockStyle(style)
164           || style == SCE_HA_LITERATE_CODEDELIM)
165          && (pos < eol_pos)) {
166       if (inPrevPrefix) {
167          char chPrev = styler[posPrev++];
168          if (chPrev != ' ' && chPrev != '\t') {
169             inPrevPrefix = false;
170          }
171       }
172       if (ch == '\t') {
173          indent = (indent / 8 + 1) * 8;
174       } else { // Space or comment block
175          indent++;
176       }
177       pos++;
178       ch = styler[pos];
179       style = styler.StyleAt(pos);
180    }
181 
182    indent += SC_FOLDLEVELBASE;
183    // if completely empty line or the start of a comment or preprocessor...
184    if (  styler.LineStart(line) == styler.Length()
185       || ch == ' '
186       || ch == '\t'
187       || ch == '\n'
188       || ch == '\r'
189       || IsCommentStyle(style)
190       || style == SCE_HA_PREPROCESSOR)
191       return indent | SC_FOLDLEVELWHITEFLAG;
192    else
193       return indent;
194 }
195 
196 struct OptionsHaskell {
197    bool magicHash;
198    bool allowQuotes;
199    bool implicitParams;
200    bool highlightSafe;
201    bool cpp;
202    bool stylingWithinPreprocessor;
203    bool fold;
204    bool foldComment;
205    bool foldCompact;
206    bool foldImports;
OptionsHaskellOptionsHaskell207    OptionsHaskell() {
208       magicHash = true;       // Widespread use, enabled by default.
209       allowQuotes = true;     // Widespread use, enabled by default.
210       implicitParams = false; // Fell out of favor, seldom used, disabled.
211       highlightSafe = true;   // Moderately used, doesn't hurt to enable.
212       cpp = true;             // Widespread use, enabled by default;
213       stylingWithinPreprocessor = false;
214       fold = false;
215       foldComment = false;
216       foldCompact = false;
217       foldImports = false;
218    }
219 };
220 
221 static const char * const haskellWordListDesc[] = {
222    "Keywords",
223    "FFI",
224    "Reserved operators",
225    0
226 };
227 
228 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskellOptionSetHaskell229    OptionSetHaskell() {
230       DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
231          "Set to 0 to disallow the '#' character at the end of identifiers and "
232          "literals with the haskell lexer "
233          "(GHC -XMagicHash extension)");
234 
235       DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
236          "Set to 0 to disable highlighting of Template Haskell name quotations "
237          "and promoted constructors "
238          "(GHC -XTemplateHaskell and -XDataKinds extensions)");
239 
240       DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
241          "Set to 1 to allow the '?' character at the start of identifiers "
242          "with the haskell lexer "
243          "(GHC & Hugs -XImplicitParams extension)");
244 
245       DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
246          "Set to 0 to disallow \"safe\" keyword in imports "
247          "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
248 
249       DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
250          "Set to 0 to disable C-preprocessor highlighting "
251          "(-XCPP extension)");
252 
253       DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
254          "For Haskell code, determines whether all preprocessor code is styled in the "
255          "preprocessor style (0, the default) or only from the initial # to the end "
256          "of the command word(1)."
257          );
258 
259       DefineProperty("fold", &OptionsHaskell::fold);
260 
261       DefineProperty("fold.comment", &OptionsHaskell::foldComment);
262 
263       DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
264 
265       DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
266          "Set to 1 to enable folding of import declarations");
267 
268       DefineWordListSets(haskellWordListDesc);
269    }
270 };
271 
272 class LexerHaskell : public ILexer {
273    bool literate;
274    Sci_Position firstImportLine;
275    int firstImportIndent;
276    WordList keywords;
277    WordList ffi;
278    WordList reserved_operators;
279    OptionsHaskell options;
280    OptionSetHaskell osHaskell;
281 
282    enum HashCount {
283        oneHash
284       ,twoHashes
285       ,unlimitedHashes
286    };
287 
288    enum KeywordMode {
289        HA_MODE_DEFAULT = 0
290       ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
291       ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
292       ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
293       ,HA_MODE_MODULE  = 4 // after "module", before module name.
294       ,HA_MODE_FFI     = 5 // after "foreign", before FFI keywords
295       ,HA_MODE_TYPE    = 6 // after "type" or "data", before "family"
296    };
297 
298    enum LiterateMode {
299        LITERATE_BIRD  = 0 // if '>' is the first character on the line,
300                           //   color '>' as a codedelim and the rest of
301                           //   the line as code.
302                           // else if "\begin{code}" is the only word on the
303                           //    line except whitespace, switch to LITERATE_BLOCK
304                           // otherwise color the line as a literate comment.
305       ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
306                           //   0 ignoring all later characters, color the line
307                           //   as a codedelim and switch to LITERATE_BIRD
308                           // otherwise color the line as code.
309    };
310 
311    struct HaskellLineInfo {
312       unsigned int nestLevel; // 22 bits ought to be enough for anybody
313       unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
314                                      // beyond 31.
315       bool pragma;
316       LiterateMode lmode;
317       KeywordMode mode;
318 
HaskellLineInfoLexerHaskell::HaskellLineInfo319       HaskellLineInfo(int state) :
320          nestLevel (state >> 10)
321        , nonexternalStyle ((state >> 5) & 0x1F)
322        , pragma ((state >> 4) & 0x1)
323        , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
324        , mode (static_cast<KeywordMode>(state & 0x7))
325          {}
326 
ToLineStateLexerHaskell::HaskellLineInfo327       int ToLineState() {
328          return
329               (nestLevel << 10)
330             | (nonexternalStyle << 5)
331             | (pragma << 4)
332             | (lmode << 3)
333             | mode;
334       }
335    };
336 
skipMagicHash(StyleContext & sc,const HashCount hashes) const337    inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
338       if (options.magicHash && sc.ch == '#') {
339          sc.Forward();
340          if (hashes == twoHashes && sc.ch == '#') {
341             sc.Forward();
342          } else if (hashes == unlimitedHashes) {
343             while (sc.ch == '#') {
344                sc.Forward();
345             }
346          }
347       }
348    }
349 
LineContainsImport(const Sci_Position line,Accessor & styler) const350    bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
351       if (options.foldImports) {
352          Sci_Position currentPos = styler.LineStart(line);
353          int style = styler.StyleAt(currentPos);
354 
355          Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
356 
357          while (currentPos < eol_pos) {
358             int ch = styler[currentPos];
359             style = styler.StyleAt(currentPos);
360 
361             if (ch == ' ' || ch == '\t'
362              || IsCommentBlockStyle(style)
363              || style == SCE_HA_LITERATE_CODEDELIM) {
364                currentPos++;
365             } else {
366                break;
367             }
368          }
369 
370          return (style == SCE_HA_KEYWORD
371               && styler.Match(currentPos, "import"));
372       } else {
373          return false;
374       }
375    }
376 
IndentAmountWithOffset(Accessor & styler,const Sci_Position line) const377    inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
378       const int indent = HaskellIndentAmount(styler, line);
379       const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
380       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
381                ? indent
382                : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
383    }
384 
IndentLevelRemoveIndentOffset(const int indentLevel) const385    inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
386       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
387             ? indentLevel
388             : indentLevel - firstImportIndent;
389    }
390 
391 public:
LexerHaskell(bool literate_)392    LexerHaskell(bool literate_)
393       : literate(literate_)
394       , firstImportLine(-1)
395       , firstImportIndent(0)
396       {}
~LexerHaskell()397    virtual ~LexerHaskell() {}
398 
Release()399    void SCI_METHOD Release() {
400       delete this;
401    }
402 
Version() const403    int SCI_METHOD Version() const {
404       return lvOriginal;
405    }
406 
PropertyNames()407    const char * SCI_METHOD PropertyNames() {
408       return osHaskell.PropertyNames();
409    }
410 
PropertyType(const char * name)411    int SCI_METHOD PropertyType(const char *name) {
412       return osHaskell.PropertyType(name);
413    }
414 
DescribeProperty(const char * name)415    const char * SCI_METHOD DescribeProperty(const char *name) {
416       return osHaskell.DescribeProperty(name);
417    }
418 
419    Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
420 
DescribeWordListSets()421    const char * SCI_METHOD DescribeWordListSets() {
422       return osHaskell.DescribeWordListSets();
423    }
424 
425    Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
426 
427    void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
428 
429    void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
430 
PrivateCall(int,void *)431    void * SCI_METHOD PrivateCall(int, void *) {
432       return 0;
433    }
434 
LexerFactoryHaskell()435    static ILexer *LexerFactoryHaskell() {
436       return new LexerHaskell(false);
437    }
438 
LexerFactoryLiterateHaskell()439    static ILexer *LexerFactoryLiterateHaskell() {
440       return new LexerHaskell(true);
441    }
442 };
443 
PropertySet(const char * key,const char * val)444 Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
445    if (osHaskell.PropertySet(&options, key, val)) {
446       return 0;
447    }
448    return -1;
449 }
450 
WordListSet(int n,const char * wl)451 Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
452    WordList *wordListN = 0;
453    switch (n) {
454    case 0:
455       wordListN = &keywords;
456       break;
457    case 1:
458       wordListN = &ffi;
459       break;
460    case 2:
461       wordListN = &reserved_operators;
462       break;
463    }
464    Sci_Position firstModification = -1;
465    if (wordListN) {
466       WordList wlNew;
467       wlNew.Set(wl);
468       if (*wordListN != wlNew) {
469          wordListN->Set(wl);
470          firstModification = 0;
471       }
472    }
473    return firstModification;
474 }
475 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)476 void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
477                                  ,IDocument *pAccess) {
478    LexAccessor styler(pAccess);
479 
480    Sci_Position lineCurrent = styler.GetLine(startPos);
481 
482    HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
483 
484    // Do not leak onto next line
485    if (initStyle == SCE_HA_STRINGEOL)
486       initStyle = SCE_HA_DEFAULT;
487    else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
488       initStyle = hs.nonexternalStyle;
489 
490    StyleContext sc(startPos, length, initStyle, styler);
491 
492    int base = 10;
493    bool dot = false;
494 
495    bool inDashes = false;
496    bool alreadyInTheMiddleOfOperator = false;
497 
498    assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
499 
500    while (sc.More()) {
501       // Check for state end
502 
503       if (!IsExternalStyle(sc.state)) {
504          hs.nonexternalStyle = sc.state;
505       }
506 
507       // For lexer to work, states should unconditionally forward at least one
508       // character.
509       // If they don't, they should still check if they are at line end and
510       // forward if so.
511       // If a state forwards more than one character, it should check every time
512       // that it is not a line end and cease forwarding otherwise.
513       if (sc.atLineEnd) {
514          // Remember the line state for future incremental lexing
515          styler.SetLineState(lineCurrent, hs.ToLineState());
516          lineCurrent++;
517       }
518 
519       // Handle line continuation generically.
520       if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
521          && (  sc.state == SCE_HA_STRING
522             || sc.state == SCE_HA_PREPROCESSOR)) {
523          // Remember the line state for future incremental lexing
524          styler.SetLineState(lineCurrent, hs.ToLineState());
525          lineCurrent++;
526 
527          sc.Forward();
528          if (sc.ch == '\r' && sc.chNext == '\n') {
529             sc.Forward();
530          }
531          sc.Forward();
532 
533          continue;
534       }
535 
536       if (sc.atLineStart) {
537 
538          if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
539             // Prevent SCE_HA_STRINGEOL from leaking back to previous line
540             sc.SetState(sc.state);
541          }
542 
543          if (literate && hs.lmode == LITERATE_BIRD) {
544             if (!IsExternalStyle(sc.state)) {
545                sc.SetState(SCE_HA_LITERATE_COMMENT);
546             }
547          }
548       }
549 
550       // External
551          // Literate
552       if (  literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
553          && sc.ch == '>') {
554             sc.SetState(SCE_HA_LITERATE_CODEDELIM);
555             sc.ForwardSetState(hs.nonexternalStyle);
556       }
557       else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
558             && (  sc.ch == ' ' || sc.ch == '\t'
559                || sc.Match("\\begin{code}"))) {
560          sc.SetState(sc.state);
561 
562          while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
563             sc.Forward();
564 
565          if (sc.Match("\\begin{code}")) {
566             sc.Forward(static_cast<int>(strlen("\\begin{code}")));
567 
568             bool correct = true;
569 
570             while (!sc.atLineEnd && sc.More()) {
571                if (sc.ch != ' ' && sc.ch != '\t') {
572                   correct = false;
573                }
574                sc.Forward();
575             }
576 
577             if (correct) {
578                sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
579                hs.lmode = LITERATE_BLOCK;
580             }
581          }
582       }
583       else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
584             && sc.Match("\\end{code}")) {
585          sc.SetState(SCE_HA_LITERATE_CODEDELIM);
586 
587          sc.Forward(static_cast<int>(strlen("\\end{code}")));
588 
589          while (!sc.atLineEnd && sc.More()) {
590             sc.Forward();
591          }
592 
593          sc.SetState(SCE_HA_LITERATE_COMMENT);
594          hs.lmode = LITERATE_BIRD;
595       }
596          // Preprocessor
597       else if (sc.atLineStart && sc.ch == '#' && options.cpp
598             && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
599          sc.SetState(SCE_HA_PREPROCESSOR);
600          sc.Forward();
601       }
602             // Literate
603       else if (sc.state == SCE_HA_LITERATE_COMMENT) {
604          sc.Forward();
605       }
606       else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
607          sc.ForwardSetState(hs.nonexternalStyle);
608       }
609             // Preprocessor
610       else if (sc.state == SCE_HA_PREPROCESSOR) {
611          if (sc.atLineEnd) {
612             sc.SetState(options.stylingWithinPreprocessor
613                         ? SCE_HA_DEFAULT
614                         : hs.nonexternalStyle);
615             sc.Forward(); // prevent double counting a line
616          } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
617             sc.SetState(SCE_HA_DEFAULT);
618          } else {
619             sc.Forward();
620          }
621       }
622       // Haskell
623          // Operator
624       else if (sc.state == SCE_HA_OPERATOR) {
625          int style = SCE_HA_OPERATOR;
626 
627          if ( sc.ch == ':'
628             && !alreadyInTheMiddleOfOperator
629             // except "::"
630             && !( sc.chNext == ':'
631                && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
632             style = SCE_HA_CAPITAL;
633          }
634 
635          alreadyInTheMiddleOfOperator = false;
636 
637          while (IsAnHaskellOperatorChar(sc.ch))
638                sc.Forward();
639 
640          char s[100];
641          sc.GetCurrent(s, sizeof(s));
642 
643          if (reserved_operators.InList(s))
644             style = SCE_HA_RESERVED_OPERATOR;
645 
646          sc.ChangeState(style);
647          sc.SetState(SCE_HA_DEFAULT);
648       }
649          // String
650       else if (sc.state == SCE_HA_STRING) {
651          if (sc.atLineEnd) {
652             sc.ChangeState(SCE_HA_STRINGEOL);
653             sc.ForwardSetState(SCE_HA_DEFAULT);
654          } else if (sc.ch == '\"') {
655             sc.Forward();
656             skipMagicHash(sc, oneHash);
657             sc.SetState(SCE_HA_DEFAULT);
658          } else if (sc.ch == '\\') {
659             sc.Forward(2);
660          } else {
661             sc.Forward();
662          }
663       }
664          // Char
665       else if (sc.state == SCE_HA_CHARACTER) {
666          if (sc.atLineEnd) {
667             sc.ChangeState(SCE_HA_STRINGEOL);
668             sc.ForwardSetState(SCE_HA_DEFAULT);
669          } else if (sc.ch == '\'') {
670             sc.Forward();
671             skipMagicHash(sc, oneHash);
672             sc.SetState(SCE_HA_DEFAULT);
673          } else if (sc.ch == '\\') {
674             sc.Forward(2);
675          } else {
676             sc.Forward();
677          }
678       }
679          // Number
680       else if (sc.state == SCE_HA_NUMBER) {
681          if (sc.atLineEnd) {
682             sc.SetState(SCE_HA_DEFAULT);
683             sc.Forward(); // prevent double counting a line
684          } else if (IsADigit(sc.ch, base)) {
685             sc.Forward();
686          } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
687             sc.Forward(2);
688             dot = false;
689          } else if ((base == 10) &&
690                     (sc.ch == 'e' || sc.ch == 'E') &&
691                     (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
692             sc.Forward();
693             if (sc.ch == '+' || sc.ch == '-')
694                 sc.Forward();
695          } else {
696             skipMagicHash(sc, twoHashes);
697             sc.SetState(SCE_HA_DEFAULT);
698          }
699       }
700          // Keyword or Identifier
701       else if (sc.state == SCE_HA_IDENTIFIER) {
702          int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
703 
704          assert(IsAHaskellWordStart(sc.ch));
705 
706          sc.Forward();
707 
708          while (sc.More()) {
709             if (IsAHaskellWordChar(sc.ch)) {
710                sc.Forward();
711             } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
712                if (IsHaskellUpperCase(sc.chNext)) {
713                   sc.Forward();
714                   style = SCE_HA_CAPITAL;
715                } else if (IsAHaskellWordStart(sc.chNext)) {
716                   sc.Forward();
717                   style = SCE_HA_IDENTIFIER;
718                } else if (IsAnHaskellOperatorChar(sc.chNext)) {
719                   sc.Forward();
720                   style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
721                   while (IsAnHaskellOperatorChar(sc.ch))
722                      sc.Forward();
723                   break;
724                } else {
725                   break;
726                }
727             } else {
728                break;
729             }
730          }
731 
732          skipMagicHash(sc, unlimitedHashes);
733 
734          char s[100];
735          sc.GetCurrent(s, sizeof(s));
736 
737          KeywordMode new_mode = HA_MODE_DEFAULT;
738 
739          if (keywords.InList(s)) {
740             style = SCE_HA_KEYWORD;
741          } else if (style == SCE_HA_CAPITAL) {
742             if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
743                style    = SCE_HA_MODULE;
744                new_mode = HA_MODE_IMPORT2;
745             } else if (hs.mode == HA_MODE_MODULE) {
746                style = SCE_HA_MODULE;
747             }
748          } else if (hs.mode == HA_MODE_IMPORT1 &&
749                     strcmp(s,"qualified") == 0) {
750              style    = SCE_HA_KEYWORD;
751              new_mode = HA_MODE_IMPORT1;
752          } else if (options.highlightSafe &&
753                     hs.mode == HA_MODE_IMPORT1 &&
754                     strcmp(s,"safe") == 0) {
755              style    = SCE_HA_KEYWORD;
756              new_mode = HA_MODE_IMPORT1;
757          } else if (hs.mode == HA_MODE_IMPORT2) {
758              if (strcmp(s,"as") == 0) {
759                 style    = SCE_HA_KEYWORD;
760                 new_mode = HA_MODE_IMPORT3;
761             } else if (strcmp(s,"hiding") == 0) {
762                 style     = SCE_HA_KEYWORD;
763             }
764          } else if (hs.mode == HA_MODE_TYPE) {
765             if (strcmp(s,"family") == 0)
766                style    = SCE_HA_KEYWORD;
767          }
768 
769          if (hs.mode == HA_MODE_FFI) {
770             if (ffi.InList(s)) {
771                style = SCE_HA_KEYWORD;
772                new_mode = HA_MODE_FFI;
773             }
774          }
775 
776          sc.ChangeState(style);
777          sc.SetState(SCE_HA_DEFAULT);
778 
779          if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
780             new_mode = HA_MODE_IMPORT1;
781          else if (strcmp(s,"module") == 0)
782             new_mode = HA_MODE_MODULE;
783          else if (strcmp(s,"foreign") == 0)
784             new_mode = HA_MODE_FFI;
785          else if (strcmp(s,"type") == 0
786                || strcmp(s,"data") == 0)
787             new_mode = HA_MODE_TYPE;
788 
789          hs.mode = new_mode;
790       }
791 
792          // Comments
793             // Oneliner
794       else if (sc.state == SCE_HA_COMMENTLINE) {
795          if (sc.atLineEnd) {
796             sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
797             sc.Forward(); // prevent double counting a line
798          } else if (inDashes && sc.ch != '-' && !hs.pragma) {
799             inDashes = false;
800             if (IsAnHaskellOperatorChar(sc.ch)) {
801                alreadyInTheMiddleOfOperator = true;
802                sc.ChangeState(SCE_HA_OPERATOR);
803             }
804          } else {
805             sc.Forward();
806          }
807       }
808             // Nested
809       else if (IsCommentBlockStyle(sc.state)) {
810          if (sc.Match('{','-')) {
811             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
812             sc.Forward(2);
813             hs.nestLevel++;
814          } else if (sc.Match('-','}')) {
815             sc.Forward(2);
816             assert(hs.nestLevel > 0);
817             if (hs.nestLevel > 0)
818                hs.nestLevel--;
819             sc.SetState(
820                hs.nestLevel == 0
821                   ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
822                   : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
823          } else {
824             sc.Forward();
825          }
826       }
827             // Pragma
828       else if (sc.state == SCE_HA_PRAGMA) {
829          if (sc.Match("#-}")) {
830             hs.pragma = false;
831             sc.Forward(3);
832             sc.SetState(SCE_HA_DEFAULT);
833          } else if (sc.Match('-','-')) {
834             sc.SetState(SCE_HA_COMMENTLINE);
835             sc.Forward(2);
836             inDashes = false;
837          } else if (sc.Match('{','-')) {
838             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
839             sc.Forward(2);
840             hs.nestLevel = 1;
841          } else {
842             sc.Forward();
843          }
844       }
845             // New state?
846       else if (sc.state == SCE_HA_DEFAULT) {
847          // Digit
848          if (IsADigit(sc.ch)) {
849             hs.mode = HA_MODE_DEFAULT;
850 
851             sc.SetState(SCE_HA_NUMBER);
852             if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
853                // Match anything starting with "0x" or "0X", too
854                sc.Forward(2);
855                base = 16;
856                dot = false;
857             } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
858                // Match anything starting with "0o" or "0O", too
859                sc.Forward(2);
860                base = 8;
861                dot = false;
862             } else {
863                sc.Forward();
864                base = 10;
865                dot = true;
866             }
867          }
868          // Pragma
869          else if (sc.Match("{-#")) {
870             hs.pragma = true;
871             sc.SetState(SCE_HA_PRAGMA);
872             sc.Forward(3);
873          }
874          // Comment line
875          else if (sc.Match('-','-')) {
876             sc.SetState(SCE_HA_COMMENTLINE);
877             sc.Forward(2);
878             inDashes = true;
879          }
880          // Comment block
881          else if (sc.Match('{','-')) {
882             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
883             sc.Forward(2);
884             hs.nestLevel = 1;
885          }
886          // String
887          else if (sc.ch == '\"') {
888             sc.SetState(SCE_HA_STRING);
889             sc.Forward();
890          }
891          // Character or quoted name or promoted term
892          else if (sc.ch == '\'') {
893             hs.mode = HA_MODE_DEFAULT;
894 
895             sc.SetState(SCE_HA_CHARACTER);
896             sc.Forward();
897 
898             if (options.allowQuotes) {
899                // Quoted type ''T
900                if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
901                   sc.Forward();
902                   sc.ChangeState(SCE_HA_IDENTIFIER);
903                } else if (sc.chNext != '\'') {
904                   // Quoted name 'n or promoted constructor 'N
905                   if (IsAHaskellWordStart(sc.ch)) {
906                      sc.ChangeState(SCE_HA_IDENTIFIER);
907                   // Promoted constructor operator ':~>
908                   } else if (sc.ch == ':') {
909                      alreadyInTheMiddleOfOperator = false;
910                      sc.ChangeState(SCE_HA_OPERATOR);
911                   // Promoted list or tuple '[T]
912                   } else if (sc.ch == '[' || sc.ch== '(') {
913                      sc.ChangeState(SCE_HA_OPERATOR);
914                      sc.ForwardSetState(SCE_HA_DEFAULT);
915                   }
916                }
917             }
918          }
919          // Operator starting with '?' or an implicit parameter
920          else if (sc.ch == '?') {
921             hs.mode = HA_MODE_DEFAULT;
922 
923             alreadyInTheMiddleOfOperator = false;
924             sc.SetState(SCE_HA_OPERATOR);
925 
926             if (  options.implicitParams
927                && IsAHaskellWordStart(sc.chNext)
928                && !IsHaskellUpperCase(sc.chNext)) {
929                sc.Forward();
930                sc.ChangeState(SCE_HA_IDENTIFIER);
931             }
932          }
933          // Operator
934          else if (IsAnHaskellOperatorChar(sc.ch)) {
935             hs.mode = HA_MODE_DEFAULT;
936 
937             sc.SetState(SCE_HA_OPERATOR);
938          }
939          // Braces and punctuation
940          else if (sc.ch == ',' || sc.ch == ';'
941                || sc.ch == '(' || sc.ch == ')'
942                || sc.ch == '[' || sc.ch == ']'
943                || sc.ch == '{' || sc.ch == '}') {
944             sc.SetState(SCE_HA_OPERATOR);
945             sc.ForwardSetState(SCE_HA_DEFAULT);
946          }
947          // Keyword or Identifier
948          else if (IsAHaskellWordStart(sc.ch)) {
949             sc.SetState(SCE_HA_IDENTIFIER);
950          // Something we don't care about
951          } else {
952             sc.Forward();
953          }
954       }
955             // This branch should never be reached.
956       else {
957          assert(false);
958          sc.Forward();
959       }
960    }
961    sc.Complete();
962 }
963 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)964 void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
965                                   ,IDocument *pAccess) {
966    if (!options.fold)
967       return;
968 
969    Accessor styler(pAccess, NULL);
970 
971    Sci_Position lineCurrent = styler.GetLine(startPos);
972 
973    if (lineCurrent <= firstImportLine) {
974       firstImportLine = -1; // readjust first import position
975       firstImportIndent = 0;
976    }
977 
978    const Sci_Position maxPos = startPos + length;
979    const Sci_Position maxLines =
980       maxPos == styler.Length()
981          ? styler.GetLine(maxPos)
982          : styler.GetLine(maxPos - 1);  // Requested last line
983    const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
984 
985    // Backtrack to previous non-blank line so we can determine indent level
986    // for any white space lines
987    // and so we can fix any preceding fold level (which is why we go back
988    // at least one line in all cases)
989    bool importHere = LineContainsImport(lineCurrent, styler);
990    int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
991 
992    while (lineCurrent > 0) {
993       lineCurrent--;
994       importHere = LineContainsImport(lineCurrent, styler);
995       indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
996       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
997          break;
998    }
999 
1000    int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1001 
1002    if (importHere) {
1003       indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1004       if (firstImportLine == -1) {
1005          firstImportLine = lineCurrent;
1006          firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1007       }
1008       if (firstImportLine != lineCurrent) {
1009          indentCurrentLevel++;
1010       }
1011    }
1012 
1013    indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1014 
1015    // Process all characters to end of requested range
1016    //that hangs over the end of the range.  Cap processing in all cases
1017    // to end of document.
1018    while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1019 
1020       // Gather info
1021       Sci_Position lineNext = lineCurrent + 1;
1022       importHere = false;
1023       int indentNext = indentCurrent;
1024 
1025       if (lineNext <= docLines) {
1026          // Information about next line is only available if not at end of document
1027          importHere = LineContainsImport(lineNext, styler);
1028          indentNext = IndentAmountWithOffset(styler, lineNext);
1029       }
1030       if (indentNext & SC_FOLDLEVELWHITEFLAG)
1031          indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1032 
1033       // Skip past any blank lines for next indent level info; we skip also
1034       // comments (all comments, not just those starting in column 0)
1035       // which effectively folds them into surrounding code rather
1036       // than screwing up folding.
1037 
1038       while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1039          lineNext++;
1040          importHere = LineContainsImport(lineNext, styler);
1041          indentNext = IndentAmountWithOffset(styler, lineNext);
1042       }
1043 
1044       int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1045 
1046       if (importHere) {
1047          indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1048          if (firstImportLine == -1) {
1049             firstImportLine = lineNext;
1050             firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1051          }
1052          if (firstImportLine != lineNext) {
1053             indentNextLevel++;
1054          }
1055       }
1056 
1057       indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1058 
1059       const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1060 
1061       // Now set all the indent levels on the lines we skipped
1062       // Do this from end to start.  Once we encounter one line
1063       // which is indented more than the line after the end of
1064       // the comment-block, use the level of the block before
1065 
1066       Sci_Position skipLine = lineNext;
1067       int skipLevel = indentNextLevel;
1068 
1069       while (--skipLine > lineCurrent) {
1070          int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1071 
1072          if (options.foldCompact) {
1073             if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1074                skipLevel = levelBeforeComments;
1075             }
1076 
1077             int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1078 
1079             styler.SetLevel(skipLine, skipLevel | whiteFlag);
1080          } else {
1081             if (  (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1082                && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1083                skipLevel = levelBeforeComments;
1084             }
1085 
1086             styler.SetLevel(skipLine, skipLevel);
1087          }
1088       }
1089 
1090       int lev = indentCurrent;
1091 
1092       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1093          if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1094             lev |= SC_FOLDLEVELHEADERFLAG;
1095       }
1096 
1097       // Set fold level for this line and move to next line
1098       styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1099 
1100       indentCurrent = indentNext;
1101       indentCurrentLevel = indentNextLevel;
1102       lineCurrent = lineNext;
1103    }
1104 
1105    // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1106    // header flag set; the loop above is crafted to take care of this case!
1107    //styler.SetLevel(lineCurrent, indentCurrent);
1108 }
1109 
1110 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1111 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1112