1 /******************************************************************
2  *    LexHaskell.cxx
3  *
4  *    A haskell lexer for the scintilla code control.
5  *    Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6  *    External lexer stuff inspired from the caml external lexer.
7  *    Folder copied from Python's.
8  *
9  *    Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10  *
11  *    Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12  *
13  *    Improved by kudah <kudahkukarek@gmail.com>
14  *
15  *    TODO:
16  *    * A proper lexical folder to fold group declarations, comments, pragmas,
17  *      #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18  *      etc.
19  *
20  *****************************************************************/
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include <ctype.h>
27 
28 #include <string>
29 #include <map>
30 
31 #include "ILexer.h"
32 #include "Scintilla.h"
33 #include "SciLexer.h"
34 
35 #include "PropSetSimple.h"
36 #include "WordList.h"
37 #include "LexAccessor.h"
38 #include "Accessor.h"
39 #include "StyleContext.h"
40 #include "CharacterSet.h"
41 #include "CharacterCategory.h"
42 #include "LexerModule.h"
43 #include "OptionSet.h"
44 #include "DefaultLexer.h"
45 
46 using namespace Scintilla;
47 
48 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
49 // Note, letter modifiers are prohibited.
50 
u_iswupper(int ch)51 static int u_iswupper (int ch) {
52    CharacterCategory c = CategoriseCharacter(ch);
53    return c == ccLu || c == ccLt;
54 }
55 
u_iswalpha(int ch)56 static int u_iswalpha (int ch) {
57    CharacterCategory c = CategoriseCharacter(ch);
58    return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
59 }
60 
u_iswalnum(int ch)61 static int u_iswalnum (int ch) {
62    CharacterCategory c = CategoriseCharacter(ch);
63    return c == ccLl || c == ccLu || c == ccLt || c == ccLo
64        || c == ccNd || c == ccNo;
65 }
66 
u_IsHaskellSymbol(int ch)67 static int u_IsHaskellSymbol(int ch) {
68    CharacterCategory c = CategoriseCharacter(ch);
69    return c == ccPc || c == ccPd || c == ccPo
70        || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
71 }
72 
IsHaskellLetter(const int ch)73 static inline bool IsHaskellLetter(const int ch) {
74    if (IsASCII(ch)) {
75       return (ch >= 'a' && ch <= 'z')
76           || (ch >= 'A' && ch <= 'Z');
77    } else {
78       return u_iswalpha(ch) != 0;
79    }
80 }
81 
IsHaskellAlphaNumeric(const int ch)82 static inline bool IsHaskellAlphaNumeric(const int ch) {
83    if (IsASCII(ch)) {
84       return IsAlphaNumeric(ch);
85    } else {
86       return u_iswalnum(ch) != 0;
87    }
88 }
89 
IsHaskellUpperCase(const int ch)90 static inline bool IsHaskellUpperCase(const int ch) {
91    if (IsASCII(ch)) {
92       return ch >= 'A' && ch <= 'Z';
93    } else {
94       return u_iswupper(ch) != 0;
95    }
96 }
97 
IsAnHaskellOperatorChar(const int ch)98 static inline bool IsAnHaskellOperatorChar(const int ch) {
99    if (IsASCII(ch)) {
100       return
101          (  ch == '!' || ch == '#' || ch == '$' || ch == '%'
102          || ch == '&' || ch == '*' || ch == '+' || ch == '-'
103          || ch == '.' || ch == '/' || ch == ':' || ch == '<'
104          || ch == '=' || ch == '>' || ch == '?' || ch == '@'
105          || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
106    } else {
107       return u_IsHaskellSymbol(ch) != 0;
108    }
109 }
110 
IsAHaskellWordStart(const int ch)111 static inline bool IsAHaskellWordStart(const int ch) {
112    return IsHaskellLetter(ch) || ch == '_';
113 }
114 
IsAHaskellWordChar(const int ch)115 static inline bool IsAHaskellWordChar(const int ch) {
116    return (  IsHaskellAlphaNumeric(ch)
117           || ch == '_'
118           || ch == '\'');
119 }
120 
IsCommentBlockStyle(int style)121 static inline bool IsCommentBlockStyle(int style) {
122    return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
123 }
124 
IsCommentStyle(int style)125 static inline bool IsCommentStyle(int style) {
126    return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
127        || ( style == SCE_HA_LITERATE_COMMENT
128          || style == SCE_HA_LITERATE_CODEDELIM);
129 }
130 
131 // styles which do not belong to Haskell, but to external tools
IsExternalStyle(int style)132 static inline bool IsExternalStyle(int style) {
133    return ( style == SCE_HA_PREPROCESSOR
134          || style == SCE_HA_LITERATE_COMMENT
135          || style == SCE_HA_LITERATE_CODEDELIM);
136 }
137 
CommentBlockStyleFromNestLevel(const unsigned int nestLevel)138 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
139    return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
140 }
141 
142 // Mangled version of lexlib/Accessor.cxx IndentAmount.
143 // Modified to treat comment blocks as whitespace
144 // plus special case for commentline/preprocessor.
HaskellIndentAmount(Accessor & styler,const Sci_Position line)145 static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
146 
147    // Determines the indentation level of the current line
148    // Comment blocks are treated as whitespace
149 
150    Sci_Position pos = styler.LineStart(line);
151    Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
152 
153    char ch = styler[pos];
154    int style = styler.StyleAt(pos);
155 
156    int indent = 0;
157    bool inPrevPrefix = line > 0;
158 
159    Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
160 
161    while ((  ch == ' ' || ch == '\t'
162           || IsCommentBlockStyle(style)
163           || style == SCE_HA_LITERATE_CODEDELIM)
164          && (pos < eol_pos)) {
165       if (inPrevPrefix) {
166          char chPrev = styler[posPrev++];
167          if (chPrev != ' ' && chPrev != '\t') {
168             inPrevPrefix = false;
169          }
170       }
171       if (ch == '\t') {
172          indent = (indent / 8 + 1) * 8;
173       } else { // Space or comment block
174          indent++;
175       }
176       pos++;
177       ch = styler[pos];
178       style = styler.StyleAt(pos);
179    }
180 
181    indent += SC_FOLDLEVELBASE;
182    // if completely empty line or the start of a comment or preprocessor...
183    if (  styler.LineStart(line) == styler.Length()
184       || ch == ' '
185       || ch == '\t'
186       || ch == '\n'
187       || ch == '\r'
188       || IsCommentStyle(style)
189       || style == SCE_HA_PREPROCESSOR)
190       return indent | SC_FOLDLEVELWHITEFLAG;
191    else
192       return indent;
193 }
194 
195 struct OptionsHaskell {
196    bool magicHash;
197    bool allowQuotes;
198    bool implicitParams;
199    bool highlightSafe;
200    bool cpp;
201    bool stylingWithinPreprocessor;
202    bool fold;
203    bool foldComment;
204    bool foldCompact;
205    bool foldImports;
OptionsHaskellOptionsHaskell206    OptionsHaskell() {
207       magicHash = true;       // Widespread use, enabled by default.
208       allowQuotes = true;     // Widespread use, enabled by default.
209       implicitParams = false; // Fell out of favor, seldom used, disabled.
210       highlightSafe = true;   // Moderately used, doesn't hurt to enable.
211       cpp = true;             // Widespread use, enabled by default;
212       stylingWithinPreprocessor = false;
213       fold = false;
214       foldComment = false;
215       foldCompact = false;
216       foldImports = false;
217    }
218 };
219 
220 static const char * const haskellWordListDesc[] = {
221    "Keywords",
222    "FFI",
223    "Reserved operators",
224    0
225 };
226 
227 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskellOptionSetHaskell228    OptionSetHaskell() {
229       DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
230          "Set to 0 to disallow the '#' character at the end of identifiers and "
231          "literals with the haskell lexer "
232          "(GHC -XMagicHash extension)");
233 
234       DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
235          "Set to 0 to disable highlighting of Template Haskell name quotations "
236          "and promoted constructors "
237          "(GHC -XTemplateHaskell and -XDataKinds extensions)");
238 
239       DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
240          "Set to 1 to allow the '?' character at the start of identifiers "
241          "with the haskell lexer "
242          "(GHC & Hugs -XImplicitParams extension)");
243 
244       DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
245          "Set to 0 to disallow \"safe\" keyword in imports "
246          "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
247 
248       DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
249          "Set to 0 to disable C-preprocessor highlighting "
250          "(-XCPP extension)");
251 
252       DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
253          "For Haskell code, determines whether all preprocessor code is styled in the "
254          "preprocessor style (0, the default) or only from the initial # to the end "
255          "of the command word(1)."
256          );
257 
258       DefineProperty("fold", &OptionsHaskell::fold);
259 
260       DefineProperty("fold.comment", &OptionsHaskell::foldComment);
261 
262       DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
263 
264       DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
265          "Set to 1 to enable folding of import declarations");
266 
267       DefineWordListSets(haskellWordListDesc);
268    }
269 };
270 
271 class LexerHaskell : public DefaultLexer {
272    bool literate;
273    Sci_Position firstImportLine;
274    int firstImportIndent;
275    WordList keywords;
276    WordList ffi;
277    WordList reserved_operators;
278    OptionsHaskell options;
279    OptionSetHaskell osHaskell;
280 
281    enum HashCount {
282        oneHash
283       ,twoHashes
284       ,unlimitedHashes
285    };
286 
287    enum KeywordMode {
288        HA_MODE_DEFAULT = 0
289       ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
290       ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
291       ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
292       ,HA_MODE_MODULE  = 4 // after "module", before module name.
293       ,HA_MODE_FFI     = 5 // after "foreign", before FFI keywords
294       ,HA_MODE_TYPE    = 6 // after "type" or "data", before "family"
295    };
296 
297    enum LiterateMode {
298        LITERATE_BIRD  = 0 // if '>' is the first character on the line,
299                           //   color '>' as a codedelim and the rest of
300                           //   the line as code.
301                           // else if "\begin{code}" is the only word on the
302                           //    line except whitespace, switch to LITERATE_BLOCK
303                           // otherwise color the line as a literate comment.
304       ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
305                           //   0 ignoring all later characters, color the line
306                           //   as a codedelim and switch to LITERATE_BIRD
307                           // otherwise color the line as code.
308    };
309 
310    struct HaskellLineInfo {
311       unsigned int nestLevel; // 22 bits ought to be enough for anybody
312       unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
313                                      // beyond 31.
314       bool pragma;
315       LiterateMode lmode;
316       KeywordMode mode;
317 
HaskellLineInfoLexerHaskell::HaskellLineInfo318       HaskellLineInfo(int state) :
319          nestLevel (state >> 10)
320        , nonexternalStyle ((state >> 5) & 0x1F)
321        , pragma ((state >> 4) & 0x1)
322        , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
323        , mode (static_cast<KeywordMode>(state & 0x7))
324          {}
325 
ToLineStateLexerHaskell::HaskellLineInfo326       int ToLineState() {
327          return
328               (nestLevel << 10)
329             | (nonexternalStyle << 5)
330             | (pragma << 4)
331             | (lmode << 3)
332             | mode;
333       }
334    };
335 
skipMagicHash(StyleContext & sc,const HashCount hashes) const336    inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
337       if (options.magicHash && sc.ch == '#') {
338          sc.Forward();
339          if (hashes == twoHashes && sc.ch == '#') {
340             sc.Forward();
341          } else if (hashes == unlimitedHashes) {
342             while (sc.ch == '#') {
343                sc.Forward();
344             }
345          }
346       }
347    }
348 
LineContainsImport(const Sci_Position line,Accessor & styler) const349    bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
350       if (options.foldImports) {
351          Sci_Position currentPos = styler.LineStart(line);
352          int style = styler.StyleAt(currentPos);
353 
354          Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
355 
356          while (currentPos < eol_pos) {
357             int ch = styler[currentPos];
358             style = styler.StyleAt(currentPos);
359 
360             if (ch == ' ' || ch == '\t'
361              || IsCommentBlockStyle(style)
362              || style == SCE_HA_LITERATE_CODEDELIM) {
363                currentPos++;
364             } else {
365                break;
366             }
367          }
368 
369          return (style == SCE_HA_KEYWORD
370               && styler.Match(currentPos, "import"));
371       } else {
372          return false;
373       }
374    }
375 
IndentAmountWithOffset(Accessor & styler,const Sci_Position line) const376    inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
377       const int indent = HaskellIndentAmount(styler, line);
378       const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
379       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
380                ? indent
381                : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
382    }
383 
IndentLevelRemoveIndentOffset(const int indentLevel) const384    inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
385       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
386             ? indentLevel
387             : indentLevel - firstImportIndent;
388    }
389 
390 public:
LexerHaskell(bool literate_)391    LexerHaskell(bool literate_)
392       : literate(literate_)
393       , firstImportLine(-1)
394       , firstImportIndent(0)
395       {}
~LexerHaskell()396    virtual ~LexerHaskell() {}
397 
Release()398    void SCI_METHOD Release() override {
399       delete this;
400    }
401 
Version() const402    int SCI_METHOD Version() const override {
403       return lvOriginal;
404    }
405 
PropertyNames()406    const char * SCI_METHOD PropertyNames() override {
407       return osHaskell.PropertyNames();
408    }
409 
PropertyType(const char * name)410    int SCI_METHOD PropertyType(const char *name) override {
411       return osHaskell.PropertyType(name);
412    }
413 
DescribeProperty(const char * name)414    const char * SCI_METHOD DescribeProperty(const char *name) override {
415       return osHaskell.DescribeProperty(name);
416    }
417 
418    Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
419 
DescribeWordListSets()420    const char * SCI_METHOD DescribeWordListSets() override {
421       return osHaskell.DescribeWordListSets();
422    }
423 
424    Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
425 
426    void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
427 
428    void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
429 
PrivateCall(int,void *)430    void * SCI_METHOD PrivateCall(int, void *) override {
431       return 0;
432    }
433 
LexerFactoryHaskell()434    static ILexer *LexerFactoryHaskell() {
435       return new LexerHaskell(false);
436    }
437 
LexerFactoryLiterateHaskell()438    static ILexer *LexerFactoryLiterateHaskell() {
439       return new LexerHaskell(true);
440    }
441 };
442 
PropertySet(const char * key,const char * val)443 Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
444    if (osHaskell.PropertySet(&options, key, val)) {
445       return 0;
446    }
447    return -1;
448 }
449 
WordListSet(int n,const char * wl)450 Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
451    WordList *wordListN = 0;
452    switch (n) {
453    case 0:
454       wordListN = &keywords;
455       break;
456    case 1:
457       wordListN = &ffi;
458       break;
459    case 2:
460       wordListN = &reserved_operators;
461       break;
462    }
463    Sci_Position firstModification = -1;
464    if (wordListN) {
465       WordList wlNew;
466       wlNew.Set(wl);
467       if (*wordListN != wlNew) {
468          wordListN->Set(wl);
469          firstModification = 0;
470       }
471    }
472    return firstModification;
473 }
474 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)475 void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
476                                  ,IDocument *pAccess) {
477    LexAccessor styler(pAccess);
478 
479    Sci_Position lineCurrent = styler.GetLine(startPos);
480 
481    HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
482 
483    // Do not leak onto next line
484    if (initStyle == SCE_HA_STRINGEOL)
485       initStyle = SCE_HA_DEFAULT;
486    else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
487       initStyle = hs.nonexternalStyle;
488 
489    StyleContext sc(startPos, length, initStyle, styler);
490 
491    int base = 10;
492    bool dot = false;
493 
494    bool inDashes = false;
495    bool alreadyInTheMiddleOfOperator = false;
496 
497    assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
498 
499    while (sc.More()) {
500       // Check for state end
501 
502       if (!IsExternalStyle(sc.state)) {
503          hs.nonexternalStyle = sc.state;
504       }
505 
506       // For lexer to work, states should unconditionally forward at least one
507       // character.
508       // If they don't, they should still check if they are at line end and
509       // forward if so.
510       // If a state forwards more than one character, it should check every time
511       // that it is not a line end and cease forwarding otherwise.
512       if (sc.atLineEnd) {
513          // Remember the line state for future incremental lexing
514          styler.SetLineState(lineCurrent, hs.ToLineState());
515          lineCurrent++;
516       }
517 
518       // Handle line continuation generically.
519       if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
520          && (  sc.state == SCE_HA_STRING
521             || sc.state == SCE_HA_PREPROCESSOR)) {
522          // Remember the line state for future incremental lexing
523          styler.SetLineState(lineCurrent, hs.ToLineState());
524          lineCurrent++;
525 
526          sc.Forward();
527          if (sc.ch == '\r' && sc.chNext == '\n') {
528             sc.Forward();
529          }
530          sc.Forward();
531 
532          continue;
533       }
534 
535       if (sc.atLineStart) {
536 
537          if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
538             // Prevent SCE_HA_STRINGEOL from leaking back to previous line
539             sc.SetState(sc.state);
540          }
541 
542          if (literate && hs.lmode == LITERATE_BIRD) {
543             if (!IsExternalStyle(sc.state)) {
544                sc.SetState(SCE_HA_LITERATE_COMMENT);
545             }
546          }
547       }
548 
549       // External
550          // Literate
551       if (  literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
552          && sc.ch == '>') {
553             sc.SetState(SCE_HA_LITERATE_CODEDELIM);
554             sc.ForwardSetState(hs.nonexternalStyle);
555       }
556       else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
557             && (  sc.ch == ' ' || sc.ch == '\t'
558                || sc.Match("\\begin{code}"))) {
559          sc.SetState(sc.state);
560 
561          while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
562             sc.Forward();
563 
564          if (sc.Match("\\begin{code}")) {
565             sc.Forward(static_cast<int>(strlen("\\begin{code}")));
566 
567             bool correct = true;
568 
569             while (!sc.atLineEnd && sc.More()) {
570                if (sc.ch != ' ' && sc.ch != '\t') {
571                   correct = false;
572                }
573                sc.Forward();
574             }
575 
576             if (correct) {
577                sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
578                hs.lmode = LITERATE_BLOCK;
579             }
580          }
581       }
582       else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
583             && sc.Match("\\end{code}")) {
584          sc.SetState(SCE_HA_LITERATE_CODEDELIM);
585 
586          sc.Forward(static_cast<int>(strlen("\\end{code}")));
587 
588          while (!sc.atLineEnd && sc.More()) {
589             sc.Forward();
590          }
591 
592          sc.SetState(SCE_HA_LITERATE_COMMENT);
593          hs.lmode = LITERATE_BIRD;
594       }
595          // Preprocessor
596       else if (sc.atLineStart && sc.ch == '#' && options.cpp
597             && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
598          sc.SetState(SCE_HA_PREPROCESSOR);
599          sc.Forward();
600       }
601             // Literate
602       else if (sc.state == SCE_HA_LITERATE_COMMENT) {
603          sc.Forward();
604       }
605       else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
606          sc.ForwardSetState(hs.nonexternalStyle);
607       }
608             // Preprocessor
609       else if (sc.state == SCE_HA_PREPROCESSOR) {
610          if (sc.atLineEnd) {
611             sc.SetState(options.stylingWithinPreprocessor
612                         ? SCE_HA_DEFAULT
613                         : hs.nonexternalStyle);
614             sc.Forward(); // prevent double counting a line
615          } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
616             sc.SetState(SCE_HA_DEFAULT);
617          } else {
618             sc.Forward();
619          }
620       }
621       // Haskell
622          // Operator
623       else if (sc.state == SCE_HA_OPERATOR) {
624          int style = SCE_HA_OPERATOR;
625 
626          if ( sc.ch == ':'
627             && !alreadyInTheMiddleOfOperator
628             // except "::"
629             && !( sc.chNext == ':'
630                && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
631             style = SCE_HA_CAPITAL;
632          }
633 
634          alreadyInTheMiddleOfOperator = false;
635 
636          while (IsAnHaskellOperatorChar(sc.ch))
637                sc.Forward();
638 
639          char s[100];
640          sc.GetCurrent(s, sizeof(s));
641 
642          if (reserved_operators.InList(s))
643             style = SCE_HA_RESERVED_OPERATOR;
644 
645          sc.ChangeState(style);
646          sc.SetState(SCE_HA_DEFAULT);
647       }
648          // String
649       else if (sc.state == SCE_HA_STRING) {
650          if (sc.atLineEnd) {
651             sc.ChangeState(SCE_HA_STRINGEOL);
652             sc.ForwardSetState(SCE_HA_DEFAULT);
653          } else if (sc.ch == '\"') {
654             sc.Forward();
655             skipMagicHash(sc, oneHash);
656             sc.SetState(SCE_HA_DEFAULT);
657          } else if (sc.ch == '\\') {
658             sc.Forward(2);
659          } else {
660             sc.Forward();
661          }
662       }
663          // Char
664       else if (sc.state == SCE_HA_CHARACTER) {
665          if (sc.atLineEnd) {
666             sc.ChangeState(SCE_HA_STRINGEOL);
667             sc.ForwardSetState(SCE_HA_DEFAULT);
668          } else if (sc.ch == '\'') {
669             sc.Forward();
670             skipMagicHash(sc, oneHash);
671             sc.SetState(SCE_HA_DEFAULT);
672          } else if (sc.ch == '\\') {
673             sc.Forward(2);
674          } else {
675             sc.Forward();
676          }
677       }
678          // Number
679       else if (sc.state == SCE_HA_NUMBER) {
680          if (sc.atLineEnd) {
681             sc.SetState(SCE_HA_DEFAULT);
682             sc.Forward(); // prevent double counting a line
683          } else if (IsADigit(sc.ch, base)) {
684             sc.Forward();
685          } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
686             sc.Forward(2);
687             dot = false;
688          } else if ((base == 10) &&
689                     (sc.ch == 'e' || sc.ch == 'E') &&
690                     (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
691             sc.Forward();
692             if (sc.ch == '+' || sc.ch == '-')
693                 sc.Forward();
694          } else {
695             skipMagicHash(sc, twoHashes);
696             sc.SetState(SCE_HA_DEFAULT);
697          }
698       }
699          // Keyword or Identifier
700       else if (sc.state == SCE_HA_IDENTIFIER) {
701          int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
702 
703          assert(IsAHaskellWordStart(sc.ch));
704 
705          sc.Forward();
706 
707          while (sc.More()) {
708             if (IsAHaskellWordChar(sc.ch)) {
709                sc.Forward();
710             } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
711                if (IsHaskellUpperCase(sc.chNext)) {
712                   sc.Forward();
713                   style = SCE_HA_CAPITAL;
714                } else if (IsAHaskellWordStart(sc.chNext)) {
715                   sc.Forward();
716                   style = SCE_HA_IDENTIFIER;
717                } else if (IsAnHaskellOperatorChar(sc.chNext)) {
718                   sc.Forward();
719                   style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
720                   while (IsAnHaskellOperatorChar(sc.ch))
721                      sc.Forward();
722                   break;
723                } else {
724                   break;
725                }
726             } else {
727                break;
728             }
729          }
730 
731          skipMagicHash(sc, unlimitedHashes);
732 
733          char s[100];
734          sc.GetCurrent(s, sizeof(s));
735 
736          KeywordMode new_mode = HA_MODE_DEFAULT;
737 
738          if (keywords.InList(s)) {
739             style = SCE_HA_KEYWORD;
740          } else if (style == SCE_HA_CAPITAL) {
741             if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
742                style    = SCE_HA_MODULE;
743                new_mode = HA_MODE_IMPORT2;
744             } else if (hs.mode == HA_MODE_MODULE) {
745                style = SCE_HA_MODULE;
746             }
747          } else if (hs.mode == HA_MODE_IMPORT1 &&
748                     strcmp(s,"qualified") == 0) {
749              style    = SCE_HA_KEYWORD;
750              new_mode = HA_MODE_IMPORT1;
751          } else if (options.highlightSafe &&
752                     hs.mode == HA_MODE_IMPORT1 &&
753                     strcmp(s,"safe") == 0) {
754              style    = SCE_HA_KEYWORD;
755              new_mode = HA_MODE_IMPORT1;
756          } else if (hs.mode == HA_MODE_IMPORT2) {
757              if (strcmp(s,"as") == 0) {
758                 style    = SCE_HA_KEYWORD;
759                 new_mode = HA_MODE_IMPORT3;
760             } else if (strcmp(s,"hiding") == 0) {
761                 style     = SCE_HA_KEYWORD;
762             }
763          } else if (hs.mode == HA_MODE_TYPE) {
764             if (strcmp(s,"family") == 0)
765                style    = SCE_HA_KEYWORD;
766          }
767 
768          if (hs.mode == HA_MODE_FFI) {
769             if (ffi.InList(s)) {
770                style = SCE_HA_KEYWORD;
771                new_mode = HA_MODE_FFI;
772             }
773          }
774 
775          sc.ChangeState(style);
776          sc.SetState(SCE_HA_DEFAULT);
777 
778          if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
779             new_mode = HA_MODE_IMPORT1;
780          else if (strcmp(s,"module") == 0)
781             new_mode = HA_MODE_MODULE;
782          else if (strcmp(s,"foreign") == 0)
783             new_mode = HA_MODE_FFI;
784          else if (strcmp(s,"type") == 0
785                || strcmp(s,"data") == 0)
786             new_mode = HA_MODE_TYPE;
787 
788          hs.mode = new_mode;
789       }
790 
791          // Comments
792             // Oneliner
793       else if (sc.state == SCE_HA_COMMENTLINE) {
794          if (sc.atLineEnd) {
795             sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
796             sc.Forward(); // prevent double counting a line
797          } else if (inDashes && sc.ch != '-' && !hs.pragma) {
798             inDashes = false;
799             if (IsAnHaskellOperatorChar(sc.ch)) {
800                alreadyInTheMiddleOfOperator = true;
801                sc.ChangeState(SCE_HA_OPERATOR);
802             }
803          } else {
804             sc.Forward();
805          }
806       }
807             // Nested
808       else if (IsCommentBlockStyle(sc.state)) {
809          if (sc.Match('{','-')) {
810             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
811             sc.Forward(2);
812             hs.nestLevel++;
813          } else if (sc.Match('-','}')) {
814             sc.Forward(2);
815             assert(hs.nestLevel > 0);
816             if (hs.nestLevel > 0)
817                hs.nestLevel--;
818             sc.SetState(
819                hs.nestLevel == 0
820                   ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
821                   : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
822          } else {
823             sc.Forward();
824          }
825       }
826             // Pragma
827       else if (sc.state == SCE_HA_PRAGMA) {
828          if (sc.Match("#-}")) {
829             hs.pragma = false;
830             sc.Forward(3);
831             sc.SetState(SCE_HA_DEFAULT);
832          } else if (sc.Match('-','-')) {
833             sc.SetState(SCE_HA_COMMENTLINE);
834             sc.Forward(2);
835             inDashes = false;
836          } else if (sc.Match('{','-')) {
837             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
838             sc.Forward(2);
839             hs.nestLevel = 1;
840          } else {
841             sc.Forward();
842          }
843       }
844             // New state?
845       else if (sc.state == SCE_HA_DEFAULT) {
846          // Digit
847          if (IsADigit(sc.ch)) {
848             hs.mode = HA_MODE_DEFAULT;
849 
850             sc.SetState(SCE_HA_NUMBER);
851             if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
852                // Match anything starting with "0x" or "0X", too
853                sc.Forward(2);
854                base = 16;
855                dot = false;
856             } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
857                // Match anything starting with "0o" or "0O", too
858                sc.Forward(2);
859                base = 8;
860                dot = false;
861             } else {
862                sc.Forward();
863                base = 10;
864                dot = true;
865             }
866          }
867          // Pragma
868          else if (sc.Match("{-#")) {
869             hs.pragma = true;
870             sc.SetState(SCE_HA_PRAGMA);
871             sc.Forward(3);
872          }
873          // Comment line
874          else if (sc.Match('-','-')) {
875             sc.SetState(SCE_HA_COMMENTLINE);
876             sc.Forward(2);
877             inDashes = true;
878          }
879          // Comment block
880          else if (sc.Match('{','-')) {
881             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
882             sc.Forward(2);
883             hs.nestLevel = 1;
884          }
885          // String
886          else if (sc.ch == '\"') {
887             sc.SetState(SCE_HA_STRING);
888             sc.Forward();
889          }
890          // Character or quoted name or promoted term
891          else if (sc.ch == '\'') {
892             hs.mode = HA_MODE_DEFAULT;
893 
894             sc.SetState(SCE_HA_CHARACTER);
895             sc.Forward();
896 
897             if (options.allowQuotes) {
898                // Quoted type ''T
899                if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
900                   sc.Forward();
901                   sc.ChangeState(SCE_HA_IDENTIFIER);
902                } else if (sc.chNext != '\'') {
903                   // Quoted name 'n or promoted constructor 'N
904                   if (IsAHaskellWordStart(sc.ch)) {
905                      sc.ChangeState(SCE_HA_IDENTIFIER);
906                   // Promoted constructor operator ':~>
907                   } else if (sc.ch == ':') {
908                      alreadyInTheMiddleOfOperator = false;
909                      sc.ChangeState(SCE_HA_OPERATOR);
910                   // Promoted list or tuple '[T]
911                   } else if (sc.ch == '[' || sc.ch== '(') {
912                      sc.ChangeState(SCE_HA_OPERATOR);
913                      sc.ForwardSetState(SCE_HA_DEFAULT);
914                   }
915                }
916             }
917          }
918          // Operator starting with '?' or an implicit parameter
919          else if (sc.ch == '?') {
920             hs.mode = HA_MODE_DEFAULT;
921 
922             alreadyInTheMiddleOfOperator = false;
923             sc.SetState(SCE_HA_OPERATOR);
924 
925             if (  options.implicitParams
926                && IsAHaskellWordStart(sc.chNext)
927                && !IsHaskellUpperCase(sc.chNext)) {
928                sc.Forward();
929                sc.ChangeState(SCE_HA_IDENTIFIER);
930             }
931          }
932          // Operator
933          else if (IsAnHaskellOperatorChar(sc.ch)) {
934             hs.mode = HA_MODE_DEFAULT;
935 
936             sc.SetState(SCE_HA_OPERATOR);
937          }
938          // Braces and punctuation
939          else if (sc.ch == ',' || sc.ch == ';'
940                || sc.ch == '(' || sc.ch == ')'
941                || sc.ch == '[' || sc.ch == ']'
942                || sc.ch == '{' || sc.ch == '}') {
943             sc.SetState(SCE_HA_OPERATOR);
944             sc.ForwardSetState(SCE_HA_DEFAULT);
945          }
946          // Keyword or Identifier
947          else if (IsAHaskellWordStart(sc.ch)) {
948             sc.SetState(SCE_HA_IDENTIFIER);
949          // Something we don't care about
950          } else {
951             sc.Forward();
952          }
953       }
954             // This branch should never be reached.
955       else {
956          assert(false);
957          sc.Forward();
958       }
959    }
960    sc.Complete();
961 }
962 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)963 void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
964                                   ,IDocument *pAccess) {
965    if (!options.fold)
966       return;
967 
968    Accessor styler(pAccess, NULL);
969 
970    Sci_Position lineCurrent = styler.GetLine(startPos);
971 
972    if (lineCurrent <= firstImportLine) {
973       firstImportLine = -1; // readjust first import position
974       firstImportIndent = 0;
975    }
976 
977    const Sci_Position maxPos = startPos + length;
978    const Sci_Position maxLines =
979       maxPos == styler.Length()
980          ? styler.GetLine(maxPos)
981          : styler.GetLine(maxPos - 1);  // Requested last line
982    const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
983 
984    // Backtrack to previous non-blank line so we can determine indent level
985    // for any white space lines
986    // and so we can fix any preceding fold level (which is why we go back
987    // at least one line in all cases)
988    bool importHere = LineContainsImport(lineCurrent, styler);
989    int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
990 
991    while (lineCurrent > 0) {
992       lineCurrent--;
993       importHere = LineContainsImport(lineCurrent, styler);
994       indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
995       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
996          break;
997    }
998 
999    int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1000 
1001    if (importHere) {
1002       indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1003       if (firstImportLine == -1) {
1004          firstImportLine = lineCurrent;
1005          firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1006       }
1007       if (firstImportLine != lineCurrent) {
1008          indentCurrentLevel++;
1009       }
1010    }
1011 
1012    indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1013 
1014    // Process all characters to end of requested range
1015    //that hangs over the end of the range.  Cap processing in all cases
1016    // to end of document.
1017    while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1018 
1019       // Gather info
1020       Sci_Position lineNext = lineCurrent + 1;
1021       importHere = false;
1022       int indentNext = indentCurrent;
1023 
1024       if (lineNext <= docLines) {
1025          // Information about next line is only available if not at end of document
1026          importHere = LineContainsImport(lineNext, styler);
1027          indentNext = IndentAmountWithOffset(styler, lineNext);
1028       }
1029       if (indentNext & SC_FOLDLEVELWHITEFLAG)
1030          indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1031 
1032       // Skip past any blank lines for next indent level info; we skip also
1033       // comments (all comments, not just those starting in column 0)
1034       // which effectively folds them into surrounding code rather
1035       // than screwing up folding.
1036 
1037       while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1038          lineNext++;
1039          importHere = LineContainsImport(lineNext, styler);
1040          indentNext = IndentAmountWithOffset(styler, lineNext);
1041       }
1042 
1043       int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1044 
1045       if (importHere) {
1046          indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1047          if (firstImportLine == -1) {
1048             firstImportLine = lineNext;
1049             firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1050          }
1051          if (firstImportLine != lineNext) {
1052             indentNextLevel++;
1053          }
1054       }
1055 
1056       indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1057 
1058       const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1059 
1060       // Now set all the indent levels on the lines we skipped
1061       // Do this from end to start.  Once we encounter one line
1062       // which is indented more than the line after the end of
1063       // the comment-block, use the level of the block before
1064 
1065       Sci_Position skipLine = lineNext;
1066       int skipLevel = indentNextLevel;
1067 
1068       while (--skipLine > lineCurrent) {
1069          int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1070 
1071          if (options.foldCompact) {
1072             if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1073                skipLevel = levelBeforeComments;
1074             }
1075 
1076             int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1077 
1078             styler.SetLevel(skipLine, skipLevel | whiteFlag);
1079          } else {
1080             if (  (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1081                && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1082                skipLevel = levelBeforeComments;
1083             }
1084 
1085             styler.SetLevel(skipLine, skipLevel);
1086          }
1087       }
1088 
1089       int lev = indentCurrent;
1090 
1091       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1092          if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1093             lev |= SC_FOLDLEVELHEADERFLAG;
1094       }
1095 
1096       // Set fold level for this line and move to next line
1097       styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1098 
1099       indentCurrent = indentNext;
1100       indentCurrentLevel = indentNextLevel;
1101       lineCurrent = lineNext;
1102    }
1103 
1104    // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1105    // header flag set; the loop above is crafted to take care of this case!
1106    //styler.SetLevel(lineCurrent, indentCurrent);
1107 }
1108 
1109 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1110 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1111