1 /******************************************************************
2  *    LexHaskell.cxx
3  *
4  *    A haskell lexer for the scintilla code control.
5  *    Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6  *    External lexer stuff inspired from the caml external lexer.
7  *    Folder copied from Python's.
8  *
9  *    Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10  *
11  *    Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12  *
13  *    Improved by kudah <kudahkukarek@gmail.com>
14  *
15  *    TODO:
16  *    * A proper lexical folder to fold group declarations, comments, pragmas,
17  *      #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18  *      etc.
19  *
20  *****************************************************************/
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include <ctype.h>
27 
28 #include <string>
29 #include <vector>
30 #include <map>
31 
32 #include "ILexer.h"
33 #include "Scintilla.h"
34 #include "SciLexer.h"
35 
36 #include "PropSetSimple.h"
37 #include "WordList.h"
38 #include "LexAccessor.h"
39 #include "Accessor.h"
40 #include "StyleContext.h"
41 #include "CharacterSet.h"
42 #include "CharacterCategory.h"
43 #include "LexerModule.h"
44 #include "OptionSet.h"
45 #include "DefaultLexer.h"
46 
47 using namespace Scintilla;
48 
49 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
50 // Note, letter modifiers are prohibited.
51 
u_iswupper(int ch)52 static int u_iswupper (int ch) {
53    CharacterCategory c = CategoriseCharacter(ch);
54    return c == ccLu || c == ccLt;
55 }
56 
u_iswalpha(int ch)57 static int u_iswalpha (int ch) {
58    CharacterCategory c = CategoriseCharacter(ch);
59    return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
60 }
61 
u_iswalnum(int ch)62 static int u_iswalnum (int ch) {
63    CharacterCategory c = CategoriseCharacter(ch);
64    return c == ccLl || c == ccLu || c == ccLt || c == ccLo
65        || c == ccNd || c == ccNo;
66 }
67 
u_IsHaskellSymbol(int ch)68 static int u_IsHaskellSymbol(int ch) {
69    CharacterCategory c = CategoriseCharacter(ch);
70    return c == ccPc || c == ccPd || c == ccPo
71        || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
72 }
73 
IsHaskellLetter(const int ch)74 static inline bool IsHaskellLetter(const int ch) {
75    if (IsASCII(ch)) {
76       return (ch >= 'a' && ch <= 'z')
77           || (ch >= 'A' && ch <= 'Z');
78    } else {
79       return u_iswalpha(ch) != 0;
80    }
81 }
82 
IsHaskellAlphaNumeric(const int ch)83 static inline bool IsHaskellAlphaNumeric(const int ch) {
84    if (IsASCII(ch)) {
85       return IsAlphaNumeric(ch);
86    } else {
87       return u_iswalnum(ch) != 0;
88    }
89 }
90 
IsHaskellUpperCase(const int ch)91 static inline bool IsHaskellUpperCase(const int ch) {
92    if (IsASCII(ch)) {
93       return ch >= 'A' && ch <= 'Z';
94    } else {
95       return u_iswupper(ch) != 0;
96    }
97 }
98 
IsAnHaskellOperatorChar(const int ch)99 static inline bool IsAnHaskellOperatorChar(const int ch) {
100    if (IsASCII(ch)) {
101       return
102          (  ch == '!' || ch == '#' || ch == '$' || ch == '%'
103          || ch == '&' || ch == '*' || ch == '+' || ch == '-'
104          || ch == '.' || ch == '/' || ch == ':' || ch == '<'
105          || ch == '=' || ch == '>' || ch == '?' || ch == '@'
106          || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
107    } else {
108       return u_IsHaskellSymbol(ch) != 0;
109    }
110 }
111 
IsAHaskellWordStart(const int ch)112 static inline bool IsAHaskellWordStart(const int ch) {
113    return IsHaskellLetter(ch) || ch == '_';
114 }
115 
IsAHaskellWordChar(const int ch)116 static inline bool IsAHaskellWordChar(const int ch) {
117    return (  IsHaskellAlphaNumeric(ch)
118           || ch == '_'
119           || ch == '\'');
120 }
121 
IsCommentBlockStyle(int style)122 static inline bool IsCommentBlockStyle(int style) {
123    return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
124 }
125 
IsCommentStyle(int style)126 static inline bool IsCommentStyle(int style) {
127    return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
128        || ( style == SCE_HA_LITERATE_COMMENT
129          || style == SCE_HA_LITERATE_CODEDELIM);
130 }
131 
132 // styles which do not belong to Haskell, but to external tools
IsExternalStyle(int style)133 static inline bool IsExternalStyle(int style) {
134    return ( style == SCE_HA_PREPROCESSOR
135          || style == SCE_HA_LITERATE_COMMENT
136          || style == SCE_HA_LITERATE_CODEDELIM);
137 }
138 
CommentBlockStyleFromNestLevel(const unsigned int nestLevel)139 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
140    return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
141 }
142 
143 // Mangled version of lexlib/Accessor.cxx IndentAmount.
144 // Modified to treat comment blocks as whitespace
145 // plus special case for commentline/preprocessor.
HaskellIndentAmount(Accessor & styler,const Sci_Position line)146 static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
147 
148    // Determines the indentation level of the current line
149    // Comment blocks are treated as whitespace
150 
151    Sci_Position pos = styler.LineStart(line);
152    Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
153 
154    char ch = styler[pos];
155    int style = styler.StyleAt(pos);
156 
157    int indent = 0;
158    bool inPrevPrefix = line > 0;
159 
160    Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
161 
162    while ((  ch == ' ' || ch == '\t'
163           || IsCommentBlockStyle(style)
164           || style == SCE_HA_LITERATE_CODEDELIM)
165          && (pos < eol_pos)) {
166       if (inPrevPrefix) {
167          char chPrev = styler[posPrev++];
168          if (chPrev != ' ' && chPrev != '\t') {
169             inPrevPrefix = false;
170          }
171       }
172       if (ch == '\t') {
173          indent = (indent / 8 + 1) * 8;
174       } else { // Space or comment block
175          indent++;
176       }
177       pos++;
178       ch = styler[pos];
179       style = styler.StyleAt(pos);
180    }
181 
182    indent += SC_FOLDLEVELBASE;
183    // if completely empty line or the start of a comment or preprocessor...
184    if (  styler.LineStart(line) == styler.Length()
185       || ch == ' '
186       || ch == '\t'
187       || ch == '\n'
188       || ch == '\r'
189       || IsCommentStyle(style)
190       || style == SCE_HA_PREPROCESSOR)
191       return indent | SC_FOLDLEVELWHITEFLAG;
192    else
193       return indent;
194 }
195 
196 struct OptionsHaskell {
197    bool magicHash;
198    bool allowQuotes;
199    bool implicitParams;
200    bool highlightSafe;
201    bool cpp;
202    bool stylingWithinPreprocessor;
203    bool fold;
204    bool foldComment;
205    bool foldCompact;
206    bool foldImports;
OptionsHaskellOptionsHaskell207    OptionsHaskell() {
208       magicHash = true;       // Widespread use, enabled by default.
209       allowQuotes = true;     // Widespread use, enabled by default.
210       implicitParams = false; // Fell out of favor, seldom used, disabled.
211       highlightSafe = true;   // Moderately used, doesn't hurt to enable.
212       cpp = true;             // Widespread use, enabled by default;
213       stylingWithinPreprocessor = false;
214       fold = false;
215       foldComment = false;
216       foldCompact = false;
217       foldImports = false;
218    }
219 };
220 
221 static const char * const haskellWordListDesc[] = {
222    "Keywords",
223    "FFI",
224    "Reserved operators",
225    0
226 };
227 
228 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskellOptionSetHaskell229    OptionSetHaskell() {
230       DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
231          "Set to 0 to disallow the '#' character at the end of identifiers and "
232          "literals with the haskell lexer "
233          "(GHC -XMagicHash extension)");
234 
235       DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
236          "Set to 0 to disable highlighting of Template Haskell name quotations "
237          "and promoted constructors "
238          "(GHC -XTemplateHaskell and -XDataKinds extensions)");
239 
240       DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
241          "Set to 1 to allow the '?' character at the start of identifiers "
242          "with the haskell lexer "
243          "(GHC & Hugs -XImplicitParams extension)");
244 
245       DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
246          "Set to 0 to disallow \"safe\" keyword in imports "
247          "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
248 
249       DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
250          "Set to 0 to disable C-preprocessor highlighting "
251          "(-XCPP extension)");
252 
253       DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
254          "For Haskell code, determines whether all preprocessor code is styled in the "
255          "preprocessor style (0, the default) or only from the initial # to the end "
256          "of the command word(1)."
257          );
258 
259       DefineProperty("fold", &OptionsHaskell::fold);
260 
261       DefineProperty("fold.comment", &OptionsHaskell::foldComment);
262 
263       DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
264 
265       DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
266          "Set to 1 to enable folding of import declarations");
267 
268       DefineWordListSets(haskellWordListDesc);
269    }
270 };
271 
272 class LexerHaskell : public DefaultLexer {
273    bool literate;
274    Sci_Position firstImportLine;
275    int firstImportIndent;
276    WordList keywords;
277    WordList ffi;
278    WordList reserved_operators;
279    OptionsHaskell options;
280    OptionSetHaskell osHaskell;
281 
282    enum HashCount {
283        oneHash
284       ,twoHashes
285       ,unlimitedHashes
286    };
287 
288    enum KeywordMode {
289        HA_MODE_DEFAULT = 0
290       ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
291       ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
292       ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
293       ,HA_MODE_MODULE  = 4 // after "module", before module name.
294       ,HA_MODE_FFI     = 5 // after "foreign", before FFI keywords
295       ,HA_MODE_TYPE    = 6 // after "type" or "data", before "family"
296    };
297 
298    enum LiterateMode {
299        LITERATE_BIRD  = 0 // if '>' is the first character on the line,
300                           //   color '>' as a codedelim and the rest of
301                           //   the line as code.
302                           // else if "\begin{code}" is the only word on the
303                           //    line except whitespace, switch to LITERATE_BLOCK
304                           // otherwise color the line as a literate comment.
305       ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
306                           //   0 ignoring all later characters, color the line
307                           //   as a codedelim and switch to LITERATE_BIRD
308                           // otherwise color the line as code.
309    };
310 
311    struct HaskellLineInfo {
312       unsigned int nestLevel; // 22 bits ought to be enough for anybody
313       unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
314                                      // beyond 31.
315       bool pragma;
316       LiterateMode lmode;
317       KeywordMode mode;
318 
HaskellLineInfoLexerHaskell::HaskellLineInfo319       HaskellLineInfo(int state) :
320          nestLevel (state >> 10)
321        , nonexternalStyle ((state >> 5) & 0x1F)
322        , pragma ((state >> 4) & 0x1)
323        , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
324        , mode (static_cast<KeywordMode>(state & 0x7))
325          {}
326 
ToLineStateLexerHaskell::HaskellLineInfo327       int ToLineState() {
328          return
329               (nestLevel << 10)
330             | (nonexternalStyle << 5)
331             | (pragma << 4)
332             | (lmode << 3)
333             | mode;
334       }
335    };
336 
skipMagicHash(StyleContext & sc,const HashCount hashes) const337    inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
338       if (options.magicHash && sc.ch == '#') {
339          sc.Forward();
340          if (hashes == twoHashes && sc.ch == '#') {
341             sc.Forward();
342          } else if (hashes == unlimitedHashes) {
343             while (sc.ch == '#') {
344                sc.Forward();
345             }
346          }
347       }
348    }
349 
LineContainsImport(const Sci_Position line,Accessor & styler) const350    bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
351       if (options.foldImports) {
352          Sci_Position currentPos = styler.LineStart(line);
353          int style = styler.StyleAt(currentPos);
354 
355          Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
356 
357          while (currentPos < eol_pos) {
358             int ch = styler[currentPos];
359             style = styler.StyleAt(currentPos);
360 
361             if (ch == ' ' || ch == '\t'
362              || IsCommentBlockStyle(style)
363              || style == SCE_HA_LITERATE_CODEDELIM) {
364                currentPos++;
365             } else {
366                break;
367             }
368          }
369 
370          return (style == SCE_HA_KEYWORD
371               && styler.Match(currentPos, "import"));
372       } else {
373          return false;
374       }
375    }
376 
IndentAmountWithOffset(Accessor & styler,const Sci_Position line) const377    inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
378       const int indent = HaskellIndentAmount(styler, line);
379       const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
380       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
381                ? indent
382                : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
383    }
384 
IndentLevelRemoveIndentOffset(const int indentLevel) const385    inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
386       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
387             ? indentLevel
388             : indentLevel - firstImportIndent;
389    }
390 
391 public:
LexerHaskell(bool literate_)392    LexerHaskell(bool literate_)
393       : DefaultLexer(literate_ ? "literatehaskell" : "haskell", literate_ ? SCLEX_LITERATEHASKELL : SCLEX_HASKELL)
394 	  , literate(literate_)
395       , firstImportLine(-1)
396       , firstImportIndent(0)
397       {}
~LexerHaskell()398    virtual ~LexerHaskell() {}
399 
Release()400    void SCI_METHOD Release() override {
401       delete this;
402    }
403 
Version() const404    int SCI_METHOD Version() const override {
405       return lvRelease5;
406    }
407 
PropertyNames()408    const char * SCI_METHOD PropertyNames() override {
409       return osHaskell.PropertyNames();
410    }
411 
PropertyType(const char * name)412    int SCI_METHOD PropertyType(const char *name) override {
413       return osHaskell.PropertyType(name);
414    }
415 
DescribeProperty(const char * name)416    const char * SCI_METHOD DescribeProperty(const char *name) override {
417       return osHaskell.DescribeProperty(name);
418    }
419 
420    Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
421 
PropertyGet(const char * key)422    const char * SCI_METHOD PropertyGet(const char *key) override {
423 	   return osHaskell.PropertyGet(key);
424    }
425 
DescribeWordListSets()426    const char * SCI_METHOD DescribeWordListSets() override {
427       return osHaskell.DescribeWordListSets();
428    }
429 
430    Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
431 
432    void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
433 
434    void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
435 
PrivateCall(int,void *)436    void * SCI_METHOD PrivateCall(int, void *) override {
437       return 0;
438    }
439 
LexerFactoryHaskell()440    static ILexer5 *LexerFactoryHaskell() {
441       return new LexerHaskell(false);
442    }
443 
LexerFactoryLiterateHaskell()444    static ILexer5 *LexerFactoryLiterateHaskell() {
445       return new LexerHaskell(true);
446    }
447 };
448 
PropertySet(const char * key,const char * val)449 Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
450    if (osHaskell.PropertySet(&options, key, val)) {
451       return 0;
452    }
453    return -1;
454 }
455 
WordListSet(int n,const char * wl)456 Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
457    WordList *wordListN = 0;
458    switch (n) {
459    case 0:
460       wordListN = &keywords;
461       break;
462    case 1:
463       wordListN = &ffi;
464       break;
465    case 2:
466       wordListN = &reserved_operators;
467       break;
468    }
469    Sci_Position firstModification = -1;
470    if (wordListN) {
471       WordList wlNew;
472       wlNew.Set(wl);
473       if (*wordListN != wlNew) {
474          wordListN->Set(wl);
475          firstModification = 0;
476       }
477    }
478    return firstModification;
479 }
480 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)481 void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
482                                  ,IDocument *pAccess) {
483    LexAccessor styler(pAccess);
484 
485    Sci_Position lineCurrent = styler.GetLine(startPos);
486 
487    HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
488 
489    // Do not leak onto next line
490    if (initStyle == SCE_HA_STRINGEOL)
491       initStyle = SCE_HA_DEFAULT;
492    else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
493       initStyle = hs.nonexternalStyle;
494 
495    StyleContext sc(startPos, length, initStyle, styler);
496 
497    int base = 10;
498    bool dot = false;
499 
500    bool inDashes = false;
501    bool alreadyInTheMiddleOfOperator = false;
502 
503    assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
504 
505    while (sc.More()) {
506       // Check for state end
507 
508       if (!IsExternalStyle(sc.state)) {
509          hs.nonexternalStyle = sc.state;
510       }
511 
512       // For lexer to work, states should unconditionally forward at least one
513       // character.
514       // If they don't, they should still check if they are at line end and
515       // forward if so.
516       // If a state forwards more than one character, it should check every time
517       // that it is not a line end and cease forwarding otherwise.
518       if (sc.atLineEnd) {
519          // Remember the line state for future incremental lexing
520          styler.SetLineState(lineCurrent, hs.ToLineState());
521          lineCurrent++;
522       }
523 
524       // Handle line continuation generically.
525       if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
526          && (  sc.state == SCE_HA_STRING
527             || sc.state == SCE_HA_PREPROCESSOR)) {
528          // Remember the line state for future incremental lexing
529          styler.SetLineState(lineCurrent, hs.ToLineState());
530          lineCurrent++;
531 
532          sc.Forward();
533          if (sc.ch == '\r' && sc.chNext == '\n') {
534             sc.Forward();
535          }
536          sc.Forward();
537 
538          continue;
539       }
540 
541       if (sc.atLineStart) {
542 
543          if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
544             // Prevent SCE_HA_STRINGEOL from leaking back to previous line
545             sc.SetState(sc.state);
546          }
547 
548          if (literate && hs.lmode == LITERATE_BIRD) {
549             if (!IsExternalStyle(sc.state)) {
550                sc.SetState(SCE_HA_LITERATE_COMMENT);
551             }
552          }
553       }
554 
555       // External
556          // Literate
557       if (  literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
558          && sc.ch == '>') {
559             sc.SetState(SCE_HA_LITERATE_CODEDELIM);
560             sc.ForwardSetState(hs.nonexternalStyle);
561       }
562       else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
563             && (  sc.ch == ' ' || sc.ch == '\t'
564                || sc.Match("\\begin{code}"))) {
565          sc.SetState(sc.state);
566 
567          while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
568             sc.Forward();
569 
570          if (sc.Match("\\begin{code}")) {
571             sc.Forward(static_cast<int>(strlen("\\begin{code}")));
572 
573             bool correct = true;
574 
575             while (!sc.atLineEnd && sc.More()) {
576                if (sc.ch != ' ' && sc.ch != '\t') {
577                   correct = false;
578                }
579                sc.Forward();
580             }
581 
582             if (correct) {
583                sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
584                hs.lmode = LITERATE_BLOCK;
585             }
586          }
587       }
588       else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
589             && sc.Match("\\end{code}")) {
590          sc.SetState(SCE_HA_LITERATE_CODEDELIM);
591 
592          sc.Forward(static_cast<int>(strlen("\\end{code}")));
593 
594          while (!sc.atLineEnd && sc.More()) {
595             sc.Forward();
596          }
597 
598          sc.SetState(SCE_HA_LITERATE_COMMENT);
599          hs.lmode = LITERATE_BIRD;
600       }
601          // Preprocessor
602       else if (sc.atLineStart && sc.ch == '#' && options.cpp
603             && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
604          sc.SetState(SCE_HA_PREPROCESSOR);
605          sc.Forward();
606       }
607             // Literate
608       else if (sc.state == SCE_HA_LITERATE_COMMENT) {
609          sc.Forward();
610       }
611       else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
612          sc.ForwardSetState(hs.nonexternalStyle);
613       }
614             // Preprocessor
615       else if (sc.state == SCE_HA_PREPROCESSOR) {
616          if (sc.atLineEnd) {
617             sc.SetState(options.stylingWithinPreprocessor
618                         ? SCE_HA_DEFAULT
619                         : hs.nonexternalStyle);
620             sc.Forward(); // prevent double counting a line
621          } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
622             sc.SetState(SCE_HA_DEFAULT);
623          } else {
624             sc.Forward();
625          }
626       }
627       // Haskell
628          // Operator
629       else if (sc.state == SCE_HA_OPERATOR) {
630          int style = SCE_HA_OPERATOR;
631 
632          if ( sc.ch == ':'
633             && !alreadyInTheMiddleOfOperator
634             // except "::"
635             && !( sc.chNext == ':'
636                && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
637             style = SCE_HA_CAPITAL;
638          }
639 
640          alreadyInTheMiddleOfOperator = false;
641 
642          while (IsAnHaskellOperatorChar(sc.ch))
643                sc.Forward();
644 
645          char s[100];
646          sc.GetCurrent(s, sizeof(s));
647 
648          if (reserved_operators.InList(s))
649             style = SCE_HA_RESERVED_OPERATOR;
650 
651          sc.ChangeState(style);
652          sc.SetState(SCE_HA_DEFAULT);
653       }
654          // String
655       else if (sc.state == SCE_HA_STRING) {
656          if (sc.atLineEnd) {
657             sc.ChangeState(SCE_HA_STRINGEOL);
658             sc.ForwardSetState(SCE_HA_DEFAULT);
659          } else if (sc.ch == '\"') {
660             sc.Forward();
661             skipMagicHash(sc, oneHash);
662             sc.SetState(SCE_HA_DEFAULT);
663          } else if (sc.ch == '\\') {
664             sc.Forward(2);
665          } else {
666             sc.Forward();
667          }
668       }
669          // Char
670       else if (sc.state == SCE_HA_CHARACTER) {
671          if (sc.atLineEnd) {
672             sc.ChangeState(SCE_HA_STRINGEOL);
673             sc.ForwardSetState(SCE_HA_DEFAULT);
674          } else if (sc.ch == '\'') {
675             sc.Forward();
676             skipMagicHash(sc, oneHash);
677             sc.SetState(SCE_HA_DEFAULT);
678          } else if (sc.ch == '\\') {
679             sc.Forward(2);
680          } else {
681             sc.Forward();
682          }
683       }
684          // Number
685       else if (sc.state == SCE_HA_NUMBER) {
686          if (sc.atLineEnd) {
687             sc.SetState(SCE_HA_DEFAULT);
688             sc.Forward(); // prevent double counting a line
689          } else if (IsADigit(sc.ch, base)) {
690             sc.Forward();
691          } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
692             sc.Forward(2);
693             dot = false;
694          } else if ((base == 10) &&
695                     (sc.ch == 'e' || sc.ch == 'E') &&
696                     (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
697             sc.Forward();
698             if (sc.ch == '+' || sc.ch == '-')
699                 sc.Forward();
700          } else {
701             skipMagicHash(sc, twoHashes);
702             sc.SetState(SCE_HA_DEFAULT);
703          }
704       }
705          // Keyword or Identifier
706       else if (sc.state == SCE_HA_IDENTIFIER) {
707          int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
708 
709          assert(IsAHaskellWordStart(sc.ch));
710 
711          sc.Forward();
712 
713          while (sc.More()) {
714             if (IsAHaskellWordChar(sc.ch)) {
715                sc.Forward();
716             } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
717                if (IsHaskellUpperCase(sc.chNext)) {
718                   sc.Forward();
719                   style = SCE_HA_CAPITAL;
720                } else if (IsAHaskellWordStart(sc.chNext)) {
721                   sc.Forward();
722                   style = SCE_HA_IDENTIFIER;
723                } else if (IsAnHaskellOperatorChar(sc.chNext)) {
724                   sc.Forward();
725                   style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
726                   while (IsAnHaskellOperatorChar(sc.ch))
727                      sc.Forward();
728                   break;
729                } else {
730                   break;
731                }
732             } else {
733                break;
734             }
735          }
736 
737          skipMagicHash(sc, unlimitedHashes);
738 
739          char s[100];
740          sc.GetCurrent(s, sizeof(s));
741 
742          KeywordMode new_mode = HA_MODE_DEFAULT;
743 
744          if (keywords.InList(s)) {
745             style = SCE_HA_KEYWORD;
746          } else if (style == SCE_HA_CAPITAL) {
747             if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
748                style    = SCE_HA_MODULE;
749                new_mode = HA_MODE_IMPORT2;
750             } else if (hs.mode == HA_MODE_MODULE) {
751                style = SCE_HA_MODULE;
752             }
753          } else if (hs.mode == HA_MODE_IMPORT1 &&
754                     strcmp(s,"qualified") == 0) {
755              style    = SCE_HA_KEYWORD;
756              new_mode = HA_MODE_IMPORT1;
757          } else if (options.highlightSafe &&
758                     hs.mode == HA_MODE_IMPORT1 &&
759                     strcmp(s,"safe") == 0) {
760              style    = SCE_HA_KEYWORD;
761              new_mode = HA_MODE_IMPORT1;
762          } else if (hs.mode == HA_MODE_IMPORT2) {
763              if (strcmp(s,"as") == 0) {
764                 style    = SCE_HA_KEYWORD;
765                 new_mode = HA_MODE_IMPORT3;
766             } else if (strcmp(s,"hiding") == 0) {
767                 style     = SCE_HA_KEYWORD;
768             }
769          } else if (hs.mode == HA_MODE_TYPE) {
770             if (strcmp(s,"family") == 0)
771                style    = SCE_HA_KEYWORD;
772          }
773 
774          if (hs.mode == HA_MODE_FFI) {
775             if (ffi.InList(s)) {
776                style = SCE_HA_KEYWORD;
777                new_mode = HA_MODE_FFI;
778             }
779          }
780 
781          sc.ChangeState(style);
782          sc.SetState(SCE_HA_DEFAULT);
783 
784          if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
785             new_mode = HA_MODE_IMPORT1;
786          else if (strcmp(s,"module") == 0)
787             new_mode = HA_MODE_MODULE;
788          else if (strcmp(s,"foreign") == 0)
789             new_mode = HA_MODE_FFI;
790          else if (strcmp(s,"type") == 0
791                || strcmp(s,"data") == 0)
792             new_mode = HA_MODE_TYPE;
793 
794          hs.mode = new_mode;
795       }
796 
797          // Comments
798             // Oneliner
799       else if (sc.state == SCE_HA_COMMENTLINE) {
800          if (sc.atLineEnd) {
801             sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
802             sc.Forward(); // prevent double counting a line
803          } else if (inDashes && sc.ch != '-' && !hs.pragma) {
804             inDashes = false;
805             if (IsAnHaskellOperatorChar(sc.ch)) {
806                alreadyInTheMiddleOfOperator = true;
807                sc.ChangeState(SCE_HA_OPERATOR);
808             }
809          } else {
810             sc.Forward();
811          }
812       }
813             // Nested
814       else if (IsCommentBlockStyle(sc.state)) {
815          if (sc.Match('{','-')) {
816             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
817             sc.Forward(2);
818             hs.nestLevel++;
819          } else if (sc.Match('-','}')) {
820             sc.Forward(2);
821             assert(hs.nestLevel > 0);
822             if (hs.nestLevel > 0)
823                hs.nestLevel--;
824             sc.SetState(
825                hs.nestLevel == 0
826                   ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
827                   : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
828          } else {
829             sc.Forward();
830          }
831       }
832             // Pragma
833       else if (sc.state == SCE_HA_PRAGMA) {
834          if (sc.Match("#-}")) {
835             hs.pragma = false;
836             sc.Forward(3);
837             sc.SetState(SCE_HA_DEFAULT);
838          } else if (sc.Match('-','-')) {
839             sc.SetState(SCE_HA_COMMENTLINE);
840             sc.Forward(2);
841             inDashes = false;
842          } else if (sc.Match('{','-')) {
843             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
844             sc.Forward(2);
845             hs.nestLevel = 1;
846          } else {
847             sc.Forward();
848          }
849       }
850             // New state?
851       else if (sc.state == SCE_HA_DEFAULT) {
852          // Digit
853          if (IsADigit(sc.ch)) {
854             hs.mode = HA_MODE_DEFAULT;
855 
856             sc.SetState(SCE_HA_NUMBER);
857             if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
858                // Match anything starting with "0x" or "0X", too
859                sc.Forward(2);
860                base = 16;
861                dot = false;
862             } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
863                // Match anything starting with "0o" or "0O", too
864                sc.Forward(2);
865                base = 8;
866                dot = false;
867             } else {
868                sc.Forward();
869                base = 10;
870                dot = true;
871             }
872          }
873          // Pragma
874          else if (sc.Match("{-#")) {
875             hs.pragma = true;
876             sc.SetState(SCE_HA_PRAGMA);
877             sc.Forward(3);
878          }
879          // Comment line
880          else if (sc.Match('-','-')) {
881             sc.SetState(SCE_HA_COMMENTLINE);
882             sc.Forward(2);
883             inDashes = true;
884          }
885          // Comment block
886          else if (sc.Match('{','-')) {
887             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
888             sc.Forward(2);
889             hs.nestLevel = 1;
890          }
891          // String
892          else if (sc.ch == '\"') {
893             sc.SetState(SCE_HA_STRING);
894             sc.Forward();
895          }
896          // Character or quoted name or promoted term
897          else if (sc.ch == '\'') {
898             hs.mode = HA_MODE_DEFAULT;
899 
900             sc.SetState(SCE_HA_CHARACTER);
901             sc.Forward();
902 
903             if (options.allowQuotes) {
904                // Quoted type ''T
905                if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
906                   sc.Forward();
907                   sc.ChangeState(SCE_HA_IDENTIFIER);
908                } else if (sc.chNext != '\'') {
909                   // Quoted name 'n or promoted constructor 'N
910                   if (IsAHaskellWordStart(sc.ch)) {
911                      sc.ChangeState(SCE_HA_IDENTIFIER);
912                   // Promoted constructor operator ':~>
913                   } else if (sc.ch == ':') {
914                      alreadyInTheMiddleOfOperator = false;
915                      sc.ChangeState(SCE_HA_OPERATOR);
916                   // Promoted list or tuple '[T]
917                   } else if (sc.ch == '[' || sc.ch== '(') {
918                      sc.ChangeState(SCE_HA_OPERATOR);
919                      sc.ForwardSetState(SCE_HA_DEFAULT);
920                   }
921                }
922             }
923          }
924          // Operator starting with '?' or an implicit parameter
925          else if (sc.ch == '?') {
926             hs.mode = HA_MODE_DEFAULT;
927 
928             alreadyInTheMiddleOfOperator = false;
929             sc.SetState(SCE_HA_OPERATOR);
930 
931             if (  options.implicitParams
932                && IsAHaskellWordStart(sc.chNext)
933                && !IsHaskellUpperCase(sc.chNext)) {
934                sc.Forward();
935                sc.ChangeState(SCE_HA_IDENTIFIER);
936             }
937          }
938          // Operator
939          else if (IsAnHaskellOperatorChar(sc.ch)) {
940             hs.mode = HA_MODE_DEFAULT;
941 
942             sc.SetState(SCE_HA_OPERATOR);
943          }
944          // Braces and punctuation
945          else if (sc.ch == ',' || sc.ch == ';'
946                || sc.ch == '(' || sc.ch == ')'
947                || sc.ch == '[' || sc.ch == ']'
948                || sc.ch == '{' || sc.ch == '}') {
949             sc.SetState(SCE_HA_OPERATOR);
950             sc.ForwardSetState(SCE_HA_DEFAULT);
951          }
952          // Keyword or Identifier
953          else if (IsAHaskellWordStart(sc.ch)) {
954             sc.SetState(SCE_HA_IDENTIFIER);
955          // Something we don't care about
956          } else {
957             sc.Forward();
958          }
959       }
960             // This branch should never be reached.
961       else {
962          assert(false);
963          sc.Forward();
964       }
965    }
966    sc.Complete();
967 }
968 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)969 void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
970                                   ,IDocument *pAccess) {
971    if (!options.fold)
972       return;
973 
974    Accessor styler(pAccess, NULL);
975 
976    Sci_Position lineCurrent = styler.GetLine(startPos);
977 
978    if (lineCurrent <= firstImportLine) {
979       firstImportLine = -1; // readjust first import position
980       firstImportIndent = 0;
981    }
982 
983    const Sci_Position maxPos = startPos + length;
984    const Sci_Position maxLines =
985       maxPos == styler.Length()
986          ? styler.GetLine(maxPos)
987          : styler.GetLine(maxPos - 1);  // Requested last line
988    const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
989 
990    // Backtrack to previous non-blank line so we can determine indent level
991    // for any white space lines
992    // and so we can fix any preceding fold level (which is why we go back
993    // at least one line in all cases)
994    bool importHere = LineContainsImport(lineCurrent, styler);
995    int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
996 
997    while (lineCurrent > 0) {
998       lineCurrent--;
999       importHere = LineContainsImport(lineCurrent, styler);
1000       indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
1001       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
1002          break;
1003    }
1004 
1005    int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1006 
1007    if (importHere) {
1008       indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1009       if (firstImportLine == -1) {
1010          firstImportLine = lineCurrent;
1011          firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1012       }
1013       if (firstImportLine != lineCurrent) {
1014          indentCurrentLevel++;
1015       }
1016    }
1017 
1018    indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1019 
1020    // Process all characters to end of requested range
1021    //that hangs over the end of the range.  Cap processing in all cases
1022    // to end of document.
1023    while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1024 
1025       // Gather info
1026       Sci_Position lineNext = lineCurrent + 1;
1027       importHere = false;
1028       int indentNext = indentCurrent;
1029 
1030       if (lineNext <= docLines) {
1031          // Information about next line is only available if not at end of document
1032          importHere = LineContainsImport(lineNext, styler);
1033          indentNext = IndentAmountWithOffset(styler, lineNext);
1034       }
1035       if (indentNext & SC_FOLDLEVELWHITEFLAG)
1036          indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1037 
1038       // Skip past any blank lines for next indent level info; we skip also
1039       // comments (all comments, not just those starting in column 0)
1040       // which effectively folds them into surrounding code rather
1041       // than screwing up folding.
1042 
1043       while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1044          lineNext++;
1045          importHere = LineContainsImport(lineNext, styler);
1046          indentNext = IndentAmountWithOffset(styler, lineNext);
1047       }
1048 
1049       int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1050 
1051       if (importHere) {
1052          indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1053          if (firstImportLine == -1) {
1054             firstImportLine = lineNext;
1055             firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1056          }
1057          if (firstImportLine != lineNext) {
1058             indentNextLevel++;
1059          }
1060       }
1061 
1062       indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1063 
1064       const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1065 
1066       // Now set all the indent levels on the lines we skipped
1067       // Do this from end to start.  Once we encounter one line
1068       // which is indented more than the line after the end of
1069       // the comment-block, use the level of the block before
1070 
1071       Sci_Position skipLine = lineNext;
1072       int skipLevel = indentNextLevel;
1073 
1074       while (--skipLine > lineCurrent) {
1075          int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1076 
1077          if (options.foldCompact) {
1078             if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1079                skipLevel = levelBeforeComments;
1080             }
1081 
1082             int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1083 
1084             styler.SetLevel(skipLine, skipLevel | whiteFlag);
1085          } else {
1086             if (  (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1087                && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1088                skipLevel = levelBeforeComments;
1089             }
1090 
1091             styler.SetLevel(skipLine, skipLevel);
1092          }
1093       }
1094 
1095       int lev = indentCurrent;
1096 
1097       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1098          if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1099             lev |= SC_FOLDLEVELHEADERFLAG;
1100       }
1101 
1102       // Set fold level for this line and move to next line
1103       styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1104 
1105       indentCurrent = indentNext;
1106       indentCurrentLevel = indentNextLevel;
1107       lineCurrent = lineNext;
1108    }
1109 
1110    // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1111    // header flag set; the loop above is crafted to take care of this case!
1112    //styler.SetLevel(lineCurrent, indentCurrent);
1113 }
1114 
1115 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1116 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1117