1 /******************************************************************
2 * LexHaskell.cxx
3 *
4 * A haskell lexer for the scintilla code control.
5 * Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6 * External lexer stuff inspired from the caml external lexer.
7 * Folder copied from Python's.
8 *
9 * Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10 *
11 * Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12 *
13 * Improved by kudah <kudahkukarek@gmail.com>
14 *
15 * TODO:
16 * * A proper lexical folder to fold group declarations, comments, pragmas,
17 * #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18 * etc.
19 *
20 *****************************************************************/
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include <ctype.h>
27
28 #include <string>
29 #include <vector>
30 #include <map>
31
32 #include "ILexer.h"
33 #include "Scintilla.h"
34 #include "SciLexer.h"
35
36 #include "PropSetSimple.h"
37 #include "WordList.h"
38 #include "LexAccessor.h"
39 #include "Accessor.h"
40 #include "StyleContext.h"
41 #include "CharacterSet.h"
42 #include "CharacterCategory.h"
43 #include "LexerModule.h"
44 #include "OptionSet.h"
45 #include "DefaultLexer.h"
46
47 using namespace Scintilla;
48
49 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
50 // Note, letter modifiers are prohibited.
51
u_iswupper(int ch)52 static int u_iswupper (int ch) {
53 CharacterCategory c = CategoriseCharacter(ch);
54 return c == ccLu || c == ccLt;
55 }
56
u_iswalpha(int ch)57 static int u_iswalpha (int ch) {
58 CharacterCategory c = CategoriseCharacter(ch);
59 return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
60 }
61
u_iswalnum(int ch)62 static int u_iswalnum (int ch) {
63 CharacterCategory c = CategoriseCharacter(ch);
64 return c == ccLl || c == ccLu || c == ccLt || c == ccLo
65 || c == ccNd || c == ccNo;
66 }
67
u_IsHaskellSymbol(int ch)68 static int u_IsHaskellSymbol(int ch) {
69 CharacterCategory c = CategoriseCharacter(ch);
70 return c == ccPc || c == ccPd || c == ccPo
71 || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
72 }
73
IsHaskellLetter(const int ch)74 static inline bool IsHaskellLetter(const int ch) {
75 if (IsASCII(ch)) {
76 return (ch >= 'a' && ch <= 'z')
77 || (ch >= 'A' && ch <= 'Z');
78 } else {
79 return u_iswalpha(ch) != 0;
80 }
81 }
82
IsHaskellAlphaNumeric(const int ch)83 static inline bool IsHaskellAlphaNumeric(const int ch) {
84 if (IsASCII(ch)) {
85 return IsAlphaNumeric(ch);
86 } else {
87 return u_iswalnum(ch) != 0;
88 }
89 }
90
IsHaskellUpperCase(const int ch)91 static inline bool IsHaskellUpperCase(const int ch) {
92 if (IsASCII(ch)) {
93 return ch >= 'A' && ch <= 'Z';
94 } else {
95 return u_iswupper(ch) != 0;
96 }
97 }
98
IsAnHaskellOperatorChar(const int ch)99 static inline bool IsAnHaskellOperatorChar(const int ch) {
100 if (IsASCII(ch)) {
101 return
102 ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
103 || ch == '&' || ch == '*' || ch == '+' || ch == '-'
104 || ch == '.' || ch == '/' || ch == ':' || ch == '<'
105 || ch == '=' || ch == '>' || ch == '?' || ch == '@'
106 || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
107 } else {
108 return u_IsHaskellSymbol(ch) != 0;
109 }
110 }
111
IsAHaskellWordStart(const int ch)112 static inline bool IsAHaskellWordStart(const int ch) {
113 return IsHaskellLetter(ch) || ch == '_';
114 }
115
IsAHaskellWordChar(const int ch)116 static inline bool IsAHaskellWordChar(const int ch) {
117 return ( IsHaskellAlphaNumeric(ch)
118 || ch == '_'
119 || ch == '\'');
120 }
121
IsCommentBlockStyle(int style)122 static inline bool IsCommentBlockStyle(int style) {
123 return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
124 }
125
IsCommentStyle(int style)126 static inline bool IsCommentStyle(int style) {
127 return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
128 || ( style == SCE_HA_LITERATE_COMMENT
129 || style == SCE_HA_LITERATE_CODEDELIM);
130 }
131
132 // styles which do not belong to Haskell, but to external tools
IsExternalStyle(int style)133 static inline bool IsExternalStyle(int style) {
134 return ( style == SCE_HA_PREPROCESSOR
135 || style == SCE_HA_LITERATE_COMMENT
136 || style == SCE_HA_LITERATE_CODEDELIM);
137 }
138
CommentBlockStyleFromNestLevel(const unsigned int nestLevel)139 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
140 return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
141 }
142
143 // Mangled version of lexlib/Accessor.cxx IndentAmount.
144 // Modified to treat comment blocks as whitespace
145 // plus special case for commentline/preprocessor.
HaskellIndentAmount(Accessor & styler,const Sci_Position line)146 static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
147
148 // Determines the indentation level of the current line
149 // Comment blocks are treated as whitespace
150
151 Sci_Position pos = styler.LineStart(line);
152 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
153
154 char ch = styler[pos];
155 int style = styler.StyleAt(pos);
156
157 int indent = 0;
158 bool inPrevPrefix = line > 0;
159
160 Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
161
162 while (( ch == ' ' || ch == '\t'
163 || IsCommentBlockStyle(style)
164 || style == SCE_HA_LITERATE_CODEDELIM)
165 && (pos < eol_pos)) {
166 if (inPrevPrefix) {
167 char chPrev = styler[posPrev++];
168 if (chPrev != ' ' && chPrev != '\t') {
169 inPrevPrefix = false;
170 }
171 }
172 if (ch == '\t') {
173 indent = (indent / 8 + 1) * 8;
174 } else { // Space or comment block
175 indent++;
176 }
177 pos++;
178 ch = styler[pos];
179 style = styler.StyleAt(pos);
180 }
181
182 indent += SC_FOLDLEVELBASE;
183 // if completely empty line or the start of a comment or preprocessor...
184 if ( styler.LineStart(line) == styler.Length()
185 || ch == ' '
186 || ch == '\t'
187 || ch == '\n'
188 || ch == '\r'
189 || IsCommentStyle(style)
190 || style == SCE_HA_PREPROCESSOR)
191 return indent | SC_FOLDLEVELWHITEFLAG;
192 else
193 return indent;
194 }
195
196 struct OptionsHaskell {
197 bool magicHash;
198 bool allowQuotes;
199 bool implicitParams;
200 bool highlightSafe;
201 bool cpp;
202 bool stylingWithinPreprocessor;
203 bool fold;
204 bool foldComment;
205 bool foldCompact;
206 bool foldImports;
OptionsHaskellOptionsHaskell207 OptionsHaskell() {
208 magicHash = true; // Widespread use, enabled by default.
209 allowQuotes = true; // Widespread use, enabled by default.
210 implicitParams = false; // Fell out of favor, seldom used, disabled.
211 highlightSafe = true; // Moderately used, doesn't hurt to enable.
212 cpp = true; // Widespread use, enabled by default;
213 stylingWithinPreprocessor = false;
214 fold = false;
215 foldComment = false;
216 foldCompact = false;
217 foldImports = false;
218 }
219 };
220
221 static const char * const haskellWordListDesc[] = {
222 "Keywords",
223 "FFI",
224 "Reserved operators",
225 0
226 };
227
228 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskellOptionSetHaskell229 OptionSetHaskell() {
230 DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
231 "Set to 0 to disallow the '#' character at the end of identifiers and "
232 "literals with the haskell lexer "
233 "(GHC -XMagicHash extension)");
234
235 DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
236 "Set to 0 to disable highlighting of Template Haskell name quotations "
237 "and promoted constructors "
238 "(GHC -XTemplateHaskell and -XDataKinds extensions)");
239
240 DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
241 "Set to 1 to allow the '?' character at the start of identifiers "
242 "with the haskell lexer "
243 "(GHC & Hugs -XImplicitParams extension)");
244
245 DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
246 "Set to 0 to disallow \"safe\" keyword in imports "
247 "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
248
249 DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
250 "Set to 0 to disable C-preprocessor highlighting "
251 "(-XCPP extension)");
252
253 DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
254 "For Haskell code, determines whether all preprocessor code is styled in the "
255 "preprocessor style (0, the default) or only from the initial # to the end "
256 "of the command word(1)."
257 );
258
259 DefineProperty("fold", &OptionsHaskell::fold);
260
261 DefineProperty("fold.comment", &OptionsHaskell::foldComment);
262
263 DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
264
265 DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
266 "Set to 1 to enable folding of import declarations");
267
268 DefineWordListSets(haskellWordListDesc);
269 }
270 };
271
272 class LexerHaskell : public DefaultLexer {
273 bool literate;
274 Sci_Position firstImportLine;
275 int firstImportIndent;
276 WordList keywords;
277 WordList ffi;
278 WordList reserved_operators;
279 OptionsHaskell options;
280 OptionSetHaskell osHaskell;
281
282 enum HashCount {
283 oneHash
284 ,twoHashes
285 ,unlimitedHashes
286 };
287
288 enum KeywordMode {
289 HA_MODE_DEFAULT = 0
290 ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
291 ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
292 ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
293 ,HA_MODE_MODULE = 4 // after "module", before module name.
294 ,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
295 ,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
296 };
297
298 enum LiterateMode {
299 LITERATE_BIRD = 0 // if '>' is the first character on the line,
300 // color '>' as a codedelim and the rest of
301 // the line as code.
302 // else if "\begin{code}" is the only word on the
303 // line except whitespace, switch to LITERATE_BLOCK
304 // otherwise color the line as a literate comment.
305 ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
306 // 0 ignoring all later characters, color the line
307 // as a codedelim and switch to LITERATE_BIRD
308 // otherwise color the line as code.
309 };
310
311 struct HaskellLineInfo {
312 unsigned int nestLevel; // 22 bits ought to be enough for anybody
313 unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
314 // beyond 31.
315 bool pragma;
316 LiterateMode lmode;
317 KeywordMode mode;
318
HaskellLineInfoLexerHaskell::HaskellLineInfo319 HaskellLineInfo(int state) :
320 nestLevel (state >> 10)
321 , nonexternalStyle ((state >> 5) & 0x1F)
322 , pragma ((state >> 4) & 0x1)
323 , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
324 , mode (static_cast<KeywordMode>(state & 0x7))
325 {}
326
ToLineStateLexerHaskell::HaskellLineInfo327 int ToLineState() {
328 return
329 (nestLevel << 10)
330 | (nonexternalStyle << 5)
331 | (pragma << 4)
332 | (lmode << 3)
333 | mode;
334 }
335 };
336
skipMagicHash(StyleContext & sc,const HashCount hashes) const337 inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
338 if (options.magicHash && sc.ch == '#') {
339 sc.Forward();
340 if (hashes == twoHashes && sc.ch == '#') {
341 sc.Forward();
342 } else if (hashes == unlimitedHashes) {
343 while (sc.ch == '#') {
344 sc.Forward();
345 }
346 }
347 }
348 }
349
LineContainsImport(const Sci_Position line,Accessor & styler) const350 bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
351 if (options.foldImports) {
352 Sci_Position currentPos = styler.LineStart(line);
353 int style = styler.StyleAt(currentPos);
354
355 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
356
357 while (currentPos < eol_pos) {
358 int ch = styler[currentPos];
359 style = styler.StyleAt(currentPos);
360
361 if (ch == ' ' || ch == '\t'
362 || IsCommentBlockStyle(style)
363 || style == SCE_HA_LITERATE_CODEDELIM) {
364 currentPos++;
365 } else {
366 break;
367 }
368 }
369
370 return (style == SCE_HA_KEYWORD
371 && styler.Match(currentPos, "import"));
372 } else {
373 return false;
374 }
375 }
376
IndentAmountWithOffset(Accessor & styler,const Sci_Position line) const377 inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
378 const int indent = HaskellIndentAmount(styler, line);
379 const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
380 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
381 ? indent
382 : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
383 }
384
IndentLevelRemoveIndentOffset(const int indentLevel) const385 inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
386 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
387 ? indentLevel
388 : indentLevel - firstImportIndent;
389 }
390
391 public:
LexerHaskell(bool literate_)392 LexerHaskell(bool literate_)
393 : DefaultLexer(literate_ ? "literatehaskell" : "haskell", literate_ ? SCLEX_LITERATEHASKELL : SCLEX_HASKELL)
394 , literate(literate_)
395 , firstImportLine(-1)
396 , firstImportIndent(0)
397 {}
~LexerHaskell()398 virtual ~LexerHaskell() {}
399
Release()400 void SCI_METHOD Release() override {
401 delete this;
402 }
403
Version() const404 int SCI_METHOD Version() const override {
405 return lvIdentity;
406 }
407
PropertyNames()408 const char * SCI_METHOD PropertyNames() override {
409 return osHaskell.PropertyNames();
410 }
411
PropertyType(const char * name)412 int SCI_METHOD PropertyType(const char *name) override {
413 return osHaskell.PropertyType(name);
414 }
415
DescribeProperty(const char * name)416 const char * SCI_METHOD DescribeProperty(const char *name) override {
417 return osHaskell.DescribeProperty(name);
418 }
419
420 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
421
PropertyGet(const char * key)422 const char * SCI_METHOD PropertyGet(const char *key) override {
423 return osHaskell.PropertyGet(key);
424 }
425
DescribeWordListSets()426 const char * SCI_METHOD DescribeWordListSets() override {
427 return osHaskell.DescribeWordListSets();
428 }
429
430 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
431
432 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
433
434 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
435
PrivateCall(int,void *)436 void * SCI_METHOD PrivateCall(int, void *) override {
437 return 0;
438 }
439
LexerFactoryHaskell()440 static ILexer *LexerFactoryHaskell() {
441 return new LexerHaskell(false);
442 }
443
LexerFactoryLiterateHaskell()444 static ILexer *LexerFactoryLiterateHaskell() {
445 return new LexerHaskell(true);
446 }
447 };
448
PropertySet(const char * key,const char * val)449 Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
450 if (osHaskell.PropertySet(&options, key, val)) {
451 return 0;
452 }
453 return -1;
454 }
455
WordListSet(int n,const char * wl)456 Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
457 WordList *wordListN = 0;
458 switch (n) {
459 case 0:
460 wordListN = &keywords;
461 break;
462 case 1:
463 wordListN = &ffi;
464 break;
465 case 2:
466 wordListN = &reserved_operators;
467 break;
468 }
469 Sci_Position firstModification = -1;
470 if (wordListN) {
471 WordList wlNew;
472 wlNew.Set(wl);
473 if (*wordListN != wlNew) {
474 wordListN->Set(wl);
475 firstModification = 0;
476 }
477 }
478 return firstModification;
479 }
480
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)481 void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
482 ,IDocument *pAccess) {
483 LexAccessor styler(pAccess);
484
485 Sci_Position lineCurrent = styler.GetLine(startPos);
486
487 HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
488
489 // Do not leak onto next line
490 if (initStyle == SCE_HA_STRINGEOL)
491 initStyle = SCE_HA_DEFAULT;
492 else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
493 initStyle = hs.nonexternalStyle;
494
495 StyleContext sc(startPos, length, initStyle, styler);
496
497 int base = 10;
498 bool dot = false;
499
500 bool inDashes = false;
501 bool alreadyInTheMiddleOfOperator = false;
502
503 assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
504
505 while (sc.More()) {
506 // Check for state end
507
508 if (!IsExternalStyle(sc.state)) {
509 hs.nonexternalStyle = sc.state;
510 }
511
512 // For lexer to work, states should unconditionally forward at least one
513 // character.
514 // If they don't, they should still check if they are at line end and
515 // forward if so.
516 // If a state forwards more than one character, it should check every time
517 // that it is not a line end and cease forwarding otherwise.
518 if (sc.atLineEnd) {
519 // Remember the line state for future incremental lexing
520 styler.SetLineState(lineCurrent, hs.ToLineState());
521 lineCurrent++;
522 }
523
524 // Handle line continuation generically.
525 if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
526 && ( sc.state == SCE_HA_STRING
527 || sc.state == SCE_HA_PREPROCESSOR)) {
528 // Remember the line state for future incremental lexing
529 styler.SetLineState(lineCurrent, hs.ToLineState());
530 lineCurrent++;
531
532 sc.Forward();
533 if (sc.ch == '\r' && sc.chNext == '\n') {
534 sc.Forward();
535 }
536 sc.Forward();
537
538 continue;
539 }
540
541 if (sc.atLineStart) {
542
543 if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
544 // Prevent SCE_HA_STRINGEOL from leaking back to previous line
545 sc.SetState(sc.state);
546 }
547
548 if (literate && hs.lmode == LITERATE_BIRD) {
549 if (!IsExternalStyle(sc.state)) {
550 sc.SetState(SCE_HA_LITERATE_COMMENT);
551 }
552 }
553 }
554
555 // External
556 // Literate
557 if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
558 && sc.ch == '>') {
559 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
560 sc.ForwardSetState(hs.nonexternalStyle);
561 }
562 else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
563 && ( sc.ch == ' ' || sc.ch == '\t'
564 || sc.Match("\\begin{code}"))) {
565 sc.SetState(sc.state);
566
567 while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
568 sc.Forward();
569
570 if (sc.Match("\\begin{code}")) {
571 sc.Forward(static_cast<int>(strlen("\\begin{code}")));
572
573 bool correct = true;
574
575 while (!sc.atLineEnd && sc.More()) {
576 if (sc.ch != ' ' && sc.ch != '\t') {
577 correct = false;
578 }
579 sc.Forward();
580 }
581
582 if (correct) {
583 sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
584 hs.lmode = LITERATE_BLOCK;
585 }
586 }
587 }
588 else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
589 && sc.Match("\\end{code}")) {
590 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
591
592 sc.Forward(static_cast<int>(strlen("\\end{code}")));
593
594 while (!sc.atLineEnd && sc.More()) {
595 sc.Forward();
596 }
597
598 sc.SetState(SCE_HA_LITERATE_COMMENT);
599 hs.lmode = LITERATE_BIRD;
600 }
601 // Preprocessor
602 else if (sc.atLineStart && sc.ch == '#' && options.cpp
603 && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
604 sc.SetState(SCE_HA_PREPROCESSOR);
605 sc.Forward();
606 }
607 // Literate
608 else if (sc.state == SCE_HA_LITERATE_COMMENT) {
609 sc.Forward();
610 }
611 else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
612 sc.ForwardSetState(hs.nonexternalStyle);
613 }
614 // Preprocessor
615 else if (sc.state == SCE_HA_PREPROCESSOR) {
616 if (sc.atLineEnd) {
617 sc.SetState(options.stylingWithinPreprocessor
618 ? SCE_HA_DEFAULT
619 : hs.nonexternalStyle);
620 sc.Forward(); // prevent double counting a line
621 } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
622 sc.SetState(SCE_HA_DEFAULT);
623 } else {
624 sc.Forward();
625 }
626 }
627 // Haskell
628 // Operator
629 else if (sc.state == SCE_HA_OPERATOR) {
630 int style = SCE_HA_OPERATOR;
631
632 if ( sc.ch == ':'
633 && !alreadyInTheMiddleOfOperator
634 // except "::"
635 && !( sc.chNext == ':'
636 && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
637 style = SCE_HA_CAPITAL;
638 }
639
640 alreadyInTheMiddleOfOperator = false;
641
642 while (IsAnHaskellOperatorChar(sc.ch))
643 sc.Forward();
644
645 char s[100];
646 sc.GetCurrent(s, sizeof(s));
647
648 if (reserved_operators.InList(s))
649 style = SCE_HA_RESERVED_OPERATOR;
650
651 sc.ChangeState(style);
652 sc.SetState(SCE_HA_DEFAULT);
653 }
654 // String
655 else if (sc.state == SCE_HA_STRING) {
656 if (sc.atLineEnd) {
657 sc.ChangeState(SCE_HA_STRINGEOL);
658 sc.ForwardSetState(SCE_HA_DEFAULT);
659 } else if (sc.ch == '\"') {
660 sc.Forward();
661 skipMagicHash(sc, oneHash);
662 sc.SetState(SCE_HA_DEFAULT);
663 } else if (sc.ch == '\\') {
664 sc.Forward(2);
665 } else {
666 sc.Forward();
667 }
668 }
669 // Char
670 else if (sc.state == SCE_HA_CHARACTER) {
671 if (sc.atLineEnd) {
672 sc.ChangeState(SCE_HA_STRINGEOL);
673 sc.ForwardSetState(SCE_HA_DEFAULT);
674 } else if (sc.ch == '\'') {
675 sc.Forward();
676 skipMagicHash(sc, oneHash);
677 sc.SetState(SCE_HA_DEFAULT);
678 } else if (sc.ch == '\\') {
679 sc.Forward(2);
680 } else {
681 sc.Forward();
682 }
683 }
684 // Number
685 else if (sc.state == SCE_HA_NUMBER) {
686 if (sc.atLineEnd) {
687 sc.SetState(SCE_HA_DEFAULT);
688 sc.Forward(); // prevent double counting a line
689 } else if (IsADigit(sc.ch, base)) {
690 sc.Forward();
691 } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
692 sc.Forward(2);
693 dot = false;
694 } else if ((base == 10) &&
695 (sc.ch == 'e' || sc.ch == 'E') &&
696 (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
697 sc.Forward();
698 if (sc.ch == '+' || sc.ch == '-')
699 sc.Forward();
700 } else {
701 skipMagicHash(sc, twoHashes);
702 sc.SetState(SCE_HA_DEFAULT);
703 }
704 }
705 // Keyword or Identifier
706 else if (sc.state == SCE_HA_IDENTIFIER) {
707 int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
708
709 assert(IsAHaskellWordStart(sc.ch));
710
711 sc.Forward();
712
713 while (sc.More()) {
714 if (IsAHaskellWordChar(sc.ch)) {
715 sc.Forward();
716 } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
717 if (IsHaskellUpperCase(sc.chNext)) {
718 sc.Forward();
719 style = SCE_HA_CAPITAL;
720 } else if (IsAHaskellWordStart(sc.chNext)) {
721 sc.Forward();
722 style = SCE_HA_IDENTIFIER;
723 } else if (IsAnHaskellOperatorChar(sc.chNext)) {
724 sc.Forward();
725 style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
726 while (IsAnHaskellOperatorChar(sc.ch))
727 sc.Forward();
728 break;
729 } else {
730 break;
731 }
732 } else {
733 break;
734 }
735 }
736
737 skipMagicHash(sc, unlimitedHashes);
738
739 char s[100];
740 sc.GetCurrent(s, sizeof(s));
741
742 KeywordMode new_mode = HA_MODE_DEFAULT;
743
744 if (keywords.InList(s)) {
745 style = SCE_HA_KEYWORD;
746 } else if (style == SCE_HA_CAPITAL) {
747 if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
748 style = SCE_HA_MODULE;
749 new_mode = HA_MODE_IMPORT2;
750 } else if (hs.mode == HA_MODE_MODULE) {
751 style = SCE_HA_MODULE;
752 }
753 } else if (hs.mode == HA_MODE_IMPORT1 &&
754 strcmp(s,"qualified") == 0) {
755 style = SCE_HA_KEYWORD;
756 new_mode = HA_MODE_IMPORT1;
757 } else if (options.highlightSafe &&
758 hs.mode == HA_MODE_IMPORT1 &&
759 strcmp(s,"safe") == 0) {
760 style = SCE_HA_KEYWORD;
761 new_mode = HA_MODE_IMPORT1;
762 } else if (hs.mode == HA_MODE_IMPORT2) {
763 if (strcmp(s,"as") == 0) {
764 style = SCE_HA_KEYWORD;
765 new_mode = HA_MODE_IMPORT3;
766 } else if (strcmp(s,"hiding") == 0) {
767 style = SCE_HA_KEYWORD;
768 }
769 } else if (hs.mode == HA_MODE_TYPE) {
770 if (strcmp(s,"family") == 0)
771 style = SCE_HA_KEYWORD;
772 }
773
774 if (hs.mode == HA_MODE_FFI) {
775 if (ffi.InList(s)) {
776 style = SCE_HA_KEYWORD;
777 new_mode = HA_MODE_FFI;
778 }
779 }
780
781 sc.ChangeState(style);
782 sc.SetState(SCE_HA_DEFAULT);
783
784 if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
785 new_mode = HA_MODE_IMPORT1;
786 else if (strcmp(s,"module") == 0)
787 new_mode = HA_MODE_MODULE;
788 else if (strcmp(s,"foreign") == 0)
789 new_mode = HA_MODE_FFI;
790 else if (strcmp(s,"type") == 0
791 || strcmp(s,"data") == 0)
792 new_mode = HA_MODE_TYPE;
793
794 hs.mode = new_mode;
795 }
796
797 // Comments
798 // Oneliner
799 else if (sc.state == SCE_HA_COMMENTLINE) {
800 if (sc.atLineEnd) {
801 sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
802 sc.Forward(); // prevent double counting a line
803 } else if (inDashes && sc.ch != '-' && !hs.pragma) {
804 inDashes = false;
805 if (IsAnHaskellOperatorChar(sc.ch)) {
806 alreadyInTheMiddleOfOperator = true;
807 sc.ChangeState(SCE_HA_OPERATOR);
808 }
809 } else {
810 sc.Forward();
811 }
812 }
813 // Nested
814 else if (IsCommentBlockStyle(sc.state)) {
815 if (sc.Match('{','-')) {
816 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
817 sc.Forward(2);
818 hs.nestLevel++;
819 } else if (sc.Match('-','}')) {
820 sc.Forward(2);
821 assert(hs.nestLevel > 0);
822 if (hs.nestLevel > 0)
823 hs.nestLevel--;
824 sc.SetState(
825 hs.nestLevel == 0
826 ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
827 : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
828 } else {
829 sc.Forward();
830 }
831 }
832 // Pragma
833 else if (sc.state == SCE_HA_PRAGMA) {
834 if (sc.Match("#-}")) {
835 hs.pragma = false;
836 sc.Forward(3);
837 sc.SetState(SCE_HA_DEFAULT);
838 } else if (sc.Match('-','-')) {
839 sc.SetState(SCE_HA_COMMENTLINE);
840 sc.Forward(2);
841 inDashes = false;
842 } else if (sc.Match('{','-')) {
843 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
844 sc.Forward(2);
845 hs.nestLevel = 1;
846 } else {
847 sc.Forward();
848 }
849 }
850 // New state?
851 else if (sc.state == SCE_HA_DEFAULT) {
852 // Digit
853 if (IsADigit(sc.ch)) {
854 hs.mode = HA_MODE_DEFAULT;
855
856 sc.SetState(SCE_HA_NUMBER);
857 if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
858 // Match anything starting with "0x" or "0X", too
859 sc.Forward(2);
860 base = 16;
861 dot = false;
862 } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
863 // Match anything starting with "0o" or "0O", too
864 sc.Forward(2);
865 base = 8;
866 dot = false;
867 } else {
868 sc.Forward();
869 base = 10;
870 dot = true;
871 }
872 }
873 // Pragma
874 else if (sc.Match("{-#")) {
875 hs.pragma = true;
876 sc.SetState(SCE_HA_PRAGMA);
877 sc.Forward(3);
878 }
879 // Comment line
880 else if (sc.Match('-','-')) {
881 sc.SetState(SCE_HA_COMMENTLINE);
882 sc.Forward(2);
883 inDashes = true;
884 }
885 // Comment block
886 else if (sc.Match('{','-')) {
887 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
888 sc.Forward(2);
889 hs.nestLevel = 1;
890 }
891 // String
892 else if (sc.ch == '\"') {
893 sc.SetState(SCE_HA_STRING);
894 sc.Forward();
895 }
896 // Character or quoted name or promoted term
897 else if (sc.ch == '\'') {
898 hs.mode = HA_MODE_DEFAULT;
899
900 sc.SetState(SCE_HA_CHARACTER);
901 sc.Forward();
902
903 if (options.allowQuotes) {
904 // Quoted type ''T
905 if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
906 sc.Forward();
907 sc.ChangeState(SCE_HA_IDENTIFIER);
908 } else if (sc.chNext != '\'') {
909 // Quoted name 'n or promoted constructor 'N
910 if (IsAHaskellWordStart(sc.ch)) {
911 sc.ChangeState(SCE_HA_IDENTIFIER);
912 // Promoted constructor operator ':~>
913 } else if (sc.ch == ':') {
914 alreadyInTheMiddleOfOperator = false;
915 sc.ChangeState(SCE_HA_OPERATOR);
916 // Promoted list or tuple '[T]
917 } else if (sc.ch == '[' || sc.ch== '(') {
918 sc.ChangeState(SCE_HA_OPERATOR);
919 sc.ForwardSetState(SCE_HA_DEFAULT);
920 }
921 }
922 }
923 }
924 // Operator starting with '?' or an implicit parameter
925 else if (sc.ch == '?') {
926 hs.mode = HA_MODE_DEFAULT;
927
928 alreadyInTheMiddleOfOperator = false;
929 sc.SetState(SCE_HA_OPERATOR);
930
931 if ( options.implicitParams
932 && IsAHaskellWordStart(sc.chNext)
933 && !IsHaskellUpperCase(sc.chNext)) {
934 sc.Forward();
935 sc.ChangeState(SCE_HA_IDENTIFIER);
936 }
937 }
938 // Operator
939 else if (IsAnHaskellOperatorChar(sc.ch)) {
940 hs.mode = HA_MODE_DEFAULT;
941
942 sc.SetState(SCE_HA_OPERATOR);
943 }
944 // Braces and punctuation
945 else if (sc.ch == ',' || sc.ch == ';'
946 || sc.ch == '(' || sc.ch == ')'
947 || sc.ch == '[' || sc.ch == ']'
948 || sc.ch == '{' || sc.ch == '}') {
949 sc.SetState(SCE_HA_OPERATOR);
950 sc.ForwardSetState(SCE_HA_DEFAULT);
951 }
952 // Keyword or Identifier
953 else if (IsAHaskellWordStart(sc.ch)) {
954 sc.SetState(SCE_HA_IDENTIFIER);
955 // Something we don't care about
956 } else {
957 sc.Forward();
958 }
959 }
960 // This branch should never be reached.
961 else {
962 assert(false);
963 sc.Forward();
964 }
965 }
966 sc.Complete();
967 }
968
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)969 void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
970 ,IDocument *pAccess) {
971 if (!options.fold)
972 return;
973
974 Accessor styler(pAccess, NULL);
975
976 Sci_Position lineCurrent = styler.GetLine(startPos);
977
978 if (lineCurrent <= firstImportLine) {
979 firstImportLine = -1; // readjust first import position
980 firstImportIndent = 0;
981 }
982
983 const Sci_Position maxPos = startPos + length;
984 const Sci_Position maxLines =
985 maxPos == styler.Length()
986 ? styler.GetLine(maxPos)
987 : styler.GetLine(maxPos - 1); // Requested last line
988 const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
989
990 // Backtrack to previous non-blank line so we can determine indent level
991 // for any white space lines
992 // and so we can fix any preceding fold level (which is why we go back
993 // at least one line in all cases)
994 bool importHere = LineContainsImport(lineCurrent, styler);
995 int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
996
997 while (lineCurrent > 0) {
998 lineCurrent--;
999 importHere = LineContainsImport(lineCurrent, styler);
1000 indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
1001 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
1002 break;
1003 }
1004
1005 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1006
1007 if (importHere) {
1008 indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1009 if (firstImportLine == -1) {
1010 firstImportLine = lineCurrent;
1011 firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1012 }
1013 if (firstImportLine != lineCurrent) {
1014 indentCurrentLevel++;
1015 }
1016 }
1017
1018 indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1019
1020 // Process all characters to end of requested range
1021 //that hangs over the end of the range. Cap processing in all cases
1022 // to end of document.
1023 while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1024
1025 // Gather info
1026 Sci_Position lineNext = lineCurrent + 1;
1027 importHere = false;
1028 int indentNext = indentCurrent;
1029
1030 if (lineNext <= docLines) {
1031 // Information about next line is only available if not at end of document
1032 importHere = LineContainsImport(lineNext, styler);
1033 indentNext = IndentAmountWithOffset(styler, lineNext);
1034 }
1035 if (indentNext & SC_FOLDLEVELWHITEFLAG)
1036 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1037
1038 // Skip past any blank lines for next indent level info; we skip also
1039 // comments (all comments, not just those starting in column 0)
1040 // which effectively folds them into surrounding code rather
1041 // than screwing up folding.
1042
1043 while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1044 lineNext++;
1045 importHere = LineContainsImport(lineNext, styler);
1046 indentNext = IndentAmountWithOffset(styler, lineNext);
1047 }
1048
1049 int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1050
1051 if (importHere) {
1052 indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1053 if (firstImportLine == -1) {
1054 firstImportLine = lineNext;
1055 firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1056 }
1057 if (firstImportLine != lineNext) {
1058 indentNextLevel++;
1059 }
1060 }
1061
1062 indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1063
1064 const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1065
1066 // Now set all the indent levels on the lines we skipped
1067 // Do this from end to start. Once we encounter one line
1068 // which is indented more than the line after the end of
1069 // the comment-block, use the level of the block before
1070
1071 Sci_Position skipLine = lineNext;
1072 int skipLevel = indentNextLevel;
1073
1074 while (--skipLine > lineCurrent) {
1075 int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1076
1077 if (options.foldCompact) {
1078 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1079 skipLevel = levelBeforeComments;
1080 }
1081
1082 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1083
1084 styler.SetLevel(skipLine, skipLevel | whiteFlag);
1085 } else {
1086 if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1087 && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1088 skipLevel = levelBeforeComments;
1089 }
1090
1091 styler.SetLevel(skipLine, skipLevel);
1092 }
1093 }
1094
1095 int lev = indentCurrent;
1096
1097 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1098 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1099 lev |= SC_FOLDLEVELHEADERFLAG;
1100 }
1101
1102 // Set fold level for this line and move to next line
1103 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1104
1105 indentCurrent = indentNext;
1106 indentCurrentLevel = indentNextLevel;
1107 lineCurrent = lineNext;
1108 }
1109
1110 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1111 // header flag set; the loop above is crafted to take care of this case!
1112 //styler.SetLevel(lineCurrent, indentCurrent);
1113 }
1114
1115 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1116 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1117