1 /******************************************************************
2 * LexHaskell.cxx
3 *
4 * A haskell lexer for the scintilla code control.
5 * Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6 * External lexer stuff inspired from the caml external lexer.
7 * Folder copied from Python's.
8 *
9 * Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10 *
11 * Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12 *
13 * Improved by kudah <kudahkukarek@gmail.com>
14 *
15 * TODO:
16 * * A proper lexical folder to fold group declarations, comments, pragmas,
17 * #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18 * etc.
19 *
20 *****************************************************************/
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include <ctype.h>
27
28 #include <string>
29 #include <map>
30
31 #include "ILexer.h"
32 #include "Scintilla.h"
33 #include "SciLexer.h"
34
35 #include "PropSetSimple.h"
36 #include "WordList.h"
37 #include "LexAccessor.h"
38 #include "Accessor.h"
39 #include "StyleContext.h"
40 #include "CharacterSet.h"
41 #include "CharacterCategory.h"
42 #include "LexerModule.h"
43 #include "OptionSet.h"
44
45 #ifdef SCI_NAMESPACE
46 using namespace Scintilla;
47 #endif
48
49 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
50 // Note, letter modifiers are prohibited.
51
u_iswupper(int ch)52 static int u_iswupper (int ch) {
53 CharacterCategory c = CategoriseCharacter(ch);
54 return c == ccLu || c == ccLt;
55 }
56
u_iswalpha(int ch)57 static int u_iswalpha (int ch) {
58 CharacterCategory c = CategoriseCharacter(ch);
59 return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
60 }
61
u_iswalnum(int ch)62 static int u_iswalnum (int ch) {
63 CharacterCategory c = CategoriseCharacter(ch);
64 return c == ccLl || c == ccLu || c == ccLt || c == ccLo
65 || c == ccNd || c == ccNo;
66 }
67
u_IsHaskellSymbol(int ch)68 static int u_IsHaskellSymbol(int ch) {
69 CharacterCategory c = CategoriseCharacter(ch);
70 return c == ccPc || c == ccPd || c == ccPo
71 || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
72 }
73
IsHaskellLetter(const int ch)74 static inline bool IsHaskellLetter(const int ch) {
75 if (IsASCII(ch)) {
76 return (ch >= 'a' && ch <= 'z')
77 || (ch >= 'A' && ch <= 'Z');
78 } else {
79 return u_iswalpha(ch) != 0;
80 }
81 }
82
IsHaskellAlphaNumeric(const int ch)83 static inline bool IsHaskellAlphaNumeric(const int ch) {
84 if (IsASCII(ch)) {
85 return IsAlphaNumeric(ch);
86 } else {
87 return u_iswalnum(ch) != 0;
88 }
89 }
90
IsHaskellUpperCase(const int ch)91 static inline bool IsHaskellUpperCase(const int ch) {
92 if (IsASCII(ch)) {
93 return ch >= 'A' && ch <= 'Z';
94 } else {
95 return u_iswupper(ch) != 0;
96 }
97 }
98
IsAnHaskellOperatorChar(const int ch)99 static inline bool IsAnHaskellOperatorChar(const int ch) {
100 if (IsASCII(ch)) {
101 return
102 ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
103 || ch == '&' || ch == '*' || ch == '+' || ch == '-'
104 || ch == '.' || ch == '/' || ch == ':' || ch == '<'
105 || ch == '=' || ch == '>' || ch == '?' || ch == '@'
106 || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
107 } else {
108 return u_IsHaskellSymbol(ch) != 0;
109 }
110 }
111
IsAHaskellWordStart(const int ch)112 static inline bool IsAHaskellWordStart(const int ch) {
113 return IsHaskellLetter(ch) || ch == '_';
114 }
115
IsAHaskellWordChar(const int ch)116 static inline bool IsAHaskellWordChar(const int ch) {
117 return ( IsHaskellAlphaNumeric(ch)
118 || ch == '_'
119 || ch == '\'');
120 }
121
IsCommentBlockStyle(int style)122 static inline bool IsCommentBlockStyle(int style) {
123 return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
124 }
125
IsCommentStyle(int style)126 static inline bool IsCommentStyle(int style) {
127 return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
128 || ( style == SCE_HA_LITERATE_COMMENT
129 || style == SCE_HA_LITERATE_CODEDELIM);
130 }
131
132 // styles which do not belong to Haskell, but to external tools
IsExternalStyle(int style)133 static inline bool IsExternalStyle(int style) {
134 return ( style == SCE_HA_PREPROCESSOR
135 || style == SCE_HA_LITERATE_COMMENT
136 || style == SCE_HA_LITERATE_CODEDELIM);
137 }
138
CommentBlockStyleFromNestLevel(const unsigned int nestLevel)139 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
140 return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
141 }
142
143 // Mangled version of lexlib/Accessor.cxx IndentAmount.
144 // Modified to treat comment blocks as whitespace
145 // plus special case for commentline/preprocessor.
HaskellIndentAmount(Accessor & styler,const Sci_Position line)146 static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
147
148 // Determines the indentation level of the current line
149 // Comment blocks are treated as whitespace
150
151 Sci_Position pos = styler.LineStart(line);
152 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
153
154 char ch = styler[pos];
155 int style = styler.StyleAt(pos);
156
157 int indent = 0;
158 bool inPrevPrefix = line > 0;
159
160 Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
161
162 while (( ch == ' ' || ch == '\t'
163 || IsCommentBlockStyle(style)
164 || style == SCE_HA_LITERATE_CODEDELIM)
165 && (pos < eol_pos)) {
166 if (inPrevPrefix) {
167 char chPrev = styler[posPrev++];
168 if (chPrev != ' ' && chPrev != '\t') {
169 inPrevPrefix = false;
170 }
171 }
172 if (ch == '\t') {
173 indent = (indent / 8 + 1) * 8;
174 } else { // Space or comment block
175 indent++;
176 }
177 pos++;
178 ch = styler[pos];
179 style = styler.StyleAt(pos);
180 }
181
182 indent += SC_FOLDLEVELBASE;
183 // if completely empty line or the start of a comment or preprocessor...
184 if ( styler.LineStart(line) == styler.Length()
185 || ch == ' '
186 || ch == '\t'
187 || ch == '\n'
188 || ch == '\r'
189 || IsCommentStyle(style)
190 || style == SCE_HA_PREPROCESSOR)
191 return indent | SC_FOLDLEVELWHITEFLAG;
192 else
193 return indent;
194 }
195
196 struct OptionsHaskell {
197 bool magicHash;
198 bool allowQuotes;
199 bool implicitParams;
200 bool highlightSafe;
201 bool cpp;
202 bool stylingWithinPreprocessor;
203 bool fold;
204 bool foldComment;
205 bool foldCompact;
206 bool foldImports;
OptionsHaskellOptionsHaskell207 OptionsHaskell() {
208 magicHash = true; // Widespread use, enabled by default.
209 allowQuotes = true; // Widespread use, enabled by default.
210 implicitParams = false; // Fell out of favor, seldom used, disabled.
211 highlightSafe = true; // Moderately used, doesn't hurt to enable.
212 cpp = true; // Widespread use, enabled by default;
213 stylingWithinPreprocessor = false;
214 fold = false;
215 foldComment = false;
216 foldCompact = false;
217 foldImports = false;
218 }
219 };
220
221 static const char * const haskellWordListDesc[] = {
222 "Keywords",
223 "FFI",
224 "Reserved operators",
225 0
226 };
227
228 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskellOptionSetHaskell229 OptionSetHaskell() {
230 DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
231 "Set to 0 to disallow the '#' character at the end of identifiers and "
232 "literals with the haskell lexer "
233 "(GHC -XMagicHash extension)");
234
235 DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
236 "Set to 0 to disable highlighting of Template Haskell name quotations "
237 "and promoted constructors "
238 "(GHC -XTemplateHaskell and -XDataKinds extensions)");
239
240 DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
241 "Set to 1 to allow the '?' character at the start of identifiers "
242 "with the haskell lexer "
243 "(GHC & Hugs -XImplicitParams extension)");
244
245 DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
246 "Set to 0 to disallow \"safe\" keyword in imports "
247 "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
248
249 DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
250 "Set to 0 to disable C-preprocessor highlighting "
251 "(-XCPP extension)");
252
253 DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
254 "For Haskell code, determines whether all preprocessor code is styled in the "
255 "preprocessor style (0, the default) or only from the initial # to the end "
256 "of the command word(1)."
257 );
258
259 DefineProperty("fold", &OptionsHaskell::fold);
260
261 DefineProperty("fold.comment", &OptionsHaskell::foldComment);
262
263 DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
264
265 DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
266 "Set to 1 to enable folding of import declarations");
267
268 DefineWordListSets(haskellWordListDesc);
269 }
270 };
271
272 class LexerHaskell : public ILexer {
273 bool literate;
274 Sci_Position firstImportLine;
275 int firstImportIndent;
276 WordList keywords;
277 WordList ffi;
278 WordList reserved_operators;
279 OptionsHaskell options;
280 OptionSetHaskell osHaskell;
281
282 enum HashCount {
283 oneHash
284 ,twoHashes
285 ,unlimitedHashes
286 };
287
288 enum KeywordMode {
289 HA_MODE_DEFAULT = 0
290 ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
291 ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
292 ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
293 ,HA_MODE_MODULE = 4 // after "module", before module name.
294 ,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
295 ,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
296 };
297
298 enum LiterateMode {
299 LITERATE_BIRD = 0 // if '>' is the first character on the line,
300 // color '>' as a codedelim and the rest of
301 // the line as code.
302 // else if "\begin{code}" is the only word on the
303 // line except whitespace, switch to LITERATE_BLOCK
304 // otherwise color the line as a literate comment.
305 ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
306 // 0 ignoring all later characters, color the line
307 // as a codedelim and switch to LITERATE_BIRD
308 // otherwise color the line as code.
309 };
310
311 struct HaskellLineInfo {
312 unsigned int nestLevel; // 22 bits ought to be enough for anybody
313 unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
314 // beyond 31.
315 bool pragma;
316 LiterateMode lmode;
317 KeywordMode mode;
318
HaskellLineInfoLexerHaskell::HaskellLineInfo319 HaskellLineInfo(int state) :
320 nestLevel (state >> 10)
321 , nonexternalStyle ((state >> 5) & 0x1F)
322 , pragma ((state >> 4) & 0x1)
323 , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
324 , mode (static_cast<KeywordMode>(state & 0x7))
325 {}
326
ToLineStateLexerHaskell::HaskellLineInfo327 int ToLineState() {
328 return
329 (nestLevel << 10)
330 | (nonexternalStyle << 5)
331 | (pragma << 4)
332 | (lmode << 3)
333 | mode;
334 }
335 };
336
skipMagicHash(StyleContext & sc,const HashCount hashes) const337 inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
338 if (options.magicHash && sc.ch == '#') {
339 sc.Forward();
340 if (hashes == twoHashes && sc.ch == '#') {
341 sc.Forward();
342 } else if (hashes == unlimitedHashes) {
343 while (sc.ch == '#') {
344 sc.Forward();
345 }
346 }
347 }
348 }
349
LineContainsImport(const Sci_Position line,Accessor & styler) const350 bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
351 if (options.foldImports) {
352 Sci_Position currentPos = styler.LineStart(line);
353 int style = styler.StyleAt(currentPos);
354
355 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
356
357 while (currentPos < eol_pos) {
358 int ch = styler[currentPos];
359 style = styler.StyleAt(currentPos);
360
361 if (ch == ' ' || ch == '\t'
362 || IsCommentBlockStyle(style)
363 || style == SCE_HA_LITERATE_CODEDELIM) {
364 currentPos++;
365 } else {
366 break;
367 }
368 }
369
370 return (style == SCE_HA_KEYWORD
371 && styler.Match(currentPos, "import"));
372 } else {
373 return false;
374 }
375 }
376
IndentAmountWithOffset(Accessor & styler,const Sci_Position line) const377 inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
378 const int indent = HaskellIndentAmount(styler, line);
379 const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
380 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
381 ? indent
382 : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
383 }
384
IndentLevelRemoveIndentOffset(const int indentLevel) const385 inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
386 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
387 ? indentLevel
388 : indentLevel - firstImportIndent;
389 }
390
391 public:
LexerHaskell(bool literate_)392 LexerHaskell(bool literate_)
393 : literate(literate_)
394 , firstImportLine(-1)
395 , firstImportIndent(0)
396 {}
~LexerHaskell()397 virtual ~LexerHaskell() {}
398
Release()399 void SCI_METHOD Release() {
400 delete this;
401 }
402
Version() const403 int SCI_METHOD Version() const {
404 return lvOriginal;
405 }
406
PropertyNames()407 const char * SCI_METHOD PropertyNames() {
408 return osHaskell.PropertyNames();
409 }
410
PropertyType(const char * name)411 int SCI_METHOD PropertyType(const char *name) {
412 return osHaskell.PropertyType(name);
413 }
414
DescribeProperty(const char * name)415 const char * SCI_METHOD DescribeProperty(const char *name) {
416 return osHaskell.DescribeProperty(name);
417 }
418
419 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
420
DescribeWordListSets()421 const char * SCI_METHOD DescribeWordListSets() {
422 return osHaskell.DescribeWordListSets();
423 }
424
425 Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
426
427 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
428
429 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
430
PrivateCall(int,void *)431 void * SCI_METHOD PrivateCall(int, void *) {
432 return 0;
433 }
434
LexerFactoryHaskell()435 static ILexer *LexerFactoryHaskell() {
436 return new LexerHaskell(false);
437 }
438
LexerFactoryLiterateHaskell()439 static ILexer *LexerFactoryLiterateHaskell() {
440 return new LexerHaskell(true);
441 }
442 };
443
PropertySet(const char * key,const char * val)444 Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
445 if (osHaskell.PropertySet(&options, key, val)) {
446 return 0;
447 }
448 return -1;
449 }
450
WordListSet(int n,const char * wl)451 Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
452 WordList *wordListN = 0;
453 switch (n) {
454 case 0:
455 wordListN = &keywords;
456 break;
457 case 1:
458 wordListN = &ffi;
459 break;
460 case 2:
461 wordListN = &reserved_operators;
462 break;
463 }
464 Sci_Position firstModification = -1;
465 if (wordListN) {
466 WordList wlNew;
467 wlNew.Set(wl);
468 if (*wordListN != wlNew) {
469 wordListN->Set(wl);
470 firstModification = 0;
471 }
472 }
473 return firstModification;
474 }
475
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)476 void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
477 ,IDocument *pAccess) {
478 LexAccessor styler(pAccess);
479
480 Sci_Position lineCurrent = styler.GetLine(startPos);
481
482 HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
483
484 // Do not leak onto next line
485 if (initStyle == SCE_HA_STRINGEOL)
486 initStyle = SCE_HA_DEFAULT;
487 else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
488 initStyle = hs.nonexternalStyle;
489
490 StyleContext sc(startPos, length, initStyle, styler);
491
492 int base = 10;
493 bool dot = false;
494
495 bool inDashes = false;
496 bool alreadyInTheMiddleOfOperator = false;
497
498 assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
499
500 while (sc.More()) {
501 // Check for state end
502
503 if (!IsExternalStyle(sc.state)) {
504 hs.nonexternalStyle = sc.state;
505 }
506
507 // For lexer to work, states should unconditionally forward at least one
508 // character.
509 // If they don't, they should still check if they are at line end and
510 // forward if so.
511 // If a state forwards more than one character, it should check every time
512 // that it is not a line end and cease forwarding otherwise.
513 if (sc.atLineEnd) {
514 // Remember the line state for future incremental lexing
515 styler.SetLineState(lineCurrent, hs.ToLineState());
516 lineCurrent++;
517 }
518
519 // Handle line continuation generically.
520 if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
521 && ( sc.state == SCE_HA_STRING
522 || sc.state == SCE_HA_PREPROCESSOR)) {
523 // Remember the line state for future incremental lexing
524 styler.SetLineState(lineCurrent, hs.ToLineState());
525 lineCurrent++;
526
527 sc.Forward();
528 if (sc.ch == '\r' && sc.chNext == '\n') {
529 sc.Forward();
530 }
531 sc.Forward();
532
533 continue;
534 }
535
536 if (sc.atLineStart) {
537
538 if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
539 // Prevent SCE_HA_STRINGEOL from leaking back to previous line
540 sc.SetState(sc.state);
541 }
542
543 if (literate && hs.lmode == LITERATE_BIRD) {
544 if (!IsExternalStyle(sc.state)) {
545 sc.SetState(SCE_HA_LITERATE_COMMENT);
546 }
547 }
548 }
549
550 // External
551 // Literate
552 if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
553 && sc.ch == '>') {
554 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
555 sc.ForwardSetState(hs.nonexternalStyle);
556 }
557 else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
558 && ( sc.ch == ' ' || sc.ch == '\t'
559 || sc.Match("\\begin{code}"))) {
560 sc.SetState(sc.state);
561
562 while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
563 sc.Forward();
564
565 if (sc.Match("\\begin{code}")) {
566 sc.Forward(static_cast<int>(strlen("\\begin{code}")));
567
568 bool correct = true;
569
570 while (!sc.atLineEnd && sc.More()) {
571 if (sc.ch != ' ' && sc.ch != '\t') {
572 correct = false;
573 }
574 sc.Forward();
575 }
576
577 if (correct) {
578 sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
579 hs.lmode = LITERATE_BLOCK;
580 }
581 }
582 }
583 else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
584 && sc.Match("\\end{code}")) {
585 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
586
587 sc.Forward(static_cast<int>(strlen("\\end{code}")));
588
589 while (!sc.atLineEnd && sc.More()) {
590 sc.Forward();
591 }
592
593 sc.SetState(SCE_HA_LITERATE_COMMENT);
594 hs.lmode = LITERATE_BIRD;
595 }
596 // Preprocessor
597 else if (sc.atLineStart && sc.ch == '#' && options.cpp
598 && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
599 sc.SetState(SCE_HA_PREPROCESSOR);
600 sc.Forward();
601 }
602 // Literate
603 else if (sc.state == SCE_HA_LITERATE_COMMENT) {
604 sc.Forward();
605 }
606 else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
607 sc.ForwardSetState(hs.nonexternalStyle);
608 }
609 // Preprocessor
610 else if (sc.state == SCE_HA_PREPROCESSOR) {
611 if (sc.atLineEnd) {
612 sc.SetState(options.stylingWithinPreprocessor
613 ? SCE_HA_DEFAULT
614 : hs.nonexternalStyle);
615 sc.Forward(); // prevent double counting a line
616 } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
617 sc.SetState(SCE_HA_DEFAULT);
618 } else {
619 sc.Forward();
620 }
621 }
622 // Haskell
623 // Operator
624 else if (sc.state == SCE_HA_OPERATOR) {
625 int style = SCE_HA_OPERATOR;
626
627 if ( sc.ch == ':'
628 && !alreadyInTheMiddleOfOperator
629 // except "::"
630 && !( sc.chNext == ':'
631 && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
632 style = SCE_HA_CAPITAL;
633 }
634
635 alreadyInTheMiddleOfOperator = false;
636
637 while (IsAnHaskellOperatorChar(sc.ch))
638 sc.Forward();
639
640 char s[100];
641 sc.GetCurrent(s, sizeof(s));
642
643 if (reserved_operators.InList(s))
644 style = SCE_HA_RESERVED_OPERATOR;
645
646 sc.ChangeState(style);
647 sc.SetState(SCE_HA_DEFAULT);
648 }
649 // String
650 else if (sc.state == SCE_HA_STRING) {
651 if (sc.atLineEnd) {
652 sc.ChangeState(SCE_HA_STRINGEOL);
653 sc.ForwardSetState(SCE_HA_DEFAULT);
654 } else if (sc.ch == '\"') {
655 sc.Forward();
656 skipMagicHash(sc, oneHash);
657 sc.SetState(SCE_HA_DEFAULT);
658 } else if (sc.ch == '\\') {
659 sc.Forward(2);
660 } else {
661 sc.Forward();
662 }
663 }
664 // Char
665 else if (sc.state == SCE_HA_CHARACTER) {
666 if (sc.atLineEnd) {
667 sc.ChangeState(SCE_HA_STRINGEOL);
668 sc.ForwardSetState(SCE_HA_DEFAULT);
669 } else if (sc.ch == '\'') {
670 sc.Forward();
671 skipMagicHash(sc, oneHash);
672 sc.SetState(SCE_HA_DEFAULT);
673 } else if (sc.ch == '\\') {
674 sc.Forward(2);
675 } else {
676 sc.Forward();
677 }
678 }
679 // Number
680 else if (sc.state == SCE_HA_NUMBER) {
681 if (sc.atLineEnd) {
682 sc.SetState(SCE_HA_DEFAULT);
683 sc.Forward(); // prevent double counting a line
684 } else if (IsADigit(sc.ch, base)) {
685 sc.Forward();
686 } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
687 sc.Forward(2);
688 dot = false;
689 } else if ((base == 10) &&
690 (sc.ch == 'e' || sc.ch == 'E') &&
691 (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
692 sc.Forward();
693 if (sc.ch == '+' || sc.ch == '-')
694 sc.Forward();
695 } else {
696 skipMagicHash(sc, twoHashes);
697 sc.SetState(SCE_HA_DEFAULT);
698 }
699 }
700 // Keyword or Identifier
701 else if (sc.state == SCE_HA_IDENTIFIER) {
702 int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
703
704 assert(IsAHaskellWordStart(sc.ch));
705
706 sc.Forward();
707
708 while (sc.More()) {
709 if (IsAHaskellWordChar(sc.ch)) {
710 sc.Forward();
711 } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
712 if (IsHaskellUpperCase(sc.chNext)) {
713 sc.Forward();
714 style = SCE_HA_CAPITAL;
715 } else if (IsAHaskellWordStart(sc.chNext)) {
716 sc.Forward();
717 style = SCE_HA_IDENTIFIER;
718 } else if (IsAnHaskellOperatorChar(sc.chNext)) {
719 sc.Forward();
720 style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
721 while (IsAnHaskellOperatorChar(sc.ch))
722 sc.Forward();
723 break;
724 } else {
725 break;
726 }
727 } else {
728 break;
729 }
730 }
731
732 skipMagicHash(sc, unlimitedHashes);
733
734 char s[100];
735 sc.GetCurrent(s, sizeof(s));
736
737 KeywordMode new_mode = HA_MODE_DEFAULT;
738
739 if (keywords.InList(s)) {
740 style = SCE_HA_KEYWORD;
741 } else if (style == SCE_HA_CAPITAL) {
742 if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
743 style = SCE_HA_MODULE;
744 new_mode = HA_MODE_IMPORT2;
745 } else if (hs.mode == HA_MODE_MODULE) {
746 style = SCE_HA_MODULE;
747 }
748 } else if (hs.mode == HA_MODE_IMPORT1 &&
749 strcmp(s,"qualified") == 0) {
750 style = SCE_HA_KEYWORD;
751 new_mode = HA_MODE_IMPORT1;
752 } else if (options.highlightSafe &&
753 hs.mode == HA_MODE_IMPORT1 &&
754 strcmp(s,"safe") == 0) {
755 style = SCE_HA_KEYWORD;
756 new_mode = HA_MODE_IMPORT1;
757 } else if (hs.mode == HA_MODE_IMPORT2) {
758 if (strcmp(s,"as") == 0) {
759 style = SCE_HA_KEYWORD;
760 new_mode = HA_MODE_IMPORT3;
761 } else if (strcmp(s,"hiding") == 0) {
762 style = SCE_HA_KEYWORD;
763 }
764 } else if (hs.mode == HA_MODE_TYPE) {
765 if (strcmp(s,"family") == 0)
766 style = SCE_HA_KEYWORD;
767 }
768
769 if (hs.mode == HA_MODE_FFI) {
770 if (ffi.InList(s)) {
771 style = SCE_HA_KEYWORD;
772 new_mode = HA_MODE_FFI;
773 }
774 }
775
776 sc.ChangeState(style);
777 sc.SetState(SCE_HA_DEFAULT);
778
779 if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
780 new_mode = HA_MODE_IMPORT1;
781 else if (strcmp(s,"module") == 0)
782 new_mode = HA_MODE_MODULE;
783 else if (strcmp(s,"foreign") == 0)
784 new_mode = HA_MODE_FFI;
785 else if (strcmp(s,"type") == 0
786 || strcmp(s,"data") == 0)
787 new_mode = HA_MODE_TYPE;
788
789 hs.mode = new_mode;
790 }
791
792 // Comments
793 // Oneliner
794 else if (sc.state == SCE_HA_COMMENTLINE) {
795 if (sc.atLineEnd) {
796 sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
797 sc.Forward(); // prevent double counting a line
798 } else if (inDashes && sc.ch != '-' && !hs.pragma) {
799 inDashes = false;
800 if (IsAnHaskellOperatorChar(sc.ch)) {
801 alreadyInTheMiddleOfOperator = true;
802 sc.ChangeState(SCE_HA_OPERATOR);
803 }
804 } else {
805 sc.Forward();
806 }
807 }
808 // Nested
809 else if (IsCommentBlockStyle(sc.state)) {
810 if (sc.Match('{','-')) {
811 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
812 sc.Forward(2);
813 hs.nestLevel++;
814 } else if (sc.Match('-','}')) {
815 sc.Forward(2);
816 assert(hs.nestLevel > 0);
817 if (hs.nestLevel > 0)
818 hs.nestLevel--;
819 sc.SetState(
820 hs.nestLevel == 0
821 ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
822 : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
823 } else {
824 sc.Forward();
825 }
826 }
827 // Pragma
828 else if (sc.state == SCE_HA_PRAGMA) {
829 if (sc.Match("#-}")) {
830 hs.pragma = false;
831 sc.Forward(3);
832 sc.SetState(SCE_HA_DEFAULT);
833 } else if (sc.Match('-','-')) {
834 sc.SetState(SCE_HA_COMMENTLINE);
835 sc.Forward(2);
836 inDashes = false;
837 } else if (sc.Match('{','-')) {
838 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
839 sc.Forward(2);
840 hs.nestLevel = 1;
841 } else {
842 sc.Forward();
843 }
844 }
845 // New state?
846 else if (sc.state == SCE_HA_DEFAULT) {
847 // Digit
848 if (IsADigit(sc.ch)) {
849 hs.mode = HA_MODE_DEFAULT;
850
851 sc.SetState(SCE_HA_NUMBER);
852 if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
853 // Match anything starting with "0x" or "0X", too
854 sc.Forward(2);
855 base = 16;
856 dot = false;
857 } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
858 // Match anything starting with "0o" or "0O", too
859 sc.Forward(2);
860 base = 8;
861 dot = false;
862 } else {
863 sc.Forward();
864 base = 10;
865 dot = true;
866 }
867 }
868 // Pragma
869 else if (sc.Match("{-#")) {
870 hs.pragma = true;
871 sc.SetState(SCE_HA_PRAGMA);
872 sc.Forward(3);
873 }
874 // Comment line
875 else if (sc.Match('-','-')) {
876 sc.SetState(SCE_HA_COMMENTLINE);
877 sc.Forward(2);
878 inDashes = true;
879 }
880 // Comment block
881 else if (sc.Match('{','-')) {
882 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
883 sc.Forward(2);
884 hs.nestLevel = 1;
885 }
886 // String
887 else if (sc.ch == '\"') {
888 sc.SetState(SCE_HA_STRING);
889 sc.Forward();
890 }
891 // Character or quoted name or promoted term
892 else if (sc.ch == '\'') {
893 hs.mode = HA_MODE_DEFAULT;
894
895 sc.SetState(SCE_HA_CHARACTER);
896 sc.Forward();
897
898 if (options.allowQuotes) {
899 // Quoted type ''T
900 if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
901 sc.Forward();
902 sc.ChangeState(SCE_HA_IDENTIFIER);
903 } else if (sc.chNext != '\'') {
904 // Quoted name 'n or promoted constructor 'N
905 if (IsAHaskellWordStart(sc.ch)) {
906 sc.ChangeState(SCE_HA_IDENTIFIER);
907 // Promoted constructor operator ':~>
908 } else if (sc.ch == ':') {
909 alreadyInTheMiddleOfOperator = false;
910 sc.ChangeState(SCE_HA_OPERATOR);
911 // Promoted list or tuple '[T]
912 } else if (sc.ch == '[' || sc.ch== '(') {
913 sc.ChangeState(SCE_HA_OPERATOR);
914 sc.ForwardSetState(SCE_HA_DEFAULT);
915 }
916 }
917 }
918 }
919 // Operator starting with '?' or an implicit parameter
920 else if (sc.ch == '?') {
921 hs.mode = HA_MODE_DEFAULT;
922
923 alreadyInTheMiddleOfOperator = false;
924 sc.SetState(SCE_HA_OPERATOR);
925
926 if ( options.implicitParams
927 && IsAHaskellWordStart(sc.chNext)
928 && !IsHaskellUpperCase(sc.chNext)) {
929 sc.Forward();
930 sc.ChangeState(SCE_HA_IDENTIFIER);
931 }
932 }
933 // Operator
934 else if (IsAnHaskellOperatorChar(sc.ch)) {
935 hs.mode = HA_MODE_DEFAULT;
936
937 sc.SetState(SCE_HA_OPERATOR);
938 }
939 // Braces and punctuation
940 else if (sc.ch == ',' || sc.ch == ';'
941 || sc.ch == '(' || sc.ch == ')'
942 || sc.ch == '[' || sc.ch == ']'
943 || sc.ch == '{' || sc.ch == '}') {
944 sc.SetState(SCE_HA_OPERATOR);
945 sc.ForwardSetState(SCE_HA_DEFAULT);
946 }
947 // Keyword or Identifier
948 else if (IsAHaskellWordStart(sc.ch)) {
949 sc.SetState(SCE_HA_IDENTIFIER);
950 // Something we don't care about
951 } else {
952 sc.Forward();
953 }
954 }
955 // This branch should never be reached.
956 else {
957 assert(false);
958 sc.Forward();
959 }
960 }
961 sc.Complete();
962 }
963
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)964 void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
965 ,IDocument *pAccess) {
966 if (!options.fold)
967 return;
968
969 Accessor styler(pAccess, NULL);
970
971 Sci_Position lineCurrent = styler.GetLine(startPos);
972
973 if (lineCurrent <= firstImportLine) {
974 firstImportLine = -1; // readjust first import position
975 firstImportIndent = 0;
976 }
977
978 const Sci_Position maxPos = startPos + length;
979 const Sci_Position maxLines =
980 maxPos == styler.Length()
981 ? styler.GetLine(maxPos)
982 : styler.GetLine(maxPos - 1); // Requested last line
983 const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
984
985 // Backtrack to previous non-blank line so we can determine indent level
986 // for any white space lines
987 // and so we can fix any preceding fold level (which is why we go back
988 // at least one line in all cases)
989 bool importHere = LineContainsImport(lineCurrent, styler);
990 int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
991
992 while (lineCurrent > 0) {
993 lineCurrent--;
994 importHere = LineContainsImport(lineCurrent, styler);
995 indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
996 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
997 break;
998 }
999
1000 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1001
1002 if (importHere) {
1003 indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1004 if (firstImportLine == -1) {
1005 firstImportLine = lineCurrent;
1006 firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1007 }
1008 if (firstImportLine != lineCurrent) {
1009 indentCurrentLevel++;
1010 }
1011 }
1012
1013 indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1014
1015 // Process all characters to end of requested range
1016 //that hangs over the end of the range. Cap processing in all cases
1017 // to end of document.
1018 while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1019
1020 // Gather info
1021 Sci_Position lineNext = lineCurrent + 1;
1022 importHere = false;
1023 int indentNext = indentCurrent;
1024
1025 if (lineNext <= docLines) {
1026 // Information about next line is only available if not at end of document
1027 importHere = LineContainsImport(lineNext, styler);
1028 indentNext = IndentAmountWithOffset(styler, lineNext);
1029 }
1030 if (indentNext & SC_FOLDLEVELWHITEFLAG)
1031 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1032
1033 // Skip past any blank lines for next indent level info; we skip also
1034 // comments (all comments, not just those starting in column 0)
1035 // which effectively folds them into surrounding code rather
1036 // than screwing up folding.
1037
1038 while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1039 lineNext++;
1040 importHere = LineContainsImport(lineNext, styler);
1041 indentNext = IndentAmountWithOffset(styler, lineNext);
1042 }
1043
1044 int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1045
1046 if (importHere) {
1047 indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1048 if (firstImportLine == -1) {
1049 firstImportLine = lineNext;
1050 firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1051 }
1052 if (firstImportLine != lineNext) {
1053 indentNextLevel++;
1054 }
1055 }
1056
1057 indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1058
1059 const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1060
1061 // Now set all the indent levels on the lines we skipped
1062 // Do this from end to start. Once we encounter one line
1063 // which is indented more than the line after the end of
1064 // the comment-block, use the level of the block before
1065
1066 Sci_Position skipLine = lineNext;
1067 int skipLevel = indentNextLevel;
1068
1069 while (--skipLine > lineCurrent) {
1070 int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1071
1072 if (options.foldCompact) {
1073 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1074 skipLevel = levelBeforeComments;
1075 }
1076
1077 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1078
1079 styler.SetLevel(skipLine, skipLevel | whiteFlag);
1080 } else {
1081 if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1082 && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1083 skipLevel = levelBeforeComments;
1084 }
1085
1086 styler.SetLevel(skipLine, skipLevel);
1087 }
1088 }
1089
1090 int lev = indentCurrent;
1091
1092 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1093 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1094 lev |= SC_FOLDLEVELHEADERFLAG;
1095 }
1096
1097 // Set fold level for this line and move to next line
1098 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1099
1100 indentCurrent = indentNext;
1101 indentCurrentLevel = indentNextLevel;
1102 lineCurrent = lineNext;
1103 }
1104
1105 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1106 // header flag set; the loop above is crafted to take care of this case!
1107 //styler.SetLevel(lineCurrent, indentCurrent);
1108 }
1109
1110 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1111 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1112