1 /******************************************************************
2 * LexHaskell.cxx
3 *
4 * A haskell lexer for the scintilla code control.
5 * Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6 * External lexer stuff inspired from the caml external lexer.
7 * Folder copied from Python's.
8 *
9 * Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10 *
11 * Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12 *
13 * Improved by kudah <kudahkukarek@gmail.com>
14 *
15 * TODO:
16 * * A proper lexical folder to fold group declarations, comments, pragmas,
17 * #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18 * etc.
19 *
20 *****************************************************************/
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include <ctype.h>
27
28 #include <string>
29 #include <map>
30
31 #include "ILexer.h"
32 #include "Scintilla.h"
33 #include "SciLexer.h"
34
35 #include "PropSetSimple.h"
36 #include "WordList.h"
37 #include "LexAccessor.h"
38 #include "Accessor.h"
39 #include "StyleContext.h"
40 #include "CharacterSet.h"
41 #include "CharacterCategory.h"
42 #include "LexerModule.h"
43 #include "OptionSet.h"
44 #include "DefaultLexer.h"
45
46 using namespace Scintilla;
47
48 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
49 // Note, letter modifiers are prohibited.
50
u_iswupper(int ch)51 static int u_iswupper (int ch) {
52 CharacterCategory c = CategoriseCharacter(ch);
53 return c == ccLu || c == ccLt;
54 }
55
u_iswalpha(int ch)56 static int u_iswalpha (int ch) {
57 CharacterCategory c = CategoriseCharacter(ch);
58 return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
59 }
60
u_iswalnum(int ch)61 static int u_iswalnum (int ch) {
62 CharacterCategory c = CategoriseCharacter(ch);
63 return c == ccLl || c == ccLu || c == ccLt || c == ccLo
64 || c == ccNd || c == ccNo;
65 }
66
u_IsHaskellSymbol(int ch)67 static int u_IsHaskellSymbol(int ch) {
68 CharacterCategory c = CategoriseCharacter(ch);
69 return c == ccPc || c == ccPd || c == ccPo
70 || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
71 }
72
IsHaskellLetter(const int ch)73 static inline bool IsHaskellLetter(const int ch) {
74 if (IsASCII(ch)) {
75 return (ch >= 'a' && ch <= 'z')
76 || (ch >= 'A' && ch <= 'Z');
77 } else {
78 return u_iswalpha(ch) != 0;
79 }
80 }
81
IsHaskellAlphaNumeric(const int ch)82 static inline bool IsHaskellAlphaNumeric(const int ch) {
83 if (IsASCII(ch)) {
84 return IsAlphaNumeric(ch);
85 } else {
86 return u_iswalnum(ch) != 0;
87 }
88 }
89
IsHaskellUpperCase(const int ch)90 static inline bool IsHaskellUpperCase(const int ch) {
91 if (IsASCII(ch)) {
92 return ch >= 'A' && ch <= 'Z';
93 } else {
94 return u_iswupper(ch) != 0;
95 }
96 }
97
IsAnHaskellOperatorChar(const int ch)98 static inline bool IsAnHaskellOperatorChar(const int ch) {
99 if (IsASCII(ch)) {
100 return
101 ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
102 || ch == '&' || ch == '*' || ch == '+' || ch == '-'
103 || ch == '.' || ch == '/' || ch == ':' || ch == '<'
104 || ch == '=' || ch == '>' || ch == '?' || ch == '@'
105 || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
106 } else {
107 return u_IsHaskellSymbol(ch) != 0;
108 }
109 }
110
IsAHaskellWordStart(const int ch)111 static inline bool IsAHaskellWordStart(const int ch) {
112 return IsHaskellLetter(ch) || ch == '_';
113 }
114
IsAHaskellWordChar(const int ch)115 static inline bool IsAHaskellWordChar(const int ch) {
116 return ( IsHaskellAlphaNumeric(ch)
117 || ch == '_'
118 || ch == '\'');
119 }
120
IsCommentBlockStyle(int style)121 static inline bool IsCommentBlockStyle(int style) {
122 return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
123 }
124
IsCommentStyle(int style)125 static inline bool IsCommentStyle(int style) {
126 return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
127 || ( style == SCE_HA_LITERATE_COMMENT
128 || style == SCE_HA_LITERATE_CODEDELIM);
129 }
130
131 // styles which do not belong to Haskell, but to external tools
IsExternalStyle(int style)132 static inline bool IsExternalStyle(int style) {
133 return ( style == SCE_HA_PREPROCESSOR
134 || style == SCE_HA_LITERATE_COMMENT
135 || style == SCE_HA_LITERATE_CODEDELIM);
136 }
137
CommentBlockStyleFromNestLevel(const unsigned int nestLevel)138 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
139 return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
140 }
141
142 // Mangled version of lexlib/Accessor.cxx IndentAmount.
143 // Modified to treat comment blocks as whitespace
144 // plus special case for commentline/preprocessor.
HaskellIndentAmount(Accessor & styler,const Sci_Position line)145 static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
146
147 // Determines the indentation level of the current line
148 // Comment blocks are treated as whitespace
149
150 Sci_Position pos = styler.LineStart(line);
151 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
152
153 char ch = styler[pos];
154 int style = styler.StyleAt(pos);
155
156 int indent = 0;
157 bool inPrevPrefix = line > 0;
158
159 Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
160
161 while (( ch == ' ' || ch == '\t'
162 || IsCommentBlockStyle(style)
163 || style == SCE_HA_LITERATE_CODEDELIM)
164 && (pos < eol_pos)) {
165 if (inPrevPrefix) {
166 char chPrev = styler[posPrev++];
167 if (chPrev != ' ' && chPrev != '\t') {
168 inPrevPrefix = false;
169 }
170 }
171 if (ch == '\t') {
172 indent = (indent / 8 + 1) * 8;
173 } else { // Space or comment block
174 indent++;
175 }
176 pos++;
177 ch = styler[pos];
178 style = styler.StyleAt(pos);
179 }
180
181 indent += SC_FOLDLEVELBASE;
182 // if completely empty line or the start of a comment or preprocessor...
183 if ( styler.LineStart(line) == styler.Length()
184 || ch == ' '
185 || ch == '\t'
186 || ch == '\n'
187 || ch == '\r'
188 || IsCommentStyle(style)
189 || style == SCE_HA_PREPROCESSOR)
190 return indent | SC_FOLDLEVELWHITEFLAG;
191 else
192 return indent;
193 }
194
195 struct OptionsHaskell {
196 bool magicHash;
197 bool allowQuotes;
198 bool implicitParams;
199 bool highlightSafe;
200 bool cpp;
201 bool stylingWithinPreprocessor;
202 bool fold;
203 bool foldComment;
204 bool foldCompact;
205 bool foldImports;
OptionsHaskellOptionsHaskell206 OptionsHaskell() {
207 magicHash = true; // Widespread use, enabled by default.
208 allowQuotes = true; // Widespread use, enabled by default.
209 implicitParams = false; // Fell out of favor, seldom used, disabled.
210 highlightSafe = true; // Moderately used, doesn't hurt to enable.
211 cpp = true; // Widespread use, enabled by default;
212 stylingWithinPreprocessor = false;
213 fold = false;
214 foldComment = false;
215 foldCompact = false;
216 foldImports = false;
217 }
218 };
219
220 static const char * const haskellWordListDesc[] = {
221 "Keywords",
222 "FFI",
223 "Reserved operators",
224 0
225 };
226
227 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskellOptionSetHaskell228 OptionSetHaskell() {
229 DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
230 "Set to 0 to disallow the '#' character at the end of identifiers and "
231 "literals with the haskell lexer "
232 "(GHC -XMagicHash extension)");
233
234 DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
235 "Set to 0 to disable highlighting of Template Haskell name quotations "
236 "and promoted constructors "
237 "(GHC -XTemplateHaskell and -XDataKinds extensions)");
238
239 DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
240 "Set to 1 to allow the '?' character at the start of identifiers "
241 "with the haskell lexer "
242 "(GHC & Hugs -XImplicitParams extension)");
243
244 DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
245 "Set to 0 to disallow \"safe\" keyword in imports "
246 "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
247
248 DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
249 "Set to 0 to disable C-preprocessor highlighting "
250 "(-XCPP extension)");
251
252 DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
253 "For Haskell code, determines whether all preprocessor code is styled in the "
254 "preprocessor style (0, the default) or only from the initial # to the end "
255 "of the command word(1)."
256 );
257
258 DefineProperty("fold", &OptionsHaskell::fold);
259
260 DefineProperty("fold.comment", &OptionsHaskell::foldComment);
261
262 DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
263
264 DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
265 "Set to 1 to enable folding of import declarations");
266
267 DefineWordListSets(haskellWordListDesc);
268 }
269 };
270
271 class LexerHaskell : public DefaultLexer {
272 bool literate;
273 Sci_Position firstImportLine;
274 int firstImportIndent;
275 WordList keywords;
276 WordList ffi;
277 WordList reserved_operators;
278 OptionsHaskell options;
279 OptionSetHaskell osHaskell;
280
281 enum HashCount {
282 oneHash
283 ,twoHashes
284 ,unlimitedHashes
285 };
286
287 enum KeywordMode {
288 HA_MODE_DEFAULT = 0
289 ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
290 ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
291 ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
292 ,HA_MODE_MODULE = 4 // after "module", before module name.
293 ,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
294 ,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
295 };
296
297 enum LiterateMode {
298 LITERATE_BIRD = 0 // if '>' is the first character on the line,
299 // color '>' as a codedelim and the rest of
300 // the line as code.
301 // else if "\begin{code}" is the only word on the
302 // line except whitespace, switch to LITERATE_BLOCK
303 // otherwise color the line as a literate comment.
304 ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
305 // 0 ignoring all later characters, color the line
306 // as a codedelim and switch to LITERATE_BIRD
307 // otherwise color the line as code.
308 };
309
310 struct HaskellLineInfo {
311 unsigned int nestLevel; // 22 bits ought to be enough for anybody
312 unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
313 // beyond 31.
314 bool pragma;
315 LiterateMode lmode;
316 KeywordMode mode;
317
HaskellLineInfoLexerHaskell::HaskellLineInfo318 HaskellLineInfo(int state) :
319 nestLevel (state >> 10)
320 , nonexternalStyle ((state >> 5) & 0x1F)
321 , pragma ((state >> 4) & 0x1)
322 , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
323 , mode (static_cast<KeywordMode>(state & 0x7))
324 {}
325
ToLineStateLexerHaskell::HaskellLineInfo326 int ToLineState() {
327 return
328 (nestLevel << 10)
329 | (nonexternalStyle << 5)
330 | (pragma << 4)
331 | (lmode << 3)
332 | mode;
333 }
334 };
335
skipMagicHash(StyleContext & sc,const HashCount hashes) const336 inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
337 if (options.magicHash && sc.ch == '#') {
338 sc.Forward();
339 if (hashes == twoHashes && sc.ch == '#') {
340 sc.Forward();
341 } else if (hashes == unlimitedHashes) {
342 while (sc.ch == '#') {
343 sc.Forward();
344 }
345 }
346 }
347 }
348
LineContainsImport(const Sci_Position line,Accessor & styler) const349 bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
350 if (options.foldImports) {
351 Sci_Position currentPos = styler.LineStart(line);
352 int style = styler.StyleAt(currentPos);
353
354 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
355
356 while (currentPos < eol_pos) {
357 int ch = styler[currentPos];
358 style = styler.StyleAt(currentPos);
359
360 if (ch == ' ' || ch == '\t'
361 || IsCommentBlockStyle(style)
362 || style == SCE_HA_LITERATE_CODEDELIM) {
363 currentPos++;
364 } else {
365 break;
366 }
367 }
368
369 return (style == SCE_HA_KEYWORD
370 && styler.Match(currentPos, "import"));
371 } else {
372 return false;
373 }
374 }
375
IndentAmountWithOffset(Accessor & styler,const Sci_Position line) const376 inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
377 const int indent = HaskellIndentAmount(styler, line);
378 const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
379 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
380 ? indent
381 : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
382 }
383
IndentLevelRemoveIndentOffset(const int indentLevel) const384 inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
385 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
386 ? indentLevel
387 : indentLevel - firstImportIndent;
388 }
389
390 public:
LexerHaskell(bool literate_)391 LexerHaskell(bool literate_)
392 : literate(literate_)
393 , firstImportLine(-1)
394 , firstImportIndent(0)
395 {}
~LexerHaskell()396 virtual ~LexerHaskell() {}
397
Release()398 void SCI_METHOD Release() override {
399 delete this;
400 }
401
Version() const402 int SCI_METHOD Version() const override {
403 return lvOriginal;
404 }
405
PropertyNames()406 const char * SCI_METHOD PropertyNames() override {
407 return osHaskell.PropertyNames();
408 }
409
PropertyType(const char * name)410 int SCI_METHOD PropertyType(const char *name) override {
411 return osHaskell.PropertyType(name);
412 }
413
DescribeProperty(const char * name)414 const char * SCI_METHOD DescribeProperty(const char *name) override {
415 return osHaskell.DescribeProperty(name);
416 }
417
418 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
419
DescribeWordListSets()420 const char * SCI_METHOD DescribeWordListSets() override {
421 return osHaskell.DescribeWordListSets();
422 }
423
424 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
425
426 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
427
428 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
429
PrivateCall(int,void *)430 void * SCI_METHOD PrivateCall(int, void *) override {
431 return 0;
432 }
433
LexerFactoryHaskell()434 static ILexer *LexerFactoryHaskell() {
435 return new LexerHaskell(false);
436 }
437
LexerFactoryLiterateHaskell()438 static ILexer *LexerFactoryLiterateHaskell() {
439 return new LexerHaskell(true);
440 }
441 };
442
PropertySet(const char * key,const char * val)443 Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
444 if (osHaskell.PropertySet(&options, key, val)) {
445 return 0;
446 }
447 return -1;
448 }
449
WordListSet(int n,const char * wl)450 Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
451 WordList *wordListN = 0;
452 switch (n) {
453 case 0:
454 wordListN = &keywords;
455 break;
456 case 1:
457 wordListN = &ffi;
458 break;
459 case 2:
460 wordListN = &reserved_operators;
461 break;
462 }
463 Sci_Position firstModification = -1;
464 if (wordListN) {
465 WordList wlNew;
466 wlNew.Set(wl);
467 if (*wordListN != wlNew) {
468 wordListN->Set(wl);
469 firstModification = 0;
470 }
471 }
472 return firstModification;
473 }
474
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)475 void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
476 ,IDocument *pAccess) {
477 LexAccessor styler(pAccess);
478
479 Sci_Position lineCurrent = styler.GetLine(startPos);
480
481 HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
482
483 // Do not leak onto next line
484 if (initStyle == SCE_HA_STRINGEOL)
485 initStyle = SCE_HA_DEFAULT;
486 else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
487 initStyle = hs.nonexternalStyle;
488
489 StyleContext sc(startPos, length, initStyle, styler);
490
491 int base = 10;
492 bool dot = false;
493
494 bool inDashes = false;
495 bool alreadyInTheMiddleOfOperator = false;
496
497 assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
498
499 while (sc.More()) {
500 // Check for state end
501
502 if (!IsExternalStyle(sc.state)) {
503 hs.nonexternalStyle = sc.state;
504 }
505
506 // For lexer to work, states should unconditionally forward at least one
507 // character.
508 // If they don't, they should still check if they are at line end and
509 // forward if so.
510 // If a state forwards more than one character, it should check every time
511 // that it is not a line end and cease forwarding otherwise.
512 if (sc.atLineEnd) {
513 // Remember the line state for future incremental lexing
514 styler.SetLineState(lineCurrent, hs.ToLineState());
515 lineCurrent++;
516 }
517
518 // Handle line continuation generically.
519 if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
520 && ( sc.state == SCE_HA_STRING
521 || sc.state == SCE_HA_PREPROCESSOR)) {
522 // Remember the line state for future incremental lexing
523 styler.SetLineState(lineCurrent, hs.ToLineState());
524 lineCurrent++;
525
526 sc.Forward();
527 if (sc.ch == '\r' && sc.chNext == '\n') {
528 sc.Forward();
529 }
530 sc.Forward();
531
532 continue;
533 }
534
535 if (sc.atLineStart) {
536
537 if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
538 // Prevent SCE_HA_STRINGEOL from leaking back to previous line
539 sc.SetState(sc.state);
540 }
541
542 if (literate && hs.lmode == LITERATE_BIRD) {
543 if (!IsExternalStyle(sc.state)) {
544 sc.SetState(SCE_HA_LITERATE_COMMENT);
545 }
546 }
547 }
548
549 // External
550 // Literate
551 if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
552 && sc.ch == '>') {
553 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
554 sc.ForwardSetState(hs.nonexternalStyle);
555 }
556 else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
557 && ( sc.ch == ' ' || sc.ch == '\t'
558 || sc.Match("\\begin{code}"))) {
559 sc.SetState(sc.state);
560
561 while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
562 sc.Forward();
563
564 if (sc.Match("\\begin{code}")) {
565 sc.Forward(static_cast<int>(strlen("\\begin{code}")));
566
567 bool correct = true;
568
569 while (!sc.atLineEnd && sc.More()) {
570 if (sc.ch != ' ' && sc.ch != '\t') {
571 correct = false;
572 }
573 sc.Forward();
574 }
575
576 if (correct) {
577 sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
578 hs.lmode = LITERATE_BLOCK;
579 }
580 }
581 }
582 else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
583 && sc.Match("\\end{code}")) {
584 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
585
586 sc.Forward(static_cast<int>(strlen("\\end{code}")));
587
588 while (!sc.atLineEnd && sc.More()) {
589 sc.Forward();
590 }
591
592 sc.SetState(SCE_HA_LITERATE_COMMENT);
593 hs.lmode = LITERATE_BIRD;
594 }
595 // Preprocessor
596 else if (sc.atLineStart && sc.ch == '#' && options.cpp
597 && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
598 sc.SetState(SCE_HA_PREPROCESSOR);
599 sc.Forward();
600 }
601 // Literate
602 else if (sc.state == SCE_HA_LITERATE_COMMENT) {
603 sc.Forward();
604 }
605 else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
606 sc.ForwardSetState(hs.nonexternalStyle);
607 }
608 // Preprocessor
609 else if (sc.state == SCE_HA_PREPROCESSOR) {
610 if (sc.atLineEnd) {
611 sc.SetState(options.stylingWithinPreprocessor
612 ? SCE_HA_DEFAULT
613 : hs.nonexternalStyle);
614 sc.Forward(); // prevent double counting a line
615 } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
616 sc.SetState(SCE_HA_DEFAULT);
617 } else {
618 sc.Forward();
619 }
620 }
621 // Haskell
622 // Operator
623 else if (sc.state == SCE_HA_OPERATOR) {
624 int style = SCE_HA_OPERATOR;
625
626 if ( sc.ch == ':'
627 && !alreadyInTheMiddleOfOperator
628 // except "::"
629 && !( sc.chNext == ':'
630 && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
631 style = SCE_HA_CAPITAL;
632 }
633
634 alreadyInTheMiddleOfOperator = false;
635
636 while (IsAnHaskellOperatorChar(sc.ch))
637 sc.Forward();
638
639 char s[100];
640 sc.GetCurrent(s, sizeof(s));
641
642 if (reserved_operators.InList(s))
643 style = SCE_HA_RESERVED_OPERATOR;
644
645 sc.ChangeState(style);
646 sc.SetState(SCE_HA_DEFAULT);
647 }
648 // String
649 else if (sc.state == SCE_HA_STRING) {
650 if (sc.atLineEnd) {
651 sc.ChangeState(SCE_HA_STRINGEOL);
652 sc.ForwardSetState(SCE_HA_DEFAULT);
653 } else if (sc.ch == '\"') {
654 sc.Forward();
655 skipMagicHash(sc, oneHash);
656 sc.SetState(SCE_HA_DEFAULT);
657 } else if (sc.ch == '\\') {
658 sc.Forward(2);
659 } else {
660 sc.Forward();
661 }
662 }
663 // Char
664 else if (sc.state == SCE_HA_CHARACTER) {
665 if (sc.atLineEnd) {
666 sc.ChangeState(SCE_HA_STRINGEOL);
667 sc.ForwardSetState(SCE_HA_DEFAULT);
668 } else if (sc.ch == '\'') {
669 sc.Forward();
670 skipMagicHash(sc, oneHash);
671 sc.SetState(SCE_HA_DEFAULT);
672 } else if (sc.ch == '\\') {
673 sc.Forward(2);
674 } else {
675 sc.Forward();
676 }
677 }
678 // Number
679 else if (sc.state == SCE_HA_NUMBER) {
680 if (sc.atLineEnd) {
681 sc.SetState(SCE_HA_DEFAULT);
682 sc.Forward(); // prevent double counting a line
683 } else if (IsADigit(sc.ch, base)) {
684 sc.Forward();
685 } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
686 sc.Forward(2);
687 dot = false;
688 } else if ((base == 10) &&
689 (sc.ch == 'e' || sc.ch == 'E') &&
690 (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
691 sc.Forward();
692 if (sc.ch == '+' || sc.ch == '-')
693 sc.Forward();
694 } else {
695 skipMagicHash(sc, twoHashes);
696 sc.SetState(SCE_HA_DEFAULT);
697 }
698 }
699 // Keyword or Identifier
700 else if (sc.state == SCE_HA_IDENTIFIER) {
701 int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
702
703 assert(IsAHaskellWordStart(sc.ch));
704
705 sc.Forward();
706
707 while (sc.More()) {
708 if (IsAHaskellWordChar(sc.ch)) {
709 sc.Forward();
710 } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
711 if (IsHaskellUpperCase(sc.chNext)) {
712 sc.Forward();
713 style = SCE_HA_CAPITAL;
714 } else if (IsAHaskellWordStart(sc.chNext)) {
715 sc.Forward();
716 style = SCE_HA_IDENTIFIER;
717 } else if (IsAnHaskellOperatorChar(sc.chNext)) {
718 sc.Forward();
719 style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
720 while (IsAnHaskellOperatorChar(sc.ch))
721 sc.Forward();
722 break;
723 } else {
724 break;
725 }
726 } else {
727 break;
728 }
729 }
730
731 skipMagicHash(sc, unlimitedHashes);
732
733 char s[100];
734 sc.GetCurrent(s, sizeof(s));
735
736 KeywordMode new_mode = HA_MODE_DEFAULT;
737
738 if (keywords.InList(s)) {
739 style = SCE_HA_KEYWORD;
740 } else if (style == SCE_HA_CAPITAL) {
741 if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
742 style = SCE_HA_MODULE;
743 new_mode = HA_MODE_IMPORT2;
744 } else if (hs.mode == HA_MODE_MODULE) {
745 style = SCE_HA_MODULE;
746 }
747 } else if (hs.mode == HA_MODE_IMPORT1 &&
748 strcmp(s,"qualified") == 0) {
749 style = SCE_HA_KEYWORD;
750 new_mode = HA_MODE_IMPORT1;
751 } else if (options.highlightSafe &&
752 hs.mode == HA_MODE_IMPORT1 &&
753 strcmp(s,"safe") == 0) {
754 style = SCE_HA_KEYWORD;
755 new_mode = HA_MODE_IMPORT1;
756 } else if (hs.mode == HA_MODE_IMPORT2) {
757 if (strcmp(s,"as") == 0) {
758 style = SCE_HA_KEYWORD;
759 new_mode = HA_MODE_IMPORT3;
760 } else if (strcmp(s,"hiding") == 0) {
761 style = SCE_HA_KEYWORD;
762 }
763 } else if (hs.mode == HA_MODE_TYPE) {
764 if (strcmp(s,"family") == 0)
765 style = SCE_HA_KEYWORD;
766 }
767
768 if (hs.mode == HA_MODE_FFI) {
769 if (ffi.InList(s)) {
770 style = SCE_HA_KEYWORD;
771 new_mode = HA_MODE_FFI;
772 }
773 }
774
775 sc.ChangeState(style);
776 sc.SetState(SCE_HA_DEFAULT);
777
778 if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
779 new_mode = HA_MODE_IMPORT1;
780 else if (strcmp(s,"module") == 0)
781 new_mode = HA_MODE_MODULE;
782 else if (strcmp(s,"foreign") == 0)
783 new_mode = HA_MODE_FFI;
784 else if (strcmp(s,"type") == 0
785 || strcmp(s,"data") == 0)
786 new_mode = HA_MODE_TYPE;
787
788 hs.mode = new_mode;
789 }
790
791 // Comments
792 // Oneliner
793 else if (sc.state == SCE_HA_COMMENTLINE) {
794 if (sc.atLineEnd) {
795 sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
796 sc.Forward(); // prevent double counting a line
797 } else if (inDashes && sc.ch != '-' && !hs.pragma) {
798 inDashes = false;
799 if (IsAnHaskellOperatorChar(sc.ch)) {
800 alreadyInTheMiddleOfOperator = true;
801 sc.ChangeState(SCE_HA_OPERATOR);
802 }
803 } else {
804 sc.Forward();
805 }
806 }
807 // Nested
808 else if (IsCommentBlockStyle(sc.state)) {
809 if (sc.Match('{','-')) {
810 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
811 sc.Forward(2);
812 hs.nestLevel++;
813 } else if (sc.Match('-','}')) {
814 sc.Forward(2);
815 assert(hs.nestLevel > 0);
816 if (hs.nestLevel > 0)
817 hs.nestLevel--;
818 sc.SetState(
819 hs.nestLevel == 0
820 ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
821 : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
822 } else {
823 sc.Forward();
824 }
825 }
826 // Pragma
827 else if (sc.state == SCE_HA_PRAGMA) {
828 if (sc.Match("#-}")) {
829 hs.pragma = false;
830 sc.Forward(3);
831 sc.SetState(SCE_HA_DEFAULT);
832 } else if (sc.Match('-','-')) {
833 sc.SetState(SCE_HA_COMMENTLINE);
834 sc.Forward(2);
835 inDashes = false;
836 } else if (sc.Match('{','-')) {
837 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
838 sc.Forward(2);
839 hs.nestLevel = 1;
840 } else {
841 sc.Forward();
842 }
843 }
844 // New state?
845 else if (sc.state == SCE_HA_DEFAULT) {
846 // Digit
847 if (IsADigit(sc.ch)) {
848 hs.mode = HA_MODE_DEFAULT;
849
850 sc.SetState(SCE_HA_NUMBER);
851 if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
852 // Match anything starting with "0x" or "0X", too
853 sc.Forward(2);
854 base = 16;
855 dot = false;
856 } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
857 // Match anything starting with "0o" or "0O", too
858 sc.Forward(2);
859 base = 8;
860 dot = false;
861 } else {
862 sc.Forward();
863 base = 10;
864 dot = true;
865 }
866 }
867 // Pragma
868 else if (sc.Match("{-#")) {
869 hs.pragma = true;
870 sc.SetState(SCE_HA_PRAGMA);
871 sc.Forward(3);
872 }
873 // Comment line
874 else if (sc.Match('-','-')) {
875 sc.SetState(SCE_HA_COMMENTLINE);
876 sc.Forward(2);
877 inDashes = true;
878 }
879 // Comment block
880 else if (sc.Match('{','-')) {
881 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
882 sc.Forward(2);
883 hs.nestLevel = 1;
884 }
885 // String
886 else if (sc.ch == '\"') {
887 sc.SetState(SCE_HA_STRING);
888 sc.Forward();
889 }
890 // Character or quoted name or promoted term
891 else if (sc.ch == '\'') {
892 hs.mode = HA_MODE_DEFAULT;
893
894 sc.SetState(SCE_HA_CHARACTER);
895 sc.Forward();
896
897 if (options.allowQuotes) {
898 // Quoted type ''T
899 if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
900 sc.Forward();
901 sc.ChangeState(SCE_HA_IDENTIFIER);
902 } else if (sc.chNext != '\'') {
903 // Quoted name 'n or promoted constructor 'N
904 if (IsAHaskellWordStart(sc.ch)) {
905 sc.ChangeState(SCE_HA_IDENTIFIER);
906 // Promoted constructor operator ':~>
907 } else if (sc.ch == ':') {
908 alreadyInTheMiddleOfOperator = false;
909 sc.ChangeState(SCE_HA_OPERATOR);
910 // Promoted list or tuple '[T]
911 } else if (sc.ch == '[' || sc.ch== '(') {
912 sc.ChangeState(SCE_HA_OPERATOR);
913 sc.ForwardSetState(SCE_HA_DEFAULT);
914 }
915 }
916 }
917 }
918 // Operator starting with '?' or an implicit parameter
919 else if (sc.ch == '?') {
920 hs.mode = HA_MODE_DEFAULT;
921
922 alreadyInTheMiddleOfOperator = false;
923 sc.SetState(SCE_HA_OPERATOR);
924
925 if ( options.implicitParams
926 && IsAHaskellWordStart(sc.chNext)
927 && !IsHaskellUpperCase(sc.chNext)) {
928 sc.Forward();
929 sc.ChangeState(SCE_HA_IDENTIFIER);
930 }
931 }
932 // Operator
933 else if (IsAnHaskellOperatorChar(sc.ch)) {
934 hs.mode = HA_MODE_DEFAULT;
935
936 sc.SetState(SCE_HA_OPERATOR);
937 }
938 // Braces and punctuation
939 else if (sc.ch == ',' || sc.ch == ';'
940 || sc.ch == '(' || sc.ch == ')'
941 || sc.ch == '[' || sc.ch == ']'
942 || sc.ch == '{' || sc.ch == '}') {
943 sc.SetState(SCE_HA_OPERATOR);
944 sc.ForwardSetState(SCE_HA_DEFAULT);
945 }
946 // Keyword or Identifier
947 else if (IsAHaskellWordStart(sc.ch)) {
948 sc.SetState(SCE_HA_IDENTIFIER);
949 // Something we don't care about
950 } else {
951 sc.Forward();
952 }
953 }
954 // This branch should never be reached.
955 else {
956 assert(false);
957 sc.Forward();
958 }
959 }
960 sc.Complete();
961 }
962
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)963 void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
964 ,IDocument *pAccess) {
965 if (!options.fold)
966 return;
967
968 Accessor styler(pAccess, NULL);
969
970 Sci_Position lineCurrent = styler.GetLine(startPos);
971
972 if (lineCurrent <= firstImportLine) {
973 firstImportLine = -1; // readjust first import position
974 firstImportIndent = 0;
975 }
976
977 const Sci_Position maxPos = startPos + length;
978 const Sci_Position maxLines =
979 maxPos == styler.Length()
980 ? styler.GetLine(maxPos)
981 : styler.GetLine(maxPos - 1); // Requested last line
982 const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
983
984 // Backtrack to previous non-blank line so we can determine indent level
985 // for any white space lines
986 // and so we can fix any preceding fold level (which is why we go back
987 // at least one line in all cases)
988 bool importHere = LineContainsImport(lineCurrent, styler);
989 int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
990
991 while (lineCurrent > 0) {
992 lineCurrent--;
993 importHere = LineContainsImport(lineCurrent, styler);
994 indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
995 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
996 break;
997 }
998
999 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1000
1001 if (importHere) {
1002 indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1003 if (firstImportLine == -1) {
1004 firstImportLine = lineCurrent;
1005 firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1006 }
1007 if (firstImportLine != lineCurrent) {
1008 indentCurrentLevel++;
1009 }
1010 }
1011
1012 indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1013
1014 // Process all characters to end of requested range
1015 //that hangs over the end of the range. Cap processing in all cases
1016 // to end of document.
1017 while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1018
1019 // Gather info
1020 Sci_Position lineNext = lineCurrent + 1;
1021 importHere = false;
1022 int indentNext = indentCurrent;
1023
1024 if (lineNext <= docLines) {
1025 // Information about next line is only available if not at end of document
1026 importHere = LineContainsImport(lineNext, styler);
1027 indentNext = IndentAmountWithOffset(styler, lineNext);
1028 }
1029 if (indentNext & SC_FOLDLEVELWHITEFLAG)
1030 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1031
1032 // Skip past any blank lines for next indent level info; we skip also
1033 // comments (all comments, not just those starting in column 0)
1034 // which effectively folds them into surrounding code rather
1035 // than screwing up folding.
1036
1037 while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1038 lineNext++;
1039 importHere = LineContainsImport(lineNext, styler);
1040 indentNext = IndentAmountWithOffset(styler, lineNext);
1041 }
1042
1043 int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1044
1045 if (importHere) {
1046 indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1047 if (firstImportLine == -1) {
1048 firstImportLine = lineNext;
1049 firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1050 }
1051 if (firstImportLine != lineNext) {
1052 indentNextLevel++;
1053 }
1054 }
1055
1056 indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1057
1058 const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1059
1060 // Now set all the indent levels on the lines we skipped
1061 // Do this from end to start. Once we encounter one line
1062 // which is indented more than the line after the end of
1063 // the comment-block, use the level of the block before
1064
1065 Sci_Position skipLine = lineNext;
1066 int skipLevel = indentNextLevel;
1067
1068 while (--skipLine > lineCurrent) {
1069 int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1070
1071 if (options.foldCompact) {
1072 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1073 skipLevel = levelBeforeComments;
1074 }
1075
1076 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1077
1078 styler.SetLevel(skipLine, skipLevel | whiteFlag);
1079 } else {
1080 if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1081 && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1082 skipLevel = levelBeforeComments;
1083 }
1084
1085 styler.SetLevel(skipLine, skipLevel);
1086 }
1087 }
1088
1089 int lev = indentCurrent;
1090
1091 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1092 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1093 lev |= SC_FOLDLEVELHEADERFLAG;
1094 }
1095
1096 // Set fold level for this line and move to next line
1097 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1098
1099 indentCurrent = indentNext;
1100 indentCurrentLevel = indentNextLevel;
1101 lineCurrent = lineNext;
1102 }
1103
1104 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1105 // header flag set; the loop above is crafted to take care of this case!
1106 //styler.SetLevel(lineCurrent, indentCurrent);
1107 }
1108
1109 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1110 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1111