1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for Perl.
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16
17 #include <string>
18 #include <map>
19
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "DefaultLexer.h"
31
32 using namespace Scintilla;
33
34 // Info for HERE document handling from perldata.pod (reformatted):
35 // ----------------------------------------------------------------
36 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
37 // Following a << you specify a string to terminate the quoted material, and
38 // all lines following the current line down to the terminating string are
39 // the value of the item.
40 // * The terminating string may be either an identifier (a word), or some
41 // quoted text.
42 // * If quoted, the type of quotes you use determines the treatment of the
43 // text, just as in regular quoting.
44 // * An unquoted identifier works like double quotes.
45 // * There must be no space between the << and the identifier.
46 // (If you put a space it will be treated as a null identifier,
47 // which is valid, and matches the first empty line.)
48 // (This is deprecated, -w warns of this syntax)
49 // * The terminating string must appear by itself (unquoted and
50 // with no surrounding whitespace) on the terminating line.
51
52 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
53
54 #define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
55 #define PERLNUM_OCTAL 2
56 #define PERLNUM_FLOAT_EXP 3 // exponent part only
57 #define PERLNUM_HEX 4 // may be a hex float
58 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
59 #define PERLNUM_VECTOR 6
60 #define PERLNUM_V_VECTOR 7
61 #define PERLNUM_BAD 8
62
63 #define BACK_NONE 0 // lookback state for bareword disambiguation:
64 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
65 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
66
67 #define SUB_BEGIN 0 // states for subroutine prototype scan:
68 #define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
69 #define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
70 #define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
71 #define SUB_HAS_SUB 4 // 'sub' keyword
72
73 // all interpolated styles are different from their parent styles by a constant difference
74 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
75 #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
76
isPerlKeyword(Sci_PositionU start,Sci_PositionU end,WordList & keywords,LexAccessor & styler)77 static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
78 // old-style keyword matcher; needed because GetCurrent() needs
79 // current segment to be committed, but we may abandon early...
80 char s[100];
81 Sci_PositionU i, len = end - start;
82 if (len > 30) { len = 30; }
83 for (i = 0; i < len; i++, start++) s[i] = styler[start];
84 s[i] = '\0';
85 return keywords.InList(s);
86 }
87
disambiguateBareword(LexAccessor & styler,Sci_PositionU bk,Sci_PositionU fw,int backFlag,Sci_PositionU backPos,Sci_PositionU endPos)88 static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
89 int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
90 // identifiers are recognized by Perl as barewords under some
91 // conditions, the following attempts to do the disambiguation
92 // by looking backward and forward; result in 2 LSB
93 int result = 0;
94 bool moreback = false; // true if passed newline/comments
95 bool brace = false; // true if opening brace found
96 // if BACK_NONE, neither operator nor keyword, so skip test
97 if (backFlag == BACK_NONE)
98 return result;
99 // first look backwards past whitespace/comments to set EOL flag
100 // (some disambiguation patterns must be on a single line)
101 if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
102 moreback = true;
103 // look backwards at last significant lexed item for disambiguation
104 bk = backPos - 1;
105 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
106 if (ch == '{' && !moreback) {
107 // {bareword: possible variable spec
108 brace = true;
109 } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
110 // &bareword: subroutine call
111 || styler.Match(bk - 1, "->")
112 // ->bareword: part of variable spec
113 || styler.Match(bk - 1, "::")
114 // ::bareword: part of module spec
115 || styler.Match(bk - 2, "sub")) {
116 // sub bareword: subroutine declaration
117 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
118 result |= 1;
119 }
120 // next, scan forward after word past tab/spaces only;
121 // if ch isn't one of '[{(,' we can skip the test
122 if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
123 && fw < endPos) {
124 while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)))
125 && fw < endPos) {
126 fw++;
127 }
128 if ((ch == '}' && brace)
129 // {bareword}: variable spec
130 || styler.Match(fw, "=>")) {
131 // [{(, bareword=>: hash literal
132 result |= 2;
133 }
134 }
135 return result;
136 }
137
skipWhitespaceComment(LexAccessor & styler,Sci_PositionU & p)138 static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
139 // when backtracking, we need to skip whitespace and comments
140 while (p > 0) {
141 const int style = styler.StyleAt(p);
142 if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE)
143 break;
144 p--;
145 }
146 }
147
findPrevLexeme(LexAccessor & styler,Sci_PositionU & bk,int & style)148 static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
149 // scan backward past whitespace and comments to find a lexeme
150 skipWhitespaceComment(styler, bk);
151 if (bk == 0)
152 return 0;
153 int sz = 1;
154 style = styler.StyleAt(bk);
155 while (bk > 0) { // find extent of lexeme
156 if (styler.StyleAt(bk - 1) == style) {
157 bk--; sz++;
158 } else
159 break;
160 }
161 return sz;
162 }
163
styleBeforeBracePair(LexAccessor & styler,Sci_PositionU bk)164 static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
165 // backtrack to find open '{' corresponding to a '}', balanced
166 // return significant style to be tested for '/' disambiguation
167 int braceCount = 1;
168 if (bk == 0)
169 return SCE_PL_DEFAULT;
170 while (--bk > 0) {
171 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
172 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
173 if (bkch == ';') { // early out
174 break;
175 } else if (bkch == '}') {
176 braceCount++;
177 } else if (bkch == '{') {
178 if (--braceCount == 0) break;
179 }
180 }
181 }
182 if (bk > 0 && braceCount == 0) {
183 // balanced { found, bk > 0, skip more whitespace/comments
184 bk--;
185 skipWhitespaceComment(styler, bk);
186 return styler.StyleAt(bk);
187 }
188 return SCE_PL_DEFAULT;
189 }
190
styleCheckIdentifier(LexAccessor & styler,Sci_PositionU bk)191 static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
192 // backtrack to classify sub-styles of identifier under test
193 // return sub-style to be tested for '/' disambiguation
194 if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
195 return 1;
196 // backtrack to check for possible "->" or "::" before identifier
197 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
198 bk--;
199 }
200 while (bk > 0) {
201 int bkstyle = styler.StyleAt(bk);
202 if (bkstyle == SCE_PL_DEFAULT
203 || bkstyle == SCE_PL_COMMENTLINE) {
204 // skip whitespace, comments
205 } else if (bkstyle == SCE_PL_OPERATOR) {
206 // test for "->" and "::"
207 if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
208 return 2;
209 } else
210 return 3; // bare identifier
211 bk--;
212 }
213 return 0;
214 }
215
podLineScan(LexAccessor & styler,Sci_PositionU & pos,Sci_PositionU endPos)216 static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
217 // forward scan the current line to classify line for POD style
218 int state = -1;
219 while (pos < endPos) {
220 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
221 if (ch == '\n' || ch == '\r') {
222 if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
223 break;
224 }
225 if (IsASpaceOrTab(ch)) { // whitespace, take note
226 if (state == -1)
227 state = SCE_PL_DEFAULT;
228 } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
229 state = SCE_PL_POD_VERB;
230 } else if (state != SCE_PL_POD_VERB) { // regular POD line
231 state = SCE_PL_POD;
232 }
233 pos++;
234 }
235 if (state == -1)
236 state = SCE_PL_DEFAULT;
237 return state;
238 }
239
styleCheckSubPrototype(LexAccessor & styler,Sci_PositionU bk)240 static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
241 // backtrack to identify if we're starting a subroutine prototype
242 // we also need to ignore whitespace/comments, format is like:
243 // sub abc::pqr :const :prototype(...)
244 // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
245 // and a state machine generates legal subroutine syntax matches
246 styler.Flush();
247 int state = SUB_BEGIN;
248 do {
249 // find two lexemes, lexeme 2 follows lexeme 1
250 int style2 = SCE_PL_DEFAULT;
251 Sci_PositionU pos2 = bk;
252 int len2 = findPrevLexeme(styler, pos2, style2);
253 int style1 = SCE_PL_DEFAULT;
254 Sci_PositionU pos1 = pos2;
255 if (pos1 > 0) pos1--;
256 int len1 = findPrevLexeme(styler, pos1, style1);
257 if (len1 == 0 || len2 == 0) // lexeme pair must exist
258 break;
259
260 // match parts of syntax, if invalid subroutine syntax, break off
261 if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
262 styler.SafeGetCharAt(pos1) == ':') { // ':'
263 if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
264 if (len2 == 9 && styler.Match(pos2, "prototype")) { // ':' 'prototype'
265 if (state == SUB_BEGIN) {
266 state = SUB_HAS_PROTO;
267 } else
268 break;
269 } else { // ':' <attribute>
270 if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
271 state = SUB_HAS_ATTRIB;
272 } else
273 break;
274 }
275 } else
276 break;
277 } else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
278 styler.Match(pos1, "::")) { // '::'
279 if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier>
280 state = SUB_HAS_MODULE;
281 } else
282 break;
283 } else if (style1 == SCE_PL_WORD && len1 == 3 &&
284 styler.Match(pos1, "sub")) { // 'sub'
285 if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier>
286 state = SUB_HAS_SUB;
287 } else
288 break;
289 } else
290 break;
291 bk = pos1; // set position for finding next lexeme pair
292 if (bk > 0) bk--;
293 } while (state != SUB_HAS_SUB);
294 return (state == SUB_HAS_SUB);
295 }
296
actualNumStyle(int numberStyle)297 static int actualNumStyle(int numberStyle) {
298 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
299 return SCE_PL_STRING;
300 } else if (numberStyle == PERLNUM_BAD) {
301 return SCE_PL_ERROR;
302 }
303 return SCE_PL_NUMBER;
304 }
305
opposite(int ch)306 static int opposite(int ch) {
307 if (ch == '(') return ')';
308 if (ch == '[') return ']';
309 if (ch == '{') return '}';
310 if (ch == '<') return '>';
311 return ch;
312 }
313
IsCommentLine(Sci_Position line,LexAccessor & styler)314 static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
315 Sci_Position pos = styler.LineStart(line);
316 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
317 for (Sci_Position i = pos; i < eol_pos; i++) {
318 char ch = styler[i];
319 int style = styler.StyleAt(i);
320 if (ch == '#' && style == SCE_PL_COMMENTLINE)
321 return true;
322 else if (!IsASpaceOrTab(ch))
323 return false;
324 }
325 return false;
326 }
327
IsPackageLine(Sci_Position line,LexAccessor & styler)328 static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
329 Sci_Position pos = styler.LineStart(line);
330 int style = styler.StyleAt(pos);
331 if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
332 return true;
333 }
334 return false;
335 }
336
PodHeadingLevel(Sci_Position pos,LexAccessor & styler)337 static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
338 int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
339 if (lvl >= '1' && lvl <= '4') {
340 return lvl - '0';
341 }
342 return 0;
343 }
344
345 // An individual named option for use in an OptionSet
346
347 // Options used for LexerPerl
348 struct OptionsPerl {
349 bool fold;
350 bool foldComment;
351 bool foldCompact;
352 // Custom folding of POD and packages
353 bool foldPOD; // fold.perl.pod
354 // Enable folding Pod blocks when using the Perl lexer.
355 bool foldPackage; // fold.perl.package
356 // Enable folding packages when using the Perl lexer.
357
358 bool foldCommentExplicit;
359
360 bool foldAtElse;
361
OptionsPerlOptionsPerl362 OptionsPerl() {
363 fold = false;
364 foldComment = false;
365 foldCompact = true;
366 foldPOD = true;
367 foldPackage = true;
368 foldCommentExplicit = true;
369 foldAtElse = false;
370 }
371 };
372
373 static const char *const perlWordListDesc[] = {
374 "Keywords",
375 0
376 };
377
378 struct OptionSetPerl : public OptionSet<OptionsPerl> {
OptionSetPerlOptionSetPerl379 OptionSetPerl() {
380 DefineProperty("fold", &OptionsPerl::fold);
381
382 DefineProperty("fold.comment", &OptionsPerl::foldComment);
383
384 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
385
386 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
387 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
388
389 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
390 "Set to 0 to disable folding packages when using the Perl lexer.");
391
392 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
393 "Set to 0 to disable explicit folding.");
394
395 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
396 "This option enables Perl folding on a \"} else {\" line of an if statement.");
397
398 DefineWordListSets(perlWordListDesc);
399 }
400 };
401
402 class LexerPerl : public DefaultLexer {
403 CharacterSet setWordStart;
404 CharacterSet setWord;
405 CharacterSet setSpecialVar;
406 CharacterSet setControlVar;
407 WordList keywords;
408 OptionsPerl options;
409 OptionSetPerl osPerl;
410 public:
LexerPerl()411 LexerPerl() :
412 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
413 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
414 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
415 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
416 }
~LexerPerl()417 virtual ~LexerPerl() {
418 }
Release()419 void SCI_METHOD Release() override {
420 delete this;
421 }
Version() const422 int SCI_METHOD Version() const override {
423 return lvOriginal;
424 }
PropertyNames()425 const char *SCI_METHOD PropertyNames() override {
426 return osPerl.PropertyNames();
427 }
PropertyType(const char * name)428 int SCI_METHOD PropertyType(const char *name) override {
429 return osPerl.PropertyType(name);
430 }
DescribeProperty(const char * name)431 const char *SCI_METHOD DescribeProperty(const char *name) override {
432 return osPerl.DescribeProperty(name);
433 }
434 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()435 const char *SCI_METHOD DescribeWordListSets() override {
436 return osPerl.DescribeWordListSets();
437 }
438 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
439 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
440 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
441
PrivateCall(int,void *)442 void *SCI_METHOD PrivateCall(int, void *) override {
443 return 0;
444 }
445
LexerFactoryPerl()446 static ILexer *LexerFactoryPerl() {
447 return new LexerPerl();
448 }
449 int InputSymbolScan(StyleContext &sc);
450 void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
451 };
452
PropertySet(const char * key,const char * val)453 Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
454 if (osPerl.PropertySet(&options, key, val)) {
455 return 0;
456 }
457 return -1;
458 }
459
WordListSet(int n,const char * wl)460 Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
461 WordList *wordListN = 0;
462 switch (n) {
463 case 0:
464 wordListN = &keywords;
465 break;
466 }
467 Sci_Position firstModification = -1;
468 if (wordListN) {
469 WordList wlNew;
470 wlNew.Set(wl);
471 if (*wordListN != wlNew) {
472 wordListN->Set(wl);
473 firstModification = 0;
474 }
475 }
476 return firstModification;
477 }
478
InputSymbolScan(StyleContext & sc)479 int LexerPerl::InputSymbolScan(StyleContext &sc) {
480 // forward scan for matching > on same line; file handles
481 int c, sLen = 0;
482 while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
483 if (c == '\r' || c == '\n') {
484 return 0;
485 } else if (c == '>') {
486 if (sc.Match("<=>")) // '<=>' case
487 return 0;
488 return sLen;
489 }
490 }
491 return 0;
492 }
493
InterpolateSegment(StyleContext & sc,int maxSeg,bool isPattern)494 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
495 // interpolate a segment (with no active backslashes or delimiters within)
496 // switch in or out of an interpolation style or continue current style
497 // commit variable patterns if found, trim segment, repeat until done
498 while (maxSeg > 0) {
499 bool isVar = false;
500 int sLen = 0;
501 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
502 // $#[$]*word [$@][$]*word (where word or {word} is always present)
503 bool braces = false;
504 sLen = 1;
505 if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
506 sLen++;
507 }
508 while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within
509 sLen++;
510 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word}
511 sLen++;
512 braces = true;
513 }
514 if (maxSeg > sLen) {
515 int c = sc.GetRelativeCharacter(sLen);
516 if (setWordStart.Contains(c)) { // word (various)
517 sLen++;
518 isVar = true;
519 while (maxSeg > sLen) {
520 if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
521 break;
522 sLen++;
523 }
524 } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
525 sLen++;
526 isVar = true;
527 }
528 }
529 if (braces) {
530 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word}
531 sLen++;
532 } else
533 isVar = false;
534 }
535 }
536 if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
537 int c = sc.chNext;
538 if (sc.ch == '$') {
539 sLen = 1;
540 if (IsADigit(c)) { // $[0-9] and slurp trailing digits
541 sLen++;
542 isVar = true;
543 while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
544 sLen++;
545 } else if (setSpecialVar.Contains(c)) { // $ special variables
546 sLen++;
547 isVar = true;
548 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
549 sLen++;
550 isVar = true;
551 } else if (c == '^') { // $^A control-char style
552 sLen++;
553 if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
554 sLen++;
555 isVar = true;
556 }
557 }
558 } else if (sc.ch == '@') {
559 sLen = 1;
560 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
561 sLen++;
562 isVar = true;
563 }
564 }
565 }
566 if (isVar) { // commit as interpolated variable or normal character
567 if (sc.state < SCE_PL_STRING_VAR)
568 sc.SetState(sc.state + INTERPOLATE_SHIFT);
569 sc.Forward(sLen);
570 maxSeg -= sLen;
571 } else {
572 if (sc.state >= SCE_PL_STRING_VAR)
573 sc.SetState(sc.state - INTERPOLATE_SHIFT);
574 sc.Forward();
575 maxSeg--;
576 }
577 }
578 if (sc.state >= SCE_PL_STRING_VAR)
579 sc.SetState(sc.state - INTERPOLATE_SHIFT);
580 }
581
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)582 void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
583 LexAccessor styler(pAccess);
584
585 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
586 WordList reWords;
587 reWords.Set("elsif if split while");
588
589 // charset classes
590 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
591 // lexing of "%*</" operators is non-trivial; these are missing in the set below
592 CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
593 CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
594 CharacterSet setModifiers(CharacterSet::setAlpha);
595 CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
596 // setArray and setHash also accepts chars for special vars like $_,
597 // which are then truncated when the next char does not match setVar
598 CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
599 CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
600 CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
601 CharacterSet &setPOD = setModifiers;
602 CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
603 CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
604 CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
605 CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
606 // for format identifiers
607 CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
608 CharacterSet &setFormat = setHereDocDelim;
609
610 // Lexer for perl often has to backtrack to start of current style to determine
611 // which characters are being used as quotes, how deeply nested is the
612 // start position and what the termination string is for HERE documents.
613
614 class HereDocCls { // Class to manage HERE doc sequence
615 public:
616 int State;
617 // 0: '<<' encountered
618 // 1: collect the delimiter
619 // 2: here doc text (lines after the delimiter)
620 int Quote; // the char after '<<'
621 bool Quoted; // true if Quote in ('\'','"','`')
622 int DelimiterLength; // strlen(Delimiter)
623 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
624 HereDocCls() {
625 State = 0;
626 Quote = 0;
627 Quoted = false;
628 DelimiterLength = 0;
629 Delimiter[0] = '\0';
630 }
631 void Append(int ch) {
632 Delimiter[DelimiterLength++] = static_cast<char>(ch);
633 Delimiter[DelimiterLength] = '\0';
634 }
635 ~HereDocCls() {
636 }
637 };
638 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
639
640 class QuoteCls { // Class to manage quote pairs
641 public:
642 int Rep;
643 int Count;
644 int Up, Down;
645 QuoteCls() {
646 New(1);
647 }
648 void New(int r = 1) {
649 Rep = r;
650 Count = 0;
651 Up = '\0';
652 Down = '\0';
653 }
654 void Open(int u) {
655 Count++;
656 Up = u;
657 Down = opposite(Up);
658 }
659 };
660 QuoteCls Quote;
661
662 // additional state for number lexing
663 int numState = PERLNUM_DECIMAL;
664 int dotCount = 0;
665
666 Sci_PositionU endPos = startPos + length;
667
668 // Backtrack to beginning of style if required...
669 // If in a long distance lexical state, backtrack to find quote characters.
670 // Includes strings (may be multi-line), numbers (additional state), format
671 // bodies, as well as POD sections.
672 if (initStyle == SCE_PL_HERE_Q
673 || initStyle == SCE_PL_HERE_QQ
674 || initStyle == SCE_PL_HERE_QX
675 || initStyle == SCE_PL_FORMAT
676 || initStyle == SCE_PL_HERE_QQ_VAR
677 || initStyle == SCE_PL_HERE_QX_VAR
678 ) {
679 // backtrack through multiple styles to reach the delimiter start
680 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
681 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
682 startPos--;
683 }
684 startPos = styler.LineStart(styler.GetLine(startPos));
685 initStyle = styler.StyleAt(startPos - 1);
686 }
687 if (initStyle == SCE_PL_STRING
688 || initStyle == SCE_PL_STRING_QQ
689 || initStyle == SCE_PL_BACKTICKS
690 || initStyle == SCE_PL_STRING_QX
691 || initStyle == SCE_PL_REGEX
692 || initStyle == SCE_PL_STRING_QR
693 || initStyle == SCE_PL_REGSUBST
694 || initStyle == SCE_PL_STRING_VAR
695 || initStyle == SCE_PL_STRING_QQ_VAR
696 || initStyle == SCE_PL_BACKTICKS_VAR
697 || initStyle == SCE_PL_STRING_QX_VAR
698 || initStyle == SCE_PL_REGEX_VAR
699 || initStyle == SCE_PL_STRING_QR_VAR
700 || initStyle == SCE_PL_REGSUBST_VAR
701 ) {
702 // for interpolation, must backtrack through a mix of two different styles
703 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
704 initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
705 while (startPos > 1) {
706 int st = styler.StyleAt(startPos - 1);
707 if ((st != initStyle) && (st != otherStyle))
708 break;
709 startPos--;
710 }
711 initStyle = SCE_PL_DEFAULT;
712 } else if (initStyle == SCE_PL_STRING_Q
713 || initStyle == SCE_PL_STRING_QW
714 || initStyle == SCE_PL_XLAT
715 || initStyle == SCE_PL_CHARACTER
716 || initStyle == SCE_PL_NUMBER
717 || initStyle == SCE_PL_IDENTIFIER
718 || initStyle == SCE_PL_ERROR
719 || initStyle == SCE_PL_SUB_PROTOTYPE
720 ) {
721 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
722 startPos--;
723 }
724 initStyle = SCE_PL_DEFAULT;
725 } else if (initStyle == SCE_PL_POD
726 || initStyle == SCE_PL_POD_VERB
727 ) {
728 // POD backtracking finds preceding blank lines and goes back past them
729 Sci_Position ln = styler.GetLine(startPos);
730 if (ln > 0) {
731 initStyle = styler.StyleAt(styler.LineStart(--ln));
732 if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
733 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
734 ln--;
735 }
736 startPos = styler.LineStart(++ln);
737 initStyle = styler.StyleAt(startPos - 1);
738 } else {
739 startPos = 0;
740 initStyle = SCE_PL_DEFAULT;
741 }
742 }
743
744 // backFlag, backPos are additional state to aid identifier corner cases.
745 // Look backwards past whitespace and comments in order to detect either
746 // operator or keyword. Later updated as we go along.
747 int backFlag = BACK_NONE;
748 Sci_PositionU backPos = startPos;
749 if (backPos > 0) {
750 backPos--;
751 skipWhitespaceComment(styler, backPos);
752 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
753 backFlag = BACK_OPERATOR;
754 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
755 backFlag = BACK_KEYWORD;
756 backPos++;
757 }
758
759 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
760
761 for (; sc.More(); sc.Forward()) {
762
763 // Determine if the current state should terminate.
764 switch (sc.state) {
765 case SCE_PL_OPERATOR:
766 sc.SetState(SCE_PL_DEFAULT);
767 backFlag = BACK_OPERATOR;
768 backPos = sc.currentPos;
769 break;
770 case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
771 if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
772 || sc.Match('.', '.')
773 || sc.chPrev == '>') { // end of inputsymbol
774 sc.SetState(SCE_PL_DEFAULT);
775 }
776 break;
777 case SCE_PL_WORD: // keyword, plus special cases
778 if (!setWord.Contains(sc.ch)) {
779 char s[100];
780 sc.GetCurrent(s, sizeof(s));
781 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
782 sc.ChangeState(SCE_PL_DATASECTION);
783 } else {
784 if ((strcmp(s, "format") == 0)) {
785 sc.SetState(SCE_PL_FORMAT_IDENT);
786 HereDoc.State = 0;
787 } else {
788 sc.SetState(SCE_PL_DEFAULT);
789 }
790 backFlag = BACK_KEYWORD;
791 backPos = sc.currentPos;
792 }
793 }
794 break;
795 case SCE_PL_SCALAR:
796 case SCE_PL_ARRAY:
797 case SCE_PL_HASH:
798 case SCE_PL_SYMBOLTABLE:
799 if (sc.Match(':', ':')) { // skip ::
800 sc.Forward();
801 } else if (!setVar.Contains(sc.ch)) {
802 if (sc.LengthCurrent() == 1) {
803 // Special variable: $(, $_ etc.
804 sc.Forward();
805 }
806 sc.SetState(SCE_PL_DEFAULT);
807 }
808 break;
809 case SCE_PL_NUMBER:
810 // if no early break, number style is terminated at "(go through)"
811 if (sc.ch == '.') {
812 if (sc.chNext == '.') {
813 // double dot is always an operator (go through)
814 } else if (numState <= PERLNUM_FLOAT_EXP) {
815 // non-decimal number or float exponent, consume next dot
816 sc.SetState(SCE_PL_OPERATOR);
817 break;
818 } else { // decimal or vectors allows dots
819 dotCount++;
820 if (numState == PERLNUM_DECIMAL) {
821 if (dotCount <= 1) // number with one dot in it
822 break;
823 if (IsADigit(sc.chNext)) { // really a vector
824 numState = PERLNUM_VECTOR;
825 break;
826 }
827 // number then dot (go through)
828 } else if (numState == PERLNUM_HEX) {
829 if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
830 break; // hex with one dot is a hex float
831 } else {
832 sc.SetState(SCE_PL_OPERATOR);
833 break;
834 }
835 // hex then dot (go through)
836 } else if (IsADigit(sc.chNext)) // vectors
837 break;
838 // vector then dot (go through)
839 }
840 } else if (sc.ch == '_') {
841 // permissive underscoring for number and vector literals
842 break;
843 } else if (numState == PERLNUM_DECIMAL) {
844 if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
845 numState = PERLNUM_FLOAT_EXP;
846 if (sc.chNext == '+' || sc.chNext == '-') {
847 sc.Forward();
848 }
849 break;
850 } else if (IsADigit(sc.ch))
851 break;
852 // number then word (go through)
853 } else if (numState == PERLNUM_HEX) {
854 if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign
855 numState = PERLNUM_FLOAT_EXP;
856 if (sc.chNext == '+' || sc.chNext == '-') {
857 sc.Forward();
858 }
859 break;
860 } else if (IsADigit(sc.ch, 16))
861 break;
862 // hex or hex float then word (go through)
863 } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
864 if (IsADigit(sc.ch)) // vector
865 break;
866 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
867 sc.ChangeState(SCE_PL_IDENTIFIER);
868 break;
869 }
870 // vector then word (go through)
871 } else if (IsADigit(sc.ch)) {
872 if (numState == PERLNUM_FLOAT_EXP) {
873 break;
874 } else if (numState == PERLNUM_OCTAL) {
875 if (sc.ch <= '7') break;
876 } else if (numState == PERLNUM_BINARY) {
877 if (sc.ch <= '1') break;
878 }
879 // mark invalid octal, binary numbers (go through)
880 numState = PERLNUM_BAD;
881 break;
882 }
883 // complete current number or vector
884 sc.ChangeState(actualNumStyle(numState));
885 sc.SetState(SCE_PL_DEFAULT);
886 break;
887 case SCE_PL_COMMENTLINE:
888 if (sc.atLineEnd) {
889 sc.SetState(SCE_PL_DEFAULT);
890 }
891 break;
892 case SCE_PL_HERE_DELIM:
893 if (HereDoc.State == 0) { // '<<' encountered
894 int delim_ch = sc.chNext;
895 Sci_Position ws_skip = 0;
896 HereDoc.State = 1; // pre-init HERE doc class
897 HereDoc.Quote = sc.chNext;
898 HereDoc.Quoted = false;
899 HereDoc.DelimiterLength = 0;
900 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
901 if (IsASpaceOrTab(delim_ch)) {
902 // skip whitespace; legal only for quoted delimiters
903 Sci_PositionU i = sc.currentPos + 1;
904 while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
905 i++;
906 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
907 }
908 ws_skip = i - sc.currentPos - 1;
909 }
910 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
911 // a quoted here-doc delimiter; skip any whitespace
912 sc.Forward(ws_skip + 1);
913 HereDoc.Quote = delim_ch;
914 HereDoc.Quoted = true;
915 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
916 || ws_skip > 0) {
917 // left shift << or <<= operator cases
918 // restore position if operator
919 sc.ChangeState(SCE_PL_OPERATOR);
920 sc.ForwardSetState(SCE_PL_DEFAULT);
921 backFlag = BACK_OPERATOR;
922 backPos = sc.currentPos;
923 HereDoc.State = 0;
924 } else {
925 // specially handle initial '\' for identifier
926 if (ws_skip == 0 && HereDoc.Quote == '\\')
927 sc.Forward();
928 // an unquoted here-doc delimiter, no special handling
929 // (cannot be prefixed by spaces/tabs), or
930 // symbols terminates; deprecated zero-length delimiter
931 }
932 } else if (HereDoc.State == 1) { // collect the delimiter
933 backFlag = BACK_NONE;
934 if (HereDoc.Quoted) { // a quoted here-doc delimiter
935 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
936 sc.ForwardSetState(SCE_PL_DEFAULT);
937 } else if (!sc.atLineEnd) {
938 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
939 sc.Forward();
940 }
941 if (sc.ch != '\r') { // skip CR if CRLF
942 int i = 0; // else append char, possibly an extended char
943 while (i < sc.width) {
944 HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
945 i++;
946 }
947 }
948 }
949 } else { // an unquoted here-doc delimiter, no extended charsets
950 if (setHereDocDelim.Contains(sc.ch)) {
951 HereDoc.Append(sc.ch);
952 } else {
953 sc.SetState(SCE_PL_DEFAULT);
954 }
955 }
956 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
957 sc.SetState(SCE_PL_ERROR);
958 HereDoc.State = 0;
959 }
960 }
961 break;
962 case SCE_PL_HERE_Q:
963 case SCE_PL_HERE_QQ:
964 case SCE_PL_HERE_QX:
965 // also implies HereDoc.State == 2
966 sc.Complete();
967 if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
968 int c = sc.GetRelative(HereDoc.DelimiterLength);
969 if (c == '\r' || c == '\n') { // peek first, do not consume match
970 sc.ForwardBytes(HereDoc.DelimiterLength);
971 sc.SetState(SCE_PL_DEFAULT);
972 backFlag = BACK_NONE;
973 HereDoc.State = 0;
974 if (!sc.atLineEnd)
975 sc.Forward();
976 break;
977 }
978 }
979 if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
980 while (!sc.atLineEnd)
981 sc.Forward();
982 break;
983 }
984 while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
985 int c, sLen = 0, endType = 0;
986 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
987 // scan to break string into segments
988 if (c == '\\') {
989 endType = 1; break;
990 } else if (c == '\r' || c == '\n') {
991 endType = 2; break;
992 }
993 sLen++;
994 }
995 if (sLen > 0) // process non-empty segments
996 InterpolateSegment(sc, sLen);
997 if (endType == 1) {
998 sc.Forward();
999 // \ at end-of-line does not appear to have any effect, skip
1000 if (sc.ch != '\r' && sc.ch != '\n')
1001 sc.Forward();
1002 } else if (endType == 2) {
1003 if (!sc.atLineEnd)
1004 sc.Forward();
1005 }
1006 }
1007 break;
1008 case SCE_PL_POD:
1009 case SCE_PL_POD_VERB: {
1010 Sci_PositionU fw = sc.currentPos;
1011 Sci_Position ln = styler.GetLine(fw);
1012 if (sc.atLineStart && sc.Match("=cut")) { // end of POD
1013 sc.SetState(SCE_PL_POD);
1014 sc.Forward(4);
1015 sc.SetState(SCE_PL_DEFAULT);
1016 styler.SetLineState(ln, SCE_PL_POD);
1017 break;
1018 }
1019 int pod = podLineScan(styler, fw, endPos); // classify POD line
1020 styler.SetLineState(ln, pod);
1021 if (pod == SCE_PL_DEFAULT) {
1022 if (sc.state == SCE_PL_POD_VERB) {
1023 Sci_PositionU fw2 = fw;
1024 while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
1025 fw = fw2++; // penultimate line (last blank line)
1026 pod = podLineScan(styler, fw2, endPos);
1027 styler.SetLineState(styler.GetLine(fw2), pod);
1028 }
1029 if (pod == SCE_PL_POD) { // truncate verbatim POD early
1030 sc.SetState(SCE_PL_POD);
1031 } else
1032 fw = fw2;
1033 }
1034 } else {
1035 if (pod == SCE_PL_POD_VERB // still part of current paragraph
1036 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
1037 pod = SCE_PL_POD;
1038 styler.SetLineState(ln, pod);
1039 } else if (pod == SCE_PL_POD
1040 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
1041 pod = SCE_PL_POD_VERB;
1042 styler.SetLineState(ln, pod);
1043 }
1044 sc.SetState(pod);
1045 }
1046 sc.ForwardBytes(fw - sc.currentPos); // commit style
1047 }
1048 break;
1049 case SCE_PL_REGEX:
1050 case SCE_PL_STRING_QR:
1051 if (Quote.Rep <= 0) {
1052 if (!setModifiers.Contains(sc.ch))
1053 sc.SetState(SCE_PL_DEFAULT);
1054 } else if (!Quote.Up && !IsASpace(sc.ch)) {
1055 Quote.Open(sc.ch);
1056 } else {
1057 int c, sLen = 0, endType = 0;
1058 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1059 // scan to break string into segments
1060 if (IsASpace(c)) {
1061 break;
1062 } else if (c == '\\' && Quote.Up != '\\') {
1063 endType = 1; break;
1064 } else if (c == Quote.Down) {
1065 Quote.Count--;
1066 if (Quote.Count == 0) {
1067 Quote.Rep--;
1068 break;
1069 }
1070 } else if (c == Quote.Up)
1071 Quote.Count++;
1072 sLen++;
1073 }
1074 if (sLen > 0) { // process non-empty segments
1075 if (Quote.Up != '\'') {
1076 InterpolateSegment(sc, sLen, true);
1077 } else // non-interpolated path
1078 sc.Forward(sLen);
1079 }
1080 if (endType == 1)
1081 sc.Forward();
1082 }
1083 break;
1084 case SCE_PL_REGSUBST:
1085 case SCE_PL_XLAT:
1086 if (Quote.Rep <= 0) {
1087 if (!setModifiers.Contains(sc.ch))
1088 sc.SetState(SCE_PL_DEFAULT);
1089 } else if (!Quote.Up && !IsASpace(sc.ch)) {
1090 Quote.Open(sc.ch);
1091 } else {
1092 int c, sLen = 0, endType = 0;
1093 bool isPattern = (Quote.Rep == 2);
1094 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1095 // scan to break string into segments
1096 if (c == '\\' && Quote.Up != '\\') {
1097 endType = 2; break;
1098 } else if (Quote.Count == 0 && Quote.Rep == 1) {
1099 // We matched something like s(...) or tr{...}, Perl 5.10
1100 // appears to allow almost any character for use as the
1101 // next delimiters. Whitespace and comments are accepted in
1102 // between, but we'll limit to whitespace here.
1103 // For '#', if no whitespace in between, it's a delimiter.
1104 if (IsASpace(c)) {
1105 // Keep going
1106 } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1107 endType = 3;
1108 } else
1109 Quote.Open(c);
1110 break;
1111 } else if (c == Quote.Down) {
1112 Quote.Count--;
1113 if (Quote.Count == 0) {
1114 Quote.Rep--;
1115 endType = 1;
1116 }
1117 if (Quote.Up == Quote.Down)
1118 Quote.Count++;
1119 if (endType == 1)
1120 break;
1121 } else if (c == Quote.Up) {
1122 Quote.Count++;
1123 } else if (IsASpace(c))
1124 break;
1125 sLen++;
1126 }
1127 if (sLen > 0) { // process non-empty segments
1128 if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1129 InterpolateSegment(sc, sLen, isPattern);
1130 } else // non-interpolated path
1131 sc.Forward(sLen);
1132 }
1133 if (endType == 2) {
1134 sc.Forward();
1135 } else if (endType == 3)
1136 sc.SetState(SCE_PL_DEFAULT);
1137 }
1138 break;
1139 case SCE_PL_STRING_Q:
1140 case SCE_PL_STRING_QQ:
1141 case SCE_PL_STRING_QX:
1142 case SCE_PL_STRING_QW:
1143 case SCE_PL_STRING:
1144 case SCE_PL_CHARACTER:
1145 case SCE_PL_BACKTICKS:
1146 if (!Quote.Down && !IsASpace(sc.ch)) {
1147 Quote.Open(sc.ch);
1148 } else {
1149 int c, sLen = 0, endType = 0;
1150 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1151 // scan to break string into segments
1152 if (IsASpace(c)) {
1153 break;
1154 } else if (c == '\\' && Quote.Up != '\\') {
1155 endType = 2; break;
1156 } else if (c == Quote.Down) {
1157 Quote.Count--;
1158 if (Quote.Count == 0) {
1159 endType = 3; break;
1160 }
1161 } else if (c == Quote.Up)
1162 Quote.Count++;
1163 sLen++;
1164 }
1165 if (sLen > 0) { // process non-empty segments
1166 switch (sc.state) {
1167 case SCE_PL_STRING:
1168 case SCE_PL_STRING_QQ:
1169 case SCE_PL_BACKTICKS:
1170 InterpolateSegment(sc, sLen);
1171 break;
1172 case SCE_PL_STRING_QX:
1173 if (Quote.Up != '\'') {
1174 InterpolateSegment(sc, sLen);
1175 break;
1176 }
1177 // (continued for ' delim)
1178 // Falls through.
1179 default: // non-interpolated path
1180 sc.Forward(sLen);
1181 }
1182 }
1183 if (endType == 2) {
1184 sc.Forward();
1185 } else if (endType == 3)
1186 sc.ForwardSetState(SCE_PL_DEFAULT);
1187 }
1188 break;
1189 case SCE_PL_SUB_PROTOTYPE: {
1190 int i = 0;
1191 // forward scan; must all be valid proto characters
1192 while (setSubPrototype.Contains(sc.GetRelative(i)))
1193 i++;
1194 if (sc.GetRelative(i) == ')') { // valid sub prototype
1195 sc.ForwardBytes(i);
1196 sc.ForwardSetState(SCE_PL_DEFAULT);
1197 } else {
1198 // abandon prototype, restart from '('
1199 sc.ChangeState(SCE_PL_OPERATOR);
1200 sc.SetState(SCE_PL_DEFAULT);
1201 }
1202 }
1203 break;
1204 case SCE_PL_FORMAT: {
1205 sc.Complete();
1206 if (sc.Match('.')) {
1207 sc.Forward();
1208 if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1209 sc.SetState(SCE_PL_DEFAULT);
1210 }
1211 while (!sc.atLineEnd)
1212 sc.Forward();
1213 }
1214 break;
1215 case SCE_PL_ERROR:
1216 break;
1217 }
1218 // Needed for specific continuation styles (one follows the other)
1219 switch (sc.state) {
1220 // continued from SCE_PL_WORD
1221 case SCE_PL_FORMAT_IDENT:
1222 // occupies HereDoc state 3 to avoid clashing with HERE docs
1223 if (IsASpaceOrTab(sc.ch)) { // skip whitespace
1224 sc.ChangeState(SCE_PL_DEFAULT);
1225 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1226 sc.Forward();
1227 sc.SetState(SCE_PL_FORMAT_IDENT);
1228 }
1229 if (setFormatStart.Contains(sc.ch)) { // identifier or '='
1230 if (sc.ch != '=') {
1231 do {
1232 sc.Forward();
1233 } while (setFormat.Contains(sc.ch));
1234 }
1235 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1236 sc.Forward();
1237 if (sc.ch == '=') {
1238 sc.ForwardSetState(SCE_PL_DEFAULT);
1239 HereDoc.State = 3;
1240 } else {
1241 // invalid identifier; inexact fallback, but hey
1242 sc.ChangeState(SCE_PL_IDENTIFIER);
1243 sc.SetState(SCE_PL_DEFAULT);
1244 }
1245 } else {
1246 sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1247 }
1248 backFlag = BACK_NONE;
1249 break;
1250 }
1251
1252 // Must check end of HereDoc states here before default state is handled
1253 if (HereDoc.State == 1 && sc.atLineEnd) {
1254 // Begin of here-doc (the line after the here-doc delimiter):
1255 // Lexically, the here-doc starts from the next line after the >>, but the
1256 // first line of here-doc seem to follow the style of the last EOL sequence
1257 int st_new = SCE_PL_HERE_QQ;
1258 HereDoc.State = 2;
1259 if (HereDoc.Quoted) {
1260 if (sc.state == SCE_PL_HERE_DELIM) {
1261 // Missing quote at end of string! We are stricter than perl.
1262 // Colour here-doc anyway while marking this bit as an error.
1263 sc.ChangeState(SCE_PL_ERROR);
1264 }
1265 switch (HereDoc.Quote) {
1266 case '\'':
1267 st_new = SCE_PL_HERE_Q;
1268 break;
1269 case '"' :
1270 st_new = SCE_PL_HERE_QQ;
1271 break;
1272 case '`' :
1273 st_new = SCE_PL_HERE_QX;
1274 break;
1275 }
1276 } else {
1277 if (HereDoc.Quote == '\\')
1278 st_new = SCE_PL_HERE_Q;
1279 }
1280 sc.SetState(st_new);
1281 }
1282 if (HereDoc.State == 3 && sc.atLineEnd) {
1283 // Start of format body.
1284 HereDoc.State = 0;
1285 sc.SetState(SCE_PL_FORMAT);
1286 }
1287
1288 // Determine if a new state should be entered.
1289 if (sc.state == SCE_PL_DEFAULT) {
1290 if (IsADigit(sc.ch) ||
1291 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1292 sc.SetState(SCE_PL_NUMBER);
1293 backFlag = BACK_NONE;
1294 numState = PERLNUM_DECIMAL;
1295 dotCount = 0;
1296 if (sc.ch == '0') { // hex,bin,octal
1297 if (sc.chNext == 'x' || sc.chNext == 'X') {
1298 numState = PERLNUM_HEX;
1299 } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1300 numState = PERLNUM_BINARY;
1301 } else if (IsADigit(sc.chNext)) {
1302 numState = PERLNUM_OCTAL;
1303 }
1304 if (numState != PERLNUM_DECIMAL) {
1305 sc.Forward();
1306 }
1307 } else if (sc.ch == 'v') { // vector
1308 numState = PERLNUM_V_VECTOR;
1309 }
1310 } else if (setWord.Contains(sc.ch)) {
1311 // if immediately prefixed by '::', always a bareword
1312 sc.SetState(SCE_PL_WORD);
1313 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1314 sc.ChangeState(SCE_PL_IDENTIFIER);
1315 }
1316 Sci_PositionU bk = sc.currentPos;
1317 Sci_PositionU fw = sc.currentPos + 1;
1318 // first check for possible quote-like delimiter
1319 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1320 sc.ChangeState(SCE_PL_REGSUBST);
1321 Quote.New(2);
1322 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1323 sc.ChangeState(SCE_PL_REGEX);
1324 Quote.New();
1325 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1326 sc.ChangeState(SCE_PL_STRING_Q);
1327 Quote.New();
1328 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1329 sc.ChangeState(SCE_PL_XLAT);
1330 Quote.New(2);
1331 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1332 sc.ChangeState(SCE_PL_XLAT);
1333 Quote.New(2);
1334 sc.Forward();
1335 fw++;
1336 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1337 && !setWord.Contains(sc.GetRelative(2))) {
1338 if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1339 else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1340 else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1341 else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
1342 Quote.New();
1343 sc.Forward();
1344 fw++;
1345 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1346 !setWord.Contains(sc.chNext) ||
1347 (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1348 sc.ChangeState(SCE_PL_OPERATOR);
1349 }
1350 // if potentially a keyword, scan forward and grab word, then check
1351 // if it's really one; if yes, disambiguation test is performed
1352 // otherwise it is always a bareword and we skip a lot of scanning
1353 if (sc.state == SCE_PL_WORD) {
1354 while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1355 fw++;
1356 if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1357 sc.ChangeState(SCE_PL_IDENTIFIER);
1358 }
1359 }
1360 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1361 // for quote-like delimiters/keywords, attempt to disambiguate
1362 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1363 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1364 if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1365 sc.ChangeState(SCE_PL_IDENTIFIER);
1366 }
1367 backFlag = BACK_NONE;
1368 } else if (sc.ch == '#') {
1369 sc.SetState(SCE_PL_COMMENTLINE);
1370 } else if (sc.ch == '\"') {
1371 sc.SetState(SCE_PL_STRING);
1372 Quote.New();
1373 Quote.Open(sc.ch);
1374 backFlag = BACK_NONE;
1375 } else if (sc.ch == '\'') {
1376 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1377 // Archaic call
1378 sc.SetState(SCE_PL_IDENTIFIER);
1379 } else {
1380 sc.SetState(SCE_PL_CHARACTER);
1381 Quote.New();
1382 Quote.Open(sc.ch);
1383 }
1384 backFlag = BACK_NONE;
1385 } else if (sc.ch == '`') {
1386 sc.SetState(SCE_PL_BACKTICKS);
1387 Quote.New();
1388 Quote.Open(sc.ch);
1389 backFlag = BACK_NONE;
1390 } else if (sc.ch == '$') {
1391 sc.SetState(SCE_PL_SCALAR);
1392 if (sc.chNext == '{') {
1393 sc.ForwardSetState(SCE_PL_OPERATOR);
1394 } else if (IsASpace(sc.chNext)) {
1395 sc.ForwardSetState(SCE_PL_DEFAULT);
1396 } else {
1397 sc.Forward();
1398 if (sc.Match('`', '`') || sc.Match(':', ':')) {
1399 sc.Forward();
1400 }
1401 }
1402 backFlag = BACK_NONE;
1403 } else if (sc.ch == '@') {
1404 sc.SetState(SCE_PL_ARRAY);
1405 if (setArray.Contains(sc.chNext)) {
1406 // no special treatment
1407 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1408 sc.ForwardBytes(2);
1409 } else if (sc.chNext == '{' || sc.chNext == '[') {
1410 sc.ForwardSetState(SCE_PL_OPERATOR);
1411 } else {
1412 sc.ChangeState(SCE_PL_OPERATOR);
1413 }
1414 backFlag = BACK_NONE;
1415 } else if (setPreferRE.Contains(sc.ch)) {
1416 // Explicit backward peeking to set a consistent preferRE for
1417 // any slash found, so no longer need to track preferRE state.
1418 // Find first previous significant lexed element and interpret.
1419 // A few symbols shares this code for disambiguation.
1420 bool preferRE = false;
1421 bool isHereDoc = sc.Match('<', '<');
1422 bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
1423 Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1424 sc.Complete();
1425 styler.Flush();
1426 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1427 hereDocSpace = true;
1428 skipWhitespaceComment(styler, bk);
1429 if (bk == 0) {
1430 // avoid backward scanning breakage
1431 preferRE = true;
1432 } else {
1433 int bkstyle = styler.StyleAt(bk);
1434 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1435 switch (bkstyle) {
1436 case SCE_PL_OPERATOR:
1437 preferRE = true;
1438 if (bkch == ')' || bkch == ']') {
1439 preferRE = false;
1440 } else if (bkch == '}') {
1441 // backtrack by counting balanced brace pairs
1442 // needed to test for variables like ${}, @{} etc.
1443 bkstyle = styleBeforeBracePair(styler, bk);
1444 if (bkstyle == SCE_PL_SCALAR
1445 || bkstyle == SCE_PL_ARRAY
1446 || bkstyle == SCE_PL_HASH
1447 || bkstyle == SCE_PL_SYMBOLTABLE
1448 || bkstyle == SCE_PL_OPERATOR) {
1449 preferRE = false;
1450 }
1451 } else if (bkch == '+' || bkch == '-') {
1452 if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1453 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1454 // exceptions for operators: unary suffixes ++, --
1455 preferRE = false;
1456 }
1457 break;
1458 case SCE_PL_IDENTIFIER:
1459 preferRE = true;
1460 bkstyle = styleCheckIdentifier(styler, bk);
1461 if ((bkstyle == 1) || (bkstyle == 2)) {
1462 // inputsymbol or var with "->" or "::" before identifier
1463 preferRE = false;
1464 } else if (bkstyle == 3) {
1465 // bare identifier, test cases follows:
1466 if (sc.ch == '/') {
1467 // if '/', /PATTERN/ unless digit/space immediately after '/'
1468 // if '//', always expect defined-or operator to follow identifier
1469 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1470 preferRE = false;
1471 } else if (sc.ch == '*' || sc.ch == '%') {
1472 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1473 preferRE = false;
1474 } else if (sc.ch == '<') {
1475 if (IsASpace(sc.chNext) || sc.chNext == '=')
1476 preferRE = false;
1477 }
1478 }
1479 break;
1480 case SCE_PL_SCALAR: // for $var<< case:
1481 if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1482 preferRE = true;
1483 break;
1484 case SCE_PL_WORD:
1485 preferRE = true;
1486 // for HERE docs, always true
1487 if (sc.ch == '/') {
1488 // adopt heuristics similar to vim-style rules:
1489 // keywords always forced as /PATTERN/: split, if, elsif, while
1490 // everything else /PATTERN/ unless digit/space immediately after '/'
1491 // for '//', defined-or favoured unless special keywords
1492 Sci_PositionU bkend = bk + 1;
1493 while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1494 bk--;
1495 }
1496 if (isPerlKeyword(bk, bkend, reWords, styler))
1497 break;
1498 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1499 preferRE = false;
1500 } else if (sc.ch == '*' || sc.ch == '%') {
1501 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1502 preferRE = false;
1503 } else if (sc.ch == '<') {
1504 if (IsASpace(sc.chNext) || sc.chNext == '=')
1505 preferRE = false;
1506 }
1507 break;
1508
1509 // other styles uses the default, preferRE=false
1510 case SCE_PL_POD:
1511 case SCE_PL_HERE_Q:
1512 case SCE_PL_HERE_QQ:
1513 case SCE_PL_HERE_QX:
1514 preferRE = true;
1515 break;
1516 }
1517 }
1518 backFlag = BACK_NONE;
1519 if (isHereDoc) { // handle '<<', HERE doc
1520 if (sc.Match("<<>>")) { // double-diamond operator (5.22)
1521 sc.SetState(SCE_PL_OPERATOR);
1522 sc.Forward(3);
1523 } else if (preferRE) {
1524 sc.SetState(SCE_PL_HERE_DELIM);
1525 HereDoc.State = 0;
1526 } else { // << operator
1527 sc.SetState(SCE_PL_OPERATOR);
1528 sc.Forward();
1529 }
1530 } else if (sc.ch == '*') { // handle '*', typeglob
1531 if (preferRE) {
1532 sc.SetState(SCE_PL_SYMBOLTABLE);
1533 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1534 sc.ForwardBytes(2);
1535 } else if (sc.chNext == '{') {
1536 sc.ForwardSetState(SCE_PL_OPERATOR);
1537 } else {
1538 sc.Forward();
1539 }
1540 } else {
1541 sc.SetState(SCE_PL_OPERATOR);
1542 if (sc.chNext == '*') // exponentiation
1543 sc.Forward();
1544 }
1545 } else if (sc.ch == '%') { // handle '%', hash
1546 if (preferRE) {
1547 sc.SetState(SCE_PL_HASH);
1548 if (setHash.Contains(sc.chNext)) {
1549 sc.Forward();
1550 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1551 sc.ForwardBytes(2);
1552 } else if (sc.chNext == '{') {
1553 sc.ForwardSetState(SCE_PL_OPERATOR);
1554 } else {
1555 sc.ChangeState(SCE_PL_OPERATOR);
1556 }
1557 } else {
1558 sc.SetState(SCE_PL_OPERATOR);
1559 }
1560 } else if (sc.ch == '<') { // handle '<', inputsymbol
1561 if (preferRE) {
1562 // forward scan
1563 int i = InputSymbolScan(sc);
1564 if (i > 0) {
1565 sc.SetState(SCE_PL_IDENTIFIER);
1566 sc.Forward(i);
1567 } else {
1568 sc.SetState(SCE_PL_OPERATOR);
1569 }
1570 } else {
1571 sc.SetState(SCE_PL_OPERATOR);
1572 }
1573 } else { // handle '/', regexp
1574 if (preferRE) {
1575 sc.SetState(SCE_PL_REGEX);
1576 Quote.New();
1577 Quote.Open(sc.ch);
1578 } else { // / and // operators
1579 sc.SetState(SCE_PL_OPERATOR);
1580 if (sc.chNext == '/') {
1581 sc.Forward();
1582 }
1583 }
1584 }
1585 } else if (sc.ch == '=' // POD
1586 && setPOD.Contains(sc.chNext)
1587 && sc.atLineStart) {
1588 sc.SetState(SCE_PL_POD);
1589 backFlag = BACK_NONE;
1590 } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1591 Sci_PositionU bk = sc.currentPos;
1592 Sci_PositionU fw = 2;
1593 if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1594 !setWord.Contains(sc.GetRelative(2))) {
1595 sc.SetState(SCE_PL_WORD);
1596 } else {
1597 // nominally a minus and bareword; find extent of bareword
1598 while (setWord.Contains(sc.GetRelative(fw)))
1599 fw++;
1600 sc.SetState(SCE_PL_OPERATOR);
1601 }
1602 // force to bareword for hash key => or {variable literal} cases
1603 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1604 sc.ChangeState(SCE_PL_IDENTIFIER);
1605 }
1606 backFlag = BACK_NONE;
1607 } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1608 sc.Complete();
1609 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1610 sc.SetState(SCE_PL_SUB_PROTOTYPE);
1611 backFlag = BACK_NONE;
1612 } else {
1613 sc.SetState(SCE_PL_OPERATOR);
1614 }
1615 } else if (setPerlOperator.Contains(sc.ch)) { // operators
1616 sc.SetState(SCE_PL_OPERATOR);
1617 if (sc.Match('.', '.')) { // .. and ...
1618 sc.Forward();
1619 if (sc.chNext == '.') sc.Forward();
1620 }
1621 } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1622 sc.SetState(SCE_PL_DATASECTION);
1623 } else {
1624 // keep colouring defaults
1625 sc.Complete();
1626 }
1627 }
1628 }
1629 sc.Complete();
1630 if (sc.state == SCE_PL_HERE_Q
1631 || sc.state == SCE_PL_HERE_QQ
1632 || sc.state == SCE_PL_HERE_QX
1633 || sc.state == SCE_PL_FORMAT) {
1634 styler.ChangeLexerState(sc.currentPos, styler.Length());
1635 }
1636 sc.Complete();
1637 }
1638
1639 #define PERL_HEADFOLD_SHIFT 4
1640 #define PERL_HEADFOLD_MASK 0xF0
1641
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)1642 void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
1643
1644 if (!options.fold)
1645 return;
1646
1647 LexAccessor styler(pAccess);
1648
1649 Sci_PositionU endPos = startPos + length;
1650 int visibleChars = 0;
1651 Sci_Position lineCurrent = styler.GetLine(startPos);
1652
1653 // Backtrack to previous line in case need to fix its fold status
1654 if (startPos > 0) {
1655 if (lineCurrent > 0) {
1656 lineCurrent--;
1657 startPos = styler.LineStart(lineCurrent);
1658 }
1659 }
1660
1661 int levelPrev = SC_FOLDLEVELBASE;
1662 if (lineCurrent > 0)
1663 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1664 int levelCurrent = levelPrev;
1665 char chNext = styler[startPos];
1666 char chPrev = styler.SafeGetCharAt(startPos - 1);
1667 int styleNext = styler.StyleAt(startPos);
1668 // Used at end of line to determine if the line was a package definition
1669 bool isPackageLine = false;
1670 int podHeading = 0;
1671 for (Sci_PositionU i = startPos; i < endPos; i++) {
1672 char ch = chNext;
1673 chNext = styler.SafeGetCharAt(i + 1);
1674 int style = styleNext;
1675 styleNext = styler.StyleAt(i + 1);
1676 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1677 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1678 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1679 // Comment folding
1680 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1681 if (!IsCommentLine(lineCurrent - 1, styler)
1682 && IsCommentLine(lineCurrent + 1, styler))
1683 levelCurrent++;
1684 else if (IsCommentLine(lineCurrent - 1, styler)
1685 && !IsCommentLine(lineCurrent + 1, styler))
1686 levelCurrent--;
1687 }
1688 // {} [] block folding
1689 if (style == SCE_PL_OPERATOR) {
1690 if (ch == '{') {
1691 if (options.foldAtElse && levelCurrent < levelPrev)
1692 --levelPrev;
1693 levelCurrent++;
1694 } else if (ch == '}') {
1695 levelCurrent--;
1696 }
1697 if (ch == '[') {
1698 if (options.foldAtElse && levelCurrent < levelPrev)
1699 --levelPrev;
1700 levelCurrent++;
1701 } else if (ch == ']') {
1702 levelCurrent--;
1703 }
1704 }
1705 // POD folding
1706 if (options.foldPOD && atLineStart) {
1707 if (style == SCE_PL_POD) {
1708 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1709 levelCurrent++;
1710 else if (styler.Match(i, "=cut"))
1711 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1712 else if (styler.Match(i, "=head"))
1713 podHeading = PodHeadingLevel(i, styler);
1714 } else if (style == SCE_PL_DATASECTION) {
1715 if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1716 levelCurrent++;
1717 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1718 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1719 else if (styler.Match(i, "=head"))
1720 podHeading = PodHeadingLevel(i, styler);
1721 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1722 // reset needed as level test is vs. SC_FOLDLEVELBASE
1723 else if (stylePrevCh != SCE_PL_DATASECTION)
1724 levelCurrent = SC_FOLDLEVELBASE;
1725 }
1726 }
1727 // package folding
1728 if (options.foldPackage && atLineStart) {
1729 if (IsPackageLine(lineCurrent, styler)
1730 && !IsPackageLine(lineCurrent + 1, styler))
1731 isPackageLine = true;
1732 }
1733
1734 //heredoc folding
1735 switch (style) {
1736 case SCE_PL_HERE_QQ :
1737 case SCE_PL_HERE_Q :
1738 case SCE_PL_HERE_QX :
1739 switch (stylePrevCh) {
1740 case SCE_PL_HERE_QQ :
1741 case SCE_PL_HERE_Q :
1742 case SCE_PL_HERE_QX :
1743 //do nothing;
1744 break;
1745 default :
1746 levelCurrent++;
1747 break;
1748 }
1749 break;
1750 default:
1751 switch (stylePrevCh) {
1752 case SCE_PL_HERE_QQ :
1753 case SCE_PL_HERE_Q :
1754 case SCE_PL_HERE_QX :
1755 levelCurrent--;
1756 break;
1757 default :
1758 //do nothing;
1759 break;
1760 }
1761 break;
1762 }
1763
1764 //explicit folding
1765 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1766 if (chNext == '{') {
1767 levelCurrent++;
1768 } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
1769 levelCurrent--;
1770 }
1771 }
1772
1773 if (atEOL) {
1774 int lev = levelPrev;
1775 // POD headings occupy bits 7-4, leaving some breathing room for
1776 // non-standard practice -- POD sections stuck in blocks, etc.
1777 if (podHeading > 0) {
1778 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1779 lev = levelCurrent - 1;
1780 lev |= SC_FOLDLEVELHEADERFLAG;
1781 podHeading = 0;
1782 }
1783 // Check if line was a package declaration
1784 // because packages need "special" treatment
1785 if (isPackageLine) {
1786 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1787 levelCurrent = SC_FOLDLEVELBASE + 1;
1788 isPackageLine = false;
1789 }
1790 lev |= levelCurrent << 16;
1791 if (visibleChars == 0 && options.foldCompact)
1792 lev |= SC_FOLDLEVELWHITEFLAG;
1793 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1794 lev |= SC_FOLDLEVELHEADERFLAG;
1795 if (lev != styler.LevelAt(lineCurrent)) {
1796 styler.SetLevel(lineCurrent, lev);
1797 }
1798 lineCurrent++;
1799 levelPrev = levelCurrent;
1800 visibleChars = 0;
1801 }
1802 if (!isspacechar(ch))
1803 visibleChars++;
1804 chPrev = ch;
1805 }
1806 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1807 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1808 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1809 }
1810
1811 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
1812