1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3  ** Lexer for Perl.
4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5  **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 #include <map>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
34 
35 // Info for HERE document handling from perldata.pod (reformatted):
36 // ----------------------------------------------------------------
37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
38 // Following a << you specify a string to terminate the quoted material, and
39 // all lines following the current line down to the terminating string are
40 // the value of the item.
41 // * The terminating string may be either an identifier (a word), or some
42 //   quoted text.
43 // * If quoted, the type of quotes you use determines the treatment of the
44 //   text, just as in regular quoting.
45 // * An unquoted identifier works like double quotes.
46 // * There must be no space between the << and the identifier.
47 //   (If you put a space it will be treated as a null identifier,
48 //    which is valid, and matches the first empty line.)
49 //   (This is deprecated, -w warns of this syntax)
50 // * The terminating string must appear by itself (unquoted and
51 //   with no surrounding whitespace) on the terminating line.
52 
53 #define HERE_DELIM_MAX 256		// maximum length of HERE doc delimiter
54 
55 #define PERLNUM_BINARY		1	// order is significant: 1-4 cannot have a dot
56 #define PERLNUM_HEX			2
57 #define PERLNUM_OCTAL		3
58 #define PERLNUM_FLOAT_EXP	4	// exponent part only
59 #define PERLNUM_DECIMAL		5	// 1-5 are numbers; 6-7 are strings
60 #define PERLNUM_VECTOR		6
61 #define PERLNUM_V_VECTOR	7
62 #define PERLNUM_BAD			8
63 
64 #define BACK_NONE		0	// lookback state for bareword disambiguation:
65 #define BACK_OPERATOR	1	// whitespace/comments are insignificant
66 #define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation
67 
68 // all interpolated styles are different from their parent styles by a constant difference
69 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
70 #define	INTERPOLATE_SHIFT	(SCE_PL_STRING_VAR - SCE_PL_STRING)
71 
isPerlKeyword(unsigned int start,unsigned int end,WordList & keywords,LexAccessor & styler)72 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
73 	// old-style keyword matcher; needed because GetCurrent() needs
74 	// current segment to be committed, but we may abandon early...
75 	char s[100];
76 	unsigned int i, len = end - start;
77 	if (len > 30) { len = 30; }
78 	for (i = 0; i < len; i++, start++) s[i] = styler[start];
79 	s[i] = '\0';
80 	return keywords.InList(s);
81 }
82 
disambiguateBareword(LexAccessor & styler,unsigned int bk,unsigned int fw,int backFlag,unsigned int backPos,unsigned int endPos)83 static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw,
84         int backFlag, unsigned int backPos, unsigned int endPos) {
85 	// identifiers are recognized by Perl as barewords under some
86 	// conditions, the following attempts to do the disambiguation
87 	// by looking backward and forward; result in 2 LSB
88 	int result = 0;
89 	bool moreback = false;		// true if passed newline/comments
90 	bool brace = false;			// true if opening brace found
91 	// if BACK_NONE, neither operator nor keyword, so skip test
92 	if (backFlag == BACK_NONE)
93 		return result;
94 	// first look backwards past whitespace/comments to set EOL flag
95 	// (some disambiguation patterns must be on a single line)
96 	if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
97 		moreback = true;
98 	// look backwards at last significant lexed item for disambiguation
99 	bk = backPos - 1;
100 	int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
101 	if (ch == '{' && !moreback) {
102 		// {bareword: possible variable spec
103 		brace = true;
104 	} else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
105 	        // &bareword: subroutine call
106 	        || styler.Match(bk - 1, "->")
107 	        // ->bareword: part of variable spec
108 	        || styler.Match(bk - 2, "sub")) {
109 		// sub bareword: subroutine declaration
110 		// (implied BACK_KEYWORD, no keywords end in 'sub'!)
111 		result |= 1;
112 	}
113 	// next, scan forward after word past tab/spaces only;
114 	// if ch isn't one of '[{(,' we can skip the test
115 	if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
116 	        && fw < endPos) {
117 		while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
118 		        IsASpaceOrTab(ch) && fw < endPos) {
119 			fw++;
120 		}
121 		if ((ch == '}' && brace)
122 		        // {bareword}: variable spec
123 		        || styler.Match(fw, "=>")) {
124 			// [{(, bareword=>: hash literal
125 			result |= 2;
126 		}
127 	}
128 	return result;
129 }
130 
skipWhitespaceComment(LexAccessor & styler,unsigned int & p)131 static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) {
132 	// when backtracking, we need to skip whitespace and comments
133 	int style;
134 	while ((p > 0) && (style = styler.StyleAt(p),
135 	        style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
136 		p--;
137 }
138 
styleBeforeBracePair(LexAccessor & styler,unsigned int bk)139 static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) {
140 	// backtrack to find open '{' corresponding to a '}', balanced
141 	// return significant style to be tested for '/' disambiguation
142 	int braceCount = 1;
143 	if (bk == 0)
144 		return SCE_PL_DEFAULT;
145 	while (--bk > 0) {
146 		if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
147 			int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
148 			if (bkch == ';') {	// early out
149 				break;
150 			} else if (bkch == '}') {
151 				braceCount++;
152 			} else if (bkch == '{') {
153 				if (--braceCount == 0) break;
154 			}
155 		}
156 	}
157 	if (bk > 0 && braceCount == 0) {
158 		// balanced { found, bk > 0, skip more whitespace/comments
159 		bk--;
160 		skipWhitespaceComment(styler, bk);
161 		return styler.StyleAt(bk);
162 	}
163 	return SCE_PL_DEFAULT;
164 }
165 
styleCheckIdentifier(LexAccessor & styler,unsigned int bk)166 static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) {
167 	// backtrack to classify sub-styles of identifier under test
168 	// return sub-style to be tested for '/' disambiguation
169 	if (styler.SafeGetCharAt(bk) == '>')	// inputsymbol, like <foo>
170 		return 1;
171 	// backtrack to check for possible "->" or "::" before identifier
172 	while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
173 		bk--;
174 	}
175 	while (bk > 0) {
176 		int bkstyle = styler.StyleAt(bk);
177 		if (bkstyle == SCE_PL_DEFAULT
178 		        || bkstyle == SCE_PL_COMMENTLINE) {
179 			// skip whitespace, comments
180 		} else if (bkstyle == SCE_PL_OPERATOR) {
181 			// test for "->" and "::"
182 			if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
183 				return 2;
184 		} else
185 			return 3;	// bare identifier
186 		bk--;
187 	}
188 	return 0;
189 }
190 
podLineScan(LexAccessor & styler,unsigned int & pos,unsigned int endPos)191 static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) {
192 	// forward scan the current line to classify line for POD style
193 	int state = -1;
194 	while (pos < endPos) {
195 		int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
196 		if (ch == '\n' || ch == '\r') {
197 			if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
198 			break;
199 		}
200 		if (IsASpaceOrTab(ch)) {	// whitespace, take note
201 			if (state == -1)
202 				state = SCE_PL_DEFAULT;
203 		} else if (state == SCE_PL_DEFAULT) {	// verbatim POD line
204 			state = SCE_PL_POD_VERB;
205 		} else if (state != SCE_PL_POD_VERB) {	// regular POD line
206 			state = SCE_PL_POD;
207 		}
208 		pos++;
209 	}
210 	if (state == -1)
211 		state = SCE_PL_DEFAULT;
212 	return state;
213 }
214 
styleCheckSubPrototype(LexAccessor & styler,unsigned int bk)215 static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {
216 	// backtrack to identify if we're starting a subroutine prototype
217 	// we also need to ignore whitespace/comments:
218 	// 'sub' [whitespace|comment] <identifier> [whitespace|comment]
219 	styler.Flush();
220 	skipWhitespaceComment(styler, bk);
221 	if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER)	// check identifier
222 		return false;
223 	while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
224 		bk--;
225 	}
226 	skipWhitespaceComment(styler, bk);
227 	if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD	// check "sub" keyword
228 	        || !styler.Match(bk - 2, "sub"))	// assume suffix is unique!
229 		return false;
230 	return true;
231 }
232 
actualNumStyle(int numberStyle)233 static int actualNumStyle(int numberStyle) {
234 	if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
235 		return SCE_PL_STRING;
236 	} else if (numberStyle == PERLNUM_BAD) {
237 		return SCE_PL_ERROR;
238 	}
239 	return SCE_PL_NUMBER;
240 }
241 
opposite(int ch)242 static int opposite(int ch) {
243 	if (ch == '(') return ')';
244 	if (ch == '[') return ']';
245 	if (ch == '{') return '}';
246 	if (ch == '<') return '>';
247 	return ch;
248 }
249 
IsCommentLine(int line,LexAccessor & styler)250 static bool IsCommentLine(int line, LexAccessor &styler) {
251 	int pos = styler.LineStart(line);
252 	int eol_pos = styler.LineStart(line + 1) - 1;
253 	for (int i = pos; i < eol_pos; i++) {
254 		char ch = styler[i];
255 		int style = styler.StyleAt(i);
256 		if (ch == '#' && style == SCE_PL_COMMENTLINE)
257 			return true;
258 		else if (!IsASpaceOrTab(ch))
259 			return false;
260 	}
261 	return false;
262 }
263 
IsPackageLine(int line,LexAccessor & styler)264 static bool IsPackageLine(int line, LexAccessor &styler) {
265 	int pos = styler.LineStart(line);
266 	int style = styler.StyleAt(pos);
267 	if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
268 		return true;
269 	}
270 	return false;
271 }
272 
PodHeadingLevel(int pos,LexAccessor & styler)273 static int PodHeadingLevel(int pos, LexAccessor &styler) {
274 	int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
275 	if (lvl >= '1' && lvl <= '4') {
276 		return lvl - '0';
277 	}
278 	return 0;
279 }
280 
281 // An individual named option for use in an OptionSet
282 
283 // Options used for LexerPerl
284 struct OptionsPerl {
285 	bool fold;
286 	bool foldComment;
287 	bool foldCompact;
288 	// Custom folding of POD and packages
289 	bool foldPOD;            // fold.perl.pod
290 	// Enable folding Pod blocks when using the Perl lexer.
291 	bool foldPackage;        // fold.perl.package
292 	// Enable folding packages when using the Perl lexer.
293 
294 	bool foldCommentExplicit;
295 
296 	bool foldAtElse;
297 
OptionsPerlOptionsPerl298 	OptionsPerl() {
299 		fold = false;
300 		foldComment = false;
301 		foldCompact = true;
302 		foldPOD = true;
303 		foldPackage = true;
304 		foldCommentExplicit = true;
305 		foldAtElse = false;
306 	}
307 };
308 
309 static const char *const perlWordListDesc[] = {
310 	"Keywords",
311 	0
312 };
313 
314 struct OptionSetPerl : public OptionSet<OptionsPerl> {
OptionSetPerlOptionSetPerl315 	OptionSetPerl() {
316 		DefineProperty("fold", &OptionsPerl::fold);
317 
318 		DefineProperty("fold.comment", &OptionsPerl::foldComment);
319 
320 		DefineProperty("fold.compact", &OptionsPerl::foldCompact);
321 
322 		DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
323 		        "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
324 
325 		DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
326 		        "Set to 0 to disable folding packages when using the Perl lexer.");
327 
328 		DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
329 		        "Set to 0 to disable explicit folding.");
330 
331 		DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
332 		               "This option enables Perl folding on a \"} else {\" line of an if statement.");
333 
334 		DefineWordListSets(perlWordListDesc);
335 	}
336 };
337 
338 class LexerPerl : public ILexer {
339 	CharacterSet setWordStart;
340 	CharacterSet setWord;
341 	CharacterSet setSpecialVar;
342 	CharacterSet setControlVar;
343 	WordList keywords;
344 	OptionsPerl options;
345 	OptionSetPerl osPerl;
346 public:
LexerPerl()347 	LexerPerl() :
348 		setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
349 		setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
350 		setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
351 		setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
352 	}
~LexerPerl()353 	virtual ~LexerPerl() {
354 	}
Release()355 	void SCI_METHOD Release() {
356 		delete this;
357 	}
Version() const358 	int SCI_METHOD Version() const {
359 		return lvOriginal;
360 	}
PropertyNames()361 	const char *SCI_METHOD PropertyNames() {
362 		return osPerl.PropertyNames();
363 	}
PropertyType(const char * name)364 	int SCI_METHOD PropertyType(const char *name) {
365 		return osPerl.PropertyType(name);
366 	}
DescribeProperty(const char * name)367 	const char *SCI_METHOD DescribeProperty(const char *name) {
368 		return osPerl.DescribeProperty(name);
369 	}
370 	int SCI_METHOD PropertySet(const char *key, const char *val);
DescribeWordListSets()371 	const char *SCI_METHOD DescribeWordListSets() {
372 		return osPerl.DescribeWordListSets();
373 	}
374 	int SCI_METHOD WordListSet(int n, const char *wl);
375 	void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
376 	void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
377 
PrivateCall(int,void *)378 	void *SCI_METHOD PrivateCall(int, void *) {
379 		return 0;
380 	}
381 
LexerFactoryPerl()382 	static ILexer *LexerFactoryPerl() {
383 		return new LexerPerl();
384 	}
385 	int InputSymbolScan(StyleContext &sc);
386 	void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
387 };
388 
PropertySet(const char * key,const char * val)389 int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
390 	if (osPerl.PropertySet(&options, key, val)) {
391 		return 0;
392 	}
393 	return -1;
394 }
395 
WordListSet(int n,const char * wl)396 int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
397 	WordList *wordListN = 0;
398 	switch (n) {
399 	case 0:
400 		wordListN = &keywords;
401 		break;
402 	}
403 	int firstModification = -1;
404 	if (wordListN) {
405 		WordList wlNew;
406 		wlNew.Set(wl);
407 		if (*wordListN != wlNew) {
408 			wordListN->Set(wl);
409 			firstModification = 0;
410 		}
411 	}
412 	return firstModification;
413 }
414 
InputSymbolScan(StyleContext & sc)415 int LexerPerl::InputSymbolScan(StyleContext &sc) {
416 	// forward scan for matching > on same line; file handles
417 	int c, sLen = 0;
418 	while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
419 		if (c == '\r' || c == '\n') {
420 			return 0;
421 		} else if (c == '>') {
422 			if (sc.Match("<=>"))	// '<=>' case
423 				return 0;
424 			return sLen;
425 		}
426 	}
427 	return 0;
428 }
429 
InterpolateSegment(StyleContext & sc,int maxSeg,bool isPattern)430 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
431 	// interpolate a segment (with no active backslashes or delimiters within)
432 	// switch in or out of an interpolation style or continue current style
433 	// commit variable patterns if found, trim segment, repeat until done
434 	while (maxSeg > 0) {
435 		bool isVar = false;
436 		int sLen = 0;
437 		if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
438 			// $#[$]*word [$@][$]*word (where word or {word} is always present)
439 			bool braces = false;
440 			sLen = 1;
441 			if (sc.ch == '$' && sc.chNext == '#') {	// starts with $#
442 				sLen++;
443 			}
444 			while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$'))	// >0 $ dereference within
445 				sLen++;
446 			if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) {	// { start for {word}
447 				sLen++;
448 				braces = true;
449 			}
450 			if (maxSeg > sLen) {
451 				int c = sc.GetRelativeCharacter(sLen);
452 				if (setWordStart.Contains(c)) {	// word (various)
453 					sLen++;
454 					isVar = true;
455 					while (maxSeg > sLen) {
456 						if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
457 							break;
458 						sLen++;
459 					}
460 				} else if (braces && IsADigit(c) && (sLen == 2)) {	// digit for ${digit}
461 					sLen++;
462 					isVar = true;
463 				}
464 			}
465 			if (braces) {
466 				if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) {	// } end for {word}
467 					sLen++;
468 				} else
469 					isVar = false;
470 			}
471 		}
472 		if (!isVar && (maxSeg > 1)) {	// $- or @-specific variable patterns
473 			int c = sc.chNext;
474 			if (sc.ch == '$') {
475 				sLen = 1;
476 				if (IsADigit(c)) {	// $[0-9] and slurp trailing digits
477 					sLen++;
478 					isVar = true;
479 					while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
480 						sLen++;
481 				} else if (setSpecialVar.Contains(c)) {	// $ special variables
482 					sLen++;
483 					isVar = true;
484 				} else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {	// $ additional
485 					sLen++;
486 					isVar = true;
487 				} else if (c == '^') {	// $^A control-char style
488 					sLen++;
489 					if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
490 						sLen++;
491 						isVar = true;
492 					}
493 				}
494 			} else if (sc.ch == '@') {
495 				sLen = 1;
496 				if (!isPattern && ((c == '+') || (c == '-'))) {	// @ specials non-pattern
497 					sLen++;
498 					isVar = true;
499 				}
500 			}
501 		}
502 		if (isVar) {	// commit as interpolated variable or normal character
503 			if (sc.state < SCE_PL_STRING_VAR)
504 				sc.SetState(sc.state + INTERPOLATE_SHIFT);
505 			sc.Forward(sLen);
506 			maxSeg -= sLen;
507 		} else {
508 			if (sc.state >= SCE_PL_STRING_VAR)
509 				sc.SetState(sc.state - INTERPOLATE_SHIFT);
510 			sc.Forward();
511 			maxSeg--;
512 		}
513 	}
514 	if (sc.state >= SCE_PL_STRING_VAR)
515 		sc.SetState(sc.state - INTERPOLATE_SHIFT);
516 }
517 
Lex(unsigned int startPos,int length,int initStyle,IDocument * pAccess)518 void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
519 	LexAccessor styler(pAccess);
520 
521 	// keywords that forces /PATTERN/ at all times; should track vim's behaviour
522 	WordList reWords;
523 	reWords.Set("elsif if split while");
524 
525 	// charset classes
526 	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
527 	// lexing of "%*</" operators is non-trivial; these are missing in the set below
528 	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
529 	CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
530 	CharacterSet setModifiers(CharacterSet::setAlpha);
531 	CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
532 	// setArray and setHash also accepts chars for special vars like $_,
533 	// which are then truncated when the next char does not match setVar
534 	CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
535 	CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
536 	CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
537 	CharacterSet &setPOD = setModifiers;
538 	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
539 	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
540 	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];");
541 	// for format identifiers
542 	CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
543 	CharacterSet &setFormat = setHereDocDelim;
544 
545 	// Lexer for perl often has to backtrack to start of current style to determine
546 	// which characters are being used as quotes, how deeply nested is the
547 	// start position and what the termination string is for HERE documents.
548 
549 	class HereDocCls {	// Class to manage HERE doc sequence
550 	public:
551 		int State;
552 		// 0: '<<' encountered
553 		// 1: collect the delimiter
554 		// 2: here doc text (lines after the delimiter)
555 		int Quote;		// the char after '<<'
556 		bool Quoted;		// true if Quote in ('\'','"','`')
557 		int DelimiterLength;	// strlen(Delimiter)
558 		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf
559 		HereDocCls() {
560 			State = 0;
561 			Quote = 0;
562 			Quoted = false;
563 			DelimiterLength = 0;
564 			Delimiter = new char[HERE_DELIM_MAX];
565 			Delimiter[0] = '\0';
566 		}
567 		void Append(int ch) {
568 			Delimiter[DelimiterLength++] = static_cast<char>(ch);
569 			Delimiter[DelimiterLength] = '\0';
570 		}
571 		~HereDocCls() {
572 			delete []Delimiter;
573 		}
574 	};
575 	HereDocCls HereDoc;		// TODO: FIFO for stacked here-docs
576 
577 	class QuoteCls {	// Class to manage quote pairs
578 	public:
579 		int Rep;
580 		int Count;
581 		int Up, Down;
582 		QuoteCls() {
583 			New(1);
584 		}
585 		void New(int r = 1) {
586 			Rep   = r;
587 			Count = 0;
588 			Up    = '\0';
589 			Down  = '\0';
590 		}
591 		void Open(int u) {
592 			Count++;
593 			Up    = u;
594 			Down  = opposite(Up);
595 		}
596 	};
597 	QuoteCls Quote;
598 
599 	// additional state for number lexing
600 	int numState = PERLNUM_DECIMAL;
601 	int dotCount = 0;
602 
603 	unsigned int endPos = startPos + length;
604 
605 	// Backtrack to beginning of style if required...
606 	// If in a long distance lexical state, backtrack to find quote characters.
607 	// Includes strings (may be multi-line), numbers (additional state), format
608 	// bodies, as well as POD sections.
609 	if (initStyle == SCE_PL_HERE_Q
610 	    || initStyle == SCE_PL_HERE_QQ
611 	    || initStyle == SCE_PL_HERE_QX
612 	    || initStyle == SCE_PL_FORMAT
613 	    || initStyle == SCE_PL_HERE_QQ_VAR
614 	    || initStyle == SCE_PL_HERE_QX_VAR
615 	   ) {
616 		// backtrack through multiple styles to reach the delimiter start
617 		int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
618 		while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
619 			startPos--;
620 		}
621 		startPos = styler.LineStart(styler.GetLine(startPos));
622 		initStyle = styler.StyleAt(startPos - 1);
623 	}
624 	if (initStyle == SCE_PL_STRING
625 	    || initStyle == SCE_PL_STRING_QQ
626 	    || initStyle == SCE_PL_BACKTICKS
627 	    || initStyle == SCE_PL_STRING_QX
628 	    || initStyle == SCE_PL_REGEX
629 	    || initStyle == SCE_PL_STRING_QR
630 	    || initStyle == SCE_PL_REGSUBST
631 	    || initStyle == SCE_PL_STRING_VAR
632 	    || initStyle == SCE_PL_STRING_QQ_VAR
633 	    || initStyle == SCE_PL_BACKTICKS_VAR
634 	    || initStyle == SCE_PL_STRING_QX_VAR
635 	    || initStyle == SCE_PL_REGEX_VAR
636 	    || initStyle == SCE_PL_STRING_QR_VAR
637 	    || initStyle == SCE_PL_REGSUBST_VAR
638 	   ) {
639 		// for interpolation, must backtrack through a mix of two different styles
640 		int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
641 			initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
642 		while (startPos > 1) {
643 			int st = styler.StyleAt(startPos - 1);
644 			if ((st != initStyle) && (st != otherStyle))
645 				break;
646 			startPos--;
647 		}
648 		initStyle = SCE_PL_DEFAULT;
649 	} else if (initStyle == SCE_PL_STRING_Q
650 	        || initStyle == SCE_PL_STRING_QW
651 	        || initStyle == SCE_PL_XLAT
652 	        || initStyle == SCE_PL_CHARACTER
653 	        || initStyle == SCE_PL_NUMBER
654 	        || initStyle == SCE_PL_IDENTIFIER
655 	        || initStyle == SCE_PL_ERROR
656 	        || initStyle == SCE_PL_SUB_PROTOTYPE
657 	   ) {
658 		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
659 			startPos--;
660 		}
661 		initStyle = SCE_PL_DEFAULT;
662 	} else if (initStyle == SCE_PL_POD
663 	        || initStyle == SCE_PL_POD_VERB
664 	          ) {
665 		// POD backtracking finds preceding blank lines and goes back past them
666 		int ln = styler.GetLine(startPos);
667 		if (ln > 0) {
668 			initStyle = styler.StyleAt(styler.LineStart(--ln));
669 			if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
670 				while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
671 					ln--;
672 			}
673 			startPos = styler.LineStart(++ln);
674 			initStyle = styler.StyleAt(startPos - 1);
675 		} else {
676 			startPos = 0;
677 			initStyle = SCE_PL_DEFAULT;
678 		}
679 	}
680 
681 	// backFlag, backPos are additional state to aid identifier corner cases.
682 	// Look backwards past whitespace and comments in order to detect either
683 	// operator or keyword. Later updated as we go along.
684 	int backFlag = BACK_NONE;
685 	unsigned int backPos = startPos;
686 	if (backPos > 0) {
687 		backPos--;
688 		skipWhitespaceComment(styler, backPos);
689 		if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
690 			backFlag = BACK_OPERATOR;
691 		else if (styler.StyleAt(backPos) == SCE_PL_WORD)
692 			backFlag = BACK_KEYWORD;
693 		backPos++;
694 	}
695 
696 	StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
697 
698 	for (; sc.More(); sc.Forward()) {
699 
700 		// Determine if the current state should terminate.
701 		switch (sc.state) {
702 		case SCE_PL_OPERATOR:
703 			sc.SetState(SCE_PL_DEFAULT);
704 			backFlag = BACK_OPERATOR;
705 			backPos = sc.currentPos;
706 			break;
707 		case SCE_PL_IDENTIFIER:		// identifier, bareword, inputsymbol
708 			if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
709 			        || sc.Match('.', '.')
710 			        || sc.chPrev == '>') {	// end of inputsymbol
711 				sc.SetState(SCE_PL_DEFAULT);
712 			}
713 			break;
714 		case SCE_PL_WORD:		// keyword, plus special cases
715 			if (!setWord.Contains(sc.ch)) {
716 				char s[100];
717 				sc.GetCurrent(s, sizeof(s));
718 				if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
719 					sc.ChangeState(SCE_PL_DATASECTION);
720 				} else {
721 					if ((strcmp(s, "format") == 0)) {
722 						sc.SetState(SCE_PL_FORMAT_IDENT);
723 						HereDoc.State = 0;
724 					} else {
725 						sc.SetState(SCE_PL_DEFAULT);
726 					}
727 					backFlag = BACK_KEYWORD;
728 					backPos = sc.currentPos;
729 				}
730 			}
731 			break;
732 		case SCE_PL_SCALAR:
733 		case SCE_PL_ARRAY:
734 		case SCE_PL_HASH:
735 		case SCE_PL_SYMBOLTABLE:
736 			if (sc.Match(':', ':')) {	// skip ::
737 				sc.Forward();
738 			} else if (!setVar.Contains(sc.ch)) {
739 				if (sc.LengthCurrent() == 1) {
740 					// Special variable: $(, $_ etc.
741 					sc.Forward();
742 				}
743 				sc.SetState(SCE_PL_DEFAULT);
744 			}
745 			break;
746 		case SCE_PL_NUMBER:
747 			// if no early break, number style is terminated at "(go through)"
748 			if (sc.ch == '.') {
749 				if (sc.chNext == '.') {
750 					// double dot is always an operator (go through)
751 				} else if (numState <= PERLNUM_FLOAT_EXP) {
752 					// non-decimal number or float exponent, consume next dot
753 					sc.SetState(SCE_PL_OPERATOR);
754 					break;
755 				} else {	// decimal or vectors allows dots
756 					dotCount++;
757 					if (numState == PERLNUM_DECIMAL) {
758 						if (dotCount <= 1)	// number with one dot in it
759 							break;
760 						if (IsADigit(sc.chNext)) {	// really a vector
761 							numState = PERLNUM_VECTOR;
762 							break;
763 						}
764 						// number then dot (go through)
765 					} else if (IsADigit(sc.chNext))	// vectors
766 						break;
767 					// vector then dot (go through)
768 				}
769 			} else if (sc.ch == '_') {
770 				// permissive underscoring for number and vector literals
771 				break;
772 			} else if (numState == PERLNUM_DECIMAL) {
773 				if (sc.ch == 'E' || sc.ch == 'e') {	// exponent, sign
774 					numState = PERLNUM_FLOAT_EXP;
775 					if (sc.chNext == '+' || sc.chNext == '-') {
776 						sc.Forward();
777 					}
778 					break;
779 				} else if (IsADigit(sc.ch))
780 					break;
781 				// number then word (go through)
782 			} else if (numState == PERLNUM_HEX) {
783 				if (IsADigit(sc.ch, 16))
784 					break;
785 			} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
786 				if (IsADigit(sc.ch))	// vector
787 					break;
788 				if (setWord.Contains(sc.ch) && dotCount == 0) {	// change to word
789 					sc.ChangeState(SCE_PL_IDENTIFIER);
790 					break;
791 				}
792 				// vector then word (go through)
793 			} else if (IsADigit(sc.ch)) {
794 				if (numState == PERLNUM_FLOAT_EXP) {
795 					break;
796 				} else if (numState == PERLNUM_OCTAL) {
797 					if (sc.ch <= '7') break;
798 				} else if (numState == PERLNUM_BINARY) {
799 					if (sc.ch <= '1') break;
800 				}
801 				// mark invalid octal, binary numbers (go through)
802 				numState = PERLNUM_BAD;
803 				break;
804 			}
805 			// complete current number or vector
806 			sc.ChangeState(actualNumStyle(numState));
807 			sc.SetState(SCE_PL_DEFAULT);
808 			break;
809 		case SCE_PL_COMMENTLINE:
810 			if (sc.atLineEnd) {
811 				sc.SetState(SCE_PL_DEFAULT);
812 			}
813 			break;
814 		case SCE_PL_HERE_DELIM:
815 			if (HereDoc.State == 0) { // '<<' encountered
816 				int delim_ch = sc.chNext;
817 				int ws_skip = 0;
818 				HereDoc.State = 1;	// pre-init HERE doc class
819 				HereDoc.Quote = sc.chNext;
820 				HereDoc.Quoted = false;
821 				HereDoc.DelimiterLength = 0;
822 				HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
823 				if (IsASpaceOrTab(delim_ch)) {
824 					// skip whitespace; legal only for quoted delimiters
825 					unsigned int i = sc.currentPos + 1;
826 					while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
827 						i++;
828 						delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
829 					}
830 					ws_skip = i - sc.currentPos - 1;
831 				}
832 				if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
833 					// a quoted here-doc delimiter; skip any whitespace
834 					sc.Forward(ws_skip + 1);
835 					HereDoc.Quote = delim_ch;
836 					HereDoc.Quoted = true;
837 				} else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
838 				        || ws_skip > 0) {
839 					// left shift << or <<= operator cases
840 					// restore position if operator
841 					sc.ChangeState(SCE_PL_OPERATOR);
842 					sc.ForwardSetState(SCE_PL_DEFAULT);
843 					backFlag = BACK_OPERATOR;
844 					backPos = sc.currentPos;
845 					HereDoc.State = 0;
846 				} else {
847 					// specially handle initial '\' for identifier
848 					if (ws_skip == 0 && HereDoc.Quote == '\\')
849 						sc.Forward();
850 					// an unquoted here-doc delimiter, no special handling
851 					// (cannot be prefixed by spaces/tabs), or
852 					// symbols terminates; deprecated zero-length delimiter
853 				}
854 			} else if (HereDoc.State == 1) { // collect the delimiter
855 				backFlag = BACK_NONE;
856 				if (HereDoc.Quoted) { // a quoted here-doc delimiter
857 					if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
858 						sc.ForwardSetState(SCE_PL_DEFAULT);
859 					} else if (!sc.atLineEnd) {
860 						if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
861 							sc.Forward();
862 						}
863 						if (sc.ch != '\r') {	// skip CR if CRLF
864 							int i = 0;			// else append char, possibly an extended char
865 							while (i < sc.width) {
866 								HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
867 								i++;
868 							}
869 						}
870 					}
871 				} else { // an unquoted here-doc delimiter, no extended charsets
872 					if (setHereDocDelim.Contains(sc.ch)) {
873 						HereDoc.Append(sc.ch);
874 					} else {
875 						sc.SetState(SCE_PL_DEFAULT);
876 					}
877 				}
878 				if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
879 					sc.SetState(SCE_PL_ERROR);
880 					HereDoc.State = 0;
881 				}
882 			}
883 			break;
884 		case SCE_PL_HERE_Q:
885 		case SCE_PL_HERE_QQ:
886 		case SCE_PL_HERE_QX:
887 			// also implies HereDoc.State == 2
888 			sc.Complete();
889 			if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
890 				int c = sc.GetRelative(HereDoc.DelimiterLength);
891 				if (c == '\r' || c == '\n') {	// peek first, do not consume match
892 					sc.ForwardBytes(HereDoc.DelimiterLength);
893 					sc.SetState(SCE_PL_DEFAULT);
894 					backFlag = BACK_NONE;
895 					HereDoc.State = 0;
896 					if (!sc.atLineEnd)
897 						sc.Forward();
898 					break;
899 				}
900 			}
901 			if (sc.state == SCE_PL_HERE_Q) {	// \EOF and 'EOF' non-interpolated
902 				while (!sc.atLineEnd)
903 					sc.Forward();
904 				break;
905 			}
906 			while (!sc.atLineEnd) {		// "EOF" and `EOF` interpolated
907 				int c, sLen = 0, endType = 0;
908 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
909 					// scan to break string into segments
910 					if (c == '\\') {
911 						endType = 1; break;
912 					} else if (c == '\r' || c == '\n') {
913 						endType = 2; break;
914 					}
915 					sLen++;
916 				}
917 				if (sLen > 0)	// process non-empty segments
918 					InterpolateSegment(sc, sLen);
919 				if (endType == 1) {
920 					sc.Forward();
921 					// \ at end-of-line does not appear to have any effect, skip
922 					if (sc.ch != '\r' && sc.ch != '\n')
923 						sc.Forward();
924 				} else if (endType == 2) {
925 					if (!sc.atLineEnd)
926 						sc.Forward();
927 				}
928 			}
929 			break;
930 		case SCE_PL_POD:
931 		case SCE_PL_POD_VERB: {
932 				unsigned int fw = sc.currentPos;
933 				int ln = styler.GetLine(fw);
934 				if (sc.atLineStart && sc.Match("=cut")) {	// end of POD
935 					sc.SetState(SCE_PL_POD);
936 					sc.Forward(4);
937 					sc.SetState(SCE_PL_DEFAULT);
938 					styler.SetLineState(ln, SCE_PL_POD);
939 					break;
940 				}
941 				int pod = podLineScan(styler, fw, endPos);	// classify POD line
942 				styler.SetLineState(ln, pod);
943 				if (pod == SCE_PL_DEFAULT) {
944 					if (sc.state == SCE_PL_POD_VERB) {
945 						unsigned int fw2 = fw;
946 						while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
947 							fw = fw2++;	// penultimate line (last blank line)
948 							pod = podLineScan(styler, fw2, endPos);
949 							styler.SetLineState(styler.GetLine(fw2), pod);
950 						}
951 						if (pod == SCE_PL_POD) {	// truncate verbatim POD early
952 							sc.SetState(SCE_PL_POD);
953 						} else
954 							fw = fw2;
955 					}
956 				} else {
957 					if (pod == SCE_PL_POD_VERB	// still part of current paragraph
958 					        && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
959 						pod = SCE_PL_POD;
960 						styler.SetLineState(ln, pod);
961 					} else if (pod == SCE_PL_POD
962 					        && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
963 						pod = SCE_PL_POD_VERB;
964 						styler.SetLineState(ln, pod);
965 					}
966 					sc.SetState(pod);
967 				}
968 				sc.ForwardBytes(fw - sc.currentPos);	// commit style
969 			}
970 			break;
971 		case SCE_PL_REGEX:
972 		case SCE_PL_STRING_QR:
973 			if (Quote.Rep <= 0) {
974 				if (!setModifiers.Contains(sc.ch))
975 					sc.SetState(SCE_PL_DEFAULT);
976 			} else if (!Quote.Up && !IsASpace(sc.ch)) {
977 				Quote.Open(sc.ch);
978 			} else {
979 				int c, sLen = 0, endType = 0;
980 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
981 					// scan to break string into segments
982 					if (IsASpace(c)) {
983 						break;
984 					} else if (c == '\\' && Quote.Up != '\\') {
985 						endType = 1; break;
986 					} else if (c == Quote.Down) {
987 						Quote.Count--;
988 						if (Quote.Count == 0) {
989 							Quote.Rep--;
990 							break;
991 						}
992 					} else if (c == Quote.Up)
993 						Quote.Count++;
994 					sLen++;
995 				}
996 				if (sLen > 0) {	// process non-empty segments
997 					if (Quote.Up != '\'') {
998 						InterpolateSegment(sc, sLen, true);
999 					} else		// non-interpolated path
1000 						sc.Forward(sLen);
1001 				}
1002 				if (endType == 1)
1003 					sc.Forward();
1004 			}
1005 			break;
1006 		case SCE_PL_REGSUBST:
1007 		case SCE_PL_XLAT:
1008 			if (Quote.Rep <= 0) {
1009 				if (!setModifiers.Contains(sc.ch))
1010 					sc.SetState(SCE_PL_DEFAULT);
1011 			} else if (!Quote.Up && !IsASpace(sc.ch)) {
1012 				Quote.Open(sc.ch);
1013 			} else {
1014 				int c, sLen = 0, endType = 0;
1015 				bool isPattern = (Quote.Rep == 2);
1016 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1017 					// scan to break string into segments
1018 					if (c == '\\' && Quote.Up != '\\') {
1019 						endType = 2; break;
1020 					} else if (Quote.Count == 0 && Quote.Rep == 1) {
1021 						// We matched something like s(...) or tr{...}, Perl 5.10
1022 						// appears to allow almost any character for use as the
1023 						// next delimiters. Whitespace and comments are accepted in
1024 						// between, but we'll limit to whitespace here.
1025 						// For '#', if no whitespace in between, it's a delimiter.
1026 						if (IsASpace(c)) {
1027 							// Keep going
1028 						} else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1029 							endType = 3;
1030 						} else
1031 							Quote.Open(c);
1032 						break;
1033 					} else if (c == Quote.Down) {
1034 						Quote.Count--;
1035 						if (Quote.Count == 0) {
1036 							Quote.Rep--;
1037 							endType = 1;
1038 						}
1039 						if (Quote.Up == Quote.Down)
1040 							Quote.Count++;
1041 						if (endType == 1)
1042 							break;
1043 					} else if (c == Quote.Up) {
1044 						Quote.Count++;
1045 					} else if (IsASpace(c))
1046 						break;
1047 					sLen++;
1048 				}
1049 				if (sLen > 0) {	// process non-empty segments
1050 					if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1051 						InterpolateSegment(sc, sLen, isPattern);
1052 					} else		// non-interpolated path
1053 						sc.Forward(sLen);
1054 				}
1055 				if (endType == 2) {
1056 					sc.Forward();
1057 				} else if (endType == 3)
1058 					sc.SetState(SCE_PL_DEFAULT);
1059 			}
1060 			break;
1061 		case SCE_PL_STRING_Q:
1062 		case SCE_PL_STRING_QQ:
1063 		case SCE_PL_STRING_QX:
1064 		case SCE_PL_STRING_QW:
1065 		case SCE_PL_STRING:
1066 		case SCE_PL_CHARACTER:
1067 		case SCE_PL_BACKTICKS:
1068 			if (!Quote.Down && !IsASpace(sc.ch)) {
1069 				Quote.Open(sc.ch);
1070 			} else {
1071 				int c, sLen = 0, endType = 0;
1072 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1073 					// scan to break string into segments
1074 					if (IsASpace(c)) {
1075 						break;
1076 					} else if (c == '\\' && Quote.Up != '\\') {
1077 						endType = 2; break;
1078 					} else if (c == Quote.Down) {
1079 						Quote.Count--;
1080 						if (Quote.Count == 0) {
1081 							endType = 3; break;
1082 						}
1083 					} else if (c == Quote.Up)
1084 						Quote.Count++;
1085 					sLen++;
1086 				}
1087 				if (sLen > 0) {	// process non-empty segments
1088 					switch (sc.state) {
1089 					case SCE_PL_STRING:
1090 					case SCE_PL_STRING_QQ:
1091 					case SCE_PL_BACKTICKS:
1092 						InterpolateSegment(sc, sLen);
1093 						break;
1094 					case SCE_PL_STRING_QX:
1095 						if (Quote.Up != '\'') {
1096 							InterpolateSegment(sc, sLen);
1097 							break;
1098 						}
1099 						// (continued for ' delim)
1100 					default:	// non-interpolated path
1101 						sc.Forward(sLen);
1102 					}
1103 				}
1104 				if (endType == 2) {
1105 					sc.Forward();
1106 				} else if (endType == 3)
1107 					sc.ForwardSetState(SCE_PL_DEFAULT);
1108 			}
1109 			break;
1110 		case SCE_PL_SUB_PROTOTYPE: {
1111 				int i = 0;
1112 				// forward scan; must all be valid proto characters
1113 				while (setSubPrototype.Contains(sc.GetRelative(i)))
1114 					i++;
1115 				if (sc.GetRelative(i) == ')') {	// valid sub prototype
1116 					sc.ForwardBytes(i);
1117 					sc.ForwardSetState(SCE_PL_DEFAULT);
1118 				} else {
1119 					// abandon prototype, restart from '('
1120 					sc.ChangeState(SCE_PL_OPERATOR);
1121 					sc.SetState(SCE_PL_DEFAULT);
1122 				}
1123 			}
1124 			break;
1125 		case SCE_PL_FORMAT: {
1126 				sc.Complete();
1127 				if (sc.Match('.')) {
1128 					sc.Forward();
1129 					if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1130 						sc.SetState(SCE_PL_DEFAULT);
1131 				}
1132 				while (!sc.atLineEnd)
1133 					sc.Forward();
1134 			}
1135 			break;
1136 		case SCE_PL_ERROR:
1137 			break;
1138 		}
1139 		// Needed for specific continuation styles (one follows the other)
1140 		switch (sc.state) {
1141 			// continued from SCE_PL_WORD
1142 		case SCE_PL_FORMAT_IDENT:
1143 			// occupies HereDoc state 3 to avoid clashing with HERE docs
1144 			if (IsASpaceOrTab(sc.ch)) {		// skip whitespace
1145 				sc.ChangeState(SCE_PL_DEFAULT);
1146 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1147 					sc.Forward();
1148 				sc.SetState(SCE_PL_FORMAT_IDENT);
1149 			}
1150 			if (setFormatStart.Contains(sc.ch)) {	// identifier or '='
1151 				if (sc.ch != '=') {
1152 					do {
1153 						sc.Forward();
1154 					} while (setFormat.Contains(sc.ch));
1155 				}
1156 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1157 					sc.Forward();
1158 				if (sc.ch == '=') {
1159 					sc.ForwardSetState(SCE_PL_DEFAULT);
1160 					HereDoc.State = 3;
1161 				} else {
1162 					// invalid identifier; inexact fallback, but hey
1163 					sc.ChangeState(SCE_PL_IDENTIFIER);
1164 					sc.SetState(SCE_PL_DEFAULT);
1165 				}
1166 			} else {
1167 				sc.ChangeState(SCE_PL_DEFAULT);	// invalid identifier
1168 			}
1169 			backFlag = BACK_NONE;
1170 			break;
1171 		}
1172 
1173 		// Must check end of HereDoc states here before default state is handled
1174 		if (HereDoc.State == 1 && sc.atLineEnd) {
1175 			// Begin of here-doc (the line after the here-doc delimiter):
1176 			// Lexically, the here-doc starts from the next line after the >>, but the
1177 			// first line of here-doc seem to follow the style of the last EOL sequence
1178 			int st_new = SCE_PL_HERE_QQ;
1179 			HereDoc.State = 2;
1180 			if (HereDoc.Quoted) {
1181 				if (sc.state == SCE_PL_HERE_DELIM) {
1182 					// Missing quote at end of string! We are stricter than perl.
1183 					// Colour here-doc anyway while marking this bit as an error.
1184 					sc.ChangeState(SCE_PL_ERROR);
1185 				}
1186 				switch (HereDoc.Quote) {
1187 				case '\'':
1188 					st_new = SCE_PL_HERE_Q;
1189 					break;
1190 				case '"' :
1191 					st_new = SCE_PL_HERE_QQ;
1192 					break;
1193 				case '`' :
1194 					st_new = SCE_PL_HERE_QX;
1195 					break;
1196 				}
1197 			} else {
1198 				if (HereDoc.Quote == '\\')
1199 					st_new = SCE_PL_HERE_Q;
1200 			}
1201 			sc.SetState(st_new);
1202 		}
1203 		if (HereDoc.State == 3 && sc.atLineEnd) {
1204 			// Start of format body.
1205 			HereDoc.State = 0;
1206 			sc.SetState(SCE_PL_FORMAT);
1207 		}
1208 
1209 		// Determine if a new state should be entered.
1210 		if (sc.state == SCE_PL_DEFAULT) {
1211 			if (IsADigit(sc.ch) ||
1212 			        (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1213 				sc.SetState(SCE_PL_NUMBER);
1214 				backFlag = BACK_NONE;
1215 				numState = PERLNUM_DECIMAL;
1216 				dotCount = 0;
1217 				if (sc.ch == '0') {		// hex,bin,octal
1218 					if (sc.chNext == 'x' || sc.chNext == 'X') {
1219 						numState = PERLNUM_HEX;
1220 					} else if (sc.chNext == 'b' || sc.chNext == 'B') {
1221 						numState = PERLNUM_BINARY;
1222 					} else if (IsADigit(sc.chNext)) {
1223 						numState = PERLNUM_OCTAL;
1224 					}
1225 					if (numState != PERLNUM_DECIMAL) {
1226 						sc.Forward();
1227 					}
1228 				} else if (sc.ch == 'v') {		// vector
1229 					numState = PERLNUM_V_VECTOR;
1230 				}
1231 			} else if (setWord.Contains(sc.ch)) {
1232 				// if immediately prefixed by '::', always a bareword
1233 				sc.SetState(SCE_PL_WORD);
1234 				if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1235 					sc.ChangeState(SCE_PL_IDENTIFIER);
1236 				}
1237 				unsigned int bk = sc.currentPos;
1238 				unsigned int fw = sc.currentPos + 1;
1239 				// first check for possible quote-like delimiter
1240 				if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1241 					sc.ChangeState(SCE_PL_REGSUBST);
1242 					Quote.New(2);
1243 				} else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1244 					sc.ChangeState(SCE_PL_REGEX);
1245 					Quote.New();
1246 				} else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1247 					sc.ChangeState(SCE_PL_STRING_Q);
1248 					Quote.New();
1249 				} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1250 					sc.ChangeState(SCE_PL_XLAT);
1251 					Quote.New(2);
1252 				} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1253 					sc.ChangeState(SCE_PL_XLAT);
1254 					Quote.New(2);
1255 					sc.Forward();
1256 					fw++;
1257 				} else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1258 				        && !setWord.Contains(sc.GetRelative(2))) {
1259 					if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1260 					else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1261 					else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1262 					else sc.ChangeState(SCE_PL_STRING_QW);	// sc.chNext == 'w'
1263 					Quote.New();
1264 					sc.Forward();
1265 					fw++;
1266 				} else if (sc.ch == 'x' && (sc.chNext == '=' ||	// repetition
1267 				        !setWord.Contains(sc.chNext) ||
1268 				        (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
1269 					sc.ChangeState(SCE_PL_OPERATOR);
1270 				}
1271 				// if potentially a keyword, scan forward and grab word, then check
1272 				// if it's really one; if yes, disambiguation test is performed
1273 				// otherwise it is always a bareword and we skip a lot of scanning
1274 				if (sc.state == SCE_PL_WORD) {
1275 					while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1276 						fw++;
1277 					if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1278 						sc.ChangeState(SCE_PL_IDENTIFIER);
1279 					}
1280 				}
1281 				// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1282 				// for quote-like delimiters/keywords, attempt to disambiguate
1283 				// to select for bareword, change state -> SCE_PL_IDENTIFIER
1284 				if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1285 					if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1286 						sc.ChangeState(SCE_PL_IDENTIFIER);
1287 				}
1288 				backFlag = BACK_NONE;
1289 			} else if (sc.ch == '#') {
1290 				sc.SetState(SCE_PL_COMMENTLINE);
1291 			} else if (sc.ch == '\"') {
1292 				sc.SetState(SCE_PL_STRING);
1293 				Quote.New();
1294 				Quote.Open(sc.ch);
1295 				backFlag = BACK_NONE;
1296 			} else if (sc.ch == '\'') {
1297 				if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1298 					// Archaic call
1299 					sc.SetState(SCE_PL_IDENTIFIER);
1300 				} else {
1301 					sc.SetState(SCE_PL_CHARACTER);
1302 					Quote.New();
1303 					Quote.Open(sc.ch);
1304 				}
1305 				backFlag = BACK_NONE;
1306 			} else if (sc.ch == '`') {
1307 				sc.SetState(SCE_PL_BACKTICKS);
1308 				Quote.New();
1309 				Quote.Open(sc.ch);
1310 				backFlag = BACK_NONE;
1311 			} else if (sc.ch == '$') {
1312 				sc.SetState(SCE_PL_SCALAR);
1313 				if (sc.chNext == '{') {
1314 					sc.ForwardSetState(SCE_PL_OPERATOR);
1315 				} else if (IsASpace(sc.chNext)) {
1316 					sc.ForwardSetState(SCE_PL_DEFAULT);
1317 				} else {
1318 					sc.Forward();
1319 					if (sc.Match('`', '`') || sc.Match(':', ':')) {
1320 						sc.Forward();
1321 					}
1322 				}
1323 				backFlag = BACK_NONE;
1324 			} else if (sc.ch == '@') {
1325 				sc.SetState(SCE_PL_ARRAY);
1326 				if (setArray.Contains(sc.chNext)) {
1327 					// no special treatment
1328 				} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1329 					sc.ForwardBytes(2);
1330 				} else if (sc.chNext == '{' || sc.chNext == '[') {
1331 					sc.ForwardSetState(SCE_PL_OPERATOR);
1332 				} else {
1333 					sc.ChangeState(SCE_PL_OPERATOR);
1334 				}
1335 				backFlag = BACK_NONE;
1336 			} else if (setPreferRE.Contains(sc.ch)) {
1337 				// Explicit backward peeking to set a consistent preferRE for
1338 				// any slash found, so no longer need to track preferRE state.
1339 				// Find first previous significant lexed element and interpret.
1340 				// A few symbols shares this code for disambiguation.
1341 				bool preferRE = false;
1342 				bool isHereDoc = sc.Match('<', '<');
1343 				bool hereDocSpace = false;		// for: SCALAR [whitespace] '<<'
1344 				unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1345 				sc.Complete();
1346 				styler.Flush();
1347 				if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1348 					hereDocSpace = true;
1349 				skipWhitespaceComment(styler, bk);
1350 				if (bk == 0) {
1351 					// avoid backward scanning breakage
1352 					preferRE = true;
1353 				} else {
1354 					int bkstyle = styler.StyleAt(bk);
1355 					int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1356 					switch (bkstyle) {
1357 					case SCE_PL_OPERATOR:
1358 						preferRE = true;
1359 						if (bkch == ')' || bkch == ']') {
1360 							preferRE = false;
1361 						} else if (bkch == '}') {
1362 							// backtrack by counting balanced brace pairs
1363 							// needed to test for variables like ${}, @{} etc.
1364 							bkstyle = styleBeforeBracePair(styler, bk);
1365 							if (bkstyle == SCE_PL_SCALAR
1366 							        || bkstyle == SCE_PL_ARRAY
1367 							        || bkstyle == SCE_PL_HASH
1368 							        || bkstyle == SCE_PL_SYMBOLTABLE
1369 							        || bkstyle == SCE_PL_OPERATOR) {
1370 								preferRE = false;
1371 							}
1372 						} else if (bkch == '+' || bkch == '-') {
1373 							if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1374 							        && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1375 								// exceptions for operators: unary suffixes ++, --
1376 								preferRE = false;
1377 						}
1378 						break;
1379 					case SCE_PL_IDENTIFIER:
1380 						preferRE = true;
1381 						bkstyle = styleCheckIdentifier(styler, bk);
1382 						if ((bkstyle == 1) || (bkstyle == 2)) {
1383 							// inputsymbol or var with "->" or "::" before identifier
1384 							preferRE = false;
1385 						} else if (bkstyle == 3) {
1386 							// bare identifier, test cases follows:
1387 							if (sc.ch == '/') {
1388 								// if '/', /PATTERN/ unless digit/space immediately after '/'
1389 								// if '//', always expect defined-or operator to follow identifier
1390 								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1391 									preferRE = false;
1392 							} else if (sc.ch == '*' || sc.ch == '%') {
1393 								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1394 									preferRE = false;
1395 							} else if (sc.ch == '<') {
1396 								if (IsASpace(sc.chNext) || sc.chNext == '=')
1397 									preferRE = false;
1398 							}
1399 						}
1400 						break;
1401 					case SCE_PL_SCALAR:		// for $var<< case:
1402 						if (isHereDoc && hereDocSpace)	// if SCALAR whitespace '<<', *always* a HERE doc
1403 							preferRE = true;
1404 						break;
1405 					case SCE_PL_WORD:
1406 						preferRE = true;
1407 						// for HERE docs, always true
1408 						if (sc.ch == '/') {
1409 							// adopt heuristics similar to vim-style rules:
1410 							// keywords always forced as /PATTERN/: split, if, elsif, while
1411 							// everything else /PATTERN/ unless digit/space immediately after '/'
1412 							// for '//', defined-or favoured unless special keywords
1413 							unsigned int bkend = bk + 1;
1414 							while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1415 								bk--;
1416 							}
1417 							if (isPerlKeyword(bk, bkend, reWords, styler))
1418 								break;
1419 							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1420 								preferRE = false;
1421 						} else if (sc.ch == '*' || sc.ch == '%') {
1422 							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1423 								preferRE = false;
1424 						} else if (sc.ch == '<') {
1425 							if (IsASpace(sc.chNext) || sc.chNext == '=')
1426 								preferRE = false;
1427 						}
1428 						break;
1429 
1430 						// other styles uses the default, preferRE=false
1431 					case SCE_PL_POD:
1432 					case SCE_PL_HERE_Q:
1433 					case SCE_PL_HERE_QQ:
1434 					case SCE_PL_HERE_QX:
1435 						preferRE = true;
1436 						break;
1437 					}
1438 				}
1439 				backFlag = BACK_NONE;
1440 				if (isHereDoc) {	// handle '<<', HERE doc
1441 					if (preferRE) {
1442 						sc.SetState(SCE_PL_HERE_DELIM);
1443 						HereDoc.State = 0;
1444 					} else {		// << operator
1445 						sc.SetState(SCE_PL_OPERATOR);
1446 						sc.Forward();
1447 					}
1448 				} else if (sc.ch == '*') {	// handle '*', typeglob
1449 					if (preferRE) {
1450 						sc.SetState(SCE_PL_SYMBOLTABLE);
1451 						if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1452 							sc.ForwardBytes(2);
1453 						} else if (sc.chNext == '{') {
1454 							sc.ForwardSetState(SCE_PL_OPERATOR);
1455 						} else {
1456 							sc.Forward();
1457 						}
1458 					} else {
1459 						sc.SetState(SCE_PL_OPERATOR);
1460 						if (sc.chNext == '*') 	// exponentiation
1461 							sc.Forward();
1462 					}
1463 				} else if (sc.ch == '%') {	// handle '%', hash
1464 					if (preferRE) {
1465 						sc.SetState(SCE_PL_HASH);
1466 						if (setHash.Contains(sc.chNext)) {
1467 							sc.Forward();
1468 						} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1469 							sc.ForwardBytes(2);
1470 						} else if (sc.chNext == '{') {
1471 							sc.ForwardSetState(SCE_PL_OPERATOR);
1472 						} else {
1473 							sc.ChangeState(SCE_PL_OPERATOR);
1474 						}
1475 					} else {
1476 						sc.SetState(SCE_PL_OPERATOR);
1477 					}
1478 				} else if (sc.ch == '<') {	// handle '<', inputsymbol
1479 					if (preferRE) {
1480 						// forward scan
1481 						int i = InputSymbolScan(sc);
1482 						if (i > 0) {
1483 							sc.SetState(SCE_PL_IDENTIFIER);
1484 							sc.Forward(i);
1485 						} else {
1486 							sc.SetState(SCE_PL_OPERATOR);
1487 						}
1488 					} else {
1489 						sc.SetState(SCE_PL_OPERATOR);
1490 					}
1491 				} else {			// handle '/', regexp
1492 					if (preferRE) {
1493 						sc.SetState(SCE_PL_REGEX);
1494 						Quote.New();
1495 						Quote.Open(sc.ch);
1496 					} else {		// / and // operators
1497 						sc.SetState(SCE_PL_OPERATOR);
1498 						if (sc.chNext == '/') {
1499 							sc.Forward();
1500 						}
1501 					}
1502 				}
1503 			} else if (sc.ch == '='		// POD
1504 			        && setPOD.Contains(sc.chNext)
1505 			        && sc.atLineStart) {
1506 				sc.SetState(SCE_PL_POD);
1507 				backFlag = BACK_NONE;
1508 			} else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {	// extended '-' cases
1509 				unsigned int bk = sc.currentPos;
1510 				unsigned int fw = 2;
1511 				if (setSingleCharOp.Contains(sc.chNext) &&	// file test operators
1512 				        !setWord.Contains(sc.GetRelative(2))) {
1513 					sc.SetState(SCE_PL_WORD);
1514 				} else {
1515 					// nominally a minus and bareword; find extent of bareword
1516 					while (setWord.Contains(sc.GetRelative(fw)))
1517 						fw++;
1518 					sc.SetState(SCE_PL_OPERATOR);
1519 				}
1520 				// force to bareword for hash key => or {variable literal} cases
1521 				if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1522 					sc.ChangeState(SCE_PL_IDENTIFIER);
1523 				}
1524 				backFlag = BACK_NONE;
1525 			} else if (sc.ch == '(' && sc.currentPos > 0) {	// '(' or subroutine prototype
1526 				sc.Complete();
1527 				if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1528 					sc.SetState(SCE_PL_SUB_PROTOTYPE);
1529 					backFlag = BACK_NONE;
1530 				} else {
1531 					sc.SetState(SCE_PL_OPERATOR);
1532 				}
1533 			} else if (setPerlOperator.Contains(sc.ch)) {	// operators
1534 				sc.SetState(SCE_PL_OPERATOR);
1535 				if (sc.Match('.', '.')) {	// .. and ...
1536 					sc.Forward();
1537 					if (sc.chNext == '.') sc.Forward();
1538 				}
1539 			} else if (sc.ch == 4 || sc.ch == 26) {		// ^D and ^Z ends valid perl source
1540 				sc.SetState(SCE_PL_DATASECTION);
1541 			} else {
1542 				// keep colouring defaults
1543 				sc.Complete();
1544 			}
1545 		}
1546 	}
1547 	sc.Complete();
1548 	if (sc.state == SCE_PL_HERE_Q
1549 	        || sc.state == SCE_PL_HERE_QQ
1550 	        || sc.state == SCE_PL_HERE_QX
1551 	        || sc.state == SCE_PL_FORMAT) {
1552 		styler.ChangeLexerState(sc.currentPos, styler.Length());
1553 	}
1554 	sc.Complete();
1555 }
1556 
1557 #define PERL_HEADFOLD_SHIFT		4
1558 #define PERL_HEADFOLD_MASK		0xF0
1559 
Fold(unsigned int startPos,int length,int,IDocument * pAccess)1560 void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
1561 
1562 	if (!options.fold)
1563 		return;
1564 
1565 	LexAccessor styler(pAccess);
1566 
1567 	unsigned int endPos = startPos + length;
1568 	int visibleChars = 0;
1569 	int lineCurrent = styler.GetLine(startPos);
1570 
1571 	// Backtrack to previous line in case need to fix its fold status
1572 	if (startPos > 0) {
1573 		if (lineCurrent > 0) {
1574 			lineCurrent--;
1575 			startPos = styler.LineStart(lineCurrent);
1576 		}
1577 	}
1578 
1579 	int levelPrev = SC_FOLDLEVELBASE;
1580 	if (lineCurrent > 0)
1581 		levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1582 	int levelCurrent = levelPrev;
1583 	char chNext = styler[startPos];
1584 	char chPrev = styler.SafeGetCharAt(startPos - 1);
1585 	int styleNext = styler.StyleAt(startPos);
1586 	// Used at end of line to determine if the line was a package definition
1587 	bool isPackageLine = false;
1588 	int podHeading = 0;
1589 	for (unsigned int i = startPos; i < endPos; i++) {
1590 		char ch = chNext;
1591 		chNext = styler.SafeGetCharAt(i + 1);
1592 		int style = styleNext;
1593 		styleNext = styler.StyleAt(i + 1);
1594 		int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1595 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1596 		bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1597 		// Comment folding
1598 		if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1599 			if (!IsCommentLine(lineCurrent - 1, styler)
1600 			        && IsCommentLine(lineCurrent + 1, styler))
1601 				levelCurrent++;
1602 			else if (IsCommentLine(lineCurrent - 1, styler)
1603 			        && !IsCommentLine(lineCurrent + 1, styler))
1604 				levelCurrent--;
1605 		}
1606 		// {} [] block folding
1607 		if (style == SCE_PL_OPERATOR) {
1608 			if (ch == '{') {
1609 				if (options.foldAtElse && levelCurrent < levelPrev)
1610 					--levelPrev;
1611 				levelCurrent++;
1612 			} else if (ch == '}') {
1613 				levelCurrent--;
1614 			}
1615 			if (ch == '[') {
1616 				if (options.foldAtElse && levelCurrent < levelPrev)
1617 					--levelPrev;
1618 				levelCurrent++;
1619 			} else if (ch == ']') {
1620 				levelCurrent--;
1621 			}
1622 		}
1623 		// POD folding
1624 		if (options.foldPOD && atLineStart) {
1625 			if (style == SCE_PL_POD) {
1626 				if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1627 					levelCurrent++;
1628 				else if (styler.Match(i, "=cut"))
1629 					levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1630 				else if (styler.Match(i, "=head"))
1631 					podHeading = PodHeadingLevel(i, styler);
1632 			} else if (style == SCE_PL_DATASECTION) {
1633 				if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1634 					levelCurrent++;
1635 				else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1636 					levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1637 				else if (styler.Match(i, "=head"))
1638 					podHeading = PodHeadingLevel(i, styler);
1639 				// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1640 				// reset needed as level test is vs. SC_FOLDLEVELBASE
1641 				else if (stylePrevCh != SCE_PL_DATASECTION)
1642 					levelCurrent = SC_FOLDLEVELBASE;
1643 			}
1644 		}
1645 		// package folding
1646 		if (options.foldPackage && atLineStart) {
1647 			if (IsPackageLine(lineCurrent, styler)
1648 			        && !IsPackageLine(lineCurrent + 1, styler))
1649 				isPackageLine = true;
1650 		}
1651 
1652 		//heredoc folding
1653 		switch (style) {
1654 		case SCE_PL_HERE_QQ :
1655 		case SCE_PL_HERE_Q :
1656 		case SCE_PL_HERE_QX :
1657 			switch (stylePrevCh) {
1658 			case SCE_PL_HERE_QQ :
1659 			case SCE_PL_HERE_Q :
1660 			case SCE_PL_HERE_QX :
1661 				//do nothing;
1662 				break;
1663 			default :
1664 				levelCurrent++;
1665 				break;
1666 			}
1667 			break;
1668 		default:
1669 			switch (stylePrevCh) {
1670 			case SCE_PL_HERE_QQ :
1671 			case SCE_PL_HERE_Q :
1672 			case SCE_PL_HERE_QX :
1673 				levelCurrent--;
1674 				break;
1675 			default :
1676 				//do nothing;
1677 				break;
1678 			}
1679 			break;
1680 		}
1681 
1682 		//explicit folding
1683 		if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1684 			if (chNext == '{') {
1685 				levelCurrent++;
1686 			} else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1687 				levelCurrent--;
1688 			}
1689 		}
1690 
1691 		if (atEOL) {
1692 			int lev = levelPrev;
1693 			// POD headings occupy bits 7-4, leaving some breathing room for
1694 			// non-standard practice -- POD sections stuck in blocks, etc.
1695 			if (podHeading > 0) {
1696 				levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1697 				lev = levelCurrent - 1;
1698 				lev |= SC_FOLDLEVELHEADERFLAG;
1699 				podHeading = 0;
1700 			}
1701 			// Check if line was a package declaration
1702 			// because packages need "special" treatment
1703 			if (isPackageLine) {
1704 				lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1705 				levelCurrent = SC_FOLDLEVELBASE + 1;
1706 				isPackageLine = false;
1707 			}
1708 			lev |= levelCurrent << 16;
1709 			if (visibleChars == 0 && options.foldCompact)
1710 				lev |= SC_FOLDLEVELWHITEFLAG;
1711 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1712 				lev |= SC_FOLDLEVELHEADERFLAG;
1713 			if (lev != styler.LevelAt(lineCurrent)) {
1714 				styler.SetLevel(lineCurrent, lev);
1715 			}
1716 			lineCurrent++;
1717 			levelPrev = levelCurrent;
1718 			visibleChars = 0;
1719 		}
1720 		if (!isspacechar(ch))
1721 			visibleChars++;
1722 		chPrev = ch;
1723 	}
1724 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1725 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1726 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1727 }
1728 
1729 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
1730