1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3  ** Lexer for Perl.
4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5  **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 #include <map>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "DefaultLexer.h"
31 
32 using namespace Scintilla;
33 
34 // Info for HERE document handling from perldata.pod (reformatted):
35 // ----------------------------------------------------------------
36 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
37 // Following a << you specify a string to terminate the quoted material, and
38 // all lines following the current line down to the terminating string are
39 // the value of the item.
40 // * The terminating string may be either an identifier (a word), or some
41 //   quoted text.
42 // * If quoted, the type of quotes you use determines the treatment of the
43 //   text, just as in regular quoting.
44 // * An unquoted identifier works like double quotes.
45 // * There must be no space between the << and the identifier.
46 //   (If you put a space it will be treated as a null identifier,
47 //    which is valid, and matches the first empty line.)
48 //   (This is deprecated, -w warns of this syntax)
49 // * The terminating string must appear by itself (unquoted and
50 //   with no surrounding whitespace) on the terminating line.
51 
52 #define HERE_DELIM_MAX 256		// maximum length of HERE doc delimiter
53 
54 #define PERLNUM_BINARY		1	// order is significant: 1-3 cannot have a dot
55 #define PERLNUM_OCTAL		2
56 #define PERLNUM_FLOAT_EXP	3	// exponent part only
57 #define PERLNUM_HEX			4	// may be a hex float
58 #define PERLNUM_DECIMAL		5	// 1-5 are numbers; 6-7 are strings
59 #define PERLNUM_VECTOR		6
60 #define PERLNUM_V_VECTOR	7
61 #define PERLNUM_BAD			8
62 
63 #define BACK_NONE		0	// lookback state for bareword disambiguation:
64 #define BACK_OPERATOR	1	// whitespace/comments are insignificant
65 #define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation
66 
67 #define SUB_BEGIN		0	// states for subroutine prototype scan:
68 #define SUB_HAS_PROTO	1	// only 'prototype' attribute allows prototypes
69 #define SUB_HAS_ATTRIB	2	// other attributes can exist leftward
70 #define SUB_HAS_MODULE	3	// sub name can have a ::identifier part
71 #define SUB_HAS_SUB		4	// 'sub' keyword
72 
73 // all interpolated styles are different from their parent styles by a constant difference
74 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
75 #define	INTERPOLATE_SHIFT	(SCE_PL_STRING_VAR - SCE_PL_STRING)
76 
isPerlKeyword(Sci_PositionU start,Sci_PositionU end,WordList & keywords,LexAccessor & styler)77 static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
78 	// old-style keyword matcher; needed because GetCurrent() needs
79 	// current segment to be committed, but we may abandon early...
80 	char s[100];
81 	Sci_PositionU i, len = end - start;
82 	if (len > 30) { len = 30; }
83 	for (i = 0; i < len; i++, start++) s[i] = styler[start];
84 	s[i] = '\0';
85 	return keywords.InList(s);
86 }
87 
disambiguateBareword(LexAccessor & styler,Sci_PositionU bk,Sci_PositionU fw,int backFlag,Sci_PositionU backPos,Sci_PositionU endPos)88 static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
89         int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
90 	// identifiers are recognized by Perl as barewords under some
91 	// conditions, the following attempts to do the disambiguation
92 	// by looking backward and forward; result in 2 LSB
93 	int result = 0;
94 	bool moreback = false;		// true if passed newline/comments
95 	bool brace = false;			// true if opening brace found
96 	// if BACK_NONE, neither operator nor keyword, so skip test
97 	if (backFlag == BACK_NONE)
98 		return result;
99 	// first look backwards past whitespace/comments to set EOL flag
100 	// (some disambiguation patterns must be on a single line)
101 	if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
102 		moreback = true;
103 	// look backwards at last significant lexed item for disambiguation
104 	bk = backPos - 1;
105 	int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
106 	if (ch == '{' && !moreback) {
107 		// {bareword: possible variable spec
108 		brace = true;
109 	} else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
110 	        // &bareword: subroutine call
111 	        || styler.Match(bk - 1, "->")
112 	        // ->bareword: part of variable spec
113 	        || styler.Match(bk - 1, "::")
114 	        // ::bareword: part of module spec
115 	        || styler.Match(bk - 2, "sub")) {
116 	        // sub bareword: subroutine declaration
117 	        // (implied BACK_KEYWORD, no keywords end in 'sub'!)
118 		result |= 1;
119 	}
120 	// next, scan forward after word past tab/spaces only;
121 	// if ch isn't one of '[{(,' we can skip the test
122 	if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
123 	        && fw < endPos) {
124 		while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)))
125 		        && fw < endPos) {
126 			fw++;
127 		}
128 		if ((ch == '}' && brace)
129 		        // {bareword}: variable spec
130 		        || styler.Match(fw, "=>")) {
131 		        // [{(, bareword=>: hash literal
132 			result |= 2;
133 		}
134 	}
135 	return result;
136 }
137 
skipWhitespaceComment(LexAccessor & styler,Sci_PositionU & p)138 static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
139 	// when backtracking, we need to skip whitespace and comments
140 	while (p > 0) {
141 		const int style = styler.StyleAt(p);
142 		if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE)
143 			break;
144 		p--;
145 	}
146 }
147 
findPrevLexeme(LexAccessor & styler,Sci_PositionU & bk,int & style)148 static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
149 	// scan backward past whitespace and comments to find a lexeme
150 	skipWhitespaceComment(styler, bk);
151 	if (bk == 0)
152 		return 0;
153 	int sz = 1;
154 	style = styler.StyleAt(bk);
155 	while (bk > 0) {	// find extent of lexeme
156 		if (styler.StyleAt(bk - 1) == style) {
157 			bk--; sz++;
158 		} else
159 			break;
160 	}
161 	return sz;
162 }
163 
styleBeforeBracePair(LexAccessor & styler,Sci_PositionU bk)164 static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
165 	// backtrack to find open '{' corresponding to a '}', balanced
166 	// return significant style to be tested for '/' disambiguation
167 	int braceCount = 1;
168 	if (bk == 0)
169 		return SCE_PL_DEFAULT;
170 	while (--bk > 0) {
171 		if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
172 			int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
173 			if (bkch == ';') {	// early out
174 				break;
175 			} else if (bkch == '}') {
176 				braceCount++;
177 			} else if (bkch == '{') {
178 				if (--braceCount == 0) break;
179 			}
180 		}
181 	}
182 	if (bk > 0 && braceCount == 0) {
183 		// balanced { found, bk > 0, skip more whitespace/comments
184 		bk--;
185 		skipWhitespaceComment(styler, bk);
186 		return styler.StyleAt(bk);
187 	}
188 	return SCE_PL_DEFAULT;
189 }
190 
styleCheckIdentifier(LexAccessor & styler,Sci_PositionU bk)191 static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
192 	// backtrack to classify sub-styles of identifier under test
193 	// return sub-style to be tested for '/' disambiguation
194 	if (styler.SafeGetCharAt(bk) == '>')	// inputsymbol, like <foo>
195 		return 1;
196 	// backtrack to check for possible "->" or "::" before identifier
197 	while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
198 		bk--;
199 	}
200 	while (bk > 0) {
201 		int bkstyle = styler.StyleAt(bk);
202 		if (bkstyle == SCE_PL_DEFAULT
203 		        || bkstyle == SCE_PL_COMMENTLINE) {
204 			// skip whitespace, comments
205 		} else if (bkstyle == SCE_PL_OPERATOR) {
206 			// test for "->" and "::"
207 			if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
208 				return 2;
209 		} else
210 			return 3;	// bare identifier
211 		bk--;
212 	}
213 	return 0;
214 }
215 
podLineScan(LexAccessor & styler,Sci_PositionU & pos,Sci_PositionU endPos)216 static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
217 	// forward scan the current line to classify line for POD style
218 	int state = -1;
219 	while (pos < endPos) {
220 		int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
221 		if (ch == '\n' || ch == '\r') {
222 			if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
223 			break;
224 		}
225 		if (IsASpaceOrTab(ch)) {	// whitespace, take note
226 			if (state == -1)
227 				state = SCE_PL_DEFAULT;
228 		} else if (state == SCE_PL_DEFAULT) {	// verbatim POD line
229 			state = SCE_PL_POD_VERB;
230 		} else if (state != SCE_PL_POD_VERB) {	// regular POD line
231 			state = SCE_PL_POD;
232 		}
233 		pos++;
234 	}
235 	if (state == -1)
236 		state = SCE_PL_DEFAULT;
237 	return state;
238 }
239 
styleCheckSubPrototype(LexAccessor & styler,Sci_PositionU bk)240 static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
241 	// backtrack to identify if we're starting a subroutine prototype
242 	// we also need to ignore whitespace/comments, format is like:
243 	//     sub abc::pqr :const :prototype(...)
244 	// lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
245 	// and a state machine generates legal subroutine syntax matches
246 	styler.Flush();
247 	int state = SUB_BEGIN;
248 	do {
249 		// find two lexemes, lexeme 2 follows lexeme 1
250 		int style2 = SCE_PL_DEFAULT;
251 		Sci_PositionU pos2 = bk;
252 		int len2 = findPrevLexeme(styler, pos2, style2);
253 		int style1 = SCE_PL_DEFAULT;
254 		Sci_PositionU pos1 = pos2;
255 		if (pos1 > 0) pos1--;
256 		int len1 = findPrevLexeme(styler, pos1, style1);
257 		if (len1 == 0 || len2 == 0)		// lexeme pair must exist
258 			break;
259 
260 		// match parts of syntax, if invalid subroutine syntax, break off
261 		if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
262 		    styler.SafeGetCharAt(pos1) == ':') {	// ':'
263 			if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
264 				if (len2 == 9 && styler.Match(pos2, "prototype")) {	// ':' 'prototype'
265 					if (state == SUB_BEGIN) {
266 						state = SUB_HAS_PROTO;
267 					} else
268 						break;
269 				} else {	// ':' <attribute>
270 					if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
271 						state = SUB_HAS_ATTRIB;
272 					} else
273 						break;
274 				}
275 			} else
276 				break;
277 		} else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
278 		           styler.Match(pos1, "::")) {	// '::'
279 			if (style2 == SCE_PL_IDENTIFIER) {	// '::' <identifier>
280 				state = SUB_HAS_MODULE;
281 			} else
282 				break;
283 		} else if (style1 == SCE_PL_WORD && len1 == 3 &&
284 		           styler.Match(pos1, "sub")) {	// 'sub'
285 			if (style2 == SCE_PL_IDENTIFIER) {	// 'sub' <identifier>
286 				state = SUB_HAS_SUB;
287 			} else
288 				break;
289 		} else
290 			break;
291 		bk = pos1;			// set position for finding next lexeme pair
292 		if (bk > 0) bk--;
293 	} while (state != SUB_HAS_SUB);
294 	return (state == SUB_HAS_SUB);
295 }
296 
actualNumStyle(int numberStyle)297 static int actualNumStyle(int numberStyle) {
298 	if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
299 		return SCE_PL_STRING;
300 	} else if (numberStyle == PERLNUM_BAD) {
301 		return SCE_PL_ERROR;
302 	}
303 	return SCE_PL_NUMBER;
304 }
305 
opposite(int ch)306 static int opposite(int ch) {
307 	if (ch == '(') return ')';
308 	if (ch == '[') return ']';
309 	if (ch == '{') return '}';
310 	if (ch == '<') return '>';
311 	return ch;
312 }
313 
IsCommentLine(Sci_Position line,LexAccessor & styler)314 static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
315 	Sci_Position pos = styler.LineStart(line);
316 	Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
317 	for (Sci_Position i = pos; i < eol_pos; i++) {
318 		char ch = styler[i];
319 		int style = styler.StyleAt(i);
320 		if (ch == '#' && style == SCE_PL_COMMENTLINE)
321 			return true;
322 		else if (!IsASpaceOrTab(ch))
323 			return false;
324 	}
325 	return false;
326 }
327 
IsPackageLine(Sci_Position line,LexAccessor & styler)328 static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
329 	Sci_Position pos = styler.LineStart(line);
330 	int style = styler.StyleAt(pos);
331 	if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
332 		return true;
333 	}
334 	return false;
335 }
336 
PodHeadingLevel(Sci_Position pos,LexAccessor & styler)337 static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
338 	int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
339 	if (lvl >= '1' && lvl <= '4') {
340 		return lvl - '0';
341 	}
342 	return 0;
343 }
344 
345 // An individual named option for use in an OptionSet
346 
347 // Options used for LexerPerl
348 struct OptionsPerl {
349 	bool fold;
350 	bool foldComment;
351 	bool foldCompact;
352 	// Custom folding of POD and packages
353 	bool foldPOD;            // fold.perl.pod
354 	// Enable folding Pod blocks when using the Perl lexer.
355 	bool foldPackage;        // fold.perl.package
356 	// Enable folding packages when using the Perl lexer.
357 
358 	bool foldCommentExplicit;
359 
360 	bool foldAtElse;
361 
OptionsPerlOptionsPerl362 	OptionsPerl() {
363 		fold = false;
364 		foldComment = false;
365 		foldCompact = true;
366 		foldPOD = true;
367 		foldPackage = true;
368 		foldCommentExplicit = true;
369 		foldAtElse = false;
370 	}
371 };
372 
373 static const char *const perlWordListDesc[] = {
374 	"Keywords",
375 	0
376 };
377 
378 struct OptionSetPerl : public OptionSet<OptionsPerl> {
OptionSetPerlOptionSetPerl379 	OptionSetPerl() {
380 		DefineProperty("fold", &OptionsPerl::fold);
381 
382 		DefineProperty("fold.comment", &OptionsPerl::foldComment);
383 
384 		DefineProperty("fold.compact", &OptionsPerl::foldCompact);
385 
386 		DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
387 		        "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
388 
389 		DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
390 		        "Set to 0 to disable folding packages when using the Perl lexer.");
391 
392 		DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
393 		        "Set to 0 to disable explicit folding.");
394 
395 		DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
396 		               "This option enables Perl folding on a \"} else {\" line of an if statement.");
397 
398 		DefineWordListSets(perlWordListDesc);
399 	}
400 };
401 
402 class LexerPerl : public DefaultLexer {
403 	CharacterSet setWordStart;
404 	CharacterSet setWord;
405 	CharacterSet setSpecialVar;
406 	CharacterSet setControlVar;
407 	WordList keywords;
408 	OptionsPerl options;
409 	OptionSetPerl osPerl;
410 public:
LexerPerl()411 	LexerPerl() :
412 		setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
413 		setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
414 		setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
415 		setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
416 	}
~LexerPerl()417 	virtual ~LexerPerl() {
418 	}
Release()419 	void SCI_METHOD Release() override {
420 		delete this;
421 	}
Version() const422 	int SCI_METHOD Version() const override {
423 		return lvOriginal;
424 	}
PropertyNames()425 	const char *SCI_METHOD PropertyNames() override {
426 		return osPerl.PropertyNames();
427 	}
PropertyType(const char * name)428 	int SCI_METHOD PropertyType(const char *name) override {
429 		return osPerl.PropertyType(name);
430 	}
DescribeProperty(const char * name)431 	const char *SCI_METHOD DescribeProperty(const char *name) override {
432 		return osPerl.DescribeProperty(name);
433 	}
434 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()435 	const char *SCI_METHOD DescribeWordListSets() override {
436 		return osPerl.DescribeWordListSets();
437 	}
438 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
439 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
440 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
441 
PrivateCall(int,void *)442 	void *SCI_METHOD PrivateCall(int, void *) override {
443 		return 0;
444 	}
445 
LexerFactoryPerl()446 	static ILexer *LexerFactoryPerl() {
447 		return new LexerPerl();
448 	}
449 	int InputSymbolScan(StyleContext &sc);
450 	void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
451 };
452 
PropertySet(const char * key,const char * val)453 Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
454 	if (osPerl.PropertySet(&options, key, val)) {
455 		return 0;
456 	}
457 	return -1;
458 }
459 
WordListSet(int n,const char * wl)460 Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
461 	WordList *wordListN = 0;
462 	switch (n) {
463 	case 0:
464 		wordListN = &keywords;
465 		break;
466 	}
467 	Sci_Position firstModification = -1;
468 	if (wordListN) {
469 		WordList wlNew;
470 		wlNew.Set(wl);
471 		if (*wordListN != wlNew) {
472 			wordListN->Set(wl);
473 			firstModification = 0;
474 		}
475 	}
476 	return firstModification;
477 }
478 
InputSymbolScan(StyleContext & sc)479 int LexerPerl::InputSymbolScan(StyleContext &sc) {
480 	// forward scan for matching > on same line; file handles
481 	int c, sLen = 0;
482 	while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
483 		if (c == '\r' || c == '\n') {
484 			return 0;
485 		} else if (c == '>') {
486 			if (sc.Match("<=>"))	// '<=>' case
487 				return 0;
488 			return sLen;
489 		}
490 	}
491 	return 0;
492 }
493 
InterpolateSegment(StyleContext & sc,int maxSeg,bool isPattern)494 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
495 	// interpolate a segment (with no active backslashes or delimiters within)
496 	// switch in or out of an interpolation style or continue current style
497 	// commit variable patterns if found, trim segment, repeat until done
498 	while (maxSeg > 0) {
499 		bool isVar = false;
500 		int sLen = 0;
501 		if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
502 			// $#[$]*word [$@][$]*word (where word or {word} is always present)
503 			bool braces = false;
504 			sLen = 1;
505 			if (sc.ch == '$' && sc.chNext == '#') {	// starts with $#
506 				sLen++;
507 			}
508 			while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$'))	// >0 $ dereference within
509 				sLen++;
510 			if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) {	// { start for {word}
511 				sLen++;
512 				braces = true;
513 			}
514 			if (maxSeg > sLen) {
515 				int c = sc.GetRelativeCharacter(sLen);
516 				if (setWordStart.Contains(c)) {	// word (various)
517 					sLen++;
518 					isVar = true;
519 					while (maxSeg > sLen) {
520 						if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
521 							break;
522 						sLen++;
523 					}
524 				} else if (braces && IsADigit(c) && (sLen == 2)) {	// digit for ${digit}
525 					sLen++;
526 					isVar = true;
527 				}
528 			}
529 			if (braces) {
530 				if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) {	// } end for {word}
531 					sLen++;
532 				} else
533 					isVar = false;
534 			}
535 		}
536 		if (!isVar && (maxSeg > 1)) {	// $- or @-specific variable patterns
537 			int c = sc.chNext;
538 			if (sc.ch == '$') {
539 				sLen = 1;
540 				if (IsADigit(c)) {	// $[0-9] and slurp trailing digits
541 					sLen++;
542 					isVar = true;
543 					while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
544 						sLen++;
545 				} else if (setSpecialVar.Contains(c)) {	// $ special variables
546 					sLen++;
547 					isVar = true;
548 				} else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {	// $ additional
549 					sLen++;
550 					isVar = true;
551 				} else if (c == '^') {	// $^A control-char style
552 					sLen++;
553 					if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
554 						sLen++;
555 						isVar = true;
556 					}
557 				}
558 			} else if (sc.ch == '@') {
559 				sLen = 1;
560 				if (!isPattern && ((c == '+') || (c == '-'))) {	// @ specials non-pattern
561 					sLen++;
562 					isVar = true;
563 				}
564 			}
565 		}
566 		if (isVar) {	// commit as interpolated variable or normal character
567 			if (sc.state < SCE_PL_STRING_VAR)
568 				sc.SetState(sc.state + INTERPOLATE_SHIFT);
569 			sc.Forward(sLen);
570 			maxSeg -= sLen;
571 		} else {
572 			if (sc.state >= SCE_PL_STRING_VAR)
573 				sc.SetState(sc.state - INTERPOLATE_SHIFT);
574 			sc.Forward();
575 			maxSeg--;
576 		}
577 	}
578 	if (sc.state >= SCE_PL_STRING_VAR)
579 		sc.SetState(sc.state - INTERPOLATE_SHIFT);
580 }
581 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)582 void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
583 	LexAccessor styler(pAccess);
584 
585 	// keywords that forces /PATTERN/ at all times; should track vim's behaviour
586 	WordList reWords;
587 	reWords.Set("elsif if split while");
588 
589 	// charset classes
590 	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
591 	// lexing of "%*</" operators is non-trivial; these are missing in the set below
592 	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
593 	CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
594 	CharacterSet setModifiers(CharacterSet::setAlpha);
595 	CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
596 	// setArray and setHash also accepts chars for special vars like $_,
597 	// which are then truncated when the next char does not match setVar
598 	CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
599 	CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
600 	CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
601 	CharacterSet &setPOD = setModifiers;
602 	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
603 	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
604 	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
605 	CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
606 	// for format identifiers
607 	CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
608 	CharacterSet &setFormat = setHereDocDelim;
609 
610 	// Lexer for perl often has to backtrack to start of current style to determine
611 	// which characters are being used as quotes, how deeply nested is the
612 	// start position and what the termination string is for HERE documents.
613 
614 	class HereDocCls {	// Class to manage HERE doc sequence
615 	public:
616 		int State;
617 		// 0: '<<' encountered
618 		// 1: collect the delimiter
619 		// 2: here doc text (lines after the delimiter)
620 		int Quote;		// the char after '<<'
621 		bool Quoted;		// true if Quote in ('\'','"','`')
622 		int DelimiterLength;	// strlen(Delimiter)
623 		char Delimiter[HERE_DELIM_MAX];	// the Delimiter
624 		HereDocCls() {
625 			State = 0;
626 			Quote = 0;
627 			Quoted = false;
628 			DelimiterLength = 0;
629 			Delimiter[0] = '\0';
630 		}
631 		void Append(int ch) {
632 			Delimiter[DelimiterLength++] = static_cast<char>(ch);
633 			Delimiter[DelimiterLength] = '\0';
634 		}
635 		~HereDocCls() {
636 		}
637 	};
638 	HereDocCls HereDoc;		// TODO: FIFO for stacked here-docs
639 
640 	class QuoteCls {	// Class to manage quote pairs
641 	public:
642 		int Rep;
643 		int Count;
644 		int Up, Down;
645 		QuoteCls() {
646 			New(1);
647 		}
648 		void New(int r = 1) {
649 			Rep   = r;
650 			Count = 0;
651 			Up    = '\0';
652 			Down  = '\0';
653 		}
654 		void Open(int u) {
655 			Count++;
656 			Up    = u;
657 			Down  = opposite(Up);
658 		}
659 	};
660 	QuoteCls Quote;
661 
662 	// additional state for number lexing
663 	int numState = PERLNUM_DECIMAL;
664 	int dotCount = 0;
665 
666 	Sci_PositionU endPos = startPos + length;
667 
668 	// Backtrack to beginning of style if required...
669 	// If in a long distance lexical state, backtrack to find quote characters.
670 	// Includes strings (may be multi-line), numbers (additional state), format
671 	// bodies, as well as POD sections.
672 	if (initStyle == SCE_PL_HERE_Q
673 	    || initStyle == SCE_PL_HERE_QQ
674 	    || initStyle == SCE_PL_HERE_QX
675 	    || initStyle == SCE_PL_FORMAT
676 	    || initStyle == SCE_PL_HERE_QQ_VAR
677 	    || initStyle == SCE_PL_HERE_QX_VAR
678 	   ) {
679 		// backtrack through multiple styles to reach the delimiter start
680 		int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
681 		while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
682 			startPos--;
683 		}
684 		startPos = styler.LineStart(styler.GetLine(startPos));
685 		initStyle = styler.StyleAt(startPos - 1);
686 	}
687 	if (initStyle == SCE_PL_STRING
688 	    || initStyle == SCE_PL_STRING_QQ
689 	    || initStyle == SCE_PL_BACKTICKS
690 	    || initStyle == SCE_PL_STRING_QX
691 	    || initStyle == SCE_PL_REGEX
692 	    || initStyle == SCE_PL_STRING_QR
693 	    || initStyle == SCE_PL_REGSUBST
694 	    || initStyle == SCE_PL_STRING_VAR
695 	    || initStyle == SCE_PL_STRING_QQ_VAR
696 	    || initStyle == SCE_PL_BACKTICKS_VAR
697 	    || initStyle == SCE_PL_STRING_QX_VAR
698 	    || initStyle == SCE_PL_REGEX_VAR
699 	    || initStyle == SCE_PL_STRING_QR_VAR
700 	    || initStyle == SCE_PL_REGSUBST_VAR
701 	   ) {
702 		// for interpolation, must backtrack through a mix of two different styles
703 		int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
704 			initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
705 		while (startPos > 1) {
706 			int st = styler.StyleAt(startPos - 1);
707 			if ((st != initStyle) && (st != otherStyle))
708 				break;
709 			startPos--;
710 		}
711 		initStyle = SCE_PL_DEFAULT;
712 	} else if (initStyle == SCE_PL_STRING_Q
713 	        || initStyle == SCE_PL_STRING_QW
714 	        || initStyle == SCE_PL_XLAT
715 	        || initStyle == SCE_PL_CHARACTER
716 	        || initStyle == SCE_PL_NUMBER
717 	        || initStyle == SCE_PL_IDENTIFIER
718 	        || initStyle == SCE_PL_ERROR
719 	        || initStyle == SCE_PL_SUB_PROTOTYPE
720 	   ) {
721 		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
722 			startPos--;
723 		}
724 		initStyle = SCE_PL_DEFAULT;
725 	} else if (initStyle == SCE_PL_POD
726 	        || initStyle == SCE_PL_POD_VERB
727 	          ) {
728 		// POD backtracking finds preceding blank lines and goes back past them
729 		Sci_Position ln = styler.GetLine(startPos);
730 		if (ln > 0) {
731 			initStyle = styler.StyleAt(styler.LineStart(--ln));
732 			if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
733 				while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
734 					ln--;
735 			}
736 			startPos = styler.LineStart(++ln);
737 			initStyle = styler.StyleAt(startPos - 1);
738 		} else {
739 			startPos = 0;
740 			initStyle = SCE_PL_DEFAULT;
741 		}
742 	}
743 
744 	// backFlag, backPos are additional state to aid identifier corner cases.
745 	// Look backwards past whitespace and comments in order to detect either
746 	// operator or keyword. Later updated as we go along.
747 	int backFlag = BACK_NONE;
748 	Sci_PositionU backPos = startPos;
749 	if (backPos > 0) {
750 		backPos--;
751 		skipWhitespaceComment(styler, backPos);
752 		if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
753 			backFlag = BACK_OPERATOR;
754 		else if (styler.StyleAt(backPos) == SCE_PL_WORD)
755 			backFlag = BACK_KEYWORD;
756 		backPos++;
757 	}
758 
759 	StyleContext sc(startPos, endPos - startPos, initStyle, styler);
760 
761 	for (; sc.More(); sc.Forward()) {
762 
763 		// Determine if the current state should terminate.
764 		switch (sc.state) {
765 		case SCE_PL_OPERATOR:
766 			sc.SetState(SCE_PL_DEFAULT);
767 			backFlag = BACK_OPERATOR;
768 			backPos = sc.currentPos;
769 			break;
770 		case SCE_PL_IDENTIFIER:		// identifier, bareword, inputsymbol
771 			if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
772 			        || sc.Match('.', '.')
773 			        || sc.chPrev == '>') {	// end of inputsymbol
774 				sc.SetState(SCE_PL_DEFAULT);
775 			}
776 			break;
777 		case SCE_PL_WORD:		// keyword, plus special cases
778 			if (!setWord.Contains(sc.ch)) {
779 				char s[100];
780 				sc.GetCurrent(s, sizeof(s));
781 				if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
782 					sc.ChangeState(SCE_PL_DATASECTION);
783 				} else {
784 					if ((strcmp(s, "format") == 0)) {
785 						sc.SetState(SCE_PL_FORMAT_IDENT);
786 						HereDoc.State = 0;
787 					} else {
788 						sc.SetState(SCE_PL_DEFAULT);
789 					}
790 					backFlag = BACK_KEYWORD;
791 					backPos = sc.currentPos;
792 				}
793 			}
794 			break;
795 		case SCE_PL_SCALAR:
796 		case SCE_PL_ARRAY:
797 		case SCE_PL_HASH:
798 		case SCE_PL_SYMBOLTABLE:
799 			if (sc.Match(':', ':')) {	// skip ::
800 				sc.Forward();
801 			} else if (!setVar.Contains(sc.ch)) {
802 				if (sc.LengthCurrent() == 1) {
803 					// Special variable: $(, $_ etc.
804 					sc.Forward();
805 				}
806 				sc.SetState(SCE_PL_DEFAULT);
807 			}
808 			break;
809 		case SCE_PL_NUMBER:
810 			// if no early break, number style is terminated at "(go through)"
811 			if (sc.ch == '.') {
812 				if (sc.chNext == '.') {
813 					// double dot is always an operator (go through)
814 				} else if (numState <= PERLNUM_FLOAT_EXP) {
815 					// non-decimal number or float exponent, consume next dot
816 					sc.SetState(SCE_PL_OPERATOR);
817 					break;
818 				} else {	// decimal or vectors allows dots
819 					dotCount++;
820 					if (numState == PERLNUM_DECIMAL) {
821 						if (dotCount <= 1)	// number with one dot in it
822 							break;
823 						if (IsADigit(sc.chNext)) {	// really a vector
824 							numState = PERLNUM_VECTOR;
825 							break;
826 						}
827 						// number then dot (go through)
828 					} else if (numState == PERLNUM_HEX) {
829 						if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
830 							break;	// hex with one dot is a hex float
831 						} else {
832 							sc.SetState(SCE_PL_OPERATOR);
833 							break;
834 						}
835 						// hex then dot (go through)
836 					} else if (IsADigit(sc.chNext))	// vectors
837 						break;
838 					// vector then dot (go through)
839 				}
840 			} else if (sc.ch == '_') {
841 				// permissive underscoring for number and vector literals
842 				break;
843 			} else if (numState == PERLNUM_DECIMAL) {
844 				if (sc.ch == 'E' || sc.ch == 'e') {	// exponent, sign
845 					numState = PERLNUM_FLOAT_EXP;
846 					if (sc.chNext == '+' || sc.chNext == '-') {
847 						sc.Forward();
848 					}
849 					break;
850 				} else if (IsADigit(sc.ch))
851 					break;
852 				// number then word (go through)
853 			} else if (numState == PERLNUM_HEX) {
854 				if (sc.ch == 'P' || sc.ch == 'p') {	// hex float exponent, sign
855 					numState = PERLNUM_FLOAT_EXP;
856 					if (sc.chNext == '+' || sc.chNext == '-') {
857 						sc.Forward();
858 					}
859 					break;
860 				} else if (IsADigit(sc.ch, 16))
861 					break;
862 				// hex or hex float then word (go through)
863 			} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
864 				if (IsADigit(sc.ch))	// vector
865 					break;
866 				if (setWord.Contains(sc.ch) && dotCount == 0) {	// change to word
867 					sc.ChangeState(SCE_PL_IDENTIFIER);
868 					break;
869 				}
870 				// vector then word (go through)
871 			} else if (IsADigit(sc.ch)) {
872 				if (numState == PERLNUM_FLOAT_EXP) {
873 					break;
874 				} else if (numState == PERLNUM_OCTAL) {
875 					if (sc.ch <= '7') break;
876 				} else if (numState == PERLNUM_BINARY) {
877 					if (sc.ch <= '1') break;
878 				}
879 				// mark invalid octal, binary numbers (go through)
880 				numState = PERLNUM_BAD;
881 				break;
882 			}
883 			// complete current number or vector
884 			sc.ChangeState(actualNumStyle(numState));
885 			sc.SetState(SCE_PL_DEFAULT);
886 			break;
887 		case SCE_PL_COMMENTLINE:
888 			if (sc.atLineEnd) {
889 				sc.SetState(SCE_PL_DEFAULT);
890 			}
891 			break;
892 		case SCE_PL_HERE_DELIM:
893 			if (HereDoc.State == 0) { // '<<' encountered
894 				int delim_ch = sc.chNext;
895 				Sci_Position ws_skip = 0;
896 				HereDoc.State = 1;	// pre-init HERE doc class
897 				HereDoc.Quote = sc.chNext;
898 				HereDoc.Quoted = false;
899 				HereDoc.DelimiterLength = 0;
900 				HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
901 				if (IsASpaceOrTab(delim_ch)) {
902 					// skip whitespace; legal only for quoted delimiters
903 					Sci_PositionU i = sc.currentPos + 1;
904 					while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
905 						i++;
906 						delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
907 					}
908 					ws_skip = i - sc.currentPos - 1;
909 				}
910 				if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
911 					// a quoted here-doc delimiter; skip any whitespace
912 					sc.Forward(ws_skip + 1);
913 					HereDoc.Quote = delim_ch;
914 					HereDoc.Quoted = true;
915 				} else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
916 				        || ws_skip > 0) {
917 					// left shift << or <<= operator cases
918 					// restore position if operator
919 					sc.ChangeState(SCE_PL_OPERATOR);
920 					sc.ForwardSetState(SCE_PL_DEFAULT);
921 					backFlag = BACK_OPERATOR;
922 					backPos = sc.currentPos;
923 					HereDoc.State = 0;
924 				} else {
925 					// specially handle initial '\' for identifier
926 					if (ws_skip == 0 && HereDoc.Quote == '\\')
927 						sc.Forward();
928 					// an unquoted here-doc delimiter, no special handling
929 					// (cannot be prefixed by spaces/tabs), or
930 					// symbols terminates; deprecated zero-length delimiter
931 				}
932 			} else if (HereDoc.State == 1) { // collect the delimiter
933 				backFlag = BACK_NONE;
934 				if (HereDoc.Quoted) { // a quoted here-doc delimiter
935 					if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
936 						sc.ForwardSetState(SCE_PL_DEFAULT);
937 					} else if (!sc.atLineEnd) {
938 						if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
939 							sc.Forward();
940 						}
941 						if (sc.ch != '\r') {	// skip CR if CRLF
942 							int i = 0;			// else append char, possibly an extended char
943 							while (i < sc.width) {
944 								HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
945 								i++;
946 							}
947 						}
948 					}
949 				} else { // an unquoted here-doc delimiter, no extended charsets
950 					if (setHereDocDelim.Contains(sc.ch)) {
951 						HereDoc.Append(sc.ch);
952 					} else {
953 						sc.SetState(SCE_PL_DEFAULT);
954 					}
955 				}
956 				if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
957 					sc.SetState(SCE_PL_ERROR);
958 					HereDoc.State = 0;
959 				}
960 			}
961 			break;
962 		case SCE_PL_HERE_Q:
963 		case SCE_PL_HERE_QQ:
964 		case SCE_PL_HERE_QX:
965 			// also implies HereDoc.State == 2
966 			sc.Complete();
967 			if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
968 				int c = sc.GetRelative(HereDoc.DelimiterLength);
969 				if (c == '\r' || c == '\n') {	// peek first, do not consume match
970 					sc.ForwardBytes(HereDoc.DelimiterLength);
971 					sc.SetState(SCE_PL_DEFAULT);
972 					backFlag = BACK_NONE;
973 					HereDoc.State = 0;
974 					if (!sc.atLineEnd)
975 						sc.Forward();
976 					break;
977 				}
978 			}
979 			if (sc.state == SCE_PL_HERE_Q) {	// \EOF and 'EOF' non-interpolated
980 				while (!sc.atLineEnd)
981 					sc.Forward();
982 				break;
983 			}
984 			while (!sc.atLineEnd) {		// "EOF" and `EOF` interpolated
985 				int c, sLen = 0, endType = 0;
986 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
987 					// scan to break string into segments
988 					if (c == '\\') {
989 						endType = 1; break;
990 					} else if (c == '\r' || c == '\n') {
991 						endType = 2; break;
992 					}
993 					sLen++;
994 				}
995 				if (sLen > 0)	// process non-empty segments
996 					InterpolateSegment(sc, sLen);
997 				if (endType == 1) {
998 					sc.Forward();
999 					// \ at end-of-line does not appear to have any effect, skip
1000 					if (sc.ch != '\r' && sc.ch != '\n')
1001 						sc.Forward();
1002 				} else if (endType == 2) {
1003 					if (!sc.atLineEnd)
1004 						sc.Forward();
1005 				}
1006 			}
1007 			break;
1008 		case SCE_PL_POD:
1009 		case SCE_PL_POD_VERB: {
1010 				Sci_PositionU fw = sc.currentPos;
1011 				Sci_Position ln = styler.GetLine(fw);
1012 				if (sc.atLineStart && sc.Match("=cut")) {	// end of POD
1013 					sc.SetState(SCE_PL_POD);
1014 					sc.Forward(4);
1015 					sc.SetState(SCE_PL_DEFAULT);
1016 					styler.SetLineState(ln, SCE_PL_POD);
1017 					break;
1018 				}
1019 				int pod = podLineScan(styler, fw, endPos);	// classify POD line
1020 				styler.SetLineState(ln, pod);
1021 				if (pod == SCE_PL_DEFAULT) {
1022 					if (sc.state == SCE_PL_POD_VERB) {
1023 						Sci_PositionU fw2 = fw;
1024 						while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
1025 							fw = fw2++;	// penultimate line (last blank line)
1026 							pod = podLineScan(styler, fw2, endPos);
1027 							styler.SetLineState(styler.GetLine(fw2), pod);
1028 						}
1029 						if (pod == SCE_PL_POD) {	// truncate verbatim POD early
1030 							sc.SetState(SCE_PL_POD);
1031 						} else
1032 							fw = fw2;
1033 					}
1034 				} else {
1035 					if (pod == SCE_PL_POD_VERB	// still part of current paragraph
1036 					        && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
1037 						pod = SCE_PL_POD;
1038 						styler.SetLineState(ln, pod);
1039 					} else if (pod == SCE_PL_POD
1040 					        && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
1041 						pod = SCE_PL_POD_VERB;
1042 						styler.SetLineState(ln, pod);
1043 					}
1044 					sc.SetState(pod);
1045 				}
1046 				sc.ForwardBytes(fw - sc.currentPos);	// commit style
1047 			}
1048 			break;
1049 		case SCE_PL_REGEX:
1050 		case SCE_PL_STRING_QR:
1051 			if (Quote.Rep <= 0) {
1052 				if (!setModifiers.Contains(sc.ch))
1053 					sc.SetState(SCE_PL_DEFAULT);
1054 			} else if (!Quote.Up && !IsASpace(sc.ch)) {
1055 				Quote.Open(sc.ch);
1056 			} else {
1057 				int c, sLen = 0, endType = 0;
1058 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1059 					// scan to break string into segments
1060 					if (IsASpace(c)) {
1061 						break;
1062 					} else if (c == '\\' && Quote.Up != '\\') {
1063 						endType = 1; break;
1064 					} else if (c == Quote.Down) {
1065 						Quote.Count--;
1066 						if (Quote.Count == 0) {
1067 							Quote.Rep--;
1068 							break;
1069 						}
1070 					} else if (c == Quote.Up)
1071 						Quote.Count++;
1072 					sLen++;
1073 				}
1074 				if (sLen > 0) {	// process non-empty segments
1075 					if (Quote.Up != '\'') {
1076 						InterpolateSegment(sc, sLen, true);
1077 					} else		// non-interpolated path
1078 						sc.Forward(sLen);
1079 				}
1080 				if (endType == 1)
1081 					sc.Forward();
1082 			}
1083 			break;
1084 		case SCE_PL_REGSUBST:
1085 		case SCE_PL_XLAT:
1086 			if (Quote.Rep <= 0) {
1087 				if (!setModifiers.Contains(sc.ch))
1088 					sc.SetState(SCE_PL_DEFAULT);
1089 			} else if (!Quote.Up && !IsASpace(sc.ch)) {
1090 				Quote.Open(sc.ch);
1091 			} else {
1092 				int c, sLen = 0, endType = 0;
1093 				bool isPattern = (Quote.Rep == 2);
1094 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1095 					// scan to break string into segments
1096 					if (c == '\\' && Quote.Up != '\\') {
1097 						endType = 2; break;
1098 					} else if (Quote.Count == 0 && Quote.Rep == 1) {
1099 						// We matched something like s(...) or tr{...}, Perl 5.10
1100 						// appears to allow almost any character for use as the
1101 						// next delimiters. Whitespace and comments are accepted in
1102 						// between, but we'll limit to whitespace here.
1103 						// For '#', if no whitespace in between, it's a delimiter.
1104 						if (IsASpace(c)) {
1105 							// Keep going
1106 						} else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1107 							endType = 3;
1108 						} else
1109 							Quote.Open(c);
1110 						break;
1111 					} else if (c == Quote.Down) {
1112 						Quote.Count--;
1113 						if (Quote.Count == 0) {
1114 							Quote.Rep--;
1115 							endType = 1;
1116 						}
1117 						if (Quote.Up == Quote.Down)
1118 							Quote.Count++;
1119 						if (endType == 1)
1120 							break;
1121 					} else if (c == Quote.Up) {
1122 						Quote.Count++;
1123 					} else if (IsASpace(c))
1124 						break;
1125 					sLen++;
1126 				}
1127 				if (sLen > 0) {	// process non-empty segments
1128 					if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1129 						InterpolateSegment(sc, sLen, isPattern);
1130 					} else		// non-interpolated path
1131 						sc.Forward(sLen);
1132 				}
1133 				if (endType == 2) {
1134 					sc.Forward();
1135 				} else if (endType == 3)
1136 					sc.SetState(SCE_PL_DEFAULT);
1137 			}
1138 			break;
1139 		case SCE_PL_STRING_Q:
1140 		case SCE_PL_STRING_QQ:
1141 		case SCE_PL_STRING_QX:
1142 		case SCE_PL_STRING_QW:
1143 		case SCE_PL_STRING:
1144 		case SCE_PL_CHARACTER:
1145 		case SCE_PL_BACKTICKS:
1146 			if (!Quote.Down && !IsASpace(sc.ch)) {
1147 				Quote.Open(sc.ch);
1148 			} else {
1149 				int c, sLen = 0, endType = 0;
1150 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1151 					// scan to break string into segments
1152 					if (IsASpace(c)) {
1153 						break;
1154 					} else if (c == '\\' && Quote.Up != '\\') {
1155 						endType = 2; break;
1156 					} else if (c == Quote.Down) {
1157 						Quote.Count--;
1158 						if (Quote.Count == 0) {
1159 							endType = 3; break;
1160 						}
1161 					} else if (c == Quote.Up)
1162 						Quote.Count++;
1163 					sLen++;
1164 				}
1165 				if (sLen > 0) {	// process non-empty segments
1166 					switch (sc.state) {
1167 					case SCE_PL_STRING:
1168 					case SCE_PL_STRING_QQ:
1169 					case SCE_PL_BACKTICKS:
1170 						InterpolateSegment(sc, sLen);
1171 						break;
1172 					case SCE_PL_STRING_QX:
1173 						if (Quote.Up != '\'') {
1174 							InterpolateSegment(sc, sLen);
1175 							break;
1176 						}
1177 						// (continued for ' delim)
1178 						// Falls through.
1179 					default:	// non-interpolated path
1180 						sc.Forward(sLen);
1181 					}
1182 				}
1183 				if (endType == 2) {
1184 					sc.Forward();
1185 				} else if (endType == 3)
1186 					sc.ForwardSetState(SCE_PL_DEFAULT);
1187 			}
1188 			break;
1189 		case SCE_PL_SUB_PROTOTYPE: {
1190 				int i = 0;
1191 				// forward scan; must all be valid proto characters
1192 				while (setSubPrototype.Contains(sc.GetRelative(i)))
1193 					i++;
1194 				if (sc.GetRelative(i) == ')') {	// valid sub prototype
1195 					sc.ForwardBytes(i);
1196 					sc.ForwardSetState(SCE_PL_DEFAULT);
1197 				} else {
1198 					// abandon prototype, restart from '('
1199 					sc.ChangeState(SCE_PL_OPERATOR);
1200 					sc.SetState(SCE_PL_DEFAULT);
1201 				}
1202 			}
1203 			break;
1204 		case SCE_PL_FORMAT: {
1205 				sc.Complete();
1206 				if (sc.Match('.')) {
1207 					sc.Forward();
1208 					if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1209 						sc.SetState(SCE_PL_DEFAULT);
1210 				}
1211 				while (!sc.atLineEnd)
1212 					sc.Forward();
1213 			}
1214 			break;
1215 		case SCE_PL_ERROR:
1216 			break;
1217 		}
1218 		// Needed for specific continuation styles (one follows the other)
1219 		switch (sc.state) {
1220 			// continued from SCE_PL_WORD
1221 		case SCE_PL_FORMAT_IDENT:
1222 			// occupies HereDoc state 3 to avoid clashing with HERE docs
1223 			if (IsASpaceOrTab(sc.ch)) {		// skip whitespace
1224 				sc.ChangeState(SCE_PL_DEFAULT);
1225 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1226 					sc.Forward();
1227 				sc.SetState(SCE_PL_FORMAT_IDENT);
1228 			}
1229 			if (setFormatStart.Contains(sc.ch)) {	// identifier or '='
1230 				if (sc.ch != '=') {
1231 					do {
1232 						sc.Forward();
1233 					} while (setFormat.Contains(sc.ch));
1234 				}
1235 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1236 					sc.Forward();
1237 				if (sc.ch == '=') {
1238 					sc.ForwardSetState(SCE_PL_DEFAULT);
1239 					HereDoc.State = 3;
1240 				} else {
1241 					// invalid identifier; inexact fallback, but hey
1242 					sc.ChangeState(SCE_PL_IDENTIFIER);
1243 					sc.SetState(SCE_PL_DEFAULT);
1244 				}
1245 			} else {
1246 				sc.ChangeState(SCE_PL_DEFAULT);	// invalid identifier
1247 			}
1248 			backFlag = BACK_NONE;
1249 			break;
1250 		}
1251 
1252 		// Must check end of HereDoc states here before default state is handled
1253 		if (HereDoc.State == 1 && sc.atLineEnd) {
1254 			// Begin of here-doc (the line after the here-doc delimiter):
1255 			// Lexically, the here-doc starts from the next line after the >>, but the
1256 			// first line of here-doc seem to follow the style of the last EOL sequence
1257 			int st_new = SCE_PL_HERE_QQ;
1258 			HereDoc.State = 2;
1259 			if (HereDoc.Quoted) {
1260 				if (sc.state == SCE_PL_HERE_DELIM) {
1261 					// Missing quote at end of string! We are stricter than perl.
1262 					// Colour here-doc anyway while marking this bit as an error.
1263 					sc.ChangeState(SCE_PL_ERROR);
1264 				}
1265 				switch (HereDoc.Quote) {
1266 				case '\'':
1267 					st_new = SCE_PL_HERE_Q;
1268 					break;
1269 				case '"' :
1270 					st_new = SCE_PL_HERE_QQ;
1271 					break;
1272 				case '`' :
1273 					st_new = SCE_PL_HERE_QX;
1274 					break;
1275 				}
1276 			} else {
1277 				if (HereDoc.Quote == '\\')
1278 					st_new = SCE_PL_HERE_Q;
1279 			}
1280 			sc.SetState(st_new);
1281 		}
1282 		if (HereDoc.State == 3 && sc.atLineEnd) {
1283 			// Start of format body.
1284 			HereDoc.State = 0;
1285 			sc.SetState(SCE_PL_FORMAT);
1286 		}
1287 
1288 		// Determine if a new state should be entered.
1289 		if (sc.state == SCE_PL_DEFAULT) {
1290 			if (IsADigit(sc.ch) ||
1291 			        (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1292 				sc.SetState(SCE_PL_NUMBER);
1293 				backFlag = BACK_NONE;
1294 				numState = PERLNUM_DECIMAL;
1295 				dotCount = 0;
1296 				if (sc.ch == '0') {		// hex,bin,octal
1297 					if (sc.chNext == 'x' || sc.chNext == 'X') {
1298 						numState = PERLNUM_HEX;
1299 					} else if (sc.chNext == 'b' || sc.chNext == 'B') {
1300 						numState = PERLNUM_BINARY;
1301 					} else if (IsADigit(sc.chNext)) {
1302 						numState = PERLNUM_OCTAL;
1303 					}
1304 					if (numState != PERLNUM_DECIMAL) {
1305 						sc.Forward();
1306 					}
1307 				} else if (sc.ch == 'v') {		// vector
1308 					numState = PERLNUM_V_VECTOR;
1309 				}
1310 			} else if (setWord.Contains(sc.ch)) {
1311 				// if immediately prefixed by '::', always a bareword
1312 				sc.SetState(SCE_PL_WORD);
1313 				if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1314 					sc.ChangeState(SCE_PL_IDENTIFIER);
1315 				}
1316 				Sci_PositionU bk = sc.currentPos;
1317 				Sci_PositionU fw = sc.currentPos + 1;
1318 				// first check for possible quote-like delimiter
1319 				if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1320 					sc.ChangeState(SCE_PL_REGSUBST);
1321 					Quote.New(2);
1322 				} else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1323 					sc.ChangeState(SCE_PL_REGEX);
1324 					Quote.New();
1325 				} else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1326 					sc.ChangeState(SCE_PL_STRING_Q);
1327 					Quote.New();
1328 				} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1329 					sc.ChangeState(SCE_PL_XLAT);
1330 					Quote.New(2);
1331 				} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1332 					sc.ChangeState(SCE_PL_XLAT);
1333 					Quote.New(2);
1334 					sc.Forward();
1335 					fw++;
1336 				} else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1337 				        && !setWord.Contains(sc.GetRelative(2))) {
1338 					if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1339 					else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1340 					else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1341 					else sc.ChangeState(SCE_PL_STRING_QW);	// sc.chNext == 'w'
1342 					Quote.New();
1343 					sc.Forward();
1344 					fw++;
1345 				} else if (sc.ch == 'x' && (sc.chNext == '=' ||	// repetition
1346 				        !setWord.Contains(sc.chNext) ||
1347 				        (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1348 					sc.ChangeState(SCE_PL_OPERATOR);
1349 				}
1350 				// if potentially a keyword, scan forward and grab word, then check
1351 				// if it's really one; if yes, disambiguation test is performed
1352 				// otherwise it is always a bareword and we skip a lot of scanning
1353 				if (sc.state == SCE_PL_WORD) {
1354 					while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1355 						fw++;
1356 					if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1357 						sc.ChangeState(SCE_PL_IDENTIFIER);
1358 					}
1359 				}
1360 				// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1361 				// for quote-like delimiters/keywords, attempt to disambiguate
1362 				// to select for bareword, change state -> SCE_PL_IDENTIFIER
1363 				if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1364 					if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1365 						sc.ChangeState(SCE_PL_IDENTIFIER);
1366 				}
1367 				backFlag = BACK_NONE;
1368 			} else if (sc.ch == '#') {
1369 				sc.SetState(SCE_PL_COMMENTLINE);
1370 			} else if (sc.ch == '\"') {
1371 				sc.SetState(SCE_PL_STRING);
1372 				Quote.New();
1373 				Quote.Open(sc.ch);
1374 				backFlag = BACK_NONE;
1375 			} else if (sc.ch == '\'') {
1376 				if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1377 					// Archaic call
1378 					sc.SetState(SCE_PL_IDENTIFIER);
1379 				} else {
1380 					sc.SetState(SCE_PL_CHARACTER);
1381 					Quote.New();
1382 					Quote.Open(sc.ch);
1383 				}
1384 				backFlag = BACK_NONE;
1385 			} else if (sc.ch == '`') {
1386 				sc.SetState(SCE_PL_BACKTICKS);
1387 				Quote.New();
1388 				Quote.Open(sc.ch);
1389 				backFlag = BACK_NONE;
1390 			} else if (sc.ch == '$') {
1391 				sc.SetState(SCE_PL_SCALAR);
1392 				if (sc.chNext == '{') {
1393 					sc.ForwardSetState(SCE_PL_OPERATOR);
1394 				} else if (IsASpace(sc.chNext)) {
1395 					sc.ForwardSetState(SCE_PL_DEFAULT);
1396 				} else {
1397 					sc.Forward();
1398 					if (sc.Match('`', '`') || sc.Match(':', ':')) {
1399 						sc.Forward();
1400 					}
1401 				}
1402 				backFlag = BACK_NONE;
1403 			} else if (sc.ch == '@') {
1404 				sc.SetState(SCE_PL_ARRAY);
1405 				if (setArray.Contains(sc.chNext)) {
1406 					// no special treatment
1407 				} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1408 					sc.ForwardBytes(2);
1409 				} else if (sc.chNext == '{' || sc.chNext == '[') {
1410 					sc.ForwardSetState(SCE_PL_OPERATOR);
1411 				} else {
1412 					sc.ChangeState(SCE_PL_OPERATOR);
1413 				}
1414 				backFlag = BACK_NONE;
1415 			} else if (setPreferRE.Contains(sc.ch)) {
1416 				// Explicit backward peeking to set a consistent preferRE for
1417 				// any slash found, so no longer need to track preferRE state.
1418 				// Find first previous significant lexed element and interpret.
1419 				// A few symbols shares this code for disambiguation.
1420 				bool preferRE = false;
1421 				bool isHereDoc = sc.Match('<', '<');
1422 				bool hereDocSpace = false;		// for: SCALAR [whitespace] '<<'
1423 				Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1424 				sc.Complete();
1425 				styler.Flush();
1426 				if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1427 					hereDocSpace = true;
1428 				skipWhitespaceComment(styler, bk);
1429 				if (bk == 0) {
1430 					// avoid backward scanning breakage
1431 					preferRE = true;
1432 				} else {
1433 					int bkstyle = styler.StyleAt(bk);
1434 					int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1435 					switch (bkstyle) {
1436 					case SCE_PL_OPERATOR:
1437 						preferRE = true;
1438 						if (bkch == ')' || bkch == ']') {
1439 							preferRE = false;
1440 						} else if (bkch == '}') {
1441 							// backtrack by counting balanced brace pairs
1442 							// needed to test for variables like ${}, @{} etc.
1443 							bkstyle = styleBeforeBracePair(styler, bk);
1444 							if (bkstyle == SCE_PL_SCALAR
1445 							        || bkstyle == SCE_PL_ARRAY
1446 							        || bkstyle == SCE_PL_HASH
1447 							        || bkstyle == SCE_PL_SYMBOLTABLE
1448 							        || bkstyle == SCE_PL_OPERATOR) {
1449 								preferRE = false;
1450 							}
1451 						} else if (bkch == '+' || bkch == '-') {
1452 							if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1453 							        && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1454 								// exceptions for operators: unary suffixes ++, --
1455 								preferRE = false;
1456 						}
1457 						break;
1458 					case SCE_PL_IDENTIFIER:
1459 						preferRE = true;
1460 						bkstyle = styleCheckIdentifier(styler, bk);
1461 						if ((bkstyle == 1) || (bkstyle == 2)) {
1462 							// inputsymbol or var with "->" or "::" before identifier
1463 							preferRE = false;
1464 						} else if (bkstyle == 3) {
1465 							// bare identifier, test cases follows:
1466 							if (sc.ch == '/') {
1467 								// if '/', /PATTERN/ unless digit/space immediately after '/'
1468 								// if '//', always expect defined-or operator to follow identifier
1469 								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1470 									preferRE = false;
1471 							} else if (sc.ch == '*' || sc.ch == '%') {
1472 								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1473 									preferRE = false;
1474 							} else if (sc.ch == '<') {
1475 								if (IsASpace(sc.chNext) || sc.chNext == '=')
1476 									preferRE = false;
1477 							}
1478 						}
1479 						break;
1480 					case SCE_PL_SCALAR:		// for $var<< case:
1481 						if (isHereDoc && hereDocSpace)	// if SCALAR whitespace '<<', *always* a HERE doc
1482 							preferRE = true;
1483 						break;
1484 					case SCE_PL_WORD:
1485 						preferRE = true;
1486 						// for HERE docs, always true
1487 						if (sc.ch == '/') {
1488 							// adopt heuristics similar to vim-style rules:
1489 							// keywords always forced as /PATTERN/: split, if, elsif, while
1490 							// everything else /PATTERN/ unless digit/space immediately after '/'
1491 							// for '//', defined-or favoured unless special keywords
1492 							Sci_PositionU bkend = bk + 1;
1493 							while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1494 								bk--;
1495 							}
1496 							if (isPerlKeyword(bk, bkend, reWords, styler))
1497 								break;
1498 							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1499 								preferRE = false;
1500 						} else if (sc.ch == '*' || sc.ch == '%') {
1501 							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1502 								preferRE = false;
1503 						} else if (sc.ch == '<') {
1504 							if (IsASpace(sc.chNext) || sc.chNext == '=')
1505 								preferRE = false;
1506 						}
1507 						break;
1508 
1509 						// other styles uses the default, preferRE=false
1510 					case SCE_PL_POD:
1511 					case SCE_PL_HERE_Q:
1512 					case SCE_PL_HERE_QQ:
1513 					case SCE_PL_HERE_QX:
1514 						preferRE = true;
1515 						break;
1516 					}
1517 				}
1518 				backFlag = BACK_NONE;
1519 				if (isHereDoc) {	// handle '<<', HERE doc
1520 					if (sc.Match("<<>>")) {		// double-diamond operator (5.22)
1521 						sc.SetState(SCE_PL_OPERATOR);
1522 						sc.Forward(3);
1523 					} else if (preferRE) {
1524 						sc.SetState(SCE_PL_HERE_DELIM);
1525 						HereDoc.State = 0;
1526 					} else {		// << operator
1527 						sc.SetState(SCE_PL_OPERATOR);
1528 						sc.Forward();
1529 					}
1530 				} else if (sc.ch == '*') {	// handle '*', typeglob
1531 					if (preferRE) {
1532 						sc.SetState(SCE_PL_SYMBOLTABLE);
1533 						if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1534 							sc.ForwardBytes(2);
1535 						} else if (sc.chNext == '{') {
1536 							sc.ForwardSetState(SCE_PL_OPERATOR);
1537 						} else {
1538 							sc.Forward();
1539 						}
1540 					} else {
1541 						sc.SetState(SCE_PL_OPERATOR);
1542 						if (sc.chNext == '*') 	// exponentiation
1543 							sc.Forward();
1544 					}
1545 				} else if (sc.ch == '%') {	// handle '%', hash
1546 					if (preferRE) {
1547 						sc.SetState(SCE_PL_HASH);
1548 						if (setHash.Contains(sc.chNext)) {
1549 							sc.Forward();
1550 						} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1551 							sc.ForwardBytes(2);
1552 						} else if (sc.chNext == '{') {
1553 							sc.ForwardSetState(SCE_PL_OPERATOR);
1554 						} else {
1555 							sc.ChangeState(SCE_PL_OPERATOR);
1556 						}
1557 					} else {
1558 						sc.SetState(SCE_PL_OPERATOR);
1559 					}
1560 				} else if (sc.ch == '<') {	// handle '<', inputsymbol
1561 					if (preferRE) {
1562 						// forward scan
1563 						int i = InputSymbolScan(sc);
1564 						if (i > 0) {
1565 							sc.SetState(SCE_PL_IDENTIFIER);
1566 							sc.Forward(i);
1567 						} else {
1568 							sc.SetState(SCE_PL_OPERATOR);
1569 						}
1570 					} else {
1571 						sc.SetState(SCE_PL_OPERATOR);
1572 					}
1573 				} else {			// handle '/', regexp
1574 					if (preferRE) {
1575 						sc.SetState(SCE_PL_REGEX);
1576 						Quote.New();
1577 						Quote.Open(sc.ch);
1578 					} else {		// / and // operators
1579 						sc.SetState(SCE_PL_OPERATOR);
1580 						if (sc.chNext == '/') {
1581 							sc.Forward();
1582 						}
1583 					}
1584 				}
1585 			} else if (sc.ch == '='		// POD
1586 			        && setPOD.Contains(sc.chNext)
1587 			        && sc.atLineStart) {
1588 				sc.SetState(SCE_PL_POD);
1589 				backFlag = BACK_NONE;
1590 			} else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {	// extended '-' cases
1591 				Sci_PositionU bk = sc.currentPos;
1592 				Sci_PositionU fw = 2;
1593 				if (setSingleCharOp.Contains(sc.chNext) &&	// file test operators
1594 				        !setWord.Contains(sc.GetRelative(2))) {
1595 					sc.SetState(SCE_PL_WORD);
1596 				} else {
1597 					// nominally a minus and bareword; find extent of bareword
1598 					while (setWord.Contains(sc.GetRelative(fw)))
1599 						fw++;
1600 					sc.SetState(SCE_PL_OPERATOR);
1601 				}
1602 				// force to bareword for hash key => or {variable literal} cases
1603 				if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1604 					sc.ChangeState(SCE_PL_IDENTIFIER);
1605 				}
1606 				backFlag = BACK_NONE;
1607 			} else if (sc.ch == '(' && sc.currentPos > 0) {	// '(' or subroutine prototype
1608 				sc.Complete();
1609 				if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1610 					sc.SetState(SCE_PL_SUB_PROTOTYPE);
1611 					backFlag = BACK_NONE;
1612 				} else {
1613 					sc.SetState(SCE_PL_OPERATOR);
1614 				}
1615 			} else if (setPerlOperator.Contains(sc.ch)) {	// operators
1616 				sc.SetState(SCE_PL_OPERATOR);
1617 				if (sc.Match('.', '.')) {	// .. and ...
1618 					sc.Forward();
1619 					if (sc.chNext == '.') sc.Forward();
1620 				}
1621 			} else if (sc.ch == 4 || sc.ch == 26) {		// ^D and ^Z ends valid perl source
1622 				sc.SetState(SCE_PL_DATASECTION);
1623 			} else {
1624 				// keep colouring defaults
1625 				sc.Complete();
1626 			}
1627 		}
1628 	}
1629 	sc.Complete();
1630 	if (sc.state == SCE_PL_HERE_Q
1631 	        || sc.state == SCE_PL_HERE_QQ
1632 	        || sc.state == SCE_PL_HERE_QX
1633 	        || sc.state == SCE_PL_FORMAT) {
1634 		styler.ChangeLexerState(sc.currentPos, styler.Length());
1635 	}
1636 	sc.Complete();
1637 }
1638 
1639 #define PERL_HEADFOLD_SHIFT		4
1640 #define PERL_HEADFOLD_MASK		0xF0
1641 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)1642 void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
1643 
1644 	if (!options.fold)
1645 		return;
1646 
1647 	LexAccessor styler(pAccess);
1648 
1649 	Sci_PositionU endPos = startPos + length;
1650 	int visibleChars = 0;
1651 	Sci_Position lineCurrent = styler.GetLine(startPos);
1652 
1653 	// Backtrack to previous line in case need to fix its fold status
1654 	if (startPos > 0) {
1655 		if (lineCurrent > 0) {
1656 			lineCurrent--;
1657 			startPos = styler.LineStart(lineCurrent);
1658 		}
1659 	}
1660 
1661 	int levelPrev = SC_FOLDLEVELBASE;
1662 	if (lineCurrent > 0)
1663 		levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1664 	int levelCurrent = levelPrev;
1665 	char chNext = styler[startPos];
1666 	char chPrev = styler.SafeGetCharAt(startPos - 1);
1667 	int styleNext = styler.StyleAt(startPos);
1668 	// Used at end of line to determine if the line was a package definition
1669 	bool isPackageLine = false;
1670 	int podHeading = 0;
1671 	for (Sci_PositionU i = startPos; i < endPos; i++) {
1672 		char ch = chNext;
1673 		chNext = styler.SafeGetCharAt(i + 1);
1674 		int style = styleNext;
1675 		styleNext = styler.StyleAt(i + 1);
1676 		int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1677 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1678 		bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1679 		// Comment folding
1680 		if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1681 			if (!IsCommentLine(lineCurrent - 1, styler)
1682 			        && IsCommentLine(lineCurrent + 1, styler))
1683 				levelCurrent++;
1684 			else if (IsCommentLine(lineCurrent - 1, styler)
1685 			        && !IsCommentLine(lineCurrent + 1, styler))
1686 				levelCurrent--;
1687 		}
1688 		// {} [] block folding
1689 		if (style == SCE_PL_OPERATOR) {
1690 			if (ch == '{') {
1691 				if (options.foldAtElse && levelCurrent < levelPrev)
1692 					--levelPrev;
1693 				levelCurrent++;
1694 			} else if (ch == '}') {
1695 				levelCurrent--;
1696 			}
1697 			if (ch == '[') {
1698 				if (options.foldAtElse && levelCurrent < levelPrev)
1699 					--levelPrev;
1700 				levelCurrent++;
1701 			} else if (ch == ']') {
1702 				levelCurrent--;
1703 			}
1704 		}
1705 		// POD folding
1706 		if (options.foldPOD && atLineStart) {
1707 			if (style == SCE_PL_POD) {
1708 				if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1709 					levelCurrent++;
1710 				else if (styler.Match(i, "=cut"))
1711 					levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1712 				else if (styler.Match(i, "=head"))
1713 					podHeading = PodHeadingLevel(i, styler);
1714 			} else if (style == SCE_PL_DATASECTION) {
1715 				if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1716 					levelCurrent++;
1717 				else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1718 					levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1719 				else if (styler.Match(i, "=head"))
1720 					podHeading = PodHeadingLevel(i, styler);
1721 				// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1722 				// reset needed as level test is vs. SC_FOLDLEVELBASE
1723 				else if (stylePrevCh != SCE_PL_DATASECTION)
1724 					levelCurrent = SC_FOLDLEVELBASE;
1725 			}
1726 		}
1727 		// package folding
1728 		if (options.foldPackage && atLineStart) {
1729 			if (IsPackageLine(lineCurrent, styler)
1730 			        && !IsPackageLine(lineCurrent + 1, styler))
1731 				isPackageLine = true;
1732 		}
1733 
1734 		//heredoc folding
1735 		switch (style) {
1736 		case SCE_PL_HERE_QQ :
1737 		case SCE_PL_HERE_Q :
1738 		case SCE_PL_HERE_QX :
1739 			switch (stylePrevCh) {
1740 			case SCE_PL_HERE_QQ :
1741 			case SCE_PL_HERE_Q :
1742 			case SCE_PL_HERE_QX :
1743 				//do nothing;
1744 				break;
1745 			default :
1746 				levelCurrent++;
1747 				break;
1748 			}
1749 			break;
1750 		default:
1751 			switch (stylePrevCh) {
1752 			case SCE_PL_HERE_QQ :
1753 			case SCE_PL_HERE_Q :
1754 			case SCE_PL_HERE_QX :
1755 				levelCurrent--;
1756 				break;
1757 			default :
1758 				//do nothing;
1759 				break;
1760 			}
1761 			break;
1762 		}
1763 
1764 		//explicit folding
1765 		if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1766 			if (chNext == '{') {
1767 				levelCurrent++;
1768 			} else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1769 				levelCurrent--;
1770 			}
1771 		}
1772 
1773 		if (atEOL) {
1774 			int lev = levelPrev;
1775 			// POD headings occupy bits 7-4, leaving some breathing room for
1776 			// non-standard practice -- POD sections stuck in blocks, etc.
1777 			if (podHeading > 0) {
1778 				levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1779 				lev = levelCurrent - 1;
1780 				lev |= SC_FOLDLEVELHEADERFLAG;
1781 				podHeading = 0;
1782 			}
1783 			// Check if line was a package declaration
1784 			// because packages need "special" treatment
1785 			if (isPackageLine) {
1786 				lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1787 				levelCurrent = SC_FOLDLEVELBASE + 1;
1788 				isPackageLine = false;
1789 			}
1790 			lev |= levelCurrent << 16;
1791 			if (visibleChars == 0 && options.foldCompact)
1792 				lev |= SC_FOLDLEVELWHITEFLAG;
1793 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1794 				lev |= SC_FOLDLEVELHEADERFLAG;
1795 			if (lev != styler.LevelAt(lineCurrent)) {
1796 				styler.SetLevel(lineCurrent, lev);
1797 			}
1798 			lineCurrent++;
1799 			levelPrev = levelCurrent;
1800 			visibleChars = 0;
1801 		}
1802 		if (!isspacechar(ch))
1803 			visibleChars++;
1804 		chPrev = ch;
1805 	}
1806 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1807 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1808 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1809 }
1810 
1811 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
1812