1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3  ** Lexer for Perl.
4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5  **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 #include <map>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
34 
35 // Info for HERE document handling from perldata.pod (reformatted):
36 // ----------------------------------------------------------------
37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
38 // Following a << you specify a string to terminate the quoted material, and
39 // all lines following the current line down to the terminating string are
40 // the value of the item.
41 // * The terminating string may be either an identifier (a word), or some
42 //   quoted text.
43 // * If quoted, the type of quotes you use determines the treatment of the
44 //   text, just as in regular quoting.
45 // * An unquoted identifier works like double quotes.
46 // * There must be no space between the << and the identifier.
47 //   (If you put a space it will be treated as a null identifier,
48 //    which is valid, and matches the first empty line.)
49 //   (This is deprecated, -w warns of this syntax)
50 // * The terminating string must appear by itself (unquoted and
51 //   with no surrounding whitespace) on the terminating line.
52 
53 #define HERE_DELIM_MAX 256		// maximum length of HERE doc delimiter
54 
55 #define PERLNUM_BINARY		1	// order is significant: 1-3 cannot have a dot
56 #define PERLNUM_OCTAL		2
57 #define PERLNUM_FLOAT_EXP	3	// exponent part only
58 #define PERLNUM_HEX			4	// may be a hex float
59 #define PERLNUM_DECIMAL		5	// 1-5 are numbers; 6-7 are strings
60 #define PERLNUM_VECTOR		6
61 #define PERLNUM_V_VECTOR	7
62 #define PERLNUM_BAD			8
63 
64 #define BACK_NONE		0	// lookback state for bareword disambiguation:
65 #define BACK_OPERATOR	1	// whitespace/comments are insignificant
66 #define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation
67 
68 #define SUB_BEGIN		0	// states for subroutine prototype scan:
69 #define SUB_HAS_PROTO	1	// only 'prototype' attribute allows prototypes
70 #define SUB_HAS_ATTRIB	2	// other attributes can exist leftward
71 #define SUB_HAS_MODULE	3	// sub name can have a ::identifier part
72 #define SUB_HAS_SUB		4	// 'sub' keyword
73 
74 // all interpolated styles are different from their parent styles by a constant difference
75 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
76 #define	INTERPOLATE_SHIFT	(SCE_PL_STRING_VAR - SCE_PL_STRING)
77 
isPerlKeyword(Sci_PositionU start,Sci_PositionU end,WordList & keywords,LexAccessor & styler)78 static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
79 	// old-style keyword matcher; needed because GetCurrent() needs
80 	// current segment to be committed, but we may abandon early...
81 	char s[100];
82 	Sci_PositionU i, len = end - start;
83 	if (len > 30) { len = 30; }
84 	for (i = 0; i < len; i++, start++) s[i] = styler[start];
85 	s[i] = '\0';
86 	return keywords.InList(s);
87 }
88 
disambiguateBareword(LexAccessor & styler,Sci_PositionU bk,Sci_PositionU fw,int backFlag,Sci_PositionU backPos,Sci_PositionU endPos)89 static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
90         int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
91 	// identifiers are recognized by Perl as barewords under some
92 	// conditions, the following attempts to do the disambiguation
93 	// by looking backward and forward; result in 2 LSB
94 	int result = 0;
95 	bool moreback = false;		// true if passed newline/comments
96 	bool brace = false;			// true if opening brace found
97 	// if BACK_NONE, neither operator nor keyword, so skip test
98 	if (backFlag == BACK_NONE)
99 		return result;
100 	// first look backwards past whitespace/comments to set EOL flag
101 	// (some disambiguation patterns must be on a single line)
102 	if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
103 		moreback = true;
104 	// look backwards at last significant lexed item for disambiguation
105 	bk = backPos - 1;
106 	int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
107 	if (ch == '{' && !moreback) {
108 		// {bareword: possible variable spec
109 		brace = true;
110 	} else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
111 	        // &bareword: subroutine call
112 	        || styler.Match(bk - 1, "->")
113 	        // ->bareword: part of variable spec
114 	        || styler.Match(bk - 1, "::")
115 	        // ::bareword: part of module spec
116 	        || styler.Match(bk - 2, "sub")) {
117 	        // sub bareword: subroutine declaration
118 	        // (implied BACK_KEYWORD, no keywords end in 'sub'!)
119 		result |= 1;
120 	}
121 	// next, scan forward after word past tab/spaces only;
122 	// if ch isn't one of '[{(,' we can skip the test
123 	if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
124 	        && fw < endPos) {
125 		while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
126 		        IsASpaceOrTab(ch) && fw < endPos) {
127 			fw++;
128 		}
129 		if ((ch == '}' && brace)
130 		        // {bareword}: variable spec
131 		        || styler.Match(fw, "=>")) {
132 		        // [{(, bareword=>: hash literal
133 			result |= 2;
134 		}
135 	}
136 	return result;
137 }
138 
skipWhitespaceComment(LexAccessor & styler,Sci_PositionU & p)139 static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
140 	// when backtracking, we need to skip whitespace and comments
141 	int style;
142 	while ((p > 0) && (style = styler.StyleAt(p),
143 	        style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
144 		p--;
145 }
146 
findPrevLexeme(LexAccessor & styler,Sci_PositionU & bk,int & style)147 static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
148 	// scan backward past whitespace and comments to find a lexeme
149 	skipWhitespaceComment(styler, bk);
150 	if (bk == 0)
151 		return 0;
152 	int sz = 1;
153 	style = styler.StyleAt(bk);
154 	while (bk > 0) {	// find extent of lexeme
155 		if (styler.StyleAt(bk - 1) == style) {
156 			bk--; sz++;
157 		} else
158 			break;
159 	}
160 	return sz;
161 }
162 
styleBeforeBracePair(LexAccessor & styler,Sci_PositionU bk)163 static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
164 	// backtrack to find open '{' corresponding to a '}', balanced
165 	// return significant style to be tested for '/' disambiguation
166 	int braceCount = 1;
167 	if (bk == 0)
168 		return SCE_PL_DEFAULT;
169 	while (--bk > 0) {
170 		if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
171 			int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
172 			if (bkch == ';') {	// early out
173 				break;
174 			} else if (bkch == '}') {
175 				braceCount++;
176 			} else if (bkch == '{') {
177 				if (--braceCount == 0) break;
178 			}
179 		}
180 	}
181 	if (bk > 0 && braceCount == 0) {
182 		// balanced { found, bk > 0, skip more whitespace/comments
183 		bk--;
184 		skipWhitespaceComment(styler, bk);
185 		return styler.StyleAt(bk);
186 	}
187 	return SCE_PL_DEFAULT;
188 }
189 
styleCheckIdentifier(LexAccessor & styler,Sci_PositionU bk)190 static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
191 	// backtrack to classify sub-styles of identifier under test
192 	// return sub-style to be tested for '/' disambiguation
193 	if (styler.SafeGetCharAt(bk) == '>')	// inputsymbol, like <foo>
194 		return 1;
195 	// backtrack to check for possible "->" or "::" before identifier
196 	while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
197 		bk--;
198 	}
199 	while (bk > 0) {
200 		int bkstyle = styler.StyleAt(bk);
201 		if (bkstyle == SCE_PL_DEFAULT
202 		        || bkstyle == SCE_PL_COMMENTLINE) {
203 			// skip whitespace, comments
204 		} else if (bkstyle == SCE_PL_OPERATOR) {
205 			// test for "->" and "::"
206 			if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
207 				return 2;
208 		} else
209 			return 3;	// bare identifier
210 		bk--;
211 	}
212 	return 0;
213 }
214 
podLineScan(LexAccessor & styler,Sci_PositionU & pos,Sci_PositionU endPos)215 static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
216 	// forward scan the current line to classify line for POD style
217 	int state = -1;
218 	while (pos < endPos) {
219 		int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
220 		if (ch == '\n' || ch == '\r') {
221 			if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
222 			break;
223 		}
224 		if (IsASpaceOrTab(ch)) {	// whitespace, take note
225 			if (state == -1)
226 				state = SCE_PL_DEFAULT;
227 		} else if (state == SCE_PL_DEFAULT) {	// verbatim POD line
228 			state = SCE_PL_POD_VERB;
229 		} else if (state != SCE_PL_POD_VERB) {	// regular POD line
230 			state = SCE_PL_POD;
231 		}
232 		pos++;
233 	}
234 	if (state == -1)
235 		state = SCE_PL_DEFAULT;
236 	return state;
237 }
238 
styleCheckSubPrototype(LexAccessor & styler,Sci_PositionU bk)239 static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
240 	// backtrack to identify if we're starting a subroutine prototype
241 	// we also need to ignore whitespace/comments, format is like:
242 	//     sub abc::pqr :const :prototype(...)
243 	// lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
244 	// and a state machine generates legal subroutine syntax matches
245 	styler.Flush();
246 	int state = SUB_BEGIN;
247 	do {
248 		// find two lexemes, lexeme 2 follows lexeme 1
249 		int style2 = SCE_PL_DEFAULT;
250 		Sci_PositionU pos2 = bk;
251 		int len2 = findPrevLexeme(styler, pos2, style2);
252 		int style1 = SCE_PL_DEFAULT;
253 		Sci_PositionU pos1 = pos2;
254 		if (pos1 > 0) pos1--;
255 		int len1 = findPrevLexeme(styler, pos1, style1);
256 		if (len1 == 0 || len2 == 0)		// lexeme pair must exist
257 			break;
258 
259 		// match parts of syntax, if invalid subroutine syntax, break off
260 		if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
261 		    styler.SafeGetCharAt(pos1) == ':') {	// ':'
262 			if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
263 				if (len2 == 9 && styler.Match(pos2, "prototype")) {	// ':' 'prototype'
264 					if (state == SUB_BEGIN) {
265 						state = SUB_HAS_PROTO;
266 					} else
267 						break;
268 				} else {	// ':' <attribute>
269 					if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
270 						state = SUB_HAS_ATTRIB;
271 					} else
272 						break;
273 				}
274 			} else
275 				break;
276 		} else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
277 		           styler.Match(pos1, "::")) {	// '::'
278 			if (style2 == SCE_PL_IDENTIFIER) {	// '::' <identifier>
279 				state = SUB_HAS_MODULE;
280 			} else
281 				break;
282 		} else if (style1 == SCE_PL_WORD && len1 == 3 &&
283 		           styler.Match(pos1, "sub")) {	// 'sub'
284 			if (style2 == SCE_PL_IDENTIFIER) {	// 'sub' <identifier>
285 				state = SUB_HAS_SUB;
286 			} else
287 				break;
288 		} else
289 			break;
290 		bk = pos1;			// set position for finding next lexeme pair
291 		if (bk > 0) bk--;
292 	} while (state != SUB_HAS_SUB);
293 	return (state == SUB_HAS_SUB);
294 }
295 
actualNumStyle(int numberStyle)296 static int actualNumStyle(int numberStyle) {
297 	if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
298 		return SCE_PL_STRING;
299 	} else if (numberStyle == PERLNUM_BAD) {
300 		return SCE_PL_ERROR;
301 	}
302 	return SCE_PL_NUMBER;
303 }
304 
opposite(int ch)305 static int opposite(int ch) {
306 	if (ch == '(') return ')';
307 	if (ch == '[') return ']';
308 	if (ch == '{') return '}';
309 	if (ch == '<') return '>';
310 	return ch;
311 }
312 
IsCommentLine(Sci_Position line,LexAccessor & styler)313 static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
314 	Sci_Position pos = styler.LineStart(line);
315 	Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
316 	for (Sci_Position i = pos; i < eol_pos; i++) {
317 		char ch = styler[i];
318 		int style = styler.StyleAt(i);
319 		if (ch == '#' && style == SCE_PL_COMMENTLINE)
320 			return true;
321 		else if (!IsASpaceOrTab(ch))
322 			return false;
323 	}
324 	return false;
325 }
326 
IsPackageLine(Sci_Position line,LexAccessor & styler)327 static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
328 	Sci_Position pos = styler.LineStart(line);
329 	int style = styler.StyleAt(pos);
330 	if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
331 		return true;
332 	}
333 	return false;
334 }
335 
PodHeadingLevel(Sci_Position pos,LexAccessor & styler)336 static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
337 	int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
338 	if (lvl >= '1' && lvl <= '4') {
339 		return lvl - '0';
340 	}
341 	return 0;
342 }
343 
344 // An individual named option for use in an OptionSet
345 
346 // Options used for LexerPerl
347 struct OptionsPerl {
348 	bool fold;
349 	bool foldComment;
350 	bool foldCompact;
351 	// Custom folding of POD and packages
352 	bool foldPOD;            // fold.perl.pod
353 	// Enable folding Pod blocks when using the Perl lexer.
354 	bool foldPackage;        // fold.perl.package
355 	// Enable folding packages when using the Perl lexer.
356 
357 	bool foldCommentExplicit;
358 
359 	bool foldAtElse;
360 
OptionsPerlOptionsPerl361 	OptionsPerl() {
362 		fold = false;
363 		foldComment = false;
364 		foldCompact = true;
365 		foldPOD = true;
366 		foldPackage = true;
367 		foldCommentExplicit = true;
368 		foldAtElse = false;
369 	}
370 };
371 
372 static const char *const perlWordListDesc[] = {
373 	"Keywords",
374 	0
375 };
376 
377 struct OptionSetPerl : public OptionSet<OptionsPerl> {
OptionSetPerlOptionSetPerl378 	OptionSetPerl() {
379 		DefineProperty("fold", &OptionsPerl::fold);
380 
381 		DefineProperty("fold.comment", &OptionsPerl::foldComment);
382 
383 		DefineProperty("fold.compact", &OptionsPerl::foldCompact);
384 
385 		DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
386 		        "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
387 
388 		DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
389 		        "Set to 0 to disable folding packages when using the Perl lexer.");
390 
391 		DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
392 		        "Set to 0 to disable explicit folding.");
393 
394 		DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
395 		               "This option enables Perl folding on a \"} else {\" line of an if statement.");
396 
397 		DefineWordListSets(perlWordListDesc);
398 	}
399 };
400 
401 class LexerPerl : public ILexer {
402 	CharacterSet setWordStart;
403 	CharacterSet setWord;
404 	CharacterSet setSpecialVar;
405 	CharacterSet setControlVar;
406 	WordList keywords;
407 	OptionsPerl options;
408 	OptionSetPerl osPerl;
409 public:
LexerPerl()410 	LexerPerl() :
411 		setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
412 		setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
413 		setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
414 		setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
415 	}
~LexerPerl()416 	virtual ~LexerPerl() {
417 	}
Release()418 	void SCI_METHOD Release() {
419 		delete this;
420 	}
Version() const421 	int SCI_METHOD Version() const {
422 		return lvOriginal;
423 	}
PropertyNames()424 	const char *SCI_METHOD PropertyNames() {
425 		return osPerl.PropertyNames();
426 	}
PropertyType(const char * name)427 	int SCI_METHOD PropertyType(const char *name) {
428 		return osPerl.PropertyType(name);
429 	}
DescribeProperty(const char * name)430 	const char *SCI_METHOD DescribeProperty(const char *name) {
431 		return osPerl.DescribeProperty(name);
432 	}
433 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
DescribeWordListSets()434 	const char *SCI_METHOD DescribeWordListSets() {
435 		return osPerl.DescribeWordListSets();
436 	}
437 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
438 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
439 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
440 
PrivateCall(int,void *)441 	void *SCI_METHOD PrivateCall(int, void *) {
442 		return 0;
443 	}
444 
LexerFactoryPerl()445 	static ILexer *LexerFactoryPerl() {
446 		return new LexerPerl();
447 	}
448 	int InputSymbolScan(StyleContext &sc);
449 	void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
450 };
451 
PropertySet(const char * key,const char * val)452 Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
453 	if (osPerl.PropertySet(&options, key, val)) {
454 		return 0;
455 	}
456 	return -1;
457 }
458 
WordListSet(int n,const char * wl)459 Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
460 	WordList *wordListN = 0;
461 	switch (n) {
462 	case 0:
463 		wordListN = &keywords;
464 		break;
465 	}
466 	Sci_Position firstModification = -1;
467 	if (wordListN) {
468 		WordList wlNew;
469 		wlNew.Set(wl);
470 		if (*wordListN != wlNew) {
471 			wordListN->Set(wl);
472 			firstModification = 0;
473 		}
474 	}
475 	return firstModification;
476 }
477 
InputSymbolScan(StyleContext & sc)478 int LexerPerl::InputSymbolScan(StyleContext &sc) {
479 	// forward scan for matching > on same line; file handles
480 	int c, sLen = 0;
481 	while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
482 		if (c == '\r' || c == '\n') {
483 			return 0;
484 		} else if (c == '>') {
485 			if (sc.Match("<=>"))	// '<=>' case
486 				return 0;
487 			return sLen;
488 		}
489 	}
490 	return 0;
491 }
492 
InterpolateSegment(StyleContext & sc,int maxSeg,bool isPattern)493 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
494 	// interpolate a segment (with no active backslashes or delimiters within)
495 	// switch in or out of an interpolation style or continue current style
496 	// commit variable patterns if found, trim segment, repeat until done
497 	while (maxSeg > 0) {
498 		bool isVar = false;
499 		int sLen = 0;
500 		if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
501 			// $#[$]*word [$@][$]*word (where word or {word} is always present)
502 			bool braces = false;
503 			sLen = 1;
504 			if (sc.ch == '$' && sc.chNext == '#') {	// starts with $#
505 				sLen++;
506 			}
507 			while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$'))	// >0 $ dereference within
508 				sLen++;
509 			if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) {	// { start for {word}
510 				sLen++;
511 				braces = true;
512 			}
513 			if (maxSeg > sLen) {
514 				int c = sc.GetRelativeCharacter(sLen);
515 				if (setWordStart.Contains(c)) {	// word (various)
516 					sLen++;
517 					isVar = true;
518 					while (maxSeg > sLen) {
519 						if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
520 							break;
521 						sLen++;
522 					}
523 				} else if (braces && IsADigit(c) && (sLen == 2)) {	// digit for ${digit}
524 					sLen++;
525 					isVar = true;
526 				}
527 			}
528 			if (braces) {
529 				if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) {	// } end for {word}
530 					sLen++;
531 				} else
532 					isVar = false;
533 			}
534 		}
535 		if (!isVar && (maxSeg > 1)) {	// $- or @-specific variable patterns
536 			int c = sc.chNext;
537 			if (sc.ch == '$') {
538 				sLen = 1;
539 				if (IsADigit(c)) {	// $[0-9] and slurp trailing digits
540 					sLen++;
541 					isVar = true;
542 					while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
543 						sLen++;
544 				} else if (setSpecialVar.Contains(c)) {	// $ special variables
545 					sLen++;
546 					isVar = true;
547 				} else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {	// $ additional
548 					sLen++;
549 					isVar = true;
550 				} else if (c == '^') {	// $^A control-char style
551 					sLen++;
552 					if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
553 						sLen++;
554 						isVar = true;
555 					}
556 				}
557 			} else if (sc.ch == '@') {
558 				sLen = 1;
559 				if (!isPattern && ((c == '+') || (c == '-'))) {	// @ specials non-pattern
560 					sLen++;
561 					isVar = true;
562 				}
563 			}
564 		}
565 		if (isVar) {	// commit as interpolated variable or normal character
566 			if (sc.state < SCE_PL_STRING_VAR)
567 				sc.SetState(sc.state + INTERPOLATE_SHIFT);
568 			sc.Forward(sLen);
569 			maxSeg -= sLen;
570 		} else {
571 			if (sc.state >= SCE_PL_STRING_VAR)
572 				sc.SetState(sc.state - INTERPOLATE_SHIFT);
573 			sc.Forward();
574 			maxSeg--;
575 		}
576 	}
577 	if (sc.state >= SCE_PL_STRING_VAR)
578 		sc.SetState(sc.state - INTERPOLATE_SHIFT);
579 }
580 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)581 void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
582 	LexAccessor styler(pAccess);
583 
584 	// keywords that forces /PATTERN/ at all times; should track vim's behaviour
585 	WordList reWords;
586 	reWords.Set("elsif if split while");
587 
588 	// charset classes
589 	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
590 	// lexing of "%*</" operators is non-trivial; these are missing in the set below
591 	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
592 	CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
593 	CharacterSet setModifiers(CharacterSet::setAlpha);
594 	CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
595 	// setArray and setHash also accepts chars for special vars like $_,
596 	// which are then truncated when the next char does not match setVar
597 	CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
598 	CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
599 	CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
600 	CharacterSet &setPOD = setModifiers;
601 	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
602 	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
603 	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
604 	CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
605 	// for format identifiers
606 	CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
607 	CharacterSet &setFormat = setHereDocDelim;
608 
609 	// Lexer for perl often has to backtrack to start of current style to determine
610 	// which characters are being used as quotes, how deeply nested is the
611 	// start position and what the termination string is for HERE documents.
612 
613 	class HereDocCls {	// Class to manage HERE doc sequence
614 	public:
615 		int State;
616 		// 0: '<<' encountered
617 		// 1: collect the delimiter
618 		// 2: here doc text (lines after the delimiter)
619 		int Quote;		// the char after '<<'
620 		bool Quoted;		// true if Quote in ('\'','"','`')
621 		int DelimiterLength;	// strlen(Delimiter)
622 		char Delimiter[HERE_DELIM_MAX];	// the Delimiter
623 		HereDocCls() {
624 			State = 0;
625 			Quote = 0;
626 			Quoted = false;
627 			DelimiterLength = 0;
628 			Delimiter[0] = '\0';
629 		}
630 		void Append(int ch) {
631 			Delimiter[DelimiterLength++] = static_cast<char>(ch);
632 			Delimiter[DelimiterLength] = '\0';
633 		}
634 		~HereDocCls() {
635 		}
636 	};
637 	HereDocCls HereDoc;		// TODO: FIFO for stacked here-docs
638 
639 	class QuoteCls {	// Class to manage quote pairs
640 	public:
641 		int Rep;
642 		int Count;
643 		int Up, Down;
644 		QuoteCls() {
645 			New(1);
646 		}
647 		void New(int r = 1) {
648 			Rep   = r;
649 			Count = 0;
650 			Up    = '\0';
651 			Down  = '\0';
652 		}
653 		void Open(int u) {
654 			Count++;
655 			Up    = u;
656 			Down  = opposite(Up);
657 		}
658 	};
659 	QuoteCls Quote;
660 
661 	// additional state for number lexing
662 	int numState = PERLNUM_DECIMAL;
663 	int dotCount = 0;
664 
665 	Sci_PositionU endPos = startPos + length;
666 
667 	// Backtrack to beginning of style if required...
668 	// If in a long distance lexical state, backtrack to find quote characters.
669 	// Includes strings (may be multi-line), numbers (additional state), format
670 	// bodies, as well as POD sections.
671 	if (initStyle == SCE_PL_HERE_Q
672 	    || initStyle == SCE_PL_HERE_QQ
673 	    || initStyle == SCE_PL_HERE_QX
674 	    || initStyle == SCE_PL_FORMAT
675 	    || initStyle == SCE_PL_HERE_QQ_VAR
676 	    || initStyle == SCE_PL_HERE_QX_VAR
677 	   ) {
678 		// backtrack through multiple styles to reach the delimiter start
679 		int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
680 		while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
681 			startPos--;
682 		}
683 		startPos = styler.LineStart(styler.GetLine(startPos));
684 		initStyle = styler.StyleAt(startPos - 1);
685 	}
686 	if (initStyle == SCE_PL_STRING
687 	    || initStyle == SCE_PL_STRING_QQ
688 	    || initStyle == SCE_PL_BACKTICKS
689 	    || initStyle == SCE_PL_STRING_QX
690 	    || initStyle == SCE_PL_REGEX
691 	    || initStyle == SCE_PL_STRING_QR
692 	    || initStyle == SCE_PL_REGSUBST
693 	    || initStyle == SCE_PL_STRING_VAR
694 	    || initStyle == SCE_PL_STRING_QQ_VAR
695 	    || initStyle == SCE_PL_BACKTICKS_VAR
696 	    || initStyle == SCE_PL_STRING_QX_VAR
697 	    || initStyle == SCE_PL_REGEX_VAR
698 	    || initStyle == SCE_PL_STRING_QR_VAR
699 	    || initStyle == SCE_PL_REGSUBST_VAR
700 	   ) {
701 		// for interpolation, must backtrack through a mix of two different styles
702 		int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
703 			initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
704 		while (startPos > 1) {
705 			int st = styler.StyleAt(startPos - 1);
706 			if ((st != initStyle) && (st != otherStyle))
707 				break;
708 			startPos--;
709 		}
710 		initStyle = SCE_PL_DEFAULT;
711 	} else if (initStyle == SCE_PL_STRING_Q
712 	        || initStyle == SCE_PL_STRING_QW
713 	        || initStyle == SCE_PL_XLAT
714 	        || initStyle == SCE_PL_CHARACTER
715 	        || initStyle == SCE_PL_NUMBER
716 	        || initStyle == SCE_PL_IDENTIFIER
717 	        || initStyle == SCE_PL_ERROR
718 	        || initStyle == SCE_PL_SUB_PROTOTYPE
719 	   ) {
720 		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
721 			startPos--;
722 		}
723 		initStyle = SCE_PL_DEFAULT;
724 	} else if (initStyle == SCE_PL_POD
725 	        || initStyle == SCE_PL_POD_VERB
726 	          ) {
727 		// POD backtracking finds preceding blank lines and goes back past them
728 		Sci_Position ln = styler.GetLine(startPos);
729 		if (ln > 0) {
730 			initStyle = styler.StyleAt(styler.LineStart(--ln));
731 			if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
732 				while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
733 					ln--;
734 			}
735 			startPos = styler.LineStart(++ln);
736 			initStyle = styler.StyleAt(startPos - 1);
737 		} else {
738 			startPos = 0;
739 			initStyle = SCE_PL_DEFAULT;
740 		}
741 	}
742 
743 	// backFlag, backPos are additional state to aid identifier corner cases.
744 	// Look backwards past whitespace and comments in order to detect either
745 	// operator or keyword. Later updated as we go along.
746 	int backFlag = BACK_NONE;
747 	Sci_PositionU backPos = startPos;
748 	if (backPos > 0) {
749 		backPos--;
750 		skipWhitespaceComment(styler, backPos);
751 		if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
752 			backFlag = BACK_OPERATOR;
753 		else if (styler.StyleAt(backPos) == SCE_PL_WORD)
754 			backFlag = BACK_KEYWORD;
755 		backPos++;
756 	}
757 
758 	StyleContext sc(startPos, endPos - startPos, initStyle, styler);
759 
760 	for (; sc.More(); sc.Forward()) {
761 
762 		// Determine if the current state should terminate.
763 		switch (sc.state) {
764 		case SCE_PL_OPERATOR:
765 			sc.SetState(SCE_PL_DEFAULT);
766 			backFlag = BACK_OPERATOR;
767 			backPos = sc.currentPos;
768 			break;
769 		case SCE_PL_IDENTIFIER:		// identifier, bareword, inputsymbol
770 			if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
771 			        || sc.Match('.', '.')
772 			        || sc.chPrev == '>') {	// end of inputsymbol
773 				sc.SetState(SCE_PL_DEFAULT);
774 			}
775 			break;
776 		case SCE_PL_WORD:		// keyword, plus special cases
777 			if (!setWord.Contains(sc.ch)) {
778 				char s[100];
779 				sc.GetCurrent(s, sizeof(s));
780 				if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
781 					sc.ChangeState(SCE_PL_DATASECTION);
782 				} else {
783 					if ((strcmp(s, "format") == 0)) {
784 						sc.SetState(SCE_PL_FORMAT_IDENT);
785 						HereDoc.State = 0;
786 					} else {
787 						sc.SetState(SCE_PL_DEFAULT);
788 					}
789 					backFlag = BACK_KEYWORD;
790 					backPos = sc.currentPos;
791 				}
792 			}
793 			break;
794 		case SCE_PL_SCALAR:
795 		case SCE_PL_ARRAY:
796 		case SCE_PL_HASH:
797 		case SCE_PL_SYMBOLTABLE:
798 			if (sc.Match(':', ':')) {	// skip ::
799 				sc.Forward();
800 			} else if (!setVar.Contains(sc.ch)) {
801 				if (sc.LengthCurrent() == 1) {
802 					// Special variable: $(, $_ etc.
803 					sc.Forward();
804 				}
805 				sc.SetState(SCE_PL_DEFAULT);
806 			}
807 			break;
808 		case SCE_PL_NUMBER:
809 			// if no early break, number style is terminated at "(go through)"
810 			if (sc.ch == '.') {
811 				if (sc.chNext == '.') {
812 					// double dot is always an operator (go through)
813 				} else if (numState <= PERLNUM_FLOAT_EXP) {
814 					// non-decimal number or float exponent, consume next dot
815 					sc.SetState(SCE_PL_OPERATOR);
816 					break;
817 				} else {	// decimal or vectors allows dots
818 					dotCount++;
819 					if (numState == PERLNUM_DECIMAL) {
820 						if (dotCount <= 1)	// number with one dot in it
821 							break;
822 						if (IsADigit(sc.chNext)) {	// really a vector
823 							numState = PERLNUM_VECTOR;
824 							break;
825 						}
826 						// number then dot (go through)
827 					} else if (numState == PERLNUM_HEX) {
828 						if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
829 							break;	// hex with one dot is a hex float
830 						} else {
831 							sc.SetState(SCE_PL_OPERATOR);
832 							break;
833 						}
834 						// hex then dot (go through)
835 					} else if (IsADigit(sc.chNext))	// vectors
836 						break;
837 					// vector then dot (go through)
838 				}
839 			} else if (sc.ch == '_') {
840 				// permissive underscoring for number and vector literals
841 				break;
842 			} else if (numState == PERLNUM_DECIMAL) {
843 				if (sc.ch == 'E' || sc.ch == 'e') {	// exponent, sign
844 					numState = PERLNUM_FLOAT_EXP;
845 					if (sc.chNext == '+' || sc.chNext == '-') {
846 						sc.Forward();
847 					}
848 					break;
849 				} else if (IsADigit(sc.ch))
850 					break;
851 				// number then word (go through)
852 			} else if (numState == PERLNUM_HEX) {
853 				if (sc.ch == 'P' || sc.ch == 'p') {	// hex float exponent, sign
854 					numState = PERLNUM_FLOAT_EXP;
855 					if (sc.chNext == '+' || sc.chNext == '-') {
856 						sc.Forward();
857 					}
858 					break;
859 				} else if (IsADigit(sc.ch, 16))
860 					break;
861 				// hex or hex float then word (go through)
862 			} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
863 				if (IsADigit(sc.ch))	// vector
864 					break;
865 				if (setWord.Contains(sc.ch) && dotCount == 0) {	// change to word
866 					sc.ChangeState(SCE_PL_IDENTIFIER);
867 					break;
868 				}
869 				// vector then word (go through)
870 			} else if (IsADigit(sc.ch)) {
871 				if (numState == PERLNUM_FLOAT_EXP) {
872 					break;
873 				} else if (numState == PERLNUM_OCTAL) {
874 					if (sc.ch <= '7') break;
875 				} else if (numState == PERLNUM_BINARY) {
876 					if (sc.ch <= '1') break;
877 				}
878 				// mark invalid octal, binary numbers (go through)
879 				numState = PERLNUM_BAD;
880 				break;
881 			}
882 			// complete current number or vector
883 			sc.ChangeState(actualNumStyle(numState));
884 			sc.SetState(SCE_PL_DEFAULT);
885 			break;
886 		case SCE_PL_COMMENTLINE:
887 			if (sc.atLineEnd) {
888 				sc.SetState(SCE_PL_DEFAULT);
889 			}
890 			break;
891 		case SCE_PL_HERE_DELIM:
892 			if (HereDoc.State == 0) { // '<<' encountered
893 				int delim_ch = sc.chNext;
894 				Sci_Position ws_skip = 0;
895 				HereDoc.State = 1;	// pre-init HERE doc class
896 				HereDoc.Quote = sc.chNext;
897 				HereDoc.Quoted = false;
898 				HereDoc.DelimiterLength = 0;
899 				HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
900 				if (IsASpaceOrTab(delim_ch)) {
901 					// skip whitespace; legal only for quoted delimiters
902 					Sci_PositionU i = sc.currentPos + 1;
903 					while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
904 						i++;
905 						delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
906 					}
907 					ws_skip = i - sc.currentPos - 1;
908 				}
909 				if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
910 					// a quoted here-doc delimiter; skip any whitespace
911 					sc.Forward(ws_skip + 1);
912 					HereDoc.Quote = delim_ch;
913 					HereDoc.Quoted = true;
914 				} else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
915 				        || ws_skip > 0) {
916 					// left shift << or <<= operator cases
917 					// restore position if operator
918 					sc.ChangeState(SCE_PL_OPERATOR);
919 					sc.ForwardSetState(SCE_PL_DEFAULT);
920 					backFlag = BACK_OPERATOR;
921 					backPos = sc.currentPos;
922 					HereDoc.State = 0;
923 				} else {
924 					// specially handle initial '\' for identifier
925 					if (ws_skip == 0 && HereDoc.Quote == '\\')
926 						sc.Forward();
927 					// an unquoted here-doc delimiter, no special handling
928 					// (cannot be prefixed by spaces/tabs), or
929 					// symbols terminates; deprecated zero-length delimiter
930 				}
931 			} else if (HereDoc.State == 1) { // collect the delimiter
932 				backFlag = BACK_NONE;
933 				if (HereDoc.Quoted) { // a quoted here-doc delimiter
934 					if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
935 						sc.ForwardSetState(SCE_PL_DEFAULT);
936 					} else if (!sc.atLineEnd) {
937 						if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
938 							sc.Forward();
939 						}
940 						if (sc.ch != '\r') {	// skip CR if CRLF
941 							int i = 0;			// else append char, possibly an extended char
942 							while (i < sc.width) {
943 								HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
944 								i++;
945 							}
946 						}
947 					}
948 				} else { // an unquoted here-doc delimiter, no extended charsets
949 					if (setHereDocDelim.Contains(sc.ch)) {
950 						HereDoc.Append(sc.ch);
951 					} else {
952 						sc.SetState(SCE_PL_DEFAULT);
953 					}
954 				}
955 				if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
956 					sc.SetState(SCE_PL_ERROR);
957 					HereDoc.State = 0;
958 				}
959 			}
960 			break;
961 		case SCE_PL_HERE_Q:
962 		case SCE_PL_HERE_QQ:
963 		case SCE_PL_HERE_QX:
964 			// also implies HereDoc.State == 2
965 			sc.Complete();
966 			if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
967 				int c = sc.GetRelative(HereDoc.DelimiterLength);
968 				if (c == '\r' || c == '\n') {	// peek first, do not consume match
969 					sc.ForwardBytes(HereDoc.DelimiterLength);
970 					sc.SetState(SCE_PL_DEFAULT);
971 					backFlag = BACK_NONE;
972 					HereDoc.State = 0;
973 					if (!sc.atLineEnd)
974 						sc.Forward();
975 					break;
976 				}
977 			}
978 			if (sc.state == SCE_PL_HERE_Q) {	// \EOF and 'EOF' non-interpolated
979 				while (!sc.atLineEnd)
980 					sc.Forward();
981 				break;
982 			}
983 			while (!sc.atLineEnd) {		// "EOF" and `EOF` interpolated
984 				int c, sLen = 0, endType = 0;
985 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
986 					// scan to break string into segments
987 					if (c == '\\') {
988 						endType = 1; break;
989 					} else if (c == '\r' || c == '\n') {
990 						endType = 2; break;
991 					}
992 					sLen++;
993 				}
994 				if (sLen > 0)	// process non-empty segments
995 					InterpolateSegment(sc, sLen);
996 				if (endType == 1) {
997 					sc.Forward();
998 					// \ at end-of-line does not appear to have any effect, skip
999 					if (sc.ch != '\r' && sc.ch != '\n')
1000 						sc.Forward();
1001 				} else if (endType == 2) {
1002 					if (!sc.atLineEnd)
1003 						sc.Forward();
1004 				}
1005 			}
1006 			break;
1007 		case SCE_PL_POD:
1008 		case SCE_PL_POD_VERB: {
1009 				Sci_PositionU fw = sc.currentPos;
1010 				Sci_Position ln = styler.GetLine(fw);
1011 				if (sc.atLineStart && sc.Match("=cut")) {	// end of POD
1012 					sc.SetState(SCE_PL_POD);
1013 					sc.Forward(4);
1014 					sc.SetState(SCE_PL_DEFAULT);
1015 					styler.SetLineState(ln, SCE_PL_POD);
1016 					break;
1017 				}
1018 				int pod = podLineScan(styler, fw, endPos);	// classify POD line
1019 				styler.SetLineState(ln, pod);
1020 				if (pod == SCE_PL_DEFAULT) {
1021 					if (sc.state == SCE_PL_POD_VERB) {
1022 						Sci_PositionU fw2 = fw;
1023 						while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
1024 							fw = fw2++;	// penultimate line (last blank line)
1025 							pod = podLineScan(styler, fw2, endPos);
1026 							styler.SetLineState(styler.GetLine(fw2), pod);
1027 						}
1028 						if (pod == SCE_PL_POD) {	// truncate verbatim POD early
1029 							sc.SetState(SCE_PL_POD);
1030 						} else
1031 							fw = fw2;
1032 					}
1033 				} else {
1034 					if (pod == SCE_PL_POD_VERB	// still part of current paragraph
1035 					        && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
1036 						pod = SCE_PL_POD;
1037 						styler.SetLineState(ln, pod);
1038 					} else if (pod == SCE_PL_POD
1039 					        && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
1040 						pod = SCE_PL_POD_VERB;
1041 						styler.SetLineState(ln, pod);
1042 					}
1043 					sc.SetState(pod);
1044 				}
1045 				sc.ForwardBytes(fw - sc.currentPos);	// commit style
1046 			}
1047 			break;
1048 		case SCE_PL_REGEX:
1049 		case SCE_PL_STRING_QR:
1050 			if (Quote.Rep <= 0) {
1051 				if (!setModifiers.Contains(sc.ch))
1052 					sc.SetState(SCE_PL_DEFAULT);
1053 			} else if (!Quote.Up && !IsASpace(sc.ch)) {
1054 				Quote.Open(sc.ch);
1055 			} else {
1056 				int c, sLen = 0, endType = 0;
1057 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1058 					// scan to break string into segments
1059 					if (IsASpace(c)) {
1060 						break;
1061 					} else if (c == '\\' && Quote.Up != '\\') {
1062 						endType = 1; break;
1063 					} else if (c == Quote.Down) {
1064 						Quote.Count--;
1065 						if (Quote.Count == 0) {
1066 							Quote.Rep--;
1067 							break;
1068 						}
1069 					} else if (c == Quote.Up)
1070 						Quote.Count++;
1071 					sLen++;
1072 				}
1073 				if (sLen > 0) {	// process non-empty segments
1074 					if (Quote.Up != '\'') {
1075 						InterpolateSegment(sc, sLen, true);
1076 					} else		// non-interpolated path
1077 						sc.Forward(sLen);
1078 				}
1079 				if (endType == 1)
1080 					sc.Forward();
1081 			}
1082 			break;
1083 		case SCE_PL_REGSUBST:
1084 		case SCE_PL_XLAT:
1085 			if (Quote.Rep <= 0) {
1086 				if (!setModifiers.Contains(sc.ch))
1087 					sc.SetState(SCE_PL_DEFAULT);
1088 			} else if (!Quote.Up && !IsASpace(sc.ch)) {
1089 				Quote.Open(sc.ch);
1090 			} else {
1091 				int c, sLen = 0, endType = 0;
1092 				bool isPattern = (Quote.Rep == 2);
1093 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1094 					// scan to break string into segments
1095 					if (c == '\\' && Quote.Up != '\\') {
1096 						endType = 2; break;
1097 					} else if (Quote.Count == 0 && Quote.Rep == 1) {
1098 						// We matched something like s(...) or tr{...}, Perl 5.10
1099 						// appears to allow almost any character for use as the
1100 						// next delimiters. Whitespace and comments are accepted in
1101 						// between, but we'll limit to whitespace here.
1102 						// For '#', if no whitespace in between, it's a delimiter.
1103 						if (IsASpace(c)) {
1104 							// Keep going
1105 						} else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1106 							endType = 3;
1107 						} else
1108 							Quote.Open(c);
1109 						break;
1110 					} else if (c == Quote.Down) {
1111 						Quote.Count--;
1112 						if (Quote.Count == 0) {
1113 							Quote.Rep--;
1114 							endType = 1;
1115 						}
1116 						if (Quote.Up == Quote.Down)
1117 							Quote.Count++;
1118 						if (endType == 1)
1119 							break;
1120 					} else if (c == Quote.Up) {
1121 						Quote.Count++;
1122 					} else if (IsASpace(c))
1123 						break;
1124 					sLen++;
1125 				}
1126 				if (sLen > 0) {	// process non-empty segments
1127 					if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1128 						InterpolateSegment(sc, sLen, isPattern);
1129 					} else		// non-interpolated path
1130 						sc.Forward(sLen);
1131 				}
1132 				if (endType == 2) {
1133 					sc.Forward();
1134 				} else if (endType == 3)
1135 					sc.SetState(SCE_PL_DEFAULT);
1136 			}
1137 			break;
1138 		case SCE_PL_STRING_Q:
1139 		case SCE_PL_STRING_QQ:
1140 		case SCE_PL_STRING_QX:
1141 		case SCE_PL_STRING_QW:
1142 		case SCE_PL_STRING:
1143 		case SCE_PL_CHARACTER:
1144 		case SCE_PL_BACKTICKS:
1145 			if (!Quote.Down && !IsASpace(sc.ch)) {
1146 				Quote.Open(sc.ch);
1147 			} else {
1148 				int c, sLen = 0, endType = 0;
1149 				while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1150 					// scan to break string into segments
1151 					if (IsASpace(c)) {
1152 						break;
1153 					} else if (c == '\\' && Quote.Up != '\\') {
1154 						endType = 2; break;
1155 					} else if (c == Quote.Down) {
1156 						Quote.Count--;
1157 						if (Quote.Count == 0) {
1158 							endType = 3; break;
1159 						}
1160 					} else if (c == Quote.Up)
1161 						Quote.Count++;
1162 					sLen++;
1163 				}
1164 				if (sLen > 0) {	// process non-empty segments
1165 					switch (sc.state) {
1166 					case SCE_PL_STRING:
1167 					case SCE_PL_STRING_QQ:
1168 					case SCE_PL_BACKTICKS:
1169 						InterpolateSegment(sc, sLen);
1170 						break;
1171 					case SCE_PL_STRING_QX:
1172 						if (Quote.Up != '\'') {
1173 							InterpolateSegment(sc, sLen);
1174 							break;
1175 						}
1176 						// (continued for ' delim)
1177 					default:	// non-interpolated path
1178 						sc.Forward(sLen);
1179 					}
1180 				}
1181 				if (endType == 2) {
1182 					sc.Forward();
1183 				} else if (endType == 3)
1184 					sc.ForwardSetState(SCE_PL_DEFAULT);
1185 			}
1186 			break;
1187 		case SCE_PL_SUB_PROTOTYPE: {
1188 				int i = 0;
1189 				// forward scan; must all be valid proto characters
1190 				while (setSubPrototype.Contains(sc.GetRelative(i)))
1191 					i++;
1192 				if (sc.GetRelative(i) == ')') {	// valid sub prototype
1193 					sc.ForwardBytes(i);
1194 					sc.ForwardSetState(SCE_PL_DEFAULT);
1195 				} else {
1196 					// abandon prototype, restart from '('
1197 					sc.ChangeState(SCE_PL_OPERATOR);
1198 					sc.SetState(SCE_PL_DEFAULT);
1199 				}
1200 			}
1201 			break;
1202 		case SCE_PL_FORMAT: {
1203 				sc.Complete();
1204 				if (sc.Match('.')) {
1205 					sc.Forward();
1206 					if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1207 						sc.SetState(SCE_PL_DEFAULT);
1208 				}
1209 				while (!sc.atLineEnd)
1210 					sc.Forward();
1211 			}
1212 			break;
1213 		case SCE_PL_ERROR:
1214 			break;
1215 		}
1216 		// Needed for specific continuation styles (one follows the other)
1217 		switch (sc.state) {
1218 			// continued from SCE_PL_WORD
1219 		case SCE_PL_FORMAT_IDENT:
1220 			// occupies HereDoc state 3 to avoid clashing with HERE docs
1221 			if (IsASpaceOrTab(sc.ch)) {		// skip whitespace
1222 				sc.ChangeState(SCE_PL_DEFAULT);
1223 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1224 					sc.Forward();
1225 				sc.SetState(SCE_PL_FORMAT_IDENT);
1226 			}
1227 			if (setFormatStart.Contains(sc.ch)) {	// identifier or '='
1228 				if (sc.ch != '=') {
1229 					do {
1230 						sc.Forward();
1231 					} while (setFormat.Contains(sc.ch));
1232 				}
1233 				while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1234 					sc.Forward();
1235 				if (sc.ch == '=') {
1236 					sc.ForwardSetState(SCE_PL_DEFAULT);
1237 					HereDoc.State = 3;
1238 				} else {
1239 					// invalid identifier; inexact fallback, but hey
1240 					sc.ChangeState(SCE_PL_IDENTIFIER);
1241 					sc.SetState(SCE_PL_DEFAULT);
1242 				}
1243 			} else {
1244 				sc.ChangeState(SCE_PL_DEFAULT);	// invalid identifier
1245 			}
1246 			backFlag = BACK_NONE;
1247 			break;
1248 		}
1249 
1250 		// Must check end of HereDoc states here before default state is handled
1251 		if (HereDoc.State == 1 && sc.atLineEnd) {
1252 			// Begin of here-doc (the line after the here-doc delimiter):
1253 			// Lexically, the here-doc starts from the next line after the >>, but the
1254 			// first line of here-doc seem to follow the style of the last EOL sequence
1255 			int st_new = SCE_PL_HERE_QQ;
1256 			HereDoc.State = 2;
1257 			if (HereDoc.Quoted) {
1258 				if (sc.state == SCE_PL_HERE_DELIM) {
1259 					// Missing quote at end of string! We are stricter than perl.
1260 					// Colour here-doc anyway while marking this bit as an error.
1261 					sc.ChangeState(SCE_PL_ERROR);
1262 				}
1263 				switch (HereDoc.Quote) {
1264 				case '\'':
1265 					st_new = SCE_PL_HERE_Q;
1266 					break;
1267 				case '"' :
1268 					st_new = SCE_PL_HERE_QQ;
1269 					break;
1270 				case '`' :
1271 					st_new = SCE_PL_HERE_QX;
1272 					break;
1273 				}
1274 			} else {
1275 				if (HereDoc.Quote == '\\')
1276 					st_new = SCE_PL_HERE_Q;
1277 			}
1278 			sc.SetState(st_new);
1279 		}
1280 		if (HereDoc.State == 3 && sc.atLineEnd) {
1281 			// Start of format body.
1282 			HereDoc.State = 0;
1283 			sc.SetState(SCE_PL_FORMAT);
1284 		}
1285 
1286 		// Determine if a new state should be entered.
1287 		if (sc.state == SCE_PL_DEFAULT) {
1288 			if (IsADigit(sc.ch) ||
1289 			        (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1290 				sc.SetState(SCE_PL_NUMBER);
1291 				backFlag = BACK_NONE;
1292 				numState = PERLNUM_DECIMAL;
1293 				dotCount = 0;
1294 				if (sc.ch == '0') {		// hex,bin,octal
1295 					if (sc.chNext == 'x' || sc.chNext == 'X') {
1296 						numState = PERLNUM_HEX;
1297 					} else if (sc.chNext == 'b' || sc.chNext == 'B') {
1298 						numState = PERLNUM_BINARY;
1299 					} else if (IsADigit(sc.chNext)) {
1300 						numState = PERLNUM_OCTAL;
1301 					}
1302 					if (numState != PERLNUM_DECIMAL) {
1303 						sc.Forward();
1304 					}
1305 				} else if (sc.ch == 'v') {		// vector
1306 					numState = PERLNUM_V_VECTOR;
1307 				}
1308 			} else if (setWord.Contains(sc.ch)) {
1309 				// if immediately prefixed by '::', always a bareword
1310 				sc.SetState(SCE_PL_WORD);
1311 				if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1312 					sc.ChangeState(SCE_PL_IDENTIFIER);
1313 				}
1314 				Sci_PositionU bk = sc.currentPos;
1315 				Sci_PositionU fw = sc.currentPos + 1;
1316 				// first check for possible quote-like delimiter
1317 				if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1318 					sc.ChangeState(SCE_PL_REGSUBST);
1319 					Quote.New(2);
1320 				} else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1321 					sc.ChangeState(SCE_PL_REGEX);
1322 					Quote.New();
1323 				} else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1324 					sc.ChangeState(SCE_PL_STRING_Q);
1325 					Quote.New();
1326 				} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1327 					sc.ChangeState(SCE_PL_XLAT);
1328 					Quote.New(2);
1329 				} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1330 					sc.ChangeState(SCE_PL_XLAT);
1331 					Quote.New(2);
1332 					sc.Forward();
1333 					fw++;
1334 				} else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1335 				        && !setWord.Contains(sc.GetRelative(2))) {
1336 					if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1337 					else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1338 					else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1339 					else sc.ChangeState(SCE_PL_STRING_QW);	// sc.chNext == 'w'
1340 					Quote.New();
1341 					sc.Forward();
1342 					fw++;
1343 				} else if (sc.ch == 'x' && (sc.chNext == '=' ||	// repetition
1344 				        !setWord.Contains(sc.chNext) ||
1345 				        (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1346 					sc.ChangeState(SCE_PL_OPERATOR);
1347 				}
1348 				// if potentially a keyword, scan forward and grab word, then check
1349 				// if it's really one; if yes, disambiguation test is performed
1350 				// otherwise it is always a bareword and we skip a lot of scanning
1351 				if (sc.state == SCE_PL_WORD) {
1352 					while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1353 						fw++;
1354 					if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1355 						sc.ChangeState(SCE_PL_IDENTIFIER);
1356 					}
1357 				}
1358 				// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1359 				// for quote-like delimiters/keywords, attempt to disambiguate
1360 				// to select for bareword, change state -> SCE_PL_IDENTIFIER
1361 				if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1362 					if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1363 						sc.ChangeState(SCE_PL_IDENTIFIER);
1364 				}
1365 				backFlag = BACK_NONE;
1366 			} else if (sc.ch == '#') {
1367 				sc.SetState(SCE_PL_COMMENTLINE);
1368 			} else if (sc.ch == '\"') {
1369 				sc.SetState(SCE_PL_STRING);
1370 				Quote.New();
1371 				Quote.Open(sc.ch);
1372 				backFlag = BACK_NONE;
1373 			} else if (sc.ch == '\'') {
1374 				if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1375 					// Archaic call
1376 					sc.SetState(SCE_PL_IDENTIFIER);
1377 				} else {
1378 					sc.SetState(SCE_PL_CHARACTER);
1379 					Quote.New();
1380 					Quote.Open(sc.ch);
1381 				}
1382 				backFlag = BACK_NONE;
1383 			} else if (sc.ch == '`') {
1384 				sc.SetState(SCE_PL_BACKTICKS);
1385 				Quote.New();
1386 				Quote.Open(sc.ch);
1387 				backFlag = BACK_NONE;
1388 			} else if (sc.ch == '$') {
1389 				sc.SetState(SCE_PL_SCALAR);
1390 				if (sc.chNext == '{') {
1391 					sc.ForwardSetState(SCE_PL_OPERATOR);
1392 				} else if (IsASpace(sc.chNext)) {
1393 					sc.ForwardSetState(SCE_PL_DEFAULT);
1394 				} else {
1395 					sc.Forward();
1396 					if (sc.Match('`', '`') || sc.Match(':', ':')) {
1397 						sc.Forward();
1398 					}
1399 				}
1400 				backFlag = BACK_NONE;
1401 			} else if (sc.ch == '@') {
1402 				sc.SetState(SCE_PL_ARRAY);
1403 				if (setArray.Contains(sc.chNext)) {
1404 					// no special treatment
1405 				} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1406 					sc.ForwardBytes(2);
1407 				} else if (sc.chNext == '{' || sc.chNext == '[') {
1408 					sc.ForwardSetState(SCE_PL_OPERATOR);
1409 				} else {
1410 					sc.ChangeState(SCE_PL_OPERATOR);
1411 				}
1412 				backFlag = BACK_NONE;
1413 			} else if (setPreferRE.Contains(sc.ch)) {
1414 				// Explicit backward peeking to set a consistent preferRE for
1415 				// any slash found, so no longer need to track preferRE state.
1416 				// Find first previous significant lexed element and interpret.
1417 				// A few symbols shares this code for disambiguation.
1418 				bool preferRE = false;
1419 				bool isHereDoc = sc.Match('<', '<');
1420 				bool hereDocSpace = false;		// for: SCALAR [whitespace] '<<'
1421 				Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1422 				sc.Complete();
1423 				styler.Flush();
1424 				if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1425 					hereDocSpace = true;
1426 				skipWhitespaceComment(styler, bk);
1427 				if (bk == 0) {
1428 					// avoid backward scanning breakage
1429 					preferRE = true;
1430 				} else {
1431 					int bkstyle = styler.StyleAt(bk);
1432 					int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1433 					switch (bkstyle) {
1434 					case SCE_PL_OPERATOR:
1435 						preferRE = true;
1436 						if (bkch == ')' || bkch == ']') {
1437 							preferRE = false;
1438 						} else if (bkch == '}') {
1439 							// backtrack by counting balanced brace pairs
1440 							// needed to test for variables like ${}, @{} etc.
1441 							bkstyle = styleBeforeBracePair(styler, bk);
1442 							if (bkstyle == SCE_PL_SCALAR
1443 							        || bkstyle == SCE_PL_ARRAY
1444 							        || bkstyle == SCE_PL_HASH
1445 							        || bkstyle == SCE_PL_SYMBOLTABLE
1446 							        || bkstyle == SCE_PL_OPERATOR) {
1447 								preferRE = false;
1448 							}
1449 						} else if (bkch == '+' || bkch == '-') {
1450 							if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1451 							        && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1452 								// exceptions for operators: unary suffixes ++, --
1453 								preferRE = false;
1454 						}
1455 						break;
1456 					case SCE_PL_IDENTIFIER:
1457 						preferRE = true;
1458 						bkstyle = styleCheckIdentifier(styler, bk);
1459 						if ((bkstyle == 1) || (bkstyle == 2)) {
1460 							// inputsymbol or var with "->" or "::" before identifier
1461 							preferRE = false;
1462 						} else if (bkstyle == 3) {
1463 							// bare identifier, test cases follows:
1464 							if (sc.ch == '/') {
1465 								// if '/', /PATTERN/ unless digit/space immediately after '/'
1466 								// if '//', always expect defined-or operator to follow identifier
1467 								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1468 									preferRE = false;
1469 							} else if (sc.ch == '*' || sc.ch == '%') {
1470 								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1471 									preferRE = false;
1472 							} else if (sc.ch == '<') {
1473 								if (IsASpace(sc.chNext) || sc.chNext == '=')
1474 									preferRE = false;
1475 							}
1476 						}
1477 						break;
1478 					case SCE_PL_SCALAR:		// for $var<< case:
1479 						if (isHereDoc && hereDocSpace)	// if SCALAR whitespace '<<', *always* a HERE doc
1480 							preferRE = true;
1481 						break;
1482 					case SCE_PL_WORD:
1483 						preferRE = true;
1484 						// for HERE docs, always true
1485 						if (sc.ch == '/') {
1486 							// adopt heuristics similar to vim-style rules:
1487 							// keywords always forced as /PATTERN/: split, if, elsif, while
1488 							// everything else /PATTERN/ unless digit/space immediately after '/'
1489 							// for '//', defined-or favoured unless special keywords
1490 							Sci_PositionU bkend = bk + 1;
1491 							while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1492 								bk--;
1493 							}
1494 							if (isPerlKeyword(bk, bkend, reWords, styler))
1495 								break;
1496 							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1497 								preferRE = false;
1498 						} else if (sc.ch == '*' || sc.ch == '%') {
1499 							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1500 								preferRE = false;
1501 						} else if (sc.ch == '<') {
1502 							if (IsASpace(sc.chNext) || sc.chNext == '=')
1503 								preferRE = false;
1504 						}
1505 						break;
1506 
1507 						// other styles uses the default, preferRE=false
1508 					case SCE_PL_POD:
1509 					case SCE_PL_HERE_Q:
1510 					case SCE_PL_HERE_QQ:
1511 					case SCE_PL_HERE_QX:
1512 						preferRE = true;
1513 						break;
1514 					}
1515 				}
1516 				backFlag = BACK_NONE;
1517 				if (isHereDoc) {	// handle '<<', HERE doc
1518 					if (sc.Match("<<>>")) {		// double-diamond operator (5.22)
1519 						sc.SetState(SCE_PL_OPERATOR);
1520 						sc.Forward(3);
1521 					} else if (preferRE) {
1522 						sc.SetState(SCE_PL_HERE_DELIM);
1523 						HereDoc.State = 0;
1524 					} else {		// << operator
1525 						sc.SetState(SCE_PL_OPERATOR);
1526 						sc.Forward();
1527 					}
1528 				} else if (sc.ch == '*') {	// handle '*', typeglob
1529 					if (preferRE) {
1530 						sc.SetState(SCE_PL_SYMBOLTABLE);
1531 						if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1532 							sc.ForwardBytes(2);
1533 						} else if (sc.chNext == '{') {
1534 							sc.ForwardSetState(SCE_PL_OPERATOR);
1535 						} else {
1536 							sc.Forward();
1537 						}
1538 					} else {
1539 						sc.SetState(SCE_PL_OPERATOR);
1540 						if (sc.chNext == '*') 	// exponentiation
1541 							sc.Forward();
1542 					}
1543 				} else if (sc.ch == '%') {	// handle '%', hash
1544 					if (preferRE) {
1545 						sc.SetState(SCE_PL_HASH);
1546 						if (setHash.Contains(sc.chNext)) {
1547 							sc.Forward();
1548 						} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1549 							sc.ForwardBytes(2);
1550 						} else if (sc.chNext == '{') {
1551 							sc.ForwardSetState(SCE_PL_OPERATOR);
1552 						} else {
1553 							sc.ChangeState(SCE_PL_OPERATOR);
1554 						}
1555 					} else {
1556 						sc.SetState(SCE_PL_OPERATOR);
1557 					}
1558 				} else if (sc.ch == '<') {	// handle '<', inputsymbol
1559 					if (preferRE) {
1560 						// forward scan
1561 						int i = InputSymbolScan(sc);
1562 						if (i > 0) {
1563 							sc.SetState(SCE_PL_IDENTIFIER);
1564 							sc.Forward(i);
1565 						} else {
1566 							sc.SetState(SCE_PL_OPERATOR);
1567 						}
1568 					} else {
1569 						sc.SetState(SCE_PL_OPERATOR);
1570 					}
1571 				} else {			// handle '/', regexp
1572 					if (preferRE) {
1573 						sc.SetState(SCE_PL_REGEX);
1574 						Quote.New();
1575 						Quote.Open(sc.ch);
1576 					} else {		// / and // operators
1577 						sc.SetState(SCE_PL_OPERATOR);
1578 						if (sc.chNext == '/') {
1579 							sc.Forward();
1580 						}
1581 					}
1582 				}
1583 			} else if (sc.ch == '='		// POD
1584 			        && setPOD.Contains(sc.chNext)
1585 			        && sc.atLineStart) {
1586 				sc.SetState(SCE_PL_POD);
1587 				backFlag = BACK_NONE;
1588 			} else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {	// extended '-' cases
1589 				Sci_PositionU bk = sc.currentPos;
1590 				Sci_PositionU fw = 2;
1591 				if (setSingleCharOp.Contains(sc.chNext) &&	// file test operators
1592 				        !setWord.Contains(sc.GetRelative(2))) {
1593 					sc.SetState(SCE_PL_WORD);
1594 				} else {
1595 					// nominally a minus and bareword; find extent of bareword
1596 					while (setWord.Contains(sc.GetRelative(fw)))
1597 						fw++;
1598 					sc.SetState(SCE_PL_OPERATOR);
1599 				}
1600 				// force to bareword for hash key => or {variable literal} cases
1601 				if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1602 					sc.ChangeState(SCE_PL_IDENTIFIER);
1603 				}
1604 				backFlag = BACK_NONE;
1605 			} else if (sc.ch == '(' && sc.currentPos > 0) {	// '(' or subroutine prototype
1606 				sc.Complete();
1607 				if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1608 					sc.SetState(SCE_PL_SUB_PROTOTYPE);
1609 					backFlag = BACK_NONE;
1610 				} else {
1611 					sc.SetState(SCE_PL_OPERATOR);
1612 				}
1613 			} else if (setPerlOperator.Contains(sc.ch)) {	// operators
1614 				sc.SetState(SCE_PL_OPERATOR);
1615 				if (sc.Match('.', '.')) {	// .. and ...
1616 					sc.Forward();
1617 					if (sc.chNext == '.') sc.Forward();
1618 				}
1619 			} else if (sc.ch == 4 || sc.ch == 26) {		// ^D and ^Z ends valid perl source
1620 				sc.SetState(SCE_PL_DATASECTION);
1621 			} else {
1622 				// keep colouring defaults
1623 				sc.Complete();
1624 			}
1625 		}
1626 	}
1627 	sc.Complete();
1628 	if (sc.state == SCE_PL_HERE_Q
1629 	        || sc.state == SCE_PL_HERE_QQ
1630 	        || sc.state == SCE_PL_HERE_QX
1631 	        || sc.state == SCE_PL_FORMAT) {
1632 		styler.ChangeLexerState(sc.currentPos, styler.Length());
1633 	}
1634 	sc.Complete();
1635 }
1636 
1637 #define PERL_HEADFOLD_SHIFT		4
1638 #define PERL_HEADFOLD_MASK		0xF0
1639 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)1640 void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
1641 
1642 	if (!options.fold)
1643 		return;
1644 
1645 	LexAccessor styler(pAccess);
1646 
1647 	Sci_PositionU endPos = startPos + length;
1648 	int visibleChars = 0;
1649 	Sci_Position lineCurrent = styler.GetLine(startPos);
1650 
1651 	// Backtrack to previous line in case need to fix its fold status
1652 	if (startPos > 0) {
1653 		if (lineCurrent > 0) {
1654 			lineCurrent--;
1655 			startPos = styler.LineStart(lineCurrent);
1656 		}
1657 	}
1658 
1659 	int levelPrev = SC_FOLDLEVELBASE;
1660 	if (lineCurrent > 0)
1661 		levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1662 	int levelCurrent = levelPrev;
1663 	char chNext = styler[startPos];
1664 	char chPrev = styler.SafeGetCharAt(startPos - 1);
1665 	int styleNext = styler.StyleAt(startPos);
1666 	// Used at end of line to determine if the line was a package definition
1667 	bool isPackageLine = false;
1668 	int podHeading = 0;
1669 	for (Sci_PositionU i = startPos; i < endPos; i++) {
1670 		char ch = chNext;
1671 		chNext = styler.SafeGetCharAt(i + 1);
1672 		int style = styleNext;
1673 		styleNext = styler.StyleAt(i + 1);
1674 		int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1675 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1676 		bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1677 		// Comment folding
1678 		if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1679 			if (!IsCommentLine(lineCurrent - 1, styler)
1680 			        && IsCommentLine(lineCurrent + 1, styler))
1681 				levelCurrent++;
1682 			else if (IsCommentLine(lineCurrent - 1, styler)
1683 			        && !IsCommentLine(lineCurrent + 1, styler))
1684 				levelCurrent--;
1685 		}
1686 		// {} [] block folding
1687 		if (style == SCE_PL_OPERATOR) {
1688 			if (ch == '{') {
1689 				if (options.foldAtElse && levelCurrent < levelPrev)
1690 					--levelPrev;
1691 				levelCurrent++;
1692 			} else if (ch == '}') {
1693 				levelCurrent--;
1694 			}
1695 			if (ch == '[') {
1696 				if (options.foldAtElse && levelCurrent < levelPrev)
1697 					--levelPrev;
1698 				levelCurrent++;
1699 			} else if (ch == ']') {
1700 				levelCurrent--;
1701 			}
1702 		}
1703 		// POD folding
1704 		if (options.foldPOD && atLineStart) {
1705 			if (style == SCE_PL_POD) {
1706 				if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1707 					levelCurrent++;
1708 				else if (styler.Match(i, "=cut"))
1709 					levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1710 				else if (styler.Match(i, "=head"))
1711 					podHeading = PodHeadingLevel(i, styler);
1712 			} else if (style == SCE_PL_DATASECTION) {
1713 				if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1714 					levelCurrent++;
1715 				else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1716 					levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1717 				else if (styler.Match(i, "=head"))
1718 					podHeading = PodHeadingLevel(i, styler);
1719 				// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1720 				// reset needed as level test is vs. SC_FOLDLEVELBASE
1721 				else if (stylePrevCh != SCE_PL_DATASECTION)
1722 					levelCurrent = SC_FOLDLEVELBASE;
1723 			}
1724 		}
1725 		// package folding
1726 		if (options.foldPackage && atLineStart) {
1727 			if (IsPackageLine(lineCurrent, styler)
1728 			        && !IsPackageLine(lineCurrent + 1, styler))
1729 				isPackageLine = true;
1730 		}
1731 
1732 		//heredoc folding
1733 		switch (style) {
1734 		case SCE_PL_HERE_QQ :
1735 		case SCE_PL_HERE_Q :
1736 		case SCE_PL_HERE_QX :
1737 			switch (stylePrevCh) {
1738 			case SCE_PL_HERE_QQ :
1739 			case SCE_PL_HERE_Q :
1740 			case SCE_PL_HERE_QX :
1741 				//do nothing;
1742 				break;
1743 			default :
1744 				levelCurrent++;
1745 				break;
1746 			}
1747 			break;
1748 		default:
1749 			switch (stylePrevCh) {
1750 			case SCE_PL_HERE_QQ :
1751 			case SCE_PL_HERE_Q :
1752 			case SCE_PL_HERE_QX :
1753 				levelCurrent--;
1754 				break;
1755 			default :
1756 				//do nothing;
1757 				break;
1758 			}
1759 			break;
1760 		}
1761 
1762 		//explicit folding
1763 		if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1764 			if (chNext == '{') {
1765 				levelCurrent++;
1766 			} else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1767 				levelCurrent--;
1768 			}
1769 		}
1770 
1771 		if (atEOL) {
1772 			int lev = levelPrev;
1773 			// POD headings occupy bits 7-4, leaving some breathing room for
1774 			// non-standard practice -- POD sections stuck in blocks, etc.
1775 			if (podHeading > 0) {
1776 				levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1777 				lev = levelCurrent - 1;
1778 				lev |= SC_FOLDLEVELHEADERFLAG;
1779 				podHeading = 0;
1780 			}
1781 			// Check if line was a package declaration
1782 			// because packages need "special" treatment
1783 			if (isPackageLine) {
1784 				lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1785 				levelCurrent = SC_FOLDLEVELBASE + 1;
1786 				isPackageLine = false;
1787 			}
1788 			lev |= levelCurrent << 16;
1789 			if (visibleChars == 0 && options.foldCompact)
1790 				lev |= SC_FOLDLEVELWHITEFLAG;
1791 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1792 				lev |= SC_FOLDLEVELHEADERFLAG;
1793 			if (lev != styler.LevelAt(lineCurrent)) {
1794 				styler.SetLevel(lineCurrent, lev);
1795 			}
1796 			lineCurrent++;
1797 			levelPrev = levelCurrent;
1798 			visibleChars = 0;
1799 		}
1800 		if (!isspacechar(ch))
1801 			visibleChars++;
1802 		chPrev = ch;
1803 	}
1804 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1805 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1806 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1807 }
1808 
1809 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
1810