1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for subset of Perl.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
24 #define PERLNUM_HEX 2
25 #define PERLNUM_OCTAL 3
26 #define PERLNUM_FLOAT 4 // actually exponent part
27 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
28 #define PERLNUM_VECTOR 6
29 #define PERLNUM_V_VECTOR 7
30 #define PERLNUM_BAD 8
31
32 #define BACK_NONE 0 // lookback state for bareword disambiguation:
33 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
34 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
35
36 #define HERE_DELIM_MAX 256
37
isEOLChar(char ch)38 static inline bool isEOLChar(char ch) {
39 return (ch == '\r') || (ch == '\n');
40 }
41
isSingleCharOp(char ch)42 static bool isSingleCharOp(char ch) {
43 char strCharSet[2];
44 strCharSet[0] = ch;
45 strCharSet[1] = '\0';
46 return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
47 }
48
isPerlOperator(char ch)49 static inline bool isPerlOperator(char ch) {
50 if (ch == '^' || ch == '&' || ch == '\\' ||
51 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
52 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
53 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
54 ch == '>' || ch == ',' ||
55 ch == '?' || ch == '!' || ch == '.' || ch == '~')
56 return true;
57 // these chars are already tested before this call
58 // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
59 return false;
60 }
61
isPerlKeyword(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler)62 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
63 char s[100];
64 unsigned int i, len = end - start;
65 if (len > 30) { len = 30; }
66 for (i = 0; i < len; i++, start++) s[i] = styler[start];
67 s[i] = '\0';
68 return keywords.InList(s);
69 }
70
isEndVar(char ch)71 static inline bool isEndVar(char ch) {
72 return !isalnum(ch) && ch != '#' && ch != '$' &&
73 ch != '_' && ch != '\'';
74 }
75
76
isNonQuote(char ch)77 static inline bool isNonQuote(char ch) {
78 return isalnum(ch) || ch == '_';
79 }
80
actualNumStyle(int numberStyle)81 static inline char actualNumStyle(int numberStyle) {
82 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
83 return SCE_PL_STRING;
84 } else if (numberStyle == PERLNUM_BAD) {
85 return SCE_PL_ERROR;
86 }
87 return SCE_PL_NUMBER;
88 }
89
isMatch(Accessor & styler,int lengthDoc,int pos,const char * val)90 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
91 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
92 return false;
93 }
94 while (*val) {
95 if (*val != styler[pos++]) {
96 return false;
97 }
98 val++;
99 }
100 return true;
101 }
102
opposite(char ch)103 static char opposite(char ch) {
104 if (ch == '(')
105 return ')';
106 if (ch == '[')
107 return ']';
108 if (ch == '{')
109 return '}';
110 if (ch == '<')
111 return '>';
112 return ch;
113 }
114
ColourisePerlDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)115 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
116 WordList *keywordlists[], Accessor &styler) {
117
118 // Lexer for perl often has to backtrack to start of current style to determine
119 // which characters are being used as quotes, how deeply nested is the
120 // start position and what the termination string is for here documents
121
122 WordList &keywords = *keywordlists[0];
123
124 class HereDocCls {
125 public:
126 int State; // 0: '<<' encountered
127 // 1: collect the delimiter
128 // 2: here doc text (lines after the delimiter)
129 char Quote; // the char after '<<'
130 bool Quoted; // true if Quote in ('\'','"','`')
131 int DelimiterLength; // strlen(Delimiter)
132 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
133 HereDocCls() {
134 State = 0;
135 Quote = 0;
136 Quoted = false;
137 DelimiterLength = 0;
138 Delimiter = new char[HERE_DELIM_MAX];
139 Delimiter[0] = '\0';
140 }
141 ~HereDocCls() {
142 delete []Delimiter;
143 }
144 };
145 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
146
147 class QuoteCls {
148 public:
149 int Rep;
150 int Count;
151 char Up;
152 char Down;
153 QuoteCls() {
154 this->New(1);
155 }
156 void New(int r) {
157 Rep = r;
158 Count = 0;
159 Up = '\0';
160 Down = '\0';
161 }
162 void Open(char u) {
163 Count++;
164 Up = u;
165 Down = opposite(Up);
166 }
167 };
168 QuoteCls Quote;
169
170 int state = initStyle;
171 char numState = PERLNUM_DECIMAL;
172 int dotCount = 0;
173 unsigned int lengthDoc = startPos + length;
174 //int sookedpos = 0; // these have no apparent use, see POD state
175 //char sooked[100];
176 //sooked[sookedpos] = '\0';
177
178 // If in a long distance lexical state, seek to the beginning to find quote characters
179 // Perl strings can be multi-line with embedded newlines, so backtrack.
180 // Perl numbers have additional state during lexing, so backtrack too.
181 if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
182 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
183 startPos--;
184 }
185 startPos = styler.LineStart(styler.GetLine(startPos));
186 state = styler.StyleAt(startPos - 1);
187 }
188 if ( state == SCE_PL_STRING_Q
189 || state == SCE_PL_STRING_QQ
190 || state == SCE_PL_STRING_QX
191 || state == SCE_PL_STRING_QR
192 || state == SCE_PL_STRING_QW
193 || state == SCE_PL_REGEX
194 || state == SCE_PL_REGSUBST
195 || state == SCE_PL_STRING
196 || state == SCE_PL_BACKTICKS
197 || state == SCE_PL_CHARACTER
198 || state == SCE_PL_NUMBER
199 || state == SCE_PL_IDENTIFIER
200 || state == SCE_PL_ERROR
201 ) {
202 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
203 startPos--;
204 }
205 state = SCE_PL_DEFAULT;
206 }
207
208 // lookback at start of lexing to set proper state for backflag
209 // after this, they are updated when elements are lexed
210 int backflag = BACK_NONE;
211 unsigned int backPos = startPos;
212 if (backPos > 0) {
213 backPos--;
214 int sty = SCE_PL_DEFAULT;
215 while ((backPos > 0) && (sty = styler.StyleAt(backPos),
216 sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))
217 backPos--;
218 if (sty == SCE_PL_OPERATOR)
219 backflag = BACK_OPERATOR;
220 else if (sty == SCE_PL_WORD)
221 backflag = BACK_KEYWORD;
222 }
223
224 styler.StartAt(startPos);
225 char chPrev = styler.SafeGetCharAt(startPos - 1);
226 if (startPos == 0)
227 chPrev = '\n';
228 char chNext = styler[startPos];
229 styler.StartSegment(startPos);
230
231 for (unsigned int i = startPos; i < lengthDoc; i++) {
232 char ch = chNext;
233 // if the current character is not consumed due to the completion of an
234 // earlier style, lexing can be restarted via a simple goto
235 restartLexer:
236 chNext = styler.SafeGetCharAt(i + 1);
237 char chNext2 = styler.SafeGetCharAt(i + 2);
238
239 if (styler.IsLeadByte(ch)) {
240 chNext = styler.SafeGetCharAt(i + 2);
241 chPrev = ' ';
242 i += 1;
243 continue;
244 }
245 if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
246 styler.ColourTo(i, state);
247 chPrev = ch;
248 continue;
249 }
250
251 if (HereDoc.State == 1 && isEOLChar(ch)) {
252 // Begin of here-doc (the line after the here-doc delimiter):
253 // Lexically, the here-doc starts from the next line after the >>, but the
254 // first line of here-doc seem to follow the style of the last EOL sequence
255 HereDoc.State = 2;
256 if (HereDoc.Quoted) {
257 if (state == SCE_PL_HERE_DELIM) {
258 // Missing quote at end of string! We are stricter than perl.
259 // Colour here-doc anyway while marking this bit as an error.
260 state = SCE_PL_ERROR;
261 }
262 styler.ColourTo(i - 1, state);
263 switch (HereDoc.Quote) {
264 case '\'':
265 state = SCE_PL_HERE_Q ;
266 break;
267 case '"':
268 state = SCE_PL_HERE_QQ;
269 break;
270 case '`':
271 state = SCE_PL_HERE_QX;
272 break;
273 }
274 } else {
275 styler.ColourTo(i - 1, state);
276 switch (HereDoc.Quote) {
277 case '\\':
278 state = SCE_PL_HERE_Q ;
279 break;
280 default :
281 state = SCE_PL_HERE_QQ;
282 }
283 }
284 }
285
286 if (state == SCE_PL_DEFAULT) {
287 if (isdigit(ch) || (isdigit(chNext) &&
288 (ch == '.' || ch == 'v'))) {
289 state = SCE_PL_NUMBER;
290 backflag = BACK_NONE;
291 numState = PERLNUM_DECIMAL;
292 dotCount = 0;
293 if (ch == '0') { // hex,bin,octal
294 if (chNext == 'x') {
295 numState = PERLNUM_HEX;
296 } else if (chNext == 'b') {
297 numState = PERLNUM_BINARY;
298 } else if (isdigit(chNext)) {
299 numState = PERLNUM_OCTAL;
300 }
301 if (numState != PERLNUM_DECIMAL) {
302 i++;
303 ch = chNext;
304 chNext = chNext2;
305 }
306 } else if (ch == 'v') { // vector
307 numState = PERLNUM_V_VECTOR;
308 }
309 } else if (iswordstart(ch)) {
310 // if immediately prefixed by '::', always a bareword
311 state = SCE_PL_WORD;
312 if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
313 state = SCE_PL_IDENTIFIER;
314 }
315 unsigned int kw = i + 1;
316 // first check for possible quote-like delimiter
317 if (ch == 's' && !isNonQuote(chNext)) {
318 state = SCE_PL_REGSUBST;
319 Quote.New(2);
320 } else if (ch == 'm' && !isNonQuote(chNext)) {
321 state = SCE_PL_REGEX;
322 Quote.New(1);
323 } else if (ch == 'q' && !isNonQuote(chNext)) {
324 state = SCE_PL_STRING_Q;
325 Quote.New(1);
326 } else if (ch == 'y' && !isNonQuote(chNext)) {
327 state = SCE_PL_REGSUBST;
328 Quote.New(2);
329 } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
330 state = SCE_PL_REGSUBST;
331 Quote.New(2);
332 kw++;
333 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
334 if (chNext == 'q') state = SCE_PL_STRING_QQ;
335 else if (chNext == 'x') state = SCE_PL_STRING_QX;
336 else if (chNext == 'r') state = SCE_PL_STRING_QR;
337 else if (chNext == 'w') state = SCE_PL_STRING_QW;
338 Quote.New(1);
339 kw++;
340 } else if (ch == 'x' && (chNext == '=' || // repetition
341 (chNext != '_' && !isalnum(chNext)) ||
342 (isdigit(chPrev) && isdigit(chNext)))) {
343 state = SCE_PL_OPERATOR;
344 }
345 // if potentially a keyword, scan forward and grab word, then check
346 // if it's really one; if yes, disambiguation test is performed
347 // otherwise it is always a bareword and we skip a lot of scanning
348 // note: keywords assumed to be limited to [_a-zA-Z] only
349 if (state == SCE_PL_WORD) {
350 while (iswordstart(styler.SafeGetCharAt(kw))) kw++;
351 if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
352 state = SCE_PL_IDENTIFIER;
353 }
354 }
355 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
356 // for quote-like delimiters/keywords, attempt to disambiguate
357 // to select for bareword, change state -> SCE_PL_IDENTIFIER
358 if (state != SCE_PL_IDENTIFIER && i > 0) {
359 unsigned int j = i;
360 bool moreback = false; // true if passed newline/comments
361 bool brace = false; // true if opening brace found
362 char ch2;
363 // first look backwards past whitespace/comments for EOLs
364 // if BACK_NONE, neither operator nor keyword, so skip test
365 if (backflag != BACK_NONE) {
366 while (--j > backPos) {
367 if (isEOLChar(styler.SafeGetCharAt(j)))
368 moreback = true;
369 }
370 ch2 = styler.SafeGetCharAt(j);
371 if (ch2 == '{' && !moreback) {
372 // {bareword: possible variable spec
373 brace = true;
374 } else if ((ch2 == '&')
375 // &bareword: subroutine call
376 || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')
377 // ->bareword: part of variable spec
378 || (ch2 == 'b' && styler.Match(j - 2, "su"))) {
379 // sub bareword: subroutine declaration
380 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
381 state = SCE_PL_IDENTIFIER;
382 }
383 // if status still ambiguous, look forward after word past
384 // tabs/spaces only; if ch2 isn't one of '[{(,' it can never
385 // match anything, so skip the whole thing
386 j = kw;
387 if (state != SCE_PL_IDENTIFIER
388 && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')
389 && kw < lengthDoc) {
390 while (ch2 = styler.SafeGetCharAt(j),
391 (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {
392 j++;
393 }
394 if ((ch2 == '}' && brace)
395 // {bareword}: variable spec
396 || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {
397 // [{(, bareword=>: hash literal
398 state = SCE_PL_IDENTIFIER;
399 }
400 }
401 }
402 }
403 backflag = BACK_NONE;
404 // an identifier or bareword
405 if (state == SCE_PL_IDENTIFIER) {
406 if ((!iswordchar(chNext) && chNext != '\'')
407 || (chNext == '.' && chNext2 == '.')) {
408 // We need that if length of word == 1!
409 // This test is copied from the SCE_PL_WORD handler.
410 styler.ColourTo(i, SCE_PL_IDENTIFIER);
411 state = SCE_PL_DEFAULT;
412 }
413 // a keyword
414 } else if (state == SCE_PL_WORD) {
415 i = kw - 1;
416 if (ch == '_' && chNext == '_' &&
417 (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
418 || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) {
419 styler.ColourTo(i, SCE_PL_DATASECTION);
420 state = SCE_PL_DATASECTION;
421 } else {
422 styler.ColourTo(i, SCE_PL_WORD);
423 state = SCE_PL_DEFAULT;
424 backflag = BACK_KEYWORD;
425 backPos = i;
426 }
427 ch = styler.SafeGetCharAt(i);
428 chNext = styler.SafeGetCharAt(i + 1);
429 // a repetition operator 'x'
430 } else if (state == SCE_PL_OPERATOR) {
431 styler.ColourTo(i, SCE_PL_OPERATOR);
432 state = SCE_PL_DEFAULT;
433 // quote-like delimiter, skip one char if double-char delimiter
434 } else {
435 i = kw - 1;
436 chNext = styler.SafeGetCharAt(i + 1);
437 }
438 } else if (ch == '#') {
439 state = SCE_PL_COMMENTLINE;
440 } else if (ch == '\"') {
441 state = SCE_PL_STRING;
442 Quote.New(1);
443 Quote.Open(ch);
444 backflag = BACK_NONE;
445 } else if (ch == '\'') {
446 if (chPrev == '&') {
447 // Archaic call
448 styler.ColourTo(i, state);
449 } else {
450 state = SCE_PL_CHARACTER;
451 Quote.New(1);
452 Quote.Open(ch);
453 }
454 backflag = BACK_NONE;
455 } else if (ch == '`') {
456 state = SCE_PL_BACKTICKS;
457 Quote.New(1);
458 Quote.Open(ch);
459 backflag = BACK_NONE;
460 } else if (ch == '$') {
461 if ((chNext == '{') || isspacechar(chNext)) {
462 styler.ColourTo(i, SCE_PL_SCALAR);
463 } else {
464 state = SCE_PL_SCALAR;
465 if (chNext == '`' && chNext2 == '`') {
466 i += 2;
467 ch = styler.SafeGetCharAt(i);
468 chNext = styler.SafeGetCharAt(i + 1);
469 } else {
470 i++;
471 ch = chNext;
472 chNext = chNext2;
473 }
474 }
475 backflag = BACK_NONE;
476 } else if (ch == '@') {
477 if (isalpha(chNext) || chNext == '#' || chNext == '$'
478 || chNext == '_' || chNext == '+' || chNext == '-') {
479 state = SCE_PL_ARRAY;
480 } else if (chNext != '{' && chNext != '[') {
481 styler.ColourTo(i, SCE_PL_ARRAY);
482 } else {
483 styler.ColourTo(i, SCE_PL_ARRAY);
484 }
485 backflag = BACK_NONE;
486 } else if (ch == '%') {
487 if (isalpha(chNext) || chNext == '#' || chNext == '$'
488 || chNext == '_' || chNext == '!' || chNext == '^') {
489 state = SCE_PL_HASH;
490 i++;
491 ch = chNext;
492 chNext = chNext2;
493 } else if (chNext == '{') {
494 styler.ColourTo(i, SCE_PL_HASH);
495 } else {
496 styler.ColourTo(i, SCE_PL_OPERATOR);
497 }
498 backflag = BACK_NONE;
499 } else if (ch == '*') {
500 char strch[2];
501 strch[0] = chNext;
502 strch[1] = '\0';
503 if (isalpha(chNext) || chNext == '_' ||
504 NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
505 state = SCE_PL_SYMBOLTABLE;
506 i++;
507 ch = chNext;
508 chNext = chNext2;
509 } else if (chNext == '{') {
510 styler.ColourTo(i, SCE_PL_SYMBOLTABLE);
511 } else {
512 if (chNext == '*') { // exponentiation
513 i++;
514 ch = chNext;
515 chNext = chNext2;
516 }
517 styler.ColourTo(i, SCE_PL_OPERATOR);
518 }
519 backflag = BACK_NONE;
520 } else if (ch == '/' || (ch == '<' && chNext == '<')) {
521 // Explicit backward peeking to set a consistent preferRE for
522 // any slash found, so no longer need to track preferRE state.
523 // Find first previous significant lexed element and interpret.
524 // Test for HERE doc start '<<' shares this code, helps to
525 // determine if it should be an operator.
526 bool preferRE = false;
527 bool isHereDoc = (ch == '<');
528 bool hereDocSpace = false; // these are for corner case:
529 bool hereDocScalar = false; // SCALAR [whitespace] '<<'
530 unsigned int bk = (i > 0)? i - 1: 0;
531 char bkch;
532 styler.Flush();
533 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
534 hereDocSpace = true;
535 while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
536 styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
537 bk--;
538 }
539 if (bk == 0) {
540 // position 0 won't really be checked; rarely happens
541 // hard to fix due to an unsigned index i
542 preferRE = true;
543 } else {
544 int bkstyle = styler.StyleAt(bk);
545 bkch = styler.SafeGetCharAt(bk);
546 switch(bkstyle) {
547 case SCE_PL_OPERATOR:
548 preferRE = true;
549 if (bkch == ')' || bkch == ']') {
550 preferRE = false;
551 } else if (bkch == '}') {
552 // backtrack further, count balanced brace pairs
553 // if a brace pair found, see if it's a variable
554 int braceCount = 1;
555 while (--bk > 0) {
556 bkstyle = styler.StyleAt(bk);
557 if (bkstyle == SCE_PL_OPERATOR) {
558 bkch = styler.SafeGetCharAt(bk);
559 if (bkch == ';') { // early out
560 break;
561 } else if (bkch == '}') {
562 braceCount++;
563 } else if (bkch == '{') {
564 if (--braceCount == 0)
565 break;
566 }
567 }
568 }
569 if (bk == 0) {
570 // at beginning, true
571 } else if (braceCount == 0) {
572 // balanced { found, bk>0, skip more whitespace
573 if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
574 while (bk > 0) {
575 bkstyle = styler.StyleAt(--bk);
576 if (bkstyle != SCE_PL_DEFAULT)
577 break;
578 }
579 }
580 bkstyle = styler.StyleAt(bk);
581 if (bkstyle == SCE_PL_SCALAR
582 || bkstyle == SCE_PL_ARRAY
583 || bkstyle == SCE_PL_HASH
584 || bkstyle == SCE_PL_SYMBOLTABLE
585 || bkstyle == SCE_PL_OPERATOR) {
586 preferRE = false;
587 }
588 }
589 }
590 break;
591 case SCE_PL_IDENTIFIER:
592 preferRE = true;
593 if (bkch == '>') { // inputsymbol
594 preferRE = false;
595 break;
596 }
597 // backtrack to find "->" or "::" before identifier
598 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
599 bk--;
600 }
601 while (bk > 0) {
602 bkstyle = styler.StyleAt(bk);
603 if (bkstyle == SCE_PL_DEFAULT ||
604 bkstyle == SCE_PL_COMMENTLINE) {
605 } else if (bkstyle == SCE_PL_OPERATOR) {
606 // gcc 3.2.3 bloats if more compact form used
607 bkch = styler.SafeGetCharAt(bk);
608 if (bkch == '>') { // "->"
609 if (styler.SafeGetCharAt(bk - 1) == '-') {
610 preferRE = false;
611 break;
612 }
613 } else if (bkch == ':') { // "::"
614 if (styler.SafeGetCharAt(bk - 1) == ':') {
615 preferRE = false;
616 break;
617 }
618 }
619 } else {// bare identifier, usually a function call but Perl
620 // optimizes them as pseudo-constants, then the next
621 // '/' will be a divide; favour divide over regex
622 // if there is a whitespace after the '/'
623 if (isspacechar(chNext)) {
624 preferRE = false;
625 }
626 break;
627 }
628 bk--;
629 }
630 break;
631 case SCE_PL_SCALAR: // for $var<< case
632 hereDocScalar = true;
633 break;
634 // other styles uses the default, preferRE=false
635 case SCE_PL_WORD:
636 case SCE_PL_POD:
637 case SCE_PL_POD_VERB:
638 case SCE_PL_HERE_Q:
639 case SCE_PL_HERE_QQ:
640 case SCE_PL_HERE_QX:
641 preferRE = true;
642 break;
643 }
644 }
645 if (isHereDoc) { // handle HERE doc
646 // if SCALAR whitespace '<<', *always* a HERE doc
647 if (preferRE || (hereDocSpace && hereDocScalar)) {
648 state = SCE_PL_HERE_DELIM;
649 HereDoc.State = 0;
650 } else { // << operator
651 i++;
652 ch = chNext;
653 chNext = chNext2;
654 styler.ColourTo(i, SCE_PL_OPERATOR);
655 }
656 } else { // handle regexp
657 if (preferRE) {
658 state = SCE_PL_REGEX;
659 Quote.New(1);
660 Quote.Open(ch);
661 } else { // / operator
662 styler.ColourTo(i, SCE_PL_OPERATOR);
663 }
664 }
665 backflag = BACK_NONE;
666 } else if (ch == '<') {
667 // looks forward for matching > on same line
668 unsigned int fw = i + 1;
669 while (fw < lengthDoc) {
670 char fwch = styler.SafeGetCharAt(fw);
671 if (fwch == ' ') {
672 if (styler.SafeGetCharAt(fw-1) != '\\' ||
673 styler.SafeGetCharAt(fw-2) != '\\')
674 break;
675 } else if (isEOLChar(fwch) || isspacechar(fwch)) {
676 break;
677 } else if (fwch == '>') {
678 if ((fw - i) == 2 && // '<=>' case
679 styler.SafeGetCharAt(fw-1) == '=') {
680 styler.ColourTo(fw, SCE_PL_OPERATOR);
681 } else {
682 styler.ColourTo(fw, SCE_PL_IDENTIFIER);
683 }
684 i = fw;
685 ch = fwch;
686 chNext = styler.SafeGetCharAt(i+1);
687 }
688 fw++;
689 }
690 styler.ColourTo(i, SCE_PL_OPERATOR);
691 backflag = BACK_NONE;
692 } else if (ch == '=' // POD
693 && isalpha(chNext)
694 && (isEOLChar(chPrev))) {
695 state = SCE_PL_POD;
696 backflag = BACK_NONE;
697 //sookedpos = 0;
698 //sooked[sookedpos] = '\0';
699 } else if (ch == '-' // file test operators
700 && isSingleCharOp(chNext)
701 && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
702 styler.ColourTo(i + 1, SCE_PL_WORD);
703 state = SCE_PL_DEFAULT;
704 i++;
705 ch = chNext;
706 chNext = chNext2;
707 backflag = BACK_NONE;
708 } else if (isPerlOperator(ch)) {
709 if (ch == '.' && chNext == '.') { // .. and ...
710 i++;
711 if (chNext2 == '.') { i++; }
712 state = SCE_PL_DEFAULT;
713 ch = styler.SafeGetCharAt(i);
714 chNext = styler.SafeGetCharAt(i + 1);
715 }
716 styler.ColourTo(i, SCE_PL_OPERATOR);
717 backflag = BACK_OPERATOR;
718 backPos = i;
719 } else {
720 // keep colouring defaults to make restart easier
721 styler.ColourTo(i, SCE_PL_DEFAULT);
722 }
723 } else if (state == SCE_PL_NUMBER) {
724 if (ch == '.') {
725 if (chNext == '.') {
726 // double dot is always an operator
727 goto numAtEnd;
728 } else if (numState <= PERLNUM_FLOAT) {
729 // non-decimal number or float exponent, consume next dot
730 styler.ColourTo(i - 1, SCE_PL_NUMBER);
731 styler.ColourTo(i, SCE_PL_OPERATOR);
732 state = SCE_PL_DEFAULT;
733 } else { // decimal or vectors allows dots
734 dotCount++;
735 if (numState == PERLNUM_DECIMAL) {
736 if (dotCount > 1) {
737 if (isdigit(chNext)) { // really a vector
738 numState = PERLNUM_VECTOR;
739 } else // number then dot
740 goto numAtEnd;
741 }
742 } else { // vectors
743 if (!isdigit(chNext)) // vector then dot
744 goto numAtEnd;
745 }
746 }
747 } else if (ch == '_' && numState == PERLNUM_DECIMAL) {
748 if (!isdigit(chNext)) {
749 goto numAtEnd;
750 }
751 } else if (isalnum(ch)) {
752 if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
753 if (isalpha(ch)) {
754 if (dotCount == 0) { // change to word
755 state = SCE_PL_IDENTIFIER;
756 } else { // vector then word
757 goto numAtEnd;
758 }
759 }
760 } else if (numState == PERLNUM_DECIMAL) {
761 if (ch == 'E' || ch == 'e') { // exponent
762 numState = PERLNUM_FLOAT;
763 if (chNext == '+' || chNext == '-') {
764 i++;
765 ch = chNext;
766 chNext = chNext2;
767 }
768 } else if (!isdigit(ch)) { // number then word
769 goto numAtEnd;
770 }
771 } else if (numState == PERLNUM_FLOAT) {
772 if (!isdigit(ch)) { // float then word
773 goto numAtEnd;
774 }
775 } else if (numState == PERLNUM_OCTAL) {
776 if (!isdigit(ch))
777 goto numAtEnd;
778 else if (ch > '7')
779 numState = PERLNUM_BAD;
780 } else if (numState == PERLNUM_BINARY) {
781 if (!isdigit(ch))
782 goto numAtEnd;
783 else if (ch > '1')
784 numState = PERLNUM_BAD;
785 } else if (numState == PERLNUM_HEX) {
786 int ch2 = toupper(ch);
787 if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F'))
788 goto numAtEnd;
789 } else {//(numState == PERLNUM_BAD) {
790 if (!isdigit(ch))
791 goto numAtEnd;
792 }
793 } else {
794 // complete current number or vector
795 numAtEnd:
796 styler.ColourTo(i - 1, actualNumStyle(numState));
797 state = SCE_PL_DEFAULT;
798 goto restartLexer;
799 }
800 } else if (state == SCE_PL_IDENTIFIER) {
801 if (!iswordstart(chNext) && chNext != '\'') {
802 styler.ColourTo(i, SCE_PL_IDENTIFIER);
803 state = SCE_PL_DEFAULT;
804 ch = ' ';
805 }
806 } else {
807 if (state == SCE_PL_COMMENTLINE) {
808 if (isEOLChar(ch)) {
809 styler.ColourTo(i - 1, state);
810 state = SCE_PL_DEFAULT;
811 goto restartLexer;
812 } else if (isEOLChar(chNext)) {
813 styler.ColourTo(i, state);
814 state = SCE_PL_DEFAULT;
815 }
816 } else if (state == SCE_PL_HERE_DELIM) {
817 //
818 // From perldata.pod:
819 // ------------------
820 // A line-oriented form of quoting is based on the shell ``here-doc''
821 // syntax.
822 // Following a << you specify a string to terminate the quoted material,
823 // and all lines following the current line down to the terminating
824 // string are the value of the item.
825 // The terminating string may be either an identifier (a word),
826 // or some quoted text.
827 // If quoted, the type of quotes you use determines the treatment of
828 // the text, just as in regular quoting.
829 // An unquoted identifier works like double quotes.
830 // There must be no space between the << and the identifier.
831 // (If you put a space it will be treated as a null identifier,
832 // which is valid, and matches the first empty line.)
833 // (This is deprecated, -w warns of this syntax)
834 // The terminating string must appear by itself (unquoted and with no
835 // surrounding whitespace) on the terminating line.
836 //
837 // From Bash info:
838 // ---------------
839 // Specifier format is: <<[-]WORD
840 // Optional '-' is for removal of leading tabs from here-doc.
841 // Whitespace acceptable after <<[-] operator.
842 //
843 if (HereDoc.State == 0) { // '<<' encountered
844 bool gotspace = false;
845 unsigned int oldi = i;
846 if (chNext == ' ' || chNext == '\t') {
847 // skip whitespace; legal for quoted delimiters
848 gotspace = true;
849 do {
850 i++;
851 chNext = styler.SafeGetCharAt(i + 1);
852 } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t'));
853 chNext2 = styler.SafeGetCharAt(i + 2);
854 }
855 HereDoc.State = 1;
856 HereDoc.Quote = chNext;
857 HereDoc.Quoted = false;
858 HereDoc.DelimiterLength = 0;
859 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
860 if (chNext == '\'' || chNext == '"' || chNext == '`') {
861 // a quoted here-doc delimiter
862 i++;
863 ch = chNext;
864 chNext = chNext2;
865 HereDoc.Quoted = true;
866 } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
867 || chNext == '=' || chNext == '$' || chNext == '@'
868 || ((isalpha(chNext) || chNext == '_') && gotspace)) {
869 // left shift << or <<= operator cases
870 // restore position if operator
871 i = oldi;
872 styler.ColourTo(i, SCE_PL_OPERATOR);
873 state = SCE_PL_DEFAULT;
874 HereDoc.State = 0;
875 goto restartLexer;
876 } else {
877 // an unquoted here-doc delimiter, no special handling
878 // (cannot be prefixed by spaces/tabs), or
879 // symbols terminates; deprecated zero-length delimiter
880 }
881
882 } else if (HereDoc.State == 1) { // collect the delimiter
883 backflag = BACK_NONE;
884 if (HereDoc.Quoted) { // a quoted here-doc delimiter
885 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
886 styler.ColourTo(i, state);
887 state = SCE_PL_DEFAULT;
888 } else {
889 if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
890 i++;
891 ch = chNext;
892 chNext = chNext2;
893 }
894 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
895 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
896 }
897 } else { // an unquoted here-doc delimiter
898 if (isalnum(ch) || ch == '_') {
899 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
900 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
901 } else {
902 styler.ColourTo(i - 1, state);
903 state = SCE_PL_DEFAULT;
904 goto restartLexer;
905 }
906 }
907 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
908 styler.ColourTo(i - 1, state);
909 state = SCE_PL_ERROR;
910 goto restartLexer;
911 }
912 }
913 } else if (HereDoc.State == 2) {
914 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
915 if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
916 i += HereDoc.DelimiterLength;
917 chPrev = styler.SafeGetCharAt(i - 1);
918 ch = styler.SafeGetCharAt(i);
919 if (isEOLChar(ch)) {
920 styler.ColourTo(i - 1, state);
921 state = SCE_PL_DEFAULT;
922 backflag = BACK_NONE;
923 HereDoc.State = 0;
924 goto restartLexer;
925 }
926 chNext = styler.SafeGetCharAt(i + 1);
927 }
928 } else if (state == SCE_PL_POD
929 || state == SCE_PL_POD_VERB) {
930 if (isEOLChar(chPrev)) {
931 if (ch == ' ' || ch == '\t') {
932 styler.ColourTo(i - 1, state);
933 state = SCE_PL_POD_VERB;
934 } else {
935 styler.ColourTo(i - 1, state);
936 state = SCE_PL_POD;
937 if (ch == '=') {
938 if (isMatch(styler, lengthDoc, i, "=cut")) {
939 styler.ColourTo(i - 1 + 4, state);
940 i += 4;
941 state = SCE_PL_DEFAULT;
942 ch = styler.SafeGetCharAt(i);
943 //chNext = styler.SafeGetCharAt(i + 1);
944 goto restartLexer;
945 }
946 }
947 }
948 }
949 } else if (state == SCE_PL_SCALAR // variable names
950 || state == SCE_PL_ARRAY
951 || state == SCE_PL_HASH
952 || state == SCE_PL_SYMBOLTABLE) {
953 if (ch == ':' && chNext == ':') { // skip ::
954 i++;
955 ch = chNext;
956 chNext = chNext2;
957 }
958 else if (isEndVar(ch)) {
959 if (i == (styler.GetStartSegment() + 1)) {
960 // Special variable: $(, $_ etc.
961 styler.ColourTo(i, state);
962 state = SCE_PL_DEFAULT;
963 } else {
964 styler.ColourTo(i - 1, state);
965 state = SCE_PL_DEFAULT;
966 goto restartLexer;
967 }
968 }
969 } else if (state == SCE_PL_REGEX
970 || state == SCE_PL_STRING_QR
971 ) {
972 if (!Quote.Up && !isspacechar(ch)) {
973 Quote.Open(ch);
974 } else if (ch == '\\' && Quote.Up != '\\') {
975 // SG: Is it save to skip *every* escaped char?
976 i++;
977 ch = chNext;
978 chNext = styler.SafeGetCharAt(i + 1);
979 } else {
980 if (ch == Quote.Down /*&& chPrev != '\\'*/) {
981 Quote.Count--;
982 if (Quote.Count == 0) {
983 Quote.Rep--;
984 if (Quote.Up == Quote.Down) {
985 Quote.Count++;
986 }
987 }
988 if (!isalpha(chNext)) {
989 if (Quote.Rep <= 0) {
990 styler.ColourTo(i, state);
991 state = SCE_PL_DEFAULT;
992 ch = ' ';
993 }
994 }
995 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
996 Quote.Count++;
997 } else if (!isalpha(chNext)) {
998 if (Quote.Rep <= 0) {
999 styler.ColourTo(i, state);
1000 state = SCE_PL_DEFAULT;
1001 ch = ' ';
1002 }
1003 }
1004 }
1005 } else if (state == SCE_PL_REGSUBST) {
1006 if (!Quote.Up && !isspacechar(ch)) {
1007 Quote.Open(ch);
1008 } else if (ch == '\\' && Quote.Up != '\\') {
1009 // SG: Is it save to skip *every* escaped char?
1010 i++;
1011 ch = chNext;
1012 chNext = styler.SafeGetCharAt(i + 1);
1013 } else {
1014 if (Quote.Count == 0 && Quote.Rep == 1) {
1015 /* We matched something like s(...) or tr{...}
1016 * and are looking for the next matcher characters,
1017 * which could be either bracketed ({...}) or non-bracketed
1018 * (/.../).
1019 *
1020 * Number-signs are problematic. If they occur after
1021 * the close of the first part, treat them like
1022 * a Quote.Up char, even if they actually start comments.
1023 *
1024 * If we find an alnum, we end the regsubst, and punt.
1025 *
1026 * Eric Promislow ericp@activestate.com Aug 9,2000
1027 */
1028 if (isspacechar(ch)) {
1029 // Keep going
1030 }
1031 else if (isalnum(ch)) {
1032 styler.ColourTo(i, state);
1033 state = SCE_PL_DEFAULT;
1034 ch = ' ';
1035 } else {
1036 Quote.Open(ch);
1037 }
1038 } else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
1039 Quote.Count--;
1040 if (Quote.Count == 0) {
1041 Quote.Rep--;
1042 }
1043 if (!isalpha(chNext)) {
1044 if (Quote.Rep <= 0) {
1045 styler.ColourTo(i, state);
1046 state = SCE_PL_DEFAULT;
1047 ch = ' ';
1048 }
1049 }
1050 if (Quote.Up == Quote.Down) {
1051 Quote.Count++;
1052 }
1053 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
1054 Quote.Count++;
1055 } else if (!isalpha(chNext)) {
1056 if (Quote.Rep <= 0) {
1057 styler.ColourTo(i, state);
1058 state = SCE_PL_DEFAULT;
1059 ch = ' ';
1060 }
1061 }
1062 }
1063 } else if (state == SCE_PL_STRING_Q
1064 || state == SCE_PL_STRING_QQ
1065 || state == SCE_PL_STRING_QX
1066 || state == SCE_PL_STRING_QW
1067 || state == SCE_PL_STRING
1068 || state == SCE_PL_CHARACTER
1069 || state == SCE_PL_BACKTICKS
1070 ) {
1071 if (!Quote.Down && !isspacechar(ch)) {
1072 Quote.Open(ch);
1073 } else if (ch == '\\' && Quote.Up != '\\') {
1074 i++;
1075 ch = chNext;
1076 chNext = styler.SafeGetCharAt(i + 1);
1077 } else if (ch == Quote.Down) {
1078 Quote.Count--;
1079 if (Quote.Count == 0) {
1080 Quote.Rep--;
1081 if (Quote.Rep <= 0) {
1082 styler.ColourTo(i, state);
1083 state = SCE_PL_DEFAULT;
1084 ch = ' ';
1085 }
1086 if (Quote.Up == Quote.Down) {
1087 Quote.Count++;
1088 }
1089 }
1090 } else if (ch == Quote.Up) {
1091 Quote.Count++;
1092 }
1093 }
1094 }
1095 if (state == SCE_PL_ERROR) {
1096 break;
1097 }
1098 chPrev = ch;
1099 }
1100 styler.ColourTo(lengthDoc - 1, state);
1101 }
1102
IsCommentLine(int line,Accessor & styler)1103 static bool IsCommentLine(int line, Accessor &styler) {
1104 int pos = styler.LineStart(line);
1105 int eol_pos = styler.LineStart(line + 1) - 1;
1106 for (int i = pos; i < eol_pos; i++) {
1107 char ch = styler[i];
1108 int style = styler.StyleAt(i);
1109 if (ch == '#' && style == SCE_PL_COMMENTLINE)
1110 return true;
1111 else if (ch != ' ' && ch != '\t')
1112 return false;
1113 }
1114 return false;
1115 }
1116
FoldPerlDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)1117 static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
1118 Accessor &styler) {
1119 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1120 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1121 // Custom folding of POD and packages
1122 bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
1123 bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
1124 unsigned int endPos = startPos + length;
1125 int visibleChars = 0;
1126 int lineCurrent = styler.GetLine(startPos);
1127 int levelPrev = SC_FOLDLEVELBASE;
1128 if (lineCurrent > 0)
1129 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1130 int levelCurrent = levelPrev;
1131 char chNext = styler[startPos];
1132 char chPrev = styler.SafeGetCharAt(startPos - 1);
1133 int styleNext = styler.StyleAt(startPos);
1134 // Used at end of line to determine if the line was a package definition
1135 bool isPackageLine = false;
1136 bool isPodHeading = false;
1137 for (unsigned int i = startPos; i < endPos; i++) {
1138 char ch = chNext;
1139 chNext = styler.SafeGetCharAt(i + 1);
1140 int style = styleNext;
1141 styleNext = styler.StyleAt(i + 1);
1142 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1143 bool atLineStart = isEOLChar(chPrev) || i == 0;
1144 // Comment folding
1145 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1146 {
1147 if (!IsCommentLine(lineCurrent - 1, styler)
1148 && IsCommentLine(lineCurrent + 1, styler))
1149 levelCurrent++;
1150 else if (IsCommentLine(lineCurrent - 1, styler)
1151 && !IsCommentLine(lineCurrent+1, styler))
1152 levelCurrent--;
1153 }
1154 if (style == SCE_C_OPERATOR) {
1155 if (ch == '{') {
1156 levelCurrent++;
1157 } else if (ch == '}') {
1158 levelCurrent--;
1159 }
1160 }
1161 // Custom POD folding
1162 if (foldPOD && atLineStart) {
1163 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1164 if (style == SCE_PL_POD) {
1165 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1166 levelCurrent++;
1167 else if (styler.Match(i, "=cut"))
1168 levelCurrent--;
1169 else if (styler.Match(i, "=head"))
1170 isPodHeading = true;
1171 } else if (style == SCE_PL_DATASECTION) {
1172 if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1173 levelCurrent++;
1174 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1175 levelCurrent--;
1176 else if (styler.Match(i, "=head"))
1177 isPodHeading = true;
1178 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1179 // reset needed as level test is vs. SC_FOLDLEVELBASE
1180 else if (styler.Match(i, "__END__"))
1181 levelCurrent = SC_FOLDLEVELBASE;
1182 }
1183 }
1184 // Custom package folding
1185 if (foldPackage && atLineStart) {
1186 if (style == SCE_PL_WORD && styler.Match(i, "package")) {
1187 isPackageLine = true;
1188 }
1189 }
1190
1191 if (atEOL) {
1192 int lev = levelPrev;
1193 if (isPodHeading) {
1194 lev = levelPrev - 1;
1195 lev |= SC_FOLDLEVELHEADERFLAG;
1196 isPodHeading = false;
1197 }
1198 // Check if line was a package declaration
1199 // because packages need "special" treatment
1200 if (isPackageLine) {
1201 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1202 levelCurrent = SC_FOLDLEVELBASE + 1;
1203 isPackageLine = false;
1204 }
1205 lev |= levelCurrent << 16;
1206 if (visibleChars == 0 && foldCompact)
1207 lev |= SC_FOLDLEVELWHITEFLAG;
1208 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1209 lev |= SC_FOLDLEVELHEADERFLAG;
1210 if (lev != styler.LevelAt(lineCurrent)) {
1211 styler.SetLevel(lineCurrent, lev);
1212 }
1213 lineCurrent++;
1214 levelPrev = levelCurrent;
1215 visibleChars = 0;
1216 }
1217 if (!isspacechar(ch))
1218 visibleChars++;
1219 chPrev = ch;
1220 }
1221 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1222 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1223 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1224 }
1225
1226 static const char * const perlWordListDesc[] = {
1227 "Keywords",
1228 0
1229 };
1230
1231 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc);
1232
1233