1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "KeyWords.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #ifdef SCI_NAMESPACE
23 using namespace Scintilla;
24 #endif
25
26 //XXX Identical to Perl, put in common area
isEOLChar(char ch)27 static inline bool isEOLChar(char ch) {
28 return (ch == '\r') || (ch == '\n');
29 }
30
isRubyOperatorChar(char ch)31 static inline bool isRubyOperatorChar(char ch) {
32 return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;
33 }
34
35
isSafeAlpha(char ch)36 static inline bool isSafeAlpha(char ch) {
37 return ((unsigned int) ch <= 127) && isalpha(ch);
38 }
39
40 #define MAX_KEYWORD_LENGTH 200
41
42 #define STYLE_MASK 63
43 #define actual_style(style) (style & STYLE_MASK)
44
followsDot(unsigned int pos,Accessor & styler)45 static bool followsDot(unsigned int pos, Accessor &styler) {
46 styler.Flush();
47 for (; pos >= 1; --pos) {
48 int style = actual_style(styler.StyleAt(pos));
49 char ch;
50 switch (style) {
51 case SCE_RB_DEFAULT:
52 ch = styler[pos];
53 if (ch == ' ' || ch == '\t') {
54 //continue
55 } else {
56 return false;
57 }
58 break;
59
60 case SCE_RB_OPERATOR:
61 return styler[pos] == '.';
62
63 default:
64 return false;
65 }
66 }
67 return false;
68 }
69
70 // Forward declarations
71 static bool keywordIsAmbiguous(const char *prevWord);
72 static bool keywordDoStartsLoop(int pos,
73 Accessor &styler);
74 static bool keywordIsModifier(const char *word,
75 int pos,
76 Accessor &styler);
77
ClassifyWordRb(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,char * prevWord)78 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
79 char s[100];
80 unsigned int i, j;
81 unsigned int lim = end - start + 1; // num chars to copy
82 if (lim >= MAX_KEYWORD_LENGTH) {
83 lim = MAX_KEYWORD_LENGTH - 1;
84 }
85 for (i = start, j = 0; j < lim; i++, j++) {
86 s[j] = styler[i];
87 }
88 s[j] = '\0';
89 int chAttr;
90 if (0 == strcmp(prevWord, "class"))
91 chAttr = SCE_RB_CLASSNAME;
92 else if (0 == strcmp(prevWord, "module"))
93 chAttr = SCE_RB_MODULE_NAME;
94 else if (0 == strcmp(prevWord, "def"))
95 chAttr = SCE_RB_DEFNAME;
96 else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
97 if (keywordIsAmbiguous(s)
98 && keywordIsModifier(s, start, styler)) {
99
100 // Demoted keywords are colored as keywords,
101 // but do not affect changes in indentation.
102 //
103 // Consider the word 'if':
104 // 1. <<if test ...>> : normal
105 // 2. <<stmt if test>> : demoted
106 // 3. <<lhs = if ...>> : normal: start a new indent level
107 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
108
109 chAttr = SCE_RB_WORD_DEMOTED;
110 } else {
111 chAttr = SCE_RB_WORD;
112 }
113 } else
114 chAttr = SCE_RB_IDENTIFIER;
115 styler.ColourTo(end, chAttr);
116 if (chAttr == SCE_RB_WORD) {
117 strcpy(prevWord, s);
118 } else {
119 prevWord[0] = 0;
120 }
121 return chAttr;
122 }
123
124
125 //XXX Identical to Perl, put in common area
isMatch(Accessor & styler,int lengthDoc,int pos,const char * val)126 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
127 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
128 return false;
129 }
130 while (*val) {
131 if (*val != styler[pos++]) {
132 return false;
133 }
134 val++;
135 }
136 return true;
137 }
138
139 // Do Ruby better -- find the end of the line, work back,
140 // and then check for leading white space
141
142 // Precondition: the here-doc target can be indented
lookingAtHereDocDelim(Accessor & styler,int pos,int lengthDoc,const char * HereDocDelim)143 static bool lookingAtHereDocDelim(Accessor &styler,
144 int pos,
145 int lengthDoc,
146 const char *HereDocDelim)
147 {
148 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
149 return false;
150 }
151 while (--pos > 0) {
152 char ch = styler[pos];
153 if (isEOLChar(ch)) {
154 return true;
155 } else if (ch != ' ' && ch != '\t') {
156 return false;
157 }
158 }
159 return false;
160 }
161
162 //XXX Identical to Perl, put in common area
opposite(char ch)163 static char opposite(char ch) {
164 if (ch == '(')
165 return ')';
166 if (ch == '[')
167 return ']';
168 if (ch == '{')
169 return '}';
170 if (ch == '<')
171 return '>';
172 return ch;
173 }
174
175 // Null transitions when we see we've reached the end
176 // and need to relex the curr char.
177
redo_char(int & i,char & ch,char & chNext,char & chNext2,int & state)178 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
179 int &state) {
180 i--;
181 chNext2 = chNext;
182 chNext = ch;
183 state = SCE_RB_DEFAULT;
184 }
185
advance_char(int & i,char & ch,char & chNext,char & chNext2)186 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
187 i++;
188 ch = chNext;
189 chNext = chNext2;
190 }
191
192 // precondition: startPos points to one after the EOL char
currLineContainsHereDelims(int & startPos,Accessor & styler)193 static bool currLineContainsHereDelims(int& startPos,
194 Accessor &styler) {
195 if (startPos <= 1)
196 return false;
197
198 int pos;
199 for (pos = startPos - 1; pos > 0; pos--) {
200 char ch = styler.SafeGetCharAt(pos);
201 if (isEOLChar(ch)) {
202 // Leave the pointers where they are -- there are no
203 // here doc delims on the current line, even if
204 // the EOL isn't default style
205
206 return false;
207 } else {
208 styler.Flush();
209 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
210 break;
211 }
212 }
213 }
214 if (pos == 0) {
215 return false;
216 }
217 // Update the pointers so we don't have to re-analyze the string
218 startPos = pos;
219 return true;
220 }
221
222
isEmptyLine(int pos,Accessor & styler)223 static bool isEmptyLine(int pos,
224 Accessor &styler) {
225 int spaceFlags = 0;
226 int lineCurrent = styler.GetLine(pos);
227 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
228 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
229 }
230
RE_CanFollowKeyword(const char * keyword)231 static bool RE_CanFollowKeyword(const char *keyword) {
232 if (!strcmp(keyword, "and")
233 || !strcmp(keyword, "begin")
234 || !strcmp(keyword, "break")
235 || !strcmp(keyword, "case")
236 || !strcmp(keyword, "do")
237 || !strcmp(keyword, "else")
238 || !strcmp(keyword, "elsif")
239 || !strcmp(keyword, "if")
240 || !strcmp(keyword, "next")
241 || !strcmp(keyword, "return")
242 || !strcmp(keyword, "when")
243 || !strcmp(keyword, "unless")
244 || !strcmp(keyword, "until")
245 || !strcmp(keyword, "not")
246 || !strcmp(keyword, "or")) {
247 return true;
248 }
249 return false;
250 }
251
252
253 //todo: if we aren't looking at a stdio character,
254 // move to the start of the first line that is not in a
255 // multi-line construct
256
synchronizeDocStart(unsigned int & startPos,int & length,int & initStyle,Accessor & styler,bool skipWhiteSpace=false)257 static void synchronizeDocStart(unsigned int& startPos,
258 int &length,
259 int &initStyle,
260 Accessor &styler,
261 bool skipWhiteSpace=false) {
262
263 styler.Flush();
264 int style = actual_style(styler.StyleAt(startPos));
265 switch (style) {
266 case SCE_RB_STDIN:
267 case SCE_RB_STDOUT:
268 case SCE_RB_STDERR:
269 // Don't do anything else with these.
270 return;
271 }
272
273 int pos = startPos;
274 // Quick way to characterize each line
275 int lineStart;
276 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
277 // Now look at the style before the previous line's EOL
278 pos = styler.LineStart(lineStart) - 1;
279 if (pos <= 10) {
280 lineStart = 0;
281 break;
282 }
283 char ch = styler.SafeGetCharAt(pos);
284 char chPrev = styler.SafeGetCharAt(pos - 1);
285 if (ch == '\n' && chPrev == '\r') {
286 pos--;
287 }
288 if (styler.SafeGetCharAt(pos - 1) == '\\') {
289 // Continuation line -- keep going
290 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
291 // Part of multi-line construct -- keep going
292 } else if (currLineContainsHereDelims(pos, styler)) {
293 // Keep going, with pos and length now pointing
294 // at the end of the here-doc delimiter
295 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
296 // Keep going
297 } else {
298 break;
299 }
300 }
301 pos = styler.LineStart(lineStart);
302 length += (startPos - pos);
303 startPos = pos;
304 initStyle = SCE_RB_DEFAULT;
305 }
306
ColouriseRbDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)307 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
308 WordList *keywordlists[], Accessor &styler) {
309
310 // Lexer for Ruby often has to backtrack to start of current style to determine
311 // which characters are being used as quotes, how deeply nested is the
312 // start position and what the termination string is for here documents
313
314 WordList &keywords = *keywordlists[0];
315
316 class HereDocCls {
317 public:
318 int State;
319 // States
320 // 0: '<<' encountered
321 // 1: collect the delimiter
322 // 1b: text between the end of the delimiter and the EOL
323 // 2: here doc text (lines after the delimiter)
324 char Quote; // the char after '<<'
325 bool Quoted; // true if Quote in ('\'','"','`')
326 int DelimiterLength; // strlen(Delimiter)
327 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
328 bool CanBeIndented;
329 HereDocCls() {
330 State = 0;
331 DelimiterLength = 0;
332 Delimiter[0] = '\0';
333 CanBeIndented = false;
334 }
335 };
336 HereDocCls HereDoc;
337
338 class QuoteCls {
339 public:
340 int Count;
341 char Up;
342 char Down;
343 QuoteCls() {
344 this->New();
345 }
346 void New() {
347 Count = 0;
348 Up = '\0';
349 Down = '\0';
350 }
351 void Open(char u) {
352 Count++;
353 Up = u;
354 Down = opposite(Up);
355 }
356 };
357 QuoteCls Quote;
358
359 int numDots = 0; // For numbers --
360 // Don't start lexing in the middle of a num
361
362 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
363 false);
364
365 bool preferRE = true;
366 int state = initStyle;
367 int lengthDoc = startPos + length;
368
369 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
370 prevWord[0] = '\0';
371 if (length == 0)
372 return;
373
374 char chPrev = styler.SafeGetCharAt(startPos - 1);
375 char chNext = styler.SafeGetCharAt(startPos);
376 // Ruby uses a different mask because bad indentation is marked by oring with 32
377 styler.StartAt(startPos, 127);
378 styler.StartSegment(startPos);
379
380 static int q_states[] = {SCE_RB_STRING_Q,
381 SCE_RB_STRING_QQ,
382 SCE_RB_STRING_QR,
383 SCE_RB_STRING_QW,
384 SCE_RB_STRING_QW,
385 SCE_RB_STRING_QX};
386 static const char* q_chars = "qQrwWx";
387
388 for (int i = startPos; i < lengthDoc; i++) {
389 char ch = chNext;
390 chNext = styler.SafeGetCharAt(i + 1);
391 char chNext2 = styler.SafeGetCharAt(i + 2);
392
393 if (styler.IsLeadByte(ch)) {
394 chNext = chNext2;
395 chPrev = ' ';
396 i += 1;
397 continue;
398 }
399
400 // skip on DOS/Windows
401 //No, don't, because some things will get tagged on,
402 // so we won't recognize keywords, for example
403 #if 0
404 if (ch == '\r' && chNext == '\n') {
405 continue;
406 }
407 #endif
408
409 if (HereDoc.State == 1 && isEOLChar(ch)) {
410 // Begin of here-doc (the line after the here-doc delimiter):
411 HereDoc.State = 2;
412 styler.ColourTo(i-1, state);
413 // Don't check for a missing quote, just jump into
414 // the here-doc state
415 state = SCE_RB_HERE_Q;
416 }
417
418 // Regular transitions
419 if (state == SCE_RB_DEFAULT) {
420 if (isdigit(ch)) {
421 styler.ColourTo(i - 1, state);
422 state = SCE_RB_NUMBER;
423 numDots = 0;
424 } else if (iswordstart(ch)) {
425 styler.ColourTo(i - 1, state);
426 state = SCE_RB_WORD;
427 } else if (ch == '#') {
428 styler.ColourTo(i - 1, state);
429 state = SCE_RB_COMMENTLINE;
430 } else if (ch == '=') {
431 // =begin indicates the start of a comment (doc) block
432 if (i == 0 || isEOLChar(chPrev)
433 && chNext == 'b'
434 && styler.SafeGetCharAt(i + 2) == 'e'
435 && styler.SafeGetCharAt(i + 3) == 'g'
436 && styler.SafeGetCharAt(i + 4) == 'i'
437 && styler.SafeGetCharAt(i + 5) == 'n'
438 && !iswordchar(styler.SafeGetCharAt(i + 6))) {
439 styler.ColourTo(i - 1, state);
440 state = SCE_RB_POD;
441 } else {
442 styler.ColourTo(i - 1, state);
443 styler.ColourTo(i, SCE_RB_OPERATOR);
444 preferRE = true;
445 }
446 } else if (ch == '"') {
447 styler.ColourTo(i - 1, state);
448 state = SCE_RB_STRING;
449 Quote.New();
450 Quote.Open(ch);
451 } else if (ch == '\'') {
452 styler.ColourTo(i - 1, state);
453 state = SCE_RB_CHARACTER;
454 Quote.New();
455 Quote.Open(ch);
456 } else if (ch == '`') {
457 styler.ColourTo(i - 1, state);
458 state = SCE_RB_BACKTICKS;
459 Quote.New();
460 Quote.Open(ch);
461 } else if (ch == '@') {
462 // Instance or class var
463 styler.ColourTo(i - 1, state);
464 if (chNext == '@') {
465 state = SCE_RB_CLASS_VAR;
466 advance_char(i, ch, chNext, chNext2); // pass by ref
467 } else {
468 state = SCE_RB_INSTANCE_VAR;
469 }
470 } else if (ch == '$') {
471 // Check for a builtin global
472 styler.ColourTo(i - 1, state);
473 // Recognize it bit by bit
474 state = SCE_RB_GLOBAL;
475 } else if (ch == '/' && preferRE) {
476 // Ambigous operator
477 styler.ColourTo(i - 1, state);
478 state = SCE_RB_REGEX;
479 Quote.New();
480 Quote.Open(ch);
481 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
482
483 // Recognise the '<<' symbol - either a here document or a binary op
484
485 styler.ColourTo(i - 1, state);
486 i++;
487 chNext = chNext2;
488 styler.ColourTo(i, SCE_RB_OPERATOR);
489
490 if (preferRE) {
491 state = SCE_RB_HERE_DELIM;
492 HereDoc.State = 0;
493 } else {
494 // leave state as default
495 // We don't have all the heuristics Perl has for indications
496 // of a here-doc, because '<<' is overloadable and used
497 // for so many other classes.
498 preferRE = true;
499 }
500 } else if (ch == ':') {
501 styler.ColourTo(i - 1, state);
502 if (chNext == ':') {
503 // Mark "::" as an operator, not symbol start
504 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
505 advance_char(i, ch, chNext, chNext2); // pass by ref
506 state = SCE_RB_DEFAULT;
507 preferRE = false;
508 } else if (iswordchar(chNext)) {
509 state = SCE_RB_SYMBOL;
510 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
511 // Do the operator analysis in-line, looking ahead
512 // Based on the table in pickaxe 2nd ed., page 339
513 bool doColoring = true;
514 switch (chNext) {
515 case '[':
516 if (chNext2 == ']' ) {
517 char ch_tmp = styler.SafeGetCharAt(i + 3);
518 if (ch_tmp == '=') {
519 i += 3;
520 ch = ch_tmp;
521 chNext = styler.SafeGetCharAt(i + 1);
522 } else {
523 i += 2;
524 ch = chNext2;
525 chNext = ch_tmp;
526 }
527 } else {
528 doColoring = false;
529 }
530 break;
531
532 case '*':
533 if (chNext2 == '*') {
534 i += 2;
535 ch = chNext2;
536 chNext = styler.SafeGetCharAt(i + 1);
537 } else {
538 advance_char(i, ch, chNext, chNext2);
539 }
540 break;
541
542 case '!':
543 if (chNext2 == '=' || chNext2 == '~') {
544 i += 2;
545 ch = chNext2;
546 chNext = styler.SafeGetCharAt(i + 1);
547 } else {
548 advance_char(i, ch, chNext, chNext2);
549 }
550 break;
551
552 case '<':
553 if (chNext2 == '<') {
554 i += 2;
555 ch = chNext2;
556 chNext = styler.SafeGetCharAt(i + 1);
557 } else if (chNext2 == '=') {
558 char ch_tmp = styler.SafeGetCharAt(i + 3);
559 if (ch_tmp == '>') { // <=> operator
560 i += 3;
561 ch = ch_tmp;
562 chNext = styler.SafeGetCharAt(i + 1);
563 } else {
564 i += 2;
565 ch = chNext2;
566 chNext = ch_tmp;
567 }
568 } else {
569 advance_char(i, ch, chNext, chNext2);
570 }
571 break;
572
573 default:
574 // Simple one-character operators
575 advance_char(i, ch, chNext, chNext2);
576 break;
577 }
578 if (doColoring) {
579 styler.ColourTo(i, SCE_RB_SYMBOL);
580 state = SCE_RB_DEFAULT;
581 }
582 } else if (!preferRE) {
583 // Don't color symbol strings (yet)
584 // Just color the ":" and color rest as string
585 styler.ColourTo(i, SCE_RB_SYMBOL);
586 state = SCE_RB_DEFAULT;
587 } else {
588 styler.ColourTo(i, SCE_RB_OPERATOR);
589 state = SCE_RB_DEFAULT;
590 preferRE = true;
591 }
592 } else if (ch == '%') {
593 styler.ColourTo(i - 1, state);
594 bool have_string = false;
595 if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {
596 Quote.New();
597 const char *hit = strchr(q_chars, chNext);
598 if (hit != NULL) {
599 state = q_states[hit - q_chars];
600 Quote.Open(chNext2);
601 i += 2;
602 ch = chNext2;
603 chNext = styler.SafeGetCharAt(i + 1);
604 have_string = true;
605 }
606 } else if (!iswordchar(chNext)) {
607 state = SCE_RB_STRING_QQ;
608 Quote.Open(chNext);
609 advance_char(i, ch, chNext, chNext2); // pass by ref
610 have_string = true;
611 }
612 if (!have_string) {
613 styler.ColourTo(i, SCE_RB_OPERATOR);
614 // stay in default
615 preferRE = true;
616 }
617 } else if (isoperator(ch)) {
618 styler.ColourTo(i - 1, state);
619 styler.ColourTo(i, SCE_RB_OPERATOR);
620 // If we're ending an expression or block,
621 // assume it ends an object, and the ambivalent
622 // constructs are binary operators
623 //
624 // So if we don't have one of these chars,
625 // we aren't ending an object exp'n, and ops
626 // like : << / are unary operators.
627
628 preferRE = (strchr(")}]", ch) == NULL);
629 // Stay in default state
630 } else if (isEOLChar(ch)) {
631 // Make sure it's a true line-end, with no backslash
632 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
633 && chPrev != '\\') {
634 // Assume we've hit the end of the statement.
635 preferRE = true;
636 }
637 }
638 } else if (state == SCE_RB_WORD) {
639 if (ch == '.' || !iswordchar(ch)) {
640 // Words include x? in all contexts,
641 // and <letters>= after either 'def' or a dot
642 // Move along until a complete word is on our left
643
644 // Default accessor treats '.' as word-chars,
645 // but we don't for now.
646
647 if (ch == '='
648 && iswordchar(chPrev)
649 && (chNext == '('
650 || strchr(" \t\n\r", chNext) != NULL)
651 && (!strcmp(prevWord, "def")
652 || followsDot(styler.GetStartSegment(), styler))) {
653 // <name>= is a name only when being def'd -- Get it the next time
654 // This means that <name>=<name> is always lexed as
655 // <name>, (op, =), <name>
656 } else if ((ch == '?' || ch == '!')
657 && iswordchar(chPrev)
658 && !iswordchar(chNext)) {
659 // <name>? is a name -- Get it the next time
660 // But <name>?<name> is always lexed as
661 // <name>, (op, ?), <name>
662 // Same with <name>! to indicate a method that
663 // modifies its target
664 } else if (isEOLChar(ch)
665 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
666 styler.ColourTo(i, SCE_RB_DATASECTION);
667 state = SCE_RB_DATASECTION;
668 // No need to handle this state -- we'll just move to the end
669 preferRE = false;
670 } else {
671 int wordStartPos = styler.GetStartSegment();
672 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
673 switch (word_style) {
674 case SCE_RB_WORD:
675 preferRE = RE_CanFollowKeyword(prevWord);
676 break;
677
678 case SCE_RB_WORD_DEMOTED:
679 preferRE = true;
680 break;
681
682 case SCE_RB_IDENTIFIER:
683 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
684 preferRE = true;
685 } else if (isEOLChar(ch)) {
686 preferRE = true;
687 } else {
688 preferRE = false;
689 }
690 break;
691 default:
692 preferRE = false;
693 }
694 redo_char(i, ch, chNext, chNext2, state); // pass by ref
695 }
696 }
697 } else if (state == SCE_RB_NUMBER) {
698 if (isalnum(ch) || ch == '_') {
699 // Keep going
700 } else if (ch == '.' && ++numDots == 1) {
701 // Keep going
702 } else {
703 styler.ColourTo(i - 1, state);
704 redo_char(i, ch, chNext, chNext2, state); // pass by ref
705 preferRE = false;
706 }
707 } else if (state == SCE_RB_COMMENTLINE) {
708 if (isEOLChar(ch)) {
709 styler.ColourTo(i - 1, state);
710 state = SCE_RB_DEFAULT;
711 // Use whatever setting we had going into the comment
712 }
713 } else if (state == SCE_RB_HERE_DELIM) {
714 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
715 // Slightly different: if we find an immediate '-',
716 // the target can appear indented.
717
718 if (HereDoc.State == 0) { // '<<' encountered
719 HereDoc.State = 1;
720 HereDoc.DelimiterLength = 0;
721 if (ch == '-') {
722 HereDoc.CanBeIndented = true;
723 advance_char(i, ch, chNext, chNext2); // pass by ref
724 } else {
725 HereDoc.CanBeIndented = false;
726 }
727 if (isEOLChar(ch)) {
728 // Bail out of doing a here doc if there's no target
729 state = SCE_RB_DEFAULT;
730 preferRE = false;
731 } else {
732 HereDoc.Quote = ch;
733
734 if (ch == '\'' || ch == '"' || ch == '`') {
735 HereDoc.Quoted = true;
736 HereDoc.Delimiter[0] = '\0';
737 } else {
738 HereDoc.Quoted = false;
739 HereDoc.Delimiter[0] = ch;
740 HereDoc.Delimiter[1] = '\0';
741 HereDoc.DelimiterLength = 1;
742 }
743 }
744 } else if (HereDoc.State == 1) { // collect the delimiter
745 if (isEOLChar(ch)) {
746 // End the quote now, and go back for more
747 styler.ColourTo(i - 1, state);
748 state = SCE_RB_DEFAULT;
749 i--;
750 chNext = ch;
751 chNext2 = chNext;
752 preferRE = false;
753 } else if (HereDoc.Quoted) {
754 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
755 styler.ColourTo(i, state);
756 state = SCE_RB_DEFAULT;
757 preferRE = false;
758 } else {
759 if (ch == '\\' && !isEOLChar(chNext)) {
760 advance_char(i, ch, chNext, chNext2);
761 }
762 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
763 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
764 }
765 } else { // an unquoted here-doc delimiter
766 if (isalnum(ch) || ch == '_') {
767 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
768 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
769 } else {
770 styler.ColourTo(i - 1, state);
771 redo_char(i, ch, chNext, chNext2, state);
772 preferRE = false;
773 }
774 }
775 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
776 styler.ColourTo(i - 1, state);
777 state = SCE_RB_ERROR;
778 preferRE = false;
779 }
780 }
781 } else if (state == SCE_RB_HERE_Q) {
782 // Not needed: HereDoc.State == 2
783 // Indentable here docs: look backwards
784 // Non-indentable: look forwards, like in Perl
785 //
786 // Why: so we can quickly resolve things like <<-" abc"
787
788 if (!HereDoc.CanBeIndented) {
789 if (isEOLChar(chPrev)
790 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
791 styler.ColourTo(i - 1, state);
792 i += HereDoc.DelimiterLength - 1;
793 chNext = styler.SafeGetCharAt(i + 1);
794 if (isEOLChar(chNext)) {
795 styler.ColourTo(i, SCE_RB_HERE_DELIM);
796 state = SCE_RB_DEFAULT;
797 HereDoc.State = 0;
798 preferRE = false;
799 }
800 // Otherwise we skipped through the here doc faster.
801 }
802 } else if (isEOLChar(chNext)
803 && lookingAtHereDocDelim(styler,
804 i - HereDoc.DelimiterLength + 1,
805 lengthDoc,
806 HereDoc.Delimiter)) {
807 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
808 styler.ColourTo(i, SCE_RB_HERE_DELIM);
809 state = SCE_RB_DEFAULT;
810 preferRE = false;
811 HereDoc.State = 0;
812 }
813 } else if (state == SCE_RB_CLASS_VAR
814 || state == SCE_RB_INSTANCE_VAR
815 || state == SCE_RB_SYMBOL) {
816 if (!iswordchar(ch)) {
817 styler.ColourTo(i - 1, state);
818 redo_char(i, ch, chNext, chNext2, state); // pass by ref
819 preferRE = false;
820 }
821 } else if (state == SCE_RB_GLOBAL) {
822 if (!iswordchar(ch)) {
823 // handle special globals here as well
824 if (chPrev == '$') {
825 if (ch == '-') {
826 // Include the next char, like $-a
827 advance_char(i, ch, chNext, chNext2);
828 }
829 styler.ColourTo(i, state);
830 state = SCE_RB_DEFAULT;
831 } else {
832 styler.ColourTo(i - 1, state);
833 redo_char(i, ch, chNext, chNext2, state); // pass by ref
834 }
835 preferRE = false;
836 }
837 } else if (state == SCE_RB_POD) {
838 // PODs end with ^=end\s, -- any whitespace can follow =end
839 if (strchr(" \t\n\r", ch) != NULL
840 && i > 5
841 && isEOLChar(styler[i - 5])
842 && isMatch(styler, lengthDoc, i - 4, "=end")) {
843 styler.ColourTo(i - 1, state);
844 state = SCE_RB_DEFAULT;
845 preferRE = false;
846 }
847 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
848 if (ch == '\\' && Quote.Up != '\\') {
849 // Skip one
850 advance_char(i, ch, chNext, chNext2);
851 } else if (ch == Quote.Down) {
852 Quote.Count--;
853 if (Quote.Count == 0) {
854 // Include the options
855 while (isSafeAlpha(chNext)) {
856 i++;
857 ch = chNext;
858 chNext = styler.SafeGetCharAt(i + 1);
859 }
860 styler.ColourTo(i, state);
861 state = SCE_RB_DEFAULT;
862 preferRE = false;
863 }
864 } else if (ch == Quote.Up) {
865 // Only if close quoter != open quoter
866 Quote.Count++;
867
868 } else if (ch == '#' ) {
869 //todo: distinguish comments from pound chars
870 // for now, handle as comment
871 styler.ColourTo(i - 1, state);
872 bool inEscape = false;
873 while (++i < lengthDoc) {
874 ch = styler.SafeGetCharAt(i);
875 if (ch == '\\') {
876 inEscape = true;
877 } else if (isEOLChar(ch)) {
878 // Comment inside a regex
879 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
880 break;
881 } else if (inEscape) {
882 inEscape = false; // don't look at char
883 } else if (ch == Quote.Down) {
884 // Have the regular handler deal with this
885 // to get trailing modifiers.
886 i--;
887 ch = styler[i];
888 break;
889 }
890 }
891 chNext = styler.SafeGetCharAt(i + 1);
892 chNext2 = styler.SafeGetCharAt(i + 2);
893 }
894 // Quotes of all kinds...
895 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
896 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
897 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
898 state == SCE_RB_BACKTICKS) {
899 if (!Quote.Down && !isspacechar(ch)) {
900 Quote.Open(ch);
901 } else if (ch == '\\' && Quote.Up != '\\') {
902 //Riddle me this: Is it safe to skip *every* escaped char?
903 advance_char(i, ch, chNext, chNext2);
904 } else if (ch == Quote.Down) {
905 Quote.Count--;
906 if (Quote.Count == 0) {
907 styler.ColourTo(i, state);
908 state = SCE_RB_DEFAULT;
909 preferRE = false;
910 }
911 } else if (ch == Quote.Up) {
912 Quote.Count++;
913 }
914 }
915
916 if (state == SCE_RB_ERROR) {
917 break;
918 }
919 chPrev = ch;
920 }
921 if (state == SCE_RB_WORD) {
922 // We've ended on a word, possibly at EOF, and need to
923 // classify it.
924 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
925 } else {
926 styler.ColourTo(lengthDoc - 1, state);
927 }
928 }
929
930 // Helper functions for folding
931
getPrevWord(int pos,char * prevWord,Accessor & styler,int word_state)932 static void getPrevWord(int pos,
933 char *prevWord,
934 Accessor &styler,
935 int word_state)
936 {
937 int i;
938 styler.Flush();
939 for (i = pos - 1; i > 0; i--) {
940 if (actual_style(styler.StyleAt(i)) != word_state) {
941 i++;
942 break;
943 }
944 }
945 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
946 i = pos - MAX_KEYWORD_LENGTH;
947 char *dst = prevWord;
948 for (; i <= pos; i++) {
949 *dst++ = styler[i];
950 }
951 *dst = 0;
952 }
953
keywordIsAmbiguous(const char * prevWord)954 static bool keywordIsAmbiguous(const char *prevWord)
955 {
956 // Order from most likely used to least likely
957 // Lots of ways to do a loop in Ruby besides 'while/until'
958 if (!strcmp(prevWord, "if")
959 || !strcmp(prevWord, "do")
960 || !strcmp(prevWord, "while")
961 || !strcmp(prevWord, "unless")
962 || !strcmp(prevWord, "until")) {
963 return true;
964 } else {
965 return false;
966 }
967 }
968
iswhitespace(char ch)969 static bool inline iswhitespace(char ch) {
970 return ch == ' ' || ch == '\t';
971 }
972
973 // Demote keywords in the following conditions:
974 // if, while, unless, until modify a statement
975 // do after a while or until, as a noise word (like then after if)
976
keywordIsModifier(const char * word,int pos,Accessor & styler)977 static bool keywordIsModifier(const char *word,
978 int pos,
979 Accessor &styler)
980 {
981 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
982 return keywordDoStartsLoop(pos, styler);
983 }
984 char ch;
985 int style = SCE_RB_DEFAULT;
986 int lineStart = styler.GetLine(pos);
987 int lineStartPosn = styler.LineStart(lineStart);
988 styler.Flush();
989 while (--pos >= lineStartPosn) {
990 style = actual_style(styler.StyleAt(pos));
991 if (style == SCE_RB_DEFAULT) {
992 if (iswhitespace(ch = styler[pos])) {
993 //continue
994 } else if (ch == '\r' || ch == '\n') {
995 // Scintilla's LineStart() and GetLine() routines aren't
996 // platform-independent, so if we have text prepared with
997 // a different system we can't rely on it.
998 return false;
999 }
1000 } else {
1001 break;
1002 }
1003 }
1004 if (pos < lineStartPosn) {
1005 return false; //XXX not quite right if the prev line is a continuation
1006 }
1007 // First things where the action is unambiguous
1008 switch (style) {
1009 case SCE_RB_DEFAULT:
1010 case SCE_RB_COMMENTLINE:
1011 case SCE_RB_POD:
1012 case SCE_RB_CLASSNAME:
1013 case SCE_RB_DEFNAME:
1014 case SCE_RB_MODULE_NAME:
1015 return false;
1016 case SCE_RB_OPERATOR:
1017 break;
1018 case SCE_RB_WORD:
1019 // Watch out for uses of 'else if'
1020 //XXX: Make a list of other keywords where 'if' isn't a modifier
1021 // and can appear legitimately
1022 // Formulate this to avoid warnings from most compilers
1023 if (strcmp(word, "if") == 0) {
1024 char prevWord[MAX_KEYWORD_LENGTH + 1];
1025 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1026 return strcmp(prevWord, "else") != 0;
1027 }
1028 return true;
1029 default:
1030 return true;
1031 }
1032 // Assume that if the keyword follows an operator,
1033 // usually it's a block assignment, like
1034 // a << if x then y else z
1035
1036 ch = styler[pos];
1037 switch (ch) {
1038 case ')':
1039 case ']':
1040 case '}':
1041 return true;
1042 default:
1043 return false;
1044 }
1045 }
1046
1047 #define WHILE_BACKWARDS "elihw"
1048 #define UNTIL_BACKWARDS "litnu"
1049
1050 // Nothing fancy -- look to see if we follow a while/until somewhere
1051 // on the current line
1052
keywordDoStartsLoop(int pos,Accessor & styler)1053 static bool keywordDoStartsLoop(int pos,
1054 Accessor &styler)
1055 {
1056 char ch;
1057 int style;
1058 int lineStart = styler.GetLine(pos);
1059 int lineStartPosn = styler.LineStart(lineStart);
1060 styler.Flush();
1061 while (--pos >= lineStartPosn) {
1062 style = actual_style(styler.StyleAt(pos));
1063 if (style == SCE_RB_DEFAULT) {
1064 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1065 // Scintilla's LineStart() and GetLine() routines aren't
1066 // platform-independent, so if we have text prepared with
1067 // a different system we can't rely on it.
1068 return false;
1069 }
1070 } else if (style == SCE_RB_WORD) {
1071 // Check for while or until, but write the word in backwards
1072 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1073 char *dst = prevWord;
1074 int wordLen = 0;
1075 int start_word;
1076 for (start_word = pos;
1077 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1078 start_word--) {
1079 if (++wordLen < MAX_KEYWORD_LENGTH) {
1080 *dst++ = styler[start_word];
1081 }
1082 }
1083 *dst = 0;
1084 // Did we see our keyword?
1085 if (!strcmp(prevWord, WHILE_BACKWARDS)
1086 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1087 return true;
1088 }
1089 // We can move pos to the beginning of the keyword, and then
1090 // accept another decrement, as we can never have two contiguous
1091 // keywords:
1092 // word1 word2
1093 // ^
1094 // <- move to start_word
1095 // ^
1096 // <- loop decrement
1097 // ^ # pointing to end of word1 is fine
1098 pos = start_word;
1099 }
1100 }
1101 return false;
1102 }
1103
1104 /*
1105 * Folding Ruby
1106 *
1107 * The language is quite complex to analyze without a full parse.
1108 * For example, this line shouldn't affect fold level:
1109 *
1110 * print "hello" if feeling_friendly?
1111 *
1112 * Neither should this:
1113 *
1114 * print "hello" \
1115 * if feeling_friendly?
1116 *
1117 *
1118 * But this should:
1119 *
1120 * if feeling_friendly? #++
1121 * print "hello" \
1122 * print "goodbye"
1123 * end #--
1124 *
1125 * So we cheat, by actually looking at the existing indentation
1126 * levels for each line, and just echoing it back. Like Python.
1127 * Then if we get better at it, we'll take braces into consideration,
1128 * which always affect folding levels.
1129
1130 * How the keywords should work:
1131 * No effect:
1132 * __FILE__ __LINE__ BEGIN END alias and
1133 * defined? false in nil not or self super then
1134 * true undef
1135
1136 * Always increment:
1137 * begin class def do for module when {
1138 *
1139 * Always decrement:
1140 * end }
1141 *
1142 * Increment if these start a statement
1143 * if unless until while -- do nothing if they're modifiers
1144
1145 * These end a block if there's no modifier, but don't bother
1146 * break next redo retry return yield
1147 *
1148 * These temporarily de-indent, but re-indent
1149 * case else elsif ensure rescue
1150 *
1151 * This means that the folder reflects indentation rather
1152 * than setting it. The language-service updates indentation
1153 * when users type return and finishes entering de-denters.
1154 *
1155 * Later offer to fold POD, here-docs, strings, and blocks of comments
1156 */
1157
FoldRbDoc(unsigned int startPos,int length,int initStyle,WordList * [],Accessor & styler)1158 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1159 WordList *[], Accessor &styler) {
1160 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1161 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1162
1163 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1164 false);
1165 unsigned int endPos = startPos + length;
1166 int visibleChars = 0;
1167 int lineCurrent = styler.GetLine(startPos);
1168 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1169 & SC_FOLDLEVELNUMBERMASK
1170 & ~SC_FOLDLEVELBASE);
1171 int levelCurrent = levelPrev;
1172 char chNext = styler[startPos];
1173 int styleNext = styler.StyleAt(startPos);
1174 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1175 bool buffer_ends_with_eol = false;
1176 for (unsigned int i = startPos; i < endPos; i++) {
1177 char ch = chNext;
1178 chNext = styler.SafeGetCharAt(i + 1);
1179 int style = styleNext;
1180 styleNext = styler.StyleAt(i + 1);
1181 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1182 if (style == SCE_RB_COMMENTLINE) {
1183 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1184 if (chNext == '{') {
1185 levelCurrent++;
1186 } else if (chNext == '}') {
1187 levelCurrent--;
1188 }
1189 }
1190 } else if (style == SCE_RB_OPERATOR) {
1191 if (strchr("[{(", ch)) {
1192 levelCurrent++;
1193 } else if (strchr(")}]", ch)) {
1194 // Don't decrement below 0
1195 if (levelCurrent > 0)
1196 levelCurrent--;
1197 }
1198 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1199 // Look at the keyword on the left and decide what to do
1200 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1201 prevWord[0] = 0;
1202 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1203 if (!strcmp(prevWord, "end")) {
1204 // Don't decrement below 0
1205 if (levelCurrent > 0)
1206 levelCurrent--;
1207 } else if ( !strcmp(prevWord, "if")
1208 || !strcmp(prevWord, "def")
1209 || !strcmp(prevWord, "class")
1210 || !strcmp(prevWord, "module")
1211 || !strcmp(prevWord, "begin")
1212 || !strcmp(prevWord, "case")
1213 || !strcmp(prevWord, "do")
1214 || !strcmp(prevWord, "while")
1215 || !strcmp(prevWord, "unless")
1216 || !strcmp(prevWord, "until")
1217 || !strcmp(prevWord, "for")
1218 ) {
1219 levelCurrent++;
1220 }
1221 }
1222 if (atEOL) {
1223 int lev = levelPrev;
1224 if (visibleChars == 0 && foldCompact)
1225 lev |= SC_FOLDLEVELWHITEFLAG;
1226 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1227 lev |= SC_FOLDLEVELHEADERFLAG;
1228 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1229 lineCurrent++;
1230 levelPrev = levelCurrent;
1231 visibleChars = 0;
1232 buffer_ends_with_eol = true;
1233 } else if (!isspacechar(ch)) {
1234 visibleChars++;
1235 buffer_ends_with_eol = false;
1236 }
1237 }
1238 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1239 if (!buffer_ends_with_eol) {
1240 lineCurrent++;
1241 int new_lev = levelCurrent;
1242 if (visibleChars == 0 && foldCompact)
1243 new_lev |= SC_FOLDLEVELWHITEFLAG;
1244 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1245 new_lev |= SC_FOLDLEVELHEADERFLAG;
1246 levelCurrent = new_lev;
1247 }
1248 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1249 }
1250
1251 static const char * const rubyWordListDesc[] = {
1252 "Keywords",
1253 0
1254 };
1255
1256 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
1257