1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "KeyWords.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #ifdef SCI_NAMESPACE
23 using namespace Scintilla;
24 #endif
25
26 //XXX Identical to Perl, put in common area
isEOLChar(char ch)27 static inline bool isEOLChar(char ch) {
28 return (ch == '\r') || (ch == '\n');
29 }
30
31 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
32 // This one's redundant, but makes for more readable code
33 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
34
isSafeAlpha(char ch)35 static inline bool isSafeAlpha(char ch) {
36 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
37 }
38
isSafeAlnum(char ch)39 static inline bool isSafeAlnum(char ch) {
40 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
41 }
42
isSafeAlnumOrHigh(char ch)43 static inline bool isSafeAlnumOrHigh(char ch) {
44 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
45 }
46
isSafeDigit(char ch)47 static inline bool isSafeDigit(char ch) {
48 return isSafeASCII(ch) && isdigit(ch);
49 }
50
isSafeWordcharOrHigh(char ch)51 static inline bool isSafeWordcharOrHigh(char ch) {
52 return isHighBitChar(ch) || iswordchar(ch);
53 }
54
iswhitespace(char ch)55 static bool inline iswhitespace(char ch) {
56 return ch == ' ' || ch == '\t';
57 }
58
59 #define MAX_KEYWORD_LENGTH 200
60
61 #define STYLE_MASK 63
62 #define actual_style(style) (style & STYLE_MASK)
63
followsDot(unsigned int pos,Accessor & styler)64 static bool followsDot(unsigned int pos, Accessor &styler) {
65 styler.Flush();
66 for (; pos >= 1; --pos) {
67 int style = actual_style(styler.StyleAt(pos));
68 char ch;
69 switch (style) {
70 case SCE_RB_DEFAULT:
71 ch = styler[pos];
72 if (ch == ' ' || ch == '\t') {
73 //continue
74 } else {
75 return false;
76 }
77 break;
78
79 case SCE_RB_OPERATOR:
80 return styler[pos] == '.';
81
82 default:
83 return false;
84 }
85 }
86 return false;
87 }
88
89 // Forward declarations
90 static bool keywordIsAmbiguous(const char *prevWord);
91 static bool keywordDoStartsLoop(int pos,
92 Accessor &styler);
93 static bool keywordIsModifier(const char *word,
94 int pos,
95 Accessor &styler);
96
ClassifyWordRb(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,char * prevWord)97 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
98 char s[100];
99 unsigned int i, j;
100 unsigned int lim = end - start + 1; // num chars to copy
101 if (lim >= MAX_KEYWORD_LENGTH) {
102 lim = MAX_KEYWORD_LENGTH - 1;
103 }
104 for (i = start, j = 0; j < lim; i++, j++) {
105 s[j] = styler[i];
106 }
107 s[j] = '\0';
108 int chAttr;
109 if (0 == strcmp(prevWord, "class"))
110 chAttr = SCE_RB_CLASSNAME;
111 else if (0 == strcmp(prevWord, "module"))
112 chAttr = SCE_RB_MODULE_NAME;
113 else if (0 == strcmp(prevWord, "def"))
114 chAttr = SCE_RB_DEFNAME;
115 else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
116 if (keywordIsAmbiguous(s)
117 && keywordIsModifier(s, start, styler)) {
118
119 // Demoted keywords are colored as keywords,
120 // but do not affect changes in indentation.
121 //
122 // Consider the word 'if':
123 // 1. <<if test ...>> : normal
124 // 2. <<stmt if test>> : demoted
125 // 3. <<lhs = if ...>> : normal: start a new indent level
126 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
127
128 chAttr = SCE_RB_WORD_DEMOTED;
129 } else {
130 chAttr = SCE_RB_WORD;
131 }
132 } else
133 chAttr = SCE_RB_IDENTIFIER;
134 styler.ColourTo(end, chAttr);
135 if (chAttr == SCE_RB_WORD) {
136 strcpy(prevWord, s);
137 } else {
138 prevWord[0] = 0;
139 }
140 return chAttr;
141 }
142
143
144 //XXX Identical to Perl, put in common area
isMatch(Accessor & styler,int lengthDoc,int pos,const char * val)145 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
146 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
147 return false;
148 }
149 while (*val) {
150 if (*val != styler[pos++]) {
151 return false;
152 }
153 val++;
154 }
155 return true;
156 }
157
158 // Do Ruby better -- find the end of the line, work back,
159 // and then check for leading white space
160
161 // Precondition: the here-doc target can be indented
lookingAtHereDocDelim(Accessor & styler,int pos,int lengthDoc,const char * HereDocDelim)162 static bool lookingAtHereDocDelim(Accessor &styler,
163 int pos,
164 int lengthDoc,
165 const char *HereDocDelim)
166 {
167 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
168 return false;
169 }
170 while (--pos > 0) {
171 char ch = styler[pos];
172 if (isEOLChar(ch)) {
173 return true;
174 } else if (ch != ' ' && ch != '\t') {
175 return false;
176 }
177 }
178 return false;
179 }
180
181 //XXX Identical to Perl, put in common area
opposite(char ch)182 static char opposite(char ch) {
183 if (ch == '(')
184 return ')';
185 if (ch == '[')
186 return ']';
187 if (ch == '{')
188 return '}';
189 if (ch == '<')
190 return '>';
191 return ch;
192 }
193
194 // Null transitions when we see we've reached the end
195 // and need to relex the curr char.
196
redo_char(int & i,char & ch,char & chNext,char & chNext2,int & state)197 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
198 int &state) {
199 i--;
200 chNext2 = chNext;
201 chNext = ch;
202 state = SCE_RB_DEFAULT;
203 }
204
advance_char(int & i,char & ch,char & chNext,char & chNext2)205 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
206 i++;
207 ch = chNext;
208 chNext = chNext2;
209 }
210
211 // precondition: startPos points to one after the EOL char
currLineContainsHereDelims(int & startPos,Accessor & styler)212 static bool currLineContainsHereDelims(int& startPos,
213 Accessor &styler) {
214 if (startPos <= 1)
215 return false;
216
217 int pos;
218 for (pos = startPos - 1; pos > 0; pos--) {
219 char ch = styler.SafeGetCharAt(pos);
220 if (isEOLChar(ch)) {
221 // Leave the pointers where they are -- there are no
222 // here doc delims on the current line, even if
223 // the EOL isn't default style
224
225 return false;
226 } else {
227 styler.Flush();
228 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
229 break;
230 }
231 }
232 }
233 if (pos == 0) {
234 return false;
235 }
236 // Update the pointers so we don't have to re-analyze the string
237 startPos = pos;
238 return true;
239 }
240
241
isEmptyLine(int pos,Accessor & styler)242 static bool isEmptyLine(int pos,
243 Accessor &styler) {
244 int spaceFlags = 0;
245 int lineCurrent = styler.GetLine(pos);
246 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
247 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
248 }
249
RE_CanFollowKeyword(const char * keyword)250 static bool RE_CanFollowKeyword(const char *keyword) {
251 if (!strcmp(keyword, "and")
252 || !strcmp(keyword, "begin")
253 || !strcmp(keyword, "break")
254 || !strcmp(keyword, "case")
255 || !strcmp(keyword, "do")
256 || !strcmp(keyword, "else")
257 || !strcmp(keyword, "elsif")
258 || !strcmp(keyword, "if")
259 || !strcmp(keyword, "next")
260 || !strcmp(keyword, "return")
261 || !strcmp(keyword, "when")
262 || !strcmp(keyword, "unless")
263 || !strcmp(keyword, "until")
264 || !strcmp(keyword, "not")
265 || !strcmp(keyword, "or")) {
266 return true;
267 }
268 return false;
269 }
270
271 // Look at chars up to but not including endPos
272 // Don't look at styles in case we're looking forward
273
skipWhitespace(int startPos,int endPos,Accessor & styler)274 static int skipWhitespace(int startPos,
275 int endPos,
276 Accessor &styler) {
277 for (int i = startPos; i < endPos; i++) {
278 if (!iswhitespace(styler[i])) {
279 return i;
280 }
281 }
282 return endPos;
283 }
284
285 // This routine looks for false positives like
286 // undef foo, <<
287 // There aren't too many.
288 //
289 // iPrev points to the start of <<
290
sureThisIsHeredoc(int iPrev,Accessor & styler,char * prevWord)291 static bool sureThisIsHeredoc(int iPrev,
292 Accessor &styler,
293 char *prevWord) {
294
295 // Not so fast, since Ruby's so dynamic. Check the context
296 // to make sure we're OK.
297 int prevStyle;
298 int lineStart = styler.GetLine(iPrev);
299 int lineStartPosn = styler.LineStart(lineStart);
300 styler.Flush();
301
302 // Find the first word after some whitespace
303 int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
304 if (firstWordPosn >= iPrev) {
305 // Have something like {^ <<}
306 //XXX Look at the first previous non-comment non-white line
307 // to establish the context. Not too likely though.
308 return true;
309 } else {
310 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
311 case SCE_RB_WORD:
312 case SCE_RB_WORD_DEMOTED:
313 case SCE_RB_IDENTIFIER:
314 break;
315 default:
316 return true;
317 }
318 }
319 int firstWordEndPosn = firstWordPosn;
320 char *dst = prevWord;
321 for (;;) {
322 if (firstWordEndPosn >= iPrev ||
323 styler.StyleAt(firstWordEndPosn) != prevStyle) {
324 *dst = 0;
325 break;
326 }
327 *dst++ = styler[firstWordEndPosn];
328 firstWordEndPosn += 1;
329 }
330 //XXX Write a style-aware thing to regex scintilla buffer objects
331 if (!strcmp(prevWord, "undef")
332 || !strcmp(prevWord, "def")
333 || !strcmp(prevWord, "alias")) {
334 // These keywords are what we were looking for
335 return false;
336 }
337 return true;
338 }
339
340 // Routine that saves us from allocating a buffer for the here-doc target
341 // targetEndPos points one past the end of the current target
haveTargetMatch(int currPos,int lengthDoc,int targetStartPos,int targetEndPos,Accessor & styler)342 static bool haveTargetMatch(int currPos,
343 int lengthDoc,
344 int targetStartPos,
345 int targetEndPos,
346 Accessor &styler) {
347 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
348 return false;
349 }
350 int i, j;
351 for (i = targetStartPos, j = currPos;
352 i < targetEndPos && j < lengthDoc;
353 i++, j++) {
354 if (styler[i] != styler[j]) {
355 return false;
356 }
357 }
358 return true;
359 }
360
361 // We need a check because the form
362 // [identifier] <<[target]
363 // is ambiguous. The Ruby lexer/parser resolves it by
364 // looking to see if [identifier] names a variable or a
365 // function. If it's the first, it's the start of a here-doc.
366 // If it's a var, it's an operator. This lexer doesn't
367 // maintain a symbol table, so it looks ahead to see what's
368 // going on, in cases where we have
369 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
370 //
371 // If there's no occurrence of [target] on a line, assume we don't.
372
373 // return true == yes, we have no heredocs
374
sureThisIsNotHeredoc(int lt2StartPos,Accessor & styler)375 static bool sureThisIsNotHeredoc(int lt2StartPos,
376 Accessor &styler) {
377 int prevStyle;
378 // Use full document, not just part we're styling
379 int lengthDoc = styler.Length();
380 int lineStart = styler.GetLine(lt2StartPos);
381 int lineStartPosn = styler.LineStart(lineStart);
382 styler.Flush();
383 const bool definitely_not_a_here_doc = true;
384 const bool looks_like_a_here_doc = false;
385
386 // Find the first word after some whitespace
387 int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
388 if (firstWordPosn >= lt2StartPos) {
389 return definitely_not_a_here_doc;
390 }
391 prevStyle = styler.StyleAt(firstWordPosn);
392 // If we have '<<' following a keyword, it's not a heredoc
393 if (prevStyle != SCE_RB_IDENTIFIER) {
394 return definitely_not_a_here_doc;
395 }
396 int newStyle = prevStyle;
397 // Some compilers incorrectly warn about uninit newStyle
398 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
399 // Inner loop looks at the name
400 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
401 newStyle = styler.StyleAt(firstWordPosn);
402 if (newStyle != prevStyle) {
403 break;
404 }
405 }
406 // Do we have '::' or '.'?
407 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
408 char ch = styler[firstWordPosn];
409 if (ch == '.') {
410 // yes
411 } else if (ch == ':') {
412 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
413 return definitely_not_a_here_doc;
414 } else if (styler[firstWordPosn] != ':') {
415 return definitely_not_a_here_doc;
416 }
417 } else {
418 break;
419 }
420 } else {
421 break;
422 }
423 }
424 // Skip next batch of white-space
425 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
426 if (firstWordPosn != lt2StartPos) {
427 // Have [[^ws[identifier]ws[*something_else*]ws<<
428 return definitely_not_a_here_doc;
429 }
430 // OK, now 'j' will point to the current spot moving ahead
431 int j = firstWordPosn + 1;
432 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
433 // This shouldn't happen
434 return definitely_not_a_here_doc;
435 }
436 int nextLineStartPosn = styler.LineStart(lineStart + 1);
437 if (nextLineStartPosn >= lengthDoc) {
438 return definitely_not_a_here_doc;
439 }
440 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
441 if (j >= lengthDoc) {
442 return definitely_not_a_here_doc;
443 }
444 bool allow_indent;
445 int target_start, target_end;
446 // From this point on no more styling, since we're looking ahead
447 if (styler[j] == '-') {
448 allow_indent = true;
449 j++;
450 } else {
451 allow_indent = false;
452 }
453
454 // Allow for quoted targets.
455 char target_quote = 0;
456 switch (styler[j]) {
457 case '\'':
458 case '"':
459 case '`':
460 target_quote = styler[j];
461 j += 1;
462 }
463
464 if (isSafeAlnum(styler[j])) {
465 // Init target_end because some compilers think it won't
466 // be initialized by the time it's used
467 target_start = target_end = j;
468 j++;
469 } else {
470 return definitely_not_a_here_doc;
471 }
472 for (; j < lengthDoc; j++) {
473 if (!isSafeAlnum(styler[j])) {
474 if (target_quote && styler[j] != target_quote) {
475 // unquoted end
476 return definitely_not_a_here_doc;
477 }
478
479 // And for now make sure that it's a newline
480 // don't handle arbitrary expressions yet
481
482 target_end = j;
483 if (target_quote) {
484 // Now we can move to the character after the string delimiter.
485 j += 1;
486 }
487 j = skipWhitespace(j, lengthDoc, styler);
488 if (j >= lengthDoc) {
489 return definitely_not_a_here_doc;
490 } else {
491 char ch = styler[j];
492 if (ch == '#' || isEOLChar(ch)) {
493 // This is OK, so break and continue;
494 break;
495 } else {
496 return definitely_not_a_here_doc;
497 }
498 }
499 }
500 }
501
502 // Just look at the start of each line
503 int last_line = styler.GetLine(lengthDoc - 1);
504 // But don't go too far
505 if (last_line > lineStart + 50) {
506 last_line = lineStart + 50;
507 }
508 for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
509 if (allow_indent) {
510 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
511 } else {
512 j = styler.LineStart(line_num);
513 }
514 // target_end is one past the end
515 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
516 // We got it
517 return looks_like_a_here_doc;
518 }
519 }
520 return definitely_not_a_here_doc;
521 }
522
523 //todo: if we aren't looking at a stdio character,
524 // move to the start of the first line that is not in a
525 // multi-line construct
526
synchronizeDocStart(unsigned int & startPos,int & length,int & initStyle,Accessor & styler,bool skipWhiteSpace=false)527 static void synchronizeDocStart(unsigned int& startPos,
528 int &length,
529 int &initStyle,
530 Accessor &styler,
531 bool skipWhiteSpace=false) {
532
533 styler.Flush();
534 int style = actual_style(styler.StyleAt(startPos));
535 switch (style) {
536 case SCE_RB_STDIN:
537 case SCE_RB_STDOUT:
538 case SCE_RB_STDERR:
539 // Don't do anything else with these.
540 return;
541 }
542
543 int pos = startPos;
544 // Quick way to characterize each line
545 int lineStart;
546 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
547 // Now look at the style before the previous line's EOL
548 pos = styler.LineStart(lineStart) - 1;
549 if (pos <= 10) {
550 lineStart = 0;
551 break;
552 }
553 char ch = styler.SafeGetCharAt(pos);
554 char chPrev = styler.SafeGetCharAt(pos - 1);
555 if (ch == '\n' && chPrev == '\r') {
556 pos--;
557 }
558 if (styler.SafeGetCharAt(pos - 1) == '\\') {
559 // Continuation line -- keep going
560 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
561 // Part of multi-line construct -- keep going
562 } else if (currLineContainsHereDelims(pos, styler)) {
563 // Keep going, with pos and length now pointing
564 // at the end of the here-doc delimiter
565 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
566 // Keep going
567 } else {
568 break;
569 }
570 }
571 pos = styler.LineStart(lineStart);
572 length += (startPos - pos);
573 startPos = pos;
574 initStyle = SCE_RB_DEFAULT;
575 }
576
ColouriseRbDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)577 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
578 WordList *keywordlists[], Accessor &styler) {
579
580 // Lexer for Ruby often has to backtrack to start of current style to determine
581 // which characters are being used as quotes, how deeply nested is the
582 // start position and what the termination string is for here documents
583
584 WordList &keywords = *keywordlists[0];
585
586 class HereDocCls {
587 public:
588 int State;
589 // States
590 // 0: '<<' encountered
591 // 1: collect the delimiter
592 // 1b: text between the end of the delimiter and the EOL
593 // 2: here doc text (lines after the delimiter)
594 char Quote; // the char after '<<'
595 bool Quoted; // true if Quote in ('\'','"','`')
596 int DelimiterLength; // strlen(Delimiter)
597 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
598 bool CanBeIndented;
599 HereDocCls() {
600 State = 0;
601 DelimiterLength = 0;
602 Delimiter[0] = '\0';
603 CanBeIndented = false;
604 }
605 };
606 HereDocCls HereDoc;
607
608 class QuoteCls {
609 public:
610 int Count;
611 char Up;
612 char Down;
613 QuoteCls() {
614 this->New();
615 }
616 void New() {
617 Count = 0;
618 Up = '\0';
619 Down = '\0';
620 }
621 void Open(char u) {
622 Count++;
623 Up = u;
624 Down = opposite(Up);
625 }
626 };
627 QuoteCls Quote;
628
629 int numDots = 0; // For numbers --
630 // Don't start lexing in the middle of a num
631
632 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
633 false);
634
635 bool preferRE = true;
636 int state = initStyle;
637 int lengthDoc = startPos + length;
638
639 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
640 prevWord[0] = '\0';
641 if (length == 0)
642 return;
643
644 char chPrev = styler.SafeGetCharAt(startPos - 1);
645 char chNext = styler.SafeGetCharAt(startPos);
646 // Ruby uses a different mask because bad indentation is marked by oring with 32
647 styler.StartAt(startPos, 127);
648 styler.StartSegment(startPos);
649
650 static int q_states[] = {SCE_RB_STRING_Q,
651 SCE_RB_STRING_QQ,
652 SCE_RB_STRING_QR,
653 SCE_RB_STRING_QW,
654 SCE_RB_STRING_QW,
655 SCE_RB_STRING_QX};
656 static const char* q_chars = "qQrwWx";
657
658 for (int i = startPos; i < lengthDoc; i++) {
659 char ch = chNext;
660 chNext = styler.SafeGetCharAt(i + 1);
661 char chNext2 = styler.SafeGetCharAt(i + 2);
662
663 if (styler.IsLeadByte(ch)) {
664 chNext = chNext2;
665 chPrev = ' ';
666 i += 1;
667 continue;
668 }
669
670 // skip on DOS/Windows
671 //No, don't, because some things will get tagged on,
672 // so we won't recognize keywords, for example
673 #if 0
674 if (ch == '\r' && chNext == '\n') {
675 continue;
676 }
677 #endif
678
679 if (HereDoc.State == 1 && isEOLChar(ch)) {
680 // Begin of here-doc (the line after the here-doc delimiter):
681 HereDoc.State = 2;
682 styler.ColourTo(i-1, state);
683 // Don't check for a missing quote, just jump into
684 // the here-doc state
685 state = SCE_RB_HERE_Q;
686 }
687
688 // Regular transitions
689 if (state == SCE_RB_DEFAULT) {
690 if (isSafeDigit(ch)) {
691 styler.ColourTo(i - 1, state);
692 state = SCE_RB_NUMBER;
693 numDots = 0;
694 } else if (isHighBitChar(ch) || iswordstart(ch)) {
695 styler.ColourTo(i - 1, state);
696 state = SCE_RB_WORD;
697 } else if (ch == '#') {
698 styler.ColourTo(i - 1, state);
699 state = SCE_RB_COMMENTLINE;
700 } else if (ch == '=') {
701 // =begin indicates the start of a comment (doc) block
702 if (i == 0 || isEOLChar(chPrev)
703 && chNext == 'b'
704 && styler.SafeGetCharAt(i + 2) == 'e'
705 && styler.SafeGetCharAt(i + 3) == 'g'
706 && styler.SafeGetCharAt(i + 4) == 'i'
707 && styler.SafeGetCharAt(i + 5) == 'n'
708 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
709 styler.ColourTo(i - 1, state);
710 state = SCE_RB_POD;
711 } else {
712 styler.ColourTo(i - 1, state);
713 styler.ColourTo(i, SCE_RB_OPERATOR);
714 preferRE = true;
715 }
716 } else if (ch == '"') {
717 styler.ColourTo(i - 1, state);
718 state = SCE_RB_STRING;
719 Quote.New();
720 Quote.Open(ch);
721 } else if (ch == '\'') {
722 styler.ColourTo(i - 1, state);
723 state = SCE_RB_CHARACTER;
724 Quote.New();
725 Quote.Open(ch);
726 } else if (ch == '`') {
727 styler.ColourTo(i - 1, state);
728 state = SCE_RB_BACKTICKS;
729 Quote.New();
730 Quote.Open(ch);
731 } else if (ch == '@') {
732 // Instance or class var
733 styler.ColourTo(i - 1, state);
734 if (chNext == '@') {
735 state = SCE_RB_CLASS_VAR;
736 advance_char(i, ch, chNext, chNext2); // pass by ref
737 } else {
738 state = SCE_RB_INSTANCE_VAR;
739 }
740 } else if (ch == '$') {
741 // Check for a builtin global
742 styler.ColourTo(i - 1, state);
743 // Recognize it bit by bit
744 state = SCE_RB_GLOBAL;
745 } else if (ch == '/' && preferRE) {
746 // Ambigous operator
747 styler.ColourTo(i - 1, state);
748 state = SCE_RB_REGEX;
749 Quote.New();
750 Quote.Open(ch);
751 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
752
753 // Recognise the '<<' symbol - either a here document or a binary op
754 styler.ColourTo(i - 1, state);
755 i++;
756 chNext = chNext2;
757 styler.ColourTo(i, SCE_RB_OPERATOR);
758
759 if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
760 // It's definitely not a here-doc,
761 // based on Ruby's lexer/parser in the
762 // heredoc_identifier routine.
763 // Nothing else to do.
764 } else if (preferRE) {
765 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
766 state = SCE_RB_HERE_DELIM;
767 HereDoc.State = 0;
768 }
769 // else leave it in default state
770 } else {
771 if (sureThisIsNotHeredoc(i - 1, styler)) {
772 // leave state as default
773 // We don't have all the heuristics Perl has for indications
774 // of a here-doc, because '<<' is overloadable and used
775 // for so many other classes.
776 } else {
777 state = SCE_RB_HERE_DELIM;
778 HereDoc.State = 0;
779 }
780 }
781 preferRE = (state != SCE_RB_HERE_DELIM);
782 } else if (ch == ':') {
783 styler.ColourTo(i - 1, state);
784 if (chNext == ':') {
785 // Mark "::" as an operator, not symbol start
786 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
787 advance_char(i, ch, chNext, chNext2); // pass by ref
788 state = SCE_RB_DEFAULT;
789 preferRE = false;
790 } else if (isSafeWordcharOrHigh(chNext)) {
791 state = SCE_RB_SYMBOL;
792 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
793 // Do the operator analysis in-line, looking ahead
794 // Based on the table in pickaxe 2nd ed., page 339
795 bool doColoring = true;
796 switch (chNext) {
797 case '[':
798 if (chNext2 == ']' ) {
799 char ch_tmp = styler.SafeGetCharAt(i + 3);
800 if (ch_tmp == '=') {
801 i += 3;
802 ch = ch_tmp;
803 chNext = styler.SafeGetCharAt(i + 1);
804 } else {
805 i += 2;
806 ch = chNext2;
807 chNext = ch_tmp;
808 }
809 } else {
810 doColoring = false;
811 }
812 break;
813
814 case '*':
815 if (chNext2 == '*') {
816 i += 2;
817 ch = chNext2;
818 chNext = styler.SafeGetCharAt(i + 1);
819 } else {
820 advance_char(i, ch, chNext, chNext2);
821 }
822 break;
823
824 case '!':
825 if (chNext2 == '=' || chNext2 == '~') {
826 i += 2;
827 ch = chNext2;
828 chNext = styler.SafeGetCharAt(i + 1);
829 } else {
830 advance_char(i, ch, chNext, chNext2);
831 }
832 break;
833
834 case '<':
835 if (chNext2 == '<') {
836 i += 2;
837 ch = chNext2;
838 chNext = styler.SafeGetCharAt(i + 1);
839 } else if (chNext2 == '=') {
840 char ch_tmp = styler.SafeGetCharAt(i + 3);
841 if (ch_tmp == '>') { // <=> operator
842 i += 3;
843 ch = ch_tmp;
844 chNext = styler.SafeGetCharAt(i + 1);
845 } else {
846 i += 2;
847 ch = chNext2;
848 chNext = ch_tmp;
849 }
850 } else {
851 advance_char(i, ch, chNext, chNext2);
852 }
853 break;
854
855 default:
856 // Simple one-character operators
857 advance_char(i, ch, chNext, chNext2);
858 break;
859 }
860 if (doColoring) {
861 styler.ColourTo(i, SCE_RB_SYMBOL);
862 state = SCE_RB_DEFAULT;
863 }
864 } else if (!preferRE) {
865 // Don't color symbol strings (yet)
866 // Just color the ":" and color rest as string
867 styler.ColourTo(i, SCE_RB_SYMBOL);
868 state = SCE_RB_DEFAULT;
869 } else {
870 styler.ColourTo(i, SCE_RB_OPERATOR);
871 state = SCE_RB_DEFAULT;
872 preferRE = true;
873 }
874 } else if (ch == '%') {
875 styler.ColourTo(i - 1, state);
876 bool have_string = false;
877 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
878 Quote.New();
879 const char *hit = strchr(q_chars, chNext);
880 if (hit != NULL) {
881 state = q_states[hit - q_chars];
882 Quote.Open(chNext2);
883 i += 2;
884 ch = chNext2;
885 chNext = styler.SafeGetCharAt(i + 1);
886 have_string = true;
887 }
888 } else if (!isSafeWordcharOrHigh(chNext)) {
889 // Ruby doesn't allow high bit chars here,
890 // but the editor host might
891 state = SCE_RB_STRING_QQ;
892 Quote.Open(chNext);
893 advance_char(i, ch, chNext, chNext2); // pass by ref
894 have_string = true;
895 }
896 if (!have_string) {
897 styler.ColourTo(i, SCE_RB_OPERATOR);
898 // stay in default
899 preferRE = true;
900 }
901 } else if (isoperator(ch) || ch == '.') {
902 styler.ColourTo(i - 1, state);
903 styler.ColourTo(i, SCE_RB_OPERATOR);
904 // If we're ending an expression or block,
905 // assume it ends an object, and the ambivalent
906 // constructs are binary operators
907 //
908 // So if we don't have one of these chars,
909 // we aren't ending an object exp'n, and ops
910 // like : << / are unary operators.
911
912 preferRE = (strchr(")}].", ch) == NULL);
913 // Stay in default state
914 } else if (isEOLChar(ch)) {
915 // Make sure it's a true line-end, with no backslash
916 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
917 && chPrev != '\\') {
918 // Assume we've hit the end of the statement.
919 preferRE = true;
920 }
921 }
922 } else if (state == SCE_RB_WORD) {
923 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
924 // Words include x? in all contexts,
925 // and <letters>= after either 'def' or a dot
926 // Move along until a complete word is on our left
927
928 // Default accessor treats '.' as word-chars,
929 // but we don't for now.
930
931 if (ch == '='
932 && isSafeWordcharOrHigh(chPrev)
933 && (chNext == '('
934 || strchr(" \t\n\r", chNext) != NULL)
935 && (!strcmp(prevWord, "def")
936 || followsDot(styler.GetStartSegment(), styler))) {
937 // <name>= is a name only when being def'd -- Get it the next time
938 // This means that <name>=<name> is always lexed as
939 // <name>, (op, =), <name>
940 } else if ((ch == '?' || ch == '!')
941 && isSafeWordcharOrHigh(chPrev)
942 && !isSafeWordcharOrHigh(chNext)) {
943 // <name>? is a name -- Get it the next time
944 // But <name>?<name> is always lexed as
945 // <name>, (op, ?), <name>
946 // Same with <name>! to indicate a method that
947 // modifies its target
948 } else if (isEOLChar(ch)
949 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
950 styler.ColourTo(i, SCE_RB_DATASECTION);
951 state = SCE_RB_DATASECTION;
952 // No need to handle this state -- we'll just move to the end
953 preferRE = false;
954 } else {
955 int wordStartPos = styler.GetStartSegment();
956 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
957 switch (word_style) {
958 case SCE_RB_WORD:
959 preferRE = RE_CanFollowKeyword(prevWord);
960 break;
961
962 case SCE_RB_WORD_DEMOTED:
963 preferRE = true;
964 break;
965
966 case SCE_RB_IDENTIFIER:
967 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
968 preferRE = true;
969 } else if (isEOLChar(ch)) {
970 preferRE = true;
971 } else {
972 preferRE = false;
973 }
974 break;
975 default:
976 preferRE = false;
977 }
978 if (ch == '.') {
979 // We might be redefining an operator-method
980 preferRE = false;
981 }
982 // And if it's the first
983 redo_char(i, ch, chNext, chNext2, state); // pass by ref
984 }
985 }
986 } else if (state == SCE_RB_NUMBER) {
987 if (isSafeAlnumOrHigh(ch) || ch == '_') {
988 // Keep going
989 } else if (ch == '.' && ++numDots == 1) {
990 // Keep going
991 } else {
992 styler.ColourTo(i - 1, state);
993 redo_char(i, ch, chNext, chNext2, state); // pass by ref
994 preferRE = false;
995 }
996 } else if (state == SCE_RB_COMMENTLINE) {
997 if (isEOLChar(ch)) {
998 styler.ColourTo(i - 1, state);
999 state = SCE_RB_DEFAULT;
1000 // Use whatever setting we had going into the comment
1001 }
1002 } else if (state == SCE_RB_HERE_DELIM) {
1003 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1004 // Slightly different: if we find an immediate '-',
1005 // the target can appear indented.
1006
1007 if (HereDoc.State == 0) { // '<<' encountered
1008 HereDoc.State = 1;
1009 HereDoc.DelimiterLength = 0;
1010 if (ch == '-') {
1011 HereDoc.CanBeIndented = true;
1012 advance_char(i, ch, chNext, chNext2); // pass by ref
1013 } else {
1014 HereDoc.CanBeIndented = false;
1015 }
1016 if (isEOLChar(ch)) {
1017 // Bail out of doing a here doc if there's no target
1018 state = SCE_RB_DEFAULT;
1019 preferRE = false;
1020 } else {
1021 HereDoc.Quote = ch;
1022
1023 if (ch == '\'' || ch == '"' || ch == '`') {
1024 HereDoc.Quoted = true;
1025 HereDoc.Delimiter[0] = '\0';
1026 } else {
1027 HereDoc.Quoted = false;
1028 HereDoc.Delimiter[0] = ch;
1029 HereDoc.Delimiter[1] = '\0';
1030 HereDoc.DelimiterLength = 1;
1031 }
1032 }
1033 } else if (HereDoc.State == 1) { // collect the delimiter
1034 if (isEOLChar(ch)) {
1035 // End the quote now, and go back for more
1036 styler.ColourTo(i - 1, state);
1037 state = SCE_RB_DEFAULT;
1038 i--;
1039 chNext = ch;
1040 chNext2 = chNext;
1041 preferRE = false;
1042 } else if (HereDoc.Quoted) {
1043 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1044 styler.ColourTo(i, state);
1045 state = SCE_RB_DEFAULT;
1046 preferRE = false;
1047 } else {
1048 if (ch == '\\' && !isEOLChar(chNext)) {
1049 advance_char(i, ch, chNext, chNext2);
1050 }
1051 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1052 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1053 }
1054 } else { // an unquoted here-doc delimiter
1055 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1056 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1057 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1058 } else {
1059 styler.ColourTo(i - 1, state);
1060 redo_char(i, ch, chNext, chNext2, state);
1061 preferRE = false;
1062 }
1063 }
1064 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1065 styler.ColourTo(i - 1, state);
1066 state = SCE_RB_ERROR;
1067 preferRE = false;
1068 }
1069 }
1070 } else if (state == SCE_RB_HERE_Q) {
1071 // Not needed: HereDoc.State == 2
1072 // Indentable here docs: look backwards
1073 // Non-indentable: look forwards, like in Perl
1074 //
1075 // Why: so we can quickly resolve things like <<-" abc"
1076
1077 if (!HereDoc.CanBeIndented) {
1078 if (isEOLChar(chPrev)
1079 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1080 styler.ColourTo(i - 1, state);
1081 i += HereDoc.DelimiterLength - 1;
1082 chNext = styler.SafeGetCharAt(i + 1);
1083 if (isEOLChar(chNext)) {
1084 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1085 state = SCE_RB_DEFAULT;
1086 HereDoc.State = 0;
1087 preferRE = false;
1088 }
1089 // Otherwise we skipped through the here doc faster.
1090 }
1091 } else if (isEOLChar(chNext)
1092 && lookingAtHereDocDelim(styler,
1093 i - HereDoc.DelimiterLength + 1,
1094 lengthDoc,
1095 HereDoc.Delimiter)) {
1096 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1097 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1098 state = SCE_RB_DEFAULT;
1099 preferRE = false;
1100 HereDoc.State = 0;
1101 }
1102 } else if (state == SCE_RB_CLASS_VAR
1103 || state == SCE_RB_INSTANCE_VAR
1104 || state == SCE_RB_SYMBOL) {
1105 if (!isSafeWordcharOrHigh(ch)) {
1106 styler.ColourTo(i - 1, state);
1107 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1108 preferRE = false;
1109 }
1110 } else if (state == SCE_RB_GLOBAL) {
1111 if (!isSafeWordcharOrHigh(ch)) {
1112 // handle special globals here as well
1113 if (chPrev == '$') {
1114 if (ch == '-') {
1115 // Include the next char, like $-a
1116 advance_char(i, ch, chNext, chNext2);
1117 }
1118 styler.ColourTo(i, state);
1119 state = SCE_RB_DEFAULT;
1120 } else {
1121 styler.ColourTo(i - 1, state);
1122 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1123 }
1124 preferRE = false;
1125 }
1126 } else if (state == SCE_RB_POD) {
1127 // PODs end with ^=end\s, -- any whitespace can follow =end
1128 if (strchr(" \t\n\r", ch) != NULL
1129 && i > 5
1130 && isEOLChar(styler[i - 5])
1131 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1132 styler.ColourTo(i - 1, state);
1133 state = SCE_RB_DEFAULT;
1134 preferRE = false;
1135 }
1136 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1137 if (ch == '\\' && Quote.Up != '\\') {
1138 // Skip one
1139 advance_char(i, ch, chNext, chNext2);
1140 } else if (ch == Quote.Down) {
1141 Quote.Count--;
1142 if (Quote.Count == 0) {
1143 // Include the options
1144 while (isSafeAlpha(chNext)) {
1145 i++;
1146 ch = chNext;
1147 chNext = styler.SafeGetCharAt(i + 1);
1148 }
1149 styler.ColourTo(i, state);
1150 state = SCE_RB_DEFAULT;
1151 preferRE = false;
1152 }
1153 } else if (ch == Quote.Up) {
1154 // Only if close quoter != open quoter
1155 Quote.Count++;
1156
1157 } else if (ch == '#' ) {
1158 //todo: distinguish comments from pound chars
1159 // for now, handle as comment
1160 styler.ColourTo(i - 1, state);
1161 bool inEscape = false;
1162 while (++i < lengthDoc) {
1163 ch = styler.SafeGetCharAt(i);
1164 if (ch == '\\') {
1165 inEscape = true;
1166 } else if (isEOLChar(ch)) {
1167 // Comment inside a regex
1168 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1169 break;
1170 } else if (inEscape) {
1171 inEscape = false; // don't look at char
1172 } else if (ch == Quote.Down) {
1173 // Have the regular handler deal with this
1174 // to get trailing modifiers.
1175 i--;
1176 ch = styler[i];
1177 break;
1178 }
1179 }
1180 chNext = styler.SafeGetCharAt(i + 1);
1181 chNext2 = styler.SafeGetCharAt(i + 2);
1182 }
1183 // Quotes of all kinds...
1184 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1185 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1186 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1187 state == SCE_RB_BACKTICKS) {
1188 if (!Quote.Down && !isspacechar(ch)) {
1189 Quote.Open(ch);
1190 } else if (ch == '\\' && Quote.Up != '\\') {
1191 //Riddle me this: Is it safe to skip *every* escaped char?
1192 advance_char(i, ch, chNext, chNext2);
1193 } else if (ch == Quote.Down) {
1194 Quote.Count--;
1195 if (Quote.Count == 0) {
1196 styler.ColourTo(i, state);
1197 state = SCE_RB_DEFAULT;
1198 preferRE = false;
1199 }
1200 } else if (ch == Quote.Up) {
1201 Quote.Count++;
1202 }
1203 }
1204
1205 if (state == SCE_RB_ERROR) {
1206 break;
1207 }
1208 chPrev = ch;
1209 }
1210 if (state == SCE_RB_WORD) {
1211 // We've ended on a word, possibly at EOF, and need to
1212 // classify it.
1213 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1214 } else {
1215 styler.ColourTo(lengthDoc - 1, state);
1216 }
1217 }
1218
1219 // Helper functions for folding, disambiguation keywords
1220 // Assert that there are no high-bit chars
1221
getPrevWord(int pos,char * prevWord,Accessor & styler,int word_state)1222 static void getPrevWord(int pos,
1223 char *prevWord,
1224 Accessor &styler,
1225 int word_state)
1226 {
1227 int i;
1228 styler.Flush();
1229 for (i = pos - 1; i > 0; i--) {
1230 if (actual_style(styler.StyleAt(i)) != word_state) {
1231 i++;
1232 break;
1233 }
1234 }
1235 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1236 i = pos - MAX_KEYWORD_LENGTH;
1237 char *dst = prevWord;
1238 for (; i <= pos; i++) {
1239 *dst++ = styler[i];
1240 }
1241 *dst = 0;
1242 }
1243
keywordIsAmbiguous(const char * prevWord)1244 static bool keywordIsAmbiguous(const char *prevWord)
1245 {
1246 // Order from most likely used to least likely
1247 // Lots of ways to do a loop in Ruby besides 'while/until'
1248 if (!strcmp(prevWord, "if")
1249 || !strcmp(prevWord, "do")
1250 || !strcmp(prevWord, "while")
1251 || !strcmp(prevWord, "unless")
1252 || !strcmp(prevWord, "until")) {
1253 return true;
1254 } else {
1255 return false;
1256 }
1257 }
1258
1259 // Demote keywords in the following conditions:
1260 // if, while, unless, until modify a statement
1261 // do after a while or until, as a noise word (like then after if)
1262
keywordIsModifier(const char * word,int pos,Accessor & styler)1263 static bool keywordIsModifier(const char *word,
1264 int pos,
1265 Accessor &styler)
1266 {
1267 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1268 return keywordDoStartsLoop(pos, styler);
1269 }
1270 char ch;
1271 int style = SCE_RB_DEFAULT;
1272 int lineStart = styler.GetLine(pos);
1273 int lineStartPosn = styler.LineStart(lineStart);
1274 styler.Flush();
1275 while (--pos >= lineStartPosn) {
1276 style = actual_style(styler.StyleAt(pos));
1277 if (style == SCE_RB_DEFAULT) {
1278 if (iswhitespace(ch = styler[pos])) {
1279 //continue
1280 } else if (ch == '\r' || ch == '\n') {
1281 // Scintilla's LineStart() and GetLine() routines aren't
1282 // platform-independent, so if we have text prepared with
1283 // a different system we can't rely on it.
1284 return false;
1285 }
1286 } else {
1287 break;
1288 }
1289 }
1290 if (pos < lineStartPosn) {
1291 return false; //XXX not quite right if the prev line is a continuation
1292 }
1293 // First things where the action is unambiguous
1294 switch (style) {
1295 case SCE_RB_DEFAULT:
1296 case SCE_RB_COMMENTLINE:
1297 case SCE_RB_POD:
1298 case SCE_RB_CLASSNAME:
1299 case SCE_RB_DEFNAME:
1300 case SCE_RB_MODULE_NAME:
1301 return false;
1302 case SCE_RB_OPERATOR:
1303 break;
1304 case SCE_RB_WORD:
1305 // Watch out for uses of 'else if'
1306 //XXX: Make a list of other keywords where 'if' isn't a modifier
1307 // and can appear legitimately
1308 // Formulate this to avoid warnings from most compilers
1309 if (strcmp(word, "if") == 0) {
1310 char prevWord[MAX_KEYWORD_LENGTH + 1];
1311 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1312 return strcmp(prevWord, "else") != 0;
1313 }
1314 return true;
1315 default:
1316 return true;
1317 }
1318 // Assume that if the keyword follows an operator,
1319 // usually it's a block assignment, like
1320 // a << if x then y else z
1321
1322 ch = styler[pos];
1323 switch (ch) {
1324 case ')':
1325 case ']':
1326 case '}':
1327 return true;
1328 default:
1329 return false;
1330 }
1331 }
1332
1333 #define WHILE_BACKWARDS "elihw"
1334 #define UNTIL_BACKWARDS "litnu"
1335
1336 // Nothing fancy -- look to see if we follow a while/until somewhere
1337 // on the current line
1338
keywordDoStartsLoop(int pos,Accessor & styler)1339 static bool keywordDoStartsLoop(int pos,
1340 Accessor &styler)
1341 {
1342 char ch;
1343 int style;
1344 int lineStart = styler.GetLine(pos);
1345 int lineStartPosn = styler.LineStart(lineStart);
1346 styler.Flush();
1347 while (--pos >= lineStartPosn) {
1348 style = actual_style(styler.StyleAt(pos));
1349 if (style == SCE_RB_DEFAULT) {
1350 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1351 // Scintilla's LineStart() and GetLine() routines aren't
1352 // platform-independent, so if we have text prepared with
1353 // a different system we can't rely on it.
1354 return false;
1355 }
1356 } else if (style == SCE_RB_WORD) {
1357 // Check for while or until, but write the word in backwards
1358 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1359 char *dst = prevWord;
1360 int wordLen = 0;
1361 int start_word;
1362 for (start_word = pos;
1363 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1364 start_word--) {
1365 if (++wordLen < MAX_KEYWORD_LENGTH) {
1366 *dst++ = styler[start_word];
1367 }
1368 }
1369 *dst = 0;
1370 // Did we see our keyword?
1371 if (!strcmp(prevWord, WHILE_BACKWARDS)
1372 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1373 return true;
1374 }
1375 // We can move pos to the beginning of the keyword, and then
1376 // accept another decrement, as we can never have two contiguous
1377 // keywords:
1378 // word1 word2
1379 // ^
1380 // <- move to start_word
1381 // ^
1382 // <- loop decrement
1383 // ^ # pointing to end of word1 is fine
1384 pos = start_word;
1385 }
1386 }
1387 return false;
1388 }
1389
1390 /*
1391 * Folding Ruby
1392 *
1393 * The language is quite complex to analyze without a full parse.
1394 * For example, this line shouldn't affect fold level:
1395 *
1396 * print "hello" if feeling_friendly?
1397 *
1398 * Neither should this:
1399 *
1400 * print "hello" \
1401 * if feeling_friendly?
1402 *
1403 *
1404 * But this should:
1405 *
1406 * if feeling_friendly? #++
1407 * print "hello" \
1408 * print "goodbye"
1409 * end #--
1410 *
1411 * So we cheat, by actually looking at the existing indentation
1412 * levels for each line, and just echoing it back. Like Python.
1413 * Then if we get better at it, we'll take braces into consideration,
1414 * which always affect folding levels.
1415
1416 * How the keywords should work:
1417 * No effect:
1418 * __FILE__ __LINE__ BEGIN END alias and
1419 * defined? false in nil not or self super then
1420 * true undef
1421
1422 * Always increment:
1423 * begin class def do for module when {
1424 *
1425 * Always decrement:
1426 * end }
1427 *
1428 * Increment if these start a statement
1429 * if unless until while -- do nothing if they're modifiers
1430
1431 * These end a block if there's no modifier, but don't bother
1432 * break next redo retry return yield
1433 *
1434 * These temporarily de-indent, but re-indent
1435 * case else elsif ensure rescue
1436 *
1437 * This means that the folder reflects indentation rather
1438 * than setting it. The language-service updates indentation
1439 * when users type return and finishes entering de-denters.
1440 *
1441 * Later offer to fold POD, here-docs, strings, and blocks of comments
1442 */
1443
FoldRbDoc(unsigned int startPos,int length,int initStyle,WordList * [],Accessor & styler)1444 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1445 WordList *[], Accessor &styler) {
1446 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1447 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1448
1449 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1450 false);
1451 unsigned int endPos = startPos + length;
1452 int visibleChars = 0;
1453 int lineCurrent = styler.GetLine(startPos);
1454 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1455 & SC_FOLDLEVELNUMBERMASK
1456 & ~SC_FOLDLEVELBASE);
1457 int levelCurrent = levelPrev;
1458 char chNext = styler[startPos];
1459 int styleNext = styler.StyleAt(startPos);
1460 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1461 bool buffer_ends_with_eol = false;
1462 for (unsigned int i = startPos; i < endPos; i++) {
1463 char ch = chNext;
1464 chNext = styler.SafeGetCharAt(i + 1);
1465 int style = styleNext;
1466 styleNext = styler.StyleAt(i + 1);
1467 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1468 if (style == SCE_RB_COMMENTLINE) {
1469 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1470 if (chNext == '{') {
1471 levelCurrent++;
1472 } else if (chNext == '}') {
1473 levelCurrent--;
1474 }
1475 }
1476 } else if (style == SCE_RB_OPERATOR) {
1477 if (strchr("[{(", ch)) {
1478 levelCurrent++;
1479 } else if (strchr(")}]", ch)) {
1480 // Don't decrement below 0
1481 if (levelCurrent > 0)
1482 levelCurrent--;
1483 }
1484 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1485 // Look at the keyword on the left and decide what to do
1486 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1487 prevWord[0] = 0;
1488 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1489 if (!strcmp(prevWord, "end")) {
1490 // Don't decrement below 0
1491 if (levelCurrent > 0)
1492 levelCurrent--;
1493 } else if ( !strcmp(prevWord, "if")
1494 || !strcmp(prevWord, "def")
1495 || !strcmp(prevWord, "class")
1496 || !strcmp(prevWord, "module")
1497 || !strcmp(prevWord, "begin")
1498 || !strcmp(prevWord, "case")
1499 || !strcmp(prevWord, "do")
1500 || !strcmp(prevWord, "while")
1501 || !strcmp(prevWord, "unless")
1502 || !strcmp(prevWord, "until")
1503 || !strcmp(prevWord, "for")
1504 ) {
1505 levelCurrent++;
1506 }
1507 }
1508 if (atEOL) {
1509 int lev = levelPrev;
1510 if (visibleChars == 0 && foldCompact)
1511 lev |= SC_FOLDLEVELWHITEFLAG;
1512 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1513 lev |= SC_FOLDLEVELHEADERFLAG;
1514 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1515 lineCurrent++;
1516 levelPrev = levelCurrent;
1517 visibleChars = 0;
1518 buffer_ends_with_eol = true;
1519 } else if (!isspacechar(ch)) {
1520 visibleChars++;
1521 buffer_ends_with_eol = false;
1522 }
1523 }
1524 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1525 if (!buffer_ends_with_eol) {
1526 lineCurrent++;
1527 int new_lev = levelCurrent;
1528 if (visibleChars == 0 && foldCompact)
1529 new_lev |= SC_FOLDLEVELWHITEFLAG;
1530 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1531 new_lev |= SC_FOLDLEVELHEADERFLAG;
1532 levelCurrent = new_lev;
1533 }
1534 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1535 }
1536
1537 static const char * const rubyWordListDesc[] = {
1538 "Keywords",
1539 0
1540 };
1541
1542 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
1543