1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3  ** Lexer for Ruby.
4  **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 
14 #include "Platform.h"
15 
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "KeyWords.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #ifdef SCI_NAMESPACE
23 using namespace Scintilla;
24 #endif
25 
26 //XXX Identical to Perl, put in common area
isEOLChar(char ch)27 static inline bool isEOLChar(char ch) {
28 	return (ch == '\r') || (ch == '\n');
29 }
30 
31 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
32 // This one's redundant, but makes for more readable code
33 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
34 
isSafeAlpha(char ch)35 static inline bool isSafeAlpha(char ch) {
36     return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
37 }
38 
isSafeAlnum(char ch)39 static inline bool isSafeAlnum(char ch) {
40     return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
41 }
42 
isSafeAlnumOrHigh(char ch)43 static inline bool isSafeAlnumOrHigh(char ch) {
44     return isHighBitChar(ch) || isalnum(ch) || ch == '_';
45 }
46 
isSafeDigit(char ch)47 static inline bool isSafeDigit(char ch) {
48     return isSafeASCII(ch) && isdigit(ch);
49 }
50 
isSafeWordcharOrHigh(char ch)51 static inline bool isSafeWordcharOrHigh(char ch) {
52     return isHighBitChar(ch) || iswordchar(ch);
53 }
54 
iswhitespace(char ch)55 static bool inline iswhitespace(char ch) {
56 	return ch == ' ' || ch == '\t';
57 }
58 
59 #define MAX_KEYWORD_LENGTH 200
60 
61 #define STYLE_MASK 63
62 #define actual_style(style) (style & STYLE_MASK)
63 
followsDot(unsigned int pos,Accessor & styler)64 static bool followsDot(unsigned int pos, Accessor &styler) {
65     styler.Flush();
66     for (; pos >= 1; --pos) {
67         int style = actual_style(styler.StyleAt(pos));
68         char ch;
69         switch (style) {
70             case SCE_RB_DEFAULT:
71                 ch = styler[pos];
72                 if (ch == ' ' || ch == '\t') {
73                     //continue
74                 } else {
75                     return false;
76                 }
77                 break;
78 
79             case SCE_RB_OPERATOR:
80                 return styler[pos] == '.';
81 
82             default:
83                 return false;
84         }
85     }
86     return false;
87 }
88 
89 // Forward declarations
90 static bool keywordIsAmbiguous(const char *prevWord);
91 static bool keywordDoStartsLoop(int pos,
92                                 Accessor &styler);
93 static bool keywordIsModifier(const char *word,
94                               int pos,
95                               Accessor &styler);
96 
ClassifyWordRb(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,char * prevWord)97 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
98 	char s[100];
99     unsigned int i, j;
100 	unsigned int lim = end - start + 1; // num chars to copy
101 	if (lim >= MAX_KEYWORD_LENGTH) {
102 		lim = MAX_KEYWORD_LENGTH - 1;
103 	}
104 	for (i = start, j = 0; j < lim; i++, j++) {
105 		s[j] = styler[i];
106 	}
107     s[j] = '\0';
108 	int chAttr;
109 	if (0 == strcmp(prevWord, "class"))
110 		chAttr = SCE_RB_CLASSNAME;
111 	else if (0 == strcmp(prevWord, "module"))
112 		chAttr = SCE_RB_MODULE_NAME;
113 	else if (0 == strcmp(prevWord, "def"))
114 		chAttr = SCE_RB_DEFNAME;
115     else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
116         if (keywordIsAmbiguous(s)
117             && keywordIsModifier(s, start, styler)) {
118 
119             // Demoted keywords are colored as keywords,
120             // but do not affect changes in indentation.
121             //
122             // Consider the word 'if':
123             // 1. <<if test ...>> : normal
124             // 2. <<stmt if test>> : demoted
125             // 3. <<lhs = if ...>> : normal: start a new indent level
126             // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
127 
128             chAttr = SCE_RB_WORD_DEMOTED;
129         } else {
130             chAttr = SCE_RB_WORD;
131         }
132 	} else
133         chAttr = SCE_RB_IDENTIFIER;
134 	styler.ColourTo(end, chAttr);
135 	if (chAttr == SCE_RB_WORD) {
136 		strcpy(prevWord, s);
137 	} else {
138 		prevWord[0] = 0;
139 	}
140     return chAttr;
141 }
142 
143 
144 //XXX Identical to Perl, put in common area
isMatch(Accessor & styler,int lengthDoc,int pos,const char * val)145 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
146 	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
147 		return false;
148 	}
149 	while (*val) {
150 		if (*val != styler[pos++]) {
151 			return false;
152 		}
153 		val++;
154 	}
155 	return true;
156 }
157 
158 // Do Ruby better -- find the end of the line, work back,
159 // and then check for leading white space
160 
161 // Precondition: the here-doc target can be indented
lookingAtHereDocDelim(Accessor & styler,int pos,int lengthDoc,const char * HereDocDelim)162 static bool lookingAtHereDocDelim(Accessor	   &styler,
163                                   int 			pos,
164                                   int 			lengthDoc,
165                                   const char   *HereDocDelim)
166 {
167     if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
168         return false;
169     }
170     while (--pos > 0) {
171         char ch = styler[pos];
172         if (isEOLChar(ch)) {
173             return true;
174         } else if (ch != ' ' && ch != '\t') {
175             return false;
176         }
177     }
178     return false;
179 }
180 
181 //XXX Identical to Perl, put in common area
opposite(char ch)182 static char opposite(char ch) {
183 	if (ch == '(')
184 		return ')';
185 	if (ch == '[')
186 		return ']';
187 	if (ch == '{')
188 		return '}';
189 	if (ch == '<')
190 		return '>';
191 	return ch;
192 }
193 
194 // Null transitions when we see we've reached the end
195 // and need to relex the curr char.
196 
redo_char(int & i,char & ch,char & chNext,char & chNext2,int & state)197 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
198                       int &state) {
199     i--;
200     chNext2 = chNext;
201     chNext = ch;
202     state = SCE_RB_DEFAULT;
203 }
204 
advance_char(int & i,char & ch,char & chNext,char & chNext2)205 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
206     i++;
207     ch = chNext;
208     chNext = chNext2;
209 }
210 
211 // precondition: startPos points to one after the EOL char
currLineContainsHereDelims(int & startPos,Accessor & styler)212 static bool currLineContainsHereDelims(int& startPos,
213                                        Accessor &styler) {
214     if (startPos <= 1)
215         return false;
216 
217     int pos;
218     for (pos = startPos - 1; pos > 0; pos--) {
219         char ch = styler.SafeGetCharAt(pos);
220         if (isEOLChar(ch)) {
221             // Leave the pointers where they are -- there are no
222             // here doc delims on the current line, even if
223             // the EOL isn't default style
224 
225             return false;
226         } else {
227             styler.Flush();
228             if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
229                 break;
230             }
231         }
232     }
233     if (pos == 0) {
234         return false;
235     }
236     // Update the pointers so we don't have to re-analyze the string
237     startPos = pos;
238     return true;
239 }
240 
241 
isEmptyLine(int pos,Accessor & styler)242 static bool isEmptyLine(int pos,
243                         Accessor &styler) {
244 	int spaceFlags = 0;
245 	int lineCurrent = styler.GetLine(pos);
246 	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
247     return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
248 }
249 
RE_CanFollowKeyword(const char * keyword)250 static bool RE_CanFollowKeyword(const char *keyword) {
251     if (!strcmp(keyword, "and")
252         || !strcmp(keyword, "begin")
253         || !strcmp(keyword, "break")
254         || !strcmp(keyword, "case")
255         || !strcmp(keyword, "do")
256         || !strcmp(keyword, "else")
257         || !strcmp(keyword, "elsif")
258         || !strcmp(keyword, "if")
259         || !strcmp(keyword, "next")
260         || !strcmp(keyword, "return")
261         || !strcmp(keyword, "when")
262         || !strcmp(keyword, "unless")
263         || !strcmp(keyword, "until")
264         || !strcmp(keyword, "not")
265         || !strcmp(keyword, "or")) {
266         return true;
267     }
268     return false;
269 }
270 
271 // Look at chars up to but not including endPos
272 // Don't look at styles in case we're looking forward
273 
skipWhitespace(int startPos,int endPos,Accessor & styler)274 static int skipWhitespace(int startPos,
275                            int endPos,
276                            Accessor &styler) {
277     for (int i = startPos; i < endPos; i++) {
278         if (!iswhitespace(styler[i])) {
279             return i;
280         }
281     }
282     return endPos;
283 }
284 
285 // This routine looks for false positives like
286 // undef foo, <<
287 // There aren't too many.
288 //
289 // iPrev points to the start of <<
290 
sureThisIsHeredoc(int iPrev,Accessor & styler,char * prevWord)291 static bool sureThisIsHeredoc(int iPrev,
292                               Accessor &styler,
293                               char *prevWord) {
294 
295     // Not so fast, since Ruby's so dynamic.  Check the context
296     // to make sure we're OK.
297     int prevStyle;
298     int lineStart = styler.GetLine(iPrev);
299     int lineStartPosn = styler.LineStart(lineStart);
300     styler.Flush();
301 
302     // Find the first word after some whitespace
303     int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
304     if (firstWordPosn >= iPrev) {
305         // Have something like {^     <<}
306 		//XXX Look at the first previous non-comment non-white line
307 		// to establish the context.  Not too likely though.
308         return true;
309     } else {
310         switch (prevStyle = styler.StyleAt(firstWordPosn)) {
311         case SCE_RB_WORD:
312         case SCE_RB_WORD_DEMOTED:
313         case SCE_RB_IDENTIFIER:
314             break;
315         default:
316             return true;
317         }
318     }
319     int firstWordEndPosn = firstWordPosn;
320     char *dst = prevWord;
321     for (;;) {
322         if (firstWordEndPosn >= iPrev ||
323             styler.StyleAt(firstWordEndPosn) != prevStyle) {
324             *dst = 0;
325             break;
326         }
327         *dst++ = styler[firstWordEndPosn];
328         firstWordEndPosn += 1;
329     }
330     //XXX Write a style-aware thing to regex scintilla buffer objects
331     if (!strcmp(prevWord, "undef")
332         || !strcmp(prevWord, "def")
333         || !strcmp(prevWord, "alias")) {
334         // These keywords are what we were looking for
335         return false;
336     }
337     return true;
338 }
339 
340 // Routine that saves us from allocating a buffer for the here-doc target
341 // targetEndPos points one past the end of the current target
haveTargetMatch(int currPos,int lengthDoc,int targetStartPos,int targetEndPos,Accessor & styler)342 static bool haveTargetMatch(int currPos,
343                             int lengthDoc,
344                             int targetStartPos,
345                             int targetEndPos,
346                             Accessor &styler) {
347     if (lengthDoc - currPos < targetEndPos - targetStartPos) {
348         return false;
349     }
350     int i, j;
351     for (i = targetStartPos, j = currPos;
352          i < targetEndPos && j < lengthDoc;
353          i++, j++) {
354         if (styler[i] != styler[j]) {
355             return false;
356         }
357     }
358     return true;
359 }
360 
361 // We need a check because the form
362 // [identifier] <<[target]
363 // is ambiguous.  The Ruby lexer/parser resolves it by
364 // looking to see if [identifier] names a variable or a
365 // function.  If it's the first, it's the start of a here-doc.
366 // If it's a var, it's an operator.  This lexer doesn't
367 // maintain a symbol table, so it looks ahead to see what's
368 // going on, in cases where we have
369 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
370 //
371 // If there's no occurrence of [target] on a line, assume we don't.
372 
373 // return true == yes, we have no heredocs
374 
sureThisIsNotHeredoc(int lt2StartPos,Accessor & styler)375 static bool sureThisIsNotHeredoc(int lt2StartPos,
376                                  Accessor &styler) {
377     int prevStyle;
378      // Use full document, not just part we're styling
379     int lengthDoc = styler.Length();
380     int lineStart = styler.GetLine(lt2StartPos);
381     int lineStartPosn = styler.LineStart(lineStart);
382     styler.Flush();
383     const bool definitely_not_a_here_doc = true;
384     const bool looks_like_a_here_doc = false;
385 
386     // Find the first word after some whitespace
387     int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
388     if (firstWordPosn >= lt2StartPos) {
389         return definitely_not_a_here_doc;
390     }
391     prevStyle = styler.StyleAt(firstWordPosn);
392     // If we have '<<' following a keyword, it's not a heredoc
393     if (prevStyle != SCE_RB_IDENTIFIER) {
394         return definitely_not_a_here_doc;
395     }
396     int newStyle = prevStyle;
397     // Some compilers incorrectly warn about uninit newStyle
398     for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
399         // Inner loop looks at the name
400         for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
401             newStyle = styler.StyleAt(firstWordPosn);
402             if (newStyle != prevStyle) {
403                 break;
404             }
405         }
406         // Do we have '::' or '.'?
407         if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
408             char ch = styler[firstWordPosn];
409             if (ch == '.') {
410                 // yes
411             } else if (ch == ':') {
412                 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
413                     return definitely_not_a_here_doc;
414                 } else if (styler[firstWordPosn] != ':') {
415                     return definitely_not_a_here_doc;
416                 }
417             } else {
418                 break;
419             }
420         } else {
421             break;
422         }
423     }
424     // Skip next batch of white-space
425     firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
426     if (firstWordPosn != lt2StartPos) {
427         // Have [[^ws[identifier]ws[*something_else*]ws<<
428         return definitely_not_a_here_doc;
429     }
430     // OK, now 'j' will point to the current spot moving ahead
431 	int j = firstWordPosn + 1;
432     if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
433         // This shouldn't happen
434         return definitely_not_a_here_doc;
435     }
436     int nextLineStartPosn = styler.LineStart(lineStart + 1);
437     if (nextLineStartPosn >= lengthDoc) {
438         return definitely_not_a_here_doc;
439     }
440     j = skipWhitespace(j + 1, nextLineStartPosn, styler);
441     if (j >= lengthDoc) {
442         return definitely_not_a_here_doc;
443     }
444     bool allow_indent;
445     int target_start, target_end;
446     // From this point on no more styling, since we're looking ahead
447     if (styler[j] == '-') {
448         allow_indent = true;
449         j++;
450     } else {
451         allow_indent = false;
452     }
453 
454     // Allow for quoted targets.
455     char target_quote = 0;
456     switch (styler[j]) {
457     case '\'':
458     case '"':
459     case '`':
460         target_quote = styler[j];
461         j += 1;
462     }
463 
464     if (isSafeAlnum(styler[j])) {
465         // Init target_end because some compilers think it won't
466         // be initialized by the time it's used
467         target_start = target_end = j;
468         j++;
469     } else {
470         return definitely_not_a_here_doc;
471     }
472     for (; j < lengthDoc; j++) {
473         if (!isSafeAlnum(styler[j])) {
474             if (target_quote && styler[j] != target_quote) {
475                 // unquoted end
476                 return definitely_not_a_here_doc;
477             }
478 
479             // And for now make sure that it's a newline
480             // don't handle arbitrary expressions yet
481 
482             target_end = j;
483 			if (target_quote) {
484 				// Now we can move to the character after the string delimiter.
485 				j += 1;
486 			}
487             j = skipWhitespace(j, lengthDoc, styler);
488             if (j >= lengthDoc) {
489                 return definitely_not_a_here_doc;
490             } else {
491                 char ch = styler[j];
492                 if (ch == '#' || isEOLChar(ch)) {
493                     // This is OK, so break and continue;
494                     break;
495                 } else {
496                     return definitely_not_a_here_doc;
497                 }
498             }
499         }
500     }
501 
502     // Just look at the start of each line
503     int last_line = styler.GetLine(lengthDoc - 1);
504     // But don't go too far
505     if (last_line > lineStart + 50) {
506         last_line = lineStart + 50;
507     }
508     for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
509         if (allow_indent) {
510             j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
511         } else {
512             j = styler.LineStart(line_num);
513         }
514         // target_end is one past the end
515         if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
516             // We got it
517             return looks_like_a_here_doc;
518         }
519     }
520     return definitely_not_a_here_doc;
521 }
522 
523 //todo: if we aren't looking at a stdio character,
524 // move to the start of the first line that is not in a
525 // multi-line construct
526 
synchronizeDocStart(unsigned int & startPos,int & length,int & initStyle,Accessor & styler,bool skipWhiteSpace=false)527 static void synchronizeDocStart(unsigned int& startPos,
528                                 int &length,
529                                 int &initStyle,
530                                 Accessor &styler,
531                                 bool skipWhiteSpace=false) {
532 
533     styler.Flush();
534     int style = actual_style(styler.StyleAt(startPos));
535     switch (style) {
536         case SCE_RB_STDIN:
537         case SCE_RB_STDOUT:
538         case SCE_RB_STDERR:
539             // Don't do anything else with these.
540             return;
541     }
542 
543     int pos = startPos;
544     // Quick way to characterize each line
545     int lineStart;
546     for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
547         // Now look at the style before the previous line's EOL
548         pos = styler.LineStart(lineStart) - 1;
549         if (pos <= 10) {
550             lineStart = 0;
551             break;
552         }
553         char ch = styler.SafeGetCharAt(pos);
554         char chPrev = styler.SafeGetCharAt(pos - 1);
555         if (ch == '\n' && chPrev == '\r') {
556             pos--;
557         }
558         if (styler.SafeGetCharAt(pos - 1) == '\\') {
559             // Continuation line -- keep going
560         } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
561             // Part of multi-line construct -- keep going
562         } else if (currLineContainsHereDelims(pos, styler)) {
563             // Keep going, with pos and length now pointing
564             // at the end of the here-doc delimiter
565         } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
566             // Keep going
567         } else {
568             break;
569         }
570     }
571     pos = styler.LineStart(lineStart);
572     length += (startPos - pos);
573     startPos = pos;
574     initStyle = SCE_RB_DEFAULT;
575 }
576 
ColouriseRbDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)577 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
578 						   WordList *keywordlists[], Accessor &styler) {
579 
580 	// Lexer for Ruby often has to backtrack to start of current style to determine
581 	// which characters are being used as quotes, how deeply nested is the
582 	// start position and what the termination string is for here documents
583 
584 	WordList &keywords = *keywordlists[0];
585 
586 	class HereDocCls {
587 	public:
588 		int State;
589         // States
590         // 0: '<<' encountered
591 		// 1: collect the delimiter
592         // 1b: text between the end of the delimiter and the EOL
593 		// 2: here doc text (lines after the delimiter)
594 		char Quote;		// the char after '<<'
595 		bool Quoted;		// true if Quote in ('\'','"','`')
596 		int DelimiterLength;	// strlen(Delimiter)
597 		char Delimiter[256];	// the Delimiter, limit of 256: from Perl
598         bool CanBeIndented;
599 		HereDocCls() {
600 			State = 0;
601 			DelimiterLength = 0;
602 			Delimiter[0] = '\0';
603             CanBeIndented = false;
604 		}
605 	};
606 	HereDocCls HereDoc;
607 
608 	class QuoteCls {
609 		public:
610 		int  Count;
611 		char Up;
612 		char Down;
613 		QuoteCls() {
614 			this->New();
615 		}
616 		void New() {
617 			Count = 0;
618 			Up    = '\0';
619 			Down  = '\0';
620 		}
621 		void Open(char u) {
622 			Count++;
623 			Up    = u;
624 			Down  = opposite(Up);
625 		}
626 	};
627 	QuoteCls Quote;
628 
629     int numDots = 0;  // For numbers --
630                       // Don't start lexing in the middle of a num
631 
632     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
633                         false);
634 
635 	bool preferRE = true;
636     int state = initStyle;
637 	int lengthDoc = startPos + length;
638 
639 	char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
640 	prevWord[0] = '\0';
641 	if (length == 0)
642 		return;
643 
644 	char chPrev = styler.SafeGetCharAt(startPos - 1);
645 	char chNext = styler.SafeGetCharAt(startPos);
646 	// Ruby uses a different mask because bad indentation is marked by oring with 32
647 	styler.StartAt(startPos, 127);
648 	styler.StartSegment(startPos);
649 
650     static int q_states[] = {SCE_RB_STRING_Q,
651                              SCE_RB_STRING_QQ,
652                              SCE_RB_STRING_QR,
653                              SCE_RB_STRING_QW,
654                              SCE_RB_STRING_QW,
655                              SCE_RB_STRING_QX};
656     static const char* q_chars = "qQrwWx";
657 
658 	for (int i = startPos; i < lengthDoc; i++) {
659 		char ch = chNext;
660 		chNext = styler.SafeGetCharAt(i + 1);
661 		char chNext2 = styler.SafeGetCharAt(i + 2);
662 
663         if (styler.IsLeadByte(ch)) {
664 			chNext = chNext2;
665 			chPrev = ' ';
666 			i += 1;
667 			continue;
668 		}
669 
670         // skip on DOS/Windows
671         //No, don't, because some things will get tagged on,
672         // so we won't recognize keywords, for example
673 #if 0
674 		if (ch == '\r' && chNext == '\n') {
675 	    	continue;
676         }
677 #endif
678 
679         if (HereDoc.State == 1 && isEOLChar(ch)) {
680 			// Begin of here-doc (the line after the here-doc delimiter):
681 			HereDoc.State = 2;
682 			styler.ColourTo(i-1, state);
683             // Don't check for a missing quote, just jump into
684             // the here-doc state
685             state = SCE_RB_HERE_Q;
686         }
687 
688         // Regular transitions
689 		if (state == SCE_RB_DEFAULT) {
690             if (isSafeDigit(ch)) {
691             	styler.ColourTo(i - 1, state);
692 				state = SCE_RB_NUMBER;
693                 numDots = 0;
694             } else if (isHighBitChar(ch) || iswordstart(ch)) {
695             	styler.ColourTo(i - 1, state);
696 				state = SCE_RB_WORD;
697 			} else if (ch == '#') {
698 				styler.ColourTo(i - 1, state);
699 				state = SCE_RB_COMMENTLINE;
700 			} else if (ch == '=') {
701 				// =begin indicates the start of a comment (doc) block
702                 if (i == 0 || isEOLChar(chPrev)
703                     && chNext == 'b'
704                     && styler.SafeGetCharAt(i + 2) == 'e'
705                     && styler.SafeGetCharAt(i + 3) == 'g'
706                     && styler.SafeGetCharAt(i + 4) == 'i'
707                     && styler.SafeGetCharAt(i + 5) == 'n'
708                     && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
709                     styler.ColourTo(i - 1, state);
710                     state = SCE_RB_POD;
711 				} else {
712 					styler.ColourTo(i - 1, state);
713 					styler.ColourTo(i, SCE_RB_OPERATOR);
714 					preferRE = true;
715 				}
716 			} else if (ch == '"') {
717 				styler.ColourTo(i - 1, state);
718 				state = SCE_RB_STRING;
719 				Quote.New();
720 				Quote.Open(ch);
721 			} else if (ch == '\'') {
722                 styler.ColourTo(i - 1, state);
723                 state = SCE_RB_CHARACTER;
724                 Quote.New();
725                 Quote.Open(ch);
726 			} else if (ch == '`') {
727 				styler.ColourTo(i - 1, state);
728 				state = SCE_RB_BACKTICKS;
729 				Quote.New();
730 				Quote.Open(ch);
731 			} else if (ch == '@') {
732                 // Instance or class var
733 				styler.ColourTo(i - 1, state);
734                 if (chNext == '@') {
735                     state = SCE_RB_CLASS_VAR;
736                     advance_char(i, ch, chNext, chNext2); // pass by ref
737                 } else {
738                     state = SCE_RB_INSTANCE_VAR;
739                 }
740 			} else if (ch == '$') {
741                 // Check for a builtin global
742 				styler.ColourTo(i - 1, state);
743                 // Recognize it bit by bit
744                 state = SCE_RB_GLOBAL;
745             } else if (ch == '/' && preferRE) {
746                 // Ambigous operator
747 				styler.ColourTo(i - 1, state);
748 				state = SCE_RB_REGEX;
749                 Quote.New();
750                 Quote.Open(ch);
751 			} else if (ch == '<' && chNext == '<' && chNext2 != '=') {
752 
753                 // Recognise the '<<' symbol - either a here document or a binary op
754 				styler.ColourTo(i - 1, state);
755                 i++;
756                 chNext = chNext2;
757 				styler.ColourTo(i, SCE_RB_OPERATOR);
758 
759                 if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
760                     // It's definitely not a here-doc,
761                     // based on Ruby's lexer/parser in the
762                     // heredoc_identifier routine.
763                     // Nothing else to do.
764                 } else if (preferRE) {
765                     if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
766                         state = SCE_RB_HERE_DELIM;
767                         HereDoc.State = 0;
768                     }
769                     // else leave it in default state
770                 } else {
771                     if (sureThisIsNotHeredoc(i - 1, styler)) {
772                         // leave state as default
773                         // We don't have all the heuristics Perl has for indications
774                         // of a here-doc, because '<<' is overloadable and used
775                         // for so many other classes.
776                     } else {
777                         state = SCE_RB_HERE_DELIM;
778                         HereDoc.State = 0;
779                     }
780                 }
781                 preferRE = (state != SCE_RB_HERE_DELIM);
782             } else if (ch == ':') {
783 				styler.ColourTo(i - 1, state);
784                 if (chNext == ':') {
785                     // Mark "::" as an operator, not symbol start
786                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
787                     advance_char(i, ch, chNext, chNext2); // pass by ref
788                     state = SCE_RB_DEFAULT;
789 					preferRE = false;
790                 } else if (isSafeWordcharOrHigh(chNext)) {
791 					state = SCE_RB_SYMBOL;
792                 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
793                     // Do the operator analysis in-line, looking ahead
794                     // Based on the table in pickaxe 2nd ed., page 339
795                     bool doColoring = true;
796                     switch (chNext) {
797                     case '[':
798                         if (chNext2 == ']' ) {
799                             char ch_tmp = styler.SafeGetCharAt(i + 3);
800                             if (ch_tmp == '=') {
801                                 i += 3;
802                                 ch = ch_tmp;
803                                 chNext = styler.SafeGetCharAt(i + 1);
804                             } else {
805                                 i += 2;
806                                 ch = chNext2;
807                                 chNext = ch_tmp;
808                             }
809                         } else {
810                             doColoring = false;
811                         }
812                         break;
813 
814                     case '*':
815                         if (chNext2 == '*') {
816                             i += 2;
817                             ch = chNext2;
818                             chNext = styler.SafeGetCharAt(i + 1);
819                         } else {
820                             advance_char(i, ch, chNext, chNext2);
821                         }
822                         break;
823 
824                     case '!':
825                         if (chNext2 == '=' || chNext2 == '~') {
826                             i += 2;
827                             ch = chNext2;
828                             chNext = styler.SafeGetCharAt(i + 1);
829                         } else {
830                             advance_char(i, ch, chNext, chNext2);
831                         }
832                         break;
833 
834                     case '<':
835                         if (chNext2 == '<') {
836                             i += 2;
837                             ch = chNext2;
838                             chNext = styler.SafeGetCharAt(i + 1);
839                         } else if (chNext2 == '=') {
840                             char ch_tmp = styler.SafeGetCharAt(i + 3);
841                             if (ch_tmp == '>') {  // <=> operator
842                                 i += 3;
843                                 ch = ch_tmp;
844                                 chNext = styler.SafeGetCharAt(i + 1);
845                             } else {
846                                 i += 2;
847                                 ch = chNext2;
848                                 chNext = ch_tmp;
849                             }
850                         } else {
851                             advance_char(i, ch, chNext, chNext2);
852                         }
853                         break;
854 
855                     default:
856                         // Simple one-character operators
857                         advance_char(i, ch, chNext, chNext2);
858                         break;
859                     }
860                     if (doColoring) {
861                         styler.ColourTo(i, SCE_RB_SYMBOL);
862                         state = SCE_RB_DEFAULT;
863                     }
864 				} else if (!preferRE) {
865 					// Don't color symbol strings (yet)
866 					// Just color the ":" and color rest as string
867 					styler.ColourTo(i, SCE_RB_SYMBOL);
868 					state = SCE_RB_DEFAULT;
869                 } else {
870                     styler.ColourTo(i, SCE_RB_OPERATOR);
871                     state = SCE_RB_DEFAULT;
872                     preferRE = true;
873                 }
874             } else if (ch == '%') {
875                 styler.ColourTo(i - 1, state);
876                 bool have_string = false;
877                 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
878                     Quote.New();
879                     const char *hit = strchr(q_chars, chNext);
880                     if (hit != NULL) {
881                         state = q_states[hit - q_chars];
882                         Quote.Open(chNext2);
883                         i += 2;
884                         ch = chNext2;
885 						chNext = styler.SafeGetCharAt(i + 1);
886                         have_string = true;
887                     }
888                 } else if (!isSafeWordcharOrHigh(chNext)) {
889                     // Ruby doesn't allow high bit chars here,
890                     // but the editor host might
891                     state = SCE_RB_STRING_QQ;
892                     Quote.Open(chNext);
893                     advance_char(i, ch, chNext, chNext2); // pass by ref
894                     have_string = true;
895                 }
896                 if (!have_string) {
897                     styler.ColourTo(i, SCE_RB_OPERATOR);
898                     // stay in default
899                     preferRE = true;
900                 }
901             } else if (isoperator(ch) || ch == '.') {
902 				styler.ColourTo(i - 1, state);
903 				styler.ColourTo(i, SCE_RB_OPERATOR);
904                 // If we're ending an expression or block,
905                 // assume it ends an object, and the ambivalent
906                 // constructs are binary operators
907                 //
908                 // So if we don't have one of these chars,
909                 // we aren't ending an object exp'n, and ops
910                 // like : << / are unary operators.
911 
912                 preferRE = (strchr(")}].", ch) == NULL);
913                 // Stay in default state
914             } else if (isEOLChar(ch)) {
915                 // Make sure it's a true line-end, with no backslash
916                 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
917                     && chPrev != '\\') {
918                     // Assume we've hit the end of the statement.
919                     preferRE = true;
920                 }
921             }
922         } else if (state == SCE_RB_WORD) {
923             if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
924                 // Words include x? in all contexts,
925                 // and <letters>= after either 'def' or a dot
926                 // Move along until a complete word is on our left
927 
928                 // Default accessor treats '.' as word-chars,
929                 // but we don't for now.
930 
931                 if (ch == '='
932                     && isSafeWordcharOrHigh(chPrev)
933                     && (chNext == '('
934                         || strchr(" \t\n\r", chNext) != NULL)
935                     && (!strcmp(prevWord, "def")
936                         || followsDot(styler.GetStartSegment(), styler))) {
937                     // <name>= is a name only when being def'd -- Get it the next time
938                     // This means that <name>=<name> is always lexed as
939                     // <name>, (op, =), <name>
940                 } else if ((ch == '?' || ch == '!')
941                            && isSafeWordcharOrHigh(chPrev)
942                            && !isSafeWordcharOrHigh(chNext)) {
943                     // <name>? is a name -- Get it the next time
944                     // But <name>?<name> is always lexed as
945                     // <name>, (op, ?), <name>
946                     // Same with <name>! to indicate a method that
947                     // modifies its target
948                 } else if (isEOLChar(ch)
949                            && isMatch(styler, lengthDoc, i - 7, "__END__")) {
950                     styler.ColourTo(i, SCE_RB_DATASECTION);
951                     state = SCE_RB_DATASECTION;
952                     // No need to handle this state -- we'll just move to the end
953                     preferRE = false;
954                 } else {
955 					int wordStartPos = styler.GetStartSegment();
956                     int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
957                     switch (word_style) {
958                         case SCE_RB_WORD:
959                             preferRE = RE_CanFollowKeyword(prevWord);
960 							break;
961 
962                         case SCE_RB_WORD_DEMOTED:
963                             preferRE = true;
964 							break;
965 
966                         case SCE_RB_IDENTIFIER:
967                             if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
968                                 preferRE = true;
969                             } else if (isEOLChar(ch)) {
970                                 preferRE = true;
971                             } else {
972                                 preferRE = false;
973                             }
974 							break;
975                         default:
976                             preferRE = false;
977                     }
978                     if (ch == '.') {
979                         // We might be redefining an operator-method
980                         preferRE = false;
981                     }
982                     // And if it's the first
983                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
984                 }
985             }
986         } else if (state == SCE_RB_NUMBER) {
987             if (isSafeAlnumOrHigh(ch) || ch == '_') {
988                 // Keep going
989             } else if (ch == '.' && ++numDots == 1) {
990                 // Keep going
991             } else {
992                 styler.ColourTo(i - 1, state);
993                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
994                 preferRE = false;
995             }
996         } else if (state == SCE_RB_COMMENTLINE) {
997 			if (isEOLChar(ch)) {
998                 styler.ColourTo(i - 1, state);
999                 state = SCE_RB_DEFAULT;
1000                 // Use whatever setting we had going into the comment
1001             }
1002         } else if (state == SCE_RB_HERE_DELIM) {
1003             // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1004             // Slightly different: if we find an immediate '-',
1005             // the target can appear indented.
1006 
1007 			if (HereDoc.State == 0) { // '<<' encountered
1008 				HereDoc.State = 1;
1009                 HereDoc.DelimiterLength = 0;
1010                 if (ch == '-') {
1011                     HereDoc.CanBeIndented = true;
1012                     advance_char(i, ch, chNext, chNext2); // pass by ref
1013                 } else {
1014                     HereDoc.CanBeIndented = false;
1015                 }
1016                 if (isEOLChar(ch)) {
1017                     // Bail out of doing a here doc if there's no target
1018                     state = SCE_RB_DEFAULT;
1019                     preferRE = false;
1020                 } else {
1021                     HereDoc.Quote = ch;
1022 
1023                     if (ch == '\'' || ch == '"' || ch == '`') {
1024                         HereDoc.Quoted = true;
1025                         HereDoc.Delimiter[0] = '\0';
1026                     } else {
1027                         HereDoc.Quoted = false;
1028                         HereDoc.Delimiter[0] = ch;
1029                         HereDoc.Delimiter[1] = '\0';
1030                         HereDoc.DelimiterLength = 1;
1031                     }
1032                 }
1033 			} else if (HereDoc.State == 1) { // collect the delimiter
1034                 if (isEOLChar(ch)) {
1035                     // End the quote now, and go back for more
1036                     styler.ColourTo(i - 1, state);
1037                     state = SCE_RB_DEFAULT;
1038                     i--;
1039                     chNext = ch;
1040                     chNext2 = chNext;
1041                     preferRE = false;
1042                 } else if (HereDoc.Quoted) {
1043 					if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1044 						styler.ColourTo(i, state);
1045 						state = SCE_RB_DEFAULT;
1046                         preferRE = false;
1047                     } else {
1048 						if (ch == '\\' && !isEOLChar(chNext)) {
1049                             advance_char(i, ch, chNext, chNext2);
1050 						}
1051 						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1052 						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1053                     }
1054                 } else { // an unquoted here-doc delimiter
1055 					if (isSafeAlnumOrHigh(ch) || ch == '_') {
1056 						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1057 						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1058 					} else {
1059 						styler.ColourTo(i - 1, state);
1060                         redo_char(i, ch, chNext, chNext2, state);
1061                         preferRE = false;
1062 					}
1063                 }
1064 				if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1065 					styler.ColourTo(i - 1, state);
1066 					state = SCE_RB_ERROR;
1067                     preferRE = false;
1068 				}
1069             }
1070         } else if (state == SCE_RB_HERE_Q) {
1071             // Not needed: HereDoc.State == 2
1072             // Indentable here docs: look backwards
1073             // Non-indentable: look forwards, like in Perl
1074             //
1075             // Why: so we can quickly resolve things like <<-" abc"
1076 
1077             if (!HereDoc.CanBeIndented) {
1078                 if (isEOLChar(chPrev)
1079                     && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1080                     styler.ColourTo(i - 1, state);
1081                     i += HereDoc.DelimiterLength - 1;
1082                     chNext = styler.SafeGetCharAt(i + 1);
1083                     if (isEOLChar(chNext)) {
1084                         styler.ColourTo(i, SCE_RB_HERE_DELIM);
1085                         state = SCE_RB_DEFAULT;
1086                         HereDoc.State = 0;
1087                         preferRE = false;
1088                     }
1089                     // Otherwise we skipped through the here doc faster.
1090                 }
1091             } else if (isEOLChar(chNext)
1092                        && lookingAtHereDocDelim(styler,
1093                                                 i - HereDoc.DelimiterLength + 1,
1094                                                 lengthDoc,
1095                                                 HereDoc.Delimiter)) {
1096                 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1097                 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1098                 state = SCE_RB_DEFAULT;
1099                 preferRE = false;
1100                 HereDoc.State = 0;
1101             }
1102         } else if (state == SCE_RB_CLASS_VAR
1103                    || state == SCE_RB_INSTANCE_VAR
1104                    || state == SCE_RB_SYMBOL) {
1105             if (!isSafeWordcharOrHigh(ch)) {
1106                 styler.ColourTo(i - 1, state);
1107                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1108                 preferRE = false;
1109             }
1110         } else if (state == SCE_RB_GLOBAL) {
1111             if (!isSafeWordcharOrHigh(ch)) {
1112                 // handle special globals here as well
1113                 if (chPrev == '$') {
1114                     if (ch == '-') {
1115                         // Include the next char, like $-a
1116                         advance_char(i, ch, chNext, chNext2);
1117                     }
1118                     styler.ColourTo(i, state);
1119                     state = SCE_RB_DEFAULT;
1120                 } else {
1121                     styler.ColourTo(i - 1, state);
1122                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
1123                 }
1124                 preferRE = false;
1125             }
1126         } else if (state == SCE_RB_POD) {
1127             // PODs end with ^=end\s, -- any whitespace can follow =end
1128             if (strchr(" \t\n\r", ch) != NULL
1129                 && i > 5
1130                 && isEOLChar(styler[i - 5])
1131                 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1132                 styler.ColourTo(i - 1, state);
1133                 state = SCE_RB_DEFAULT;
1134                 preferRE = false;
1135             }
1136         } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1137             if (ch == '\\' && Quote.Up != '\\') {
1138                 // Skip one
1139                 advance_char(i, ch, chNext, chNext2);
1140             } else if (ch == Quote.Down) {
1141                 Quote.Count--;
1142                 if (Quote.Count == 0) {
1143                     // Include the options
1144                     while (isSafeAlpha(chNext)) {
1145                         i++;
1146 						ch = chNext;
1147                         chNext = styler.SafeGetCharAt(i + 1);
1148                     }
1149                     styler.ColourTo(i, state);
1150                     state = SCE_RB_DEFAULT;
1151                     preferRE = false;
1152                 }
1153             } else if (ch == Quote.Up) {
1154                 // Only if close quoter != open quoter
1155                 Quote.Count++;
1156 
1157             } else if (ch == '#' ) {
1158                 //todo: distinguish comments from pound chars
1159                 // for now, handle as comment
1160                 styler.ColourTo(i - 1, state);
1161                 bool inEscape = false;
1162                 while (++i < lengthDoc) {
1163                     ch = styler.SafeGetCharAt(i);
1164                     if (ch == '\\') {
1165                         inEscape = true;
1166                     } else if (isEOLChar(ch)) {
1167                         // Comment inside a regex
1168                         styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1169                         break;
1170                     } else if (inEscape) {
1171                         inEscape = false;  // don't look at char
1172                     } else if (ch == Quote.Down) {
1173                         // Have the regular handler deal with this
1174                         // to get trailing modifiers.
1175                         i--;
1176                         ch = styler[i];
1177 						break;
1178                     }
1179                 }
1180                 chNext = styler.SafeGetCharAt(i + 1);
1181                 chNext2 = styler.SafeGetCharAt(i + 2);
1182             }
1183         // Quotes of all kinds...
1184         } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1185                    state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1186                    state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1187                    state == SCE_RB_BACKTICKS) {
1188             if (!Quote.Down && !isspacechar(ch)) {
1189                 Quote.Open(ch);
1190             } else if (ch == '\\' && Quote.Up != '\\') {
1191                 //Riddle me this: Is it safe to skip *every* escaped char?
1192                 advance_char(i, ch, chNext, chNext2);
1193             } else if (ch == Quote.Down) {
1194                 Quote.Count--;
1195                 if (Quote.Count == 0) {
1196                     styler.ColourTo(i, state);
1197                     state = SCE_RB_DEFAULT;
1198                     preferRE = false;
1199                 }
1200             } else if (ch == Quote.Up) {
1201                 Quote.Count++;
1202             }
1203         }
1204 
1205         if (state == SCE_RB_ERROR) {
1206             break;
1207         }
1208         chPrev = ch;
1209     }
1210     if (state == SCE_RB_WORD) {
1211         // We've ended on a word, possibly at EOF, and need to
1212         // classify it.
1213         (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1214     } else {
1215         styler.ColourTo(lengthDoc - 1, state);
1216     }
1217 }
1218 
1219 // Helper functions for folding, disambiguation keywords
1220 // Assert that there are no high-bit chars
1221 
getPrevWord(int pos,char * prevWord,Accessor & styler,int word_state)1222 static void getPrevWord(int pos,
1223                         char *prevWord,
1224                         Accessor &styler,
1225                         int word_state)
1226 {
1227     int i;
1228     styler.Flush();
1229     for (i = pos - 1; i > 0; i--) {
1230         if (actual_style(styler.StyleAt(i)) != word_state) {
1231             i++;
1232             break;
1233         }
1234     }
1235     if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1236         i = pos - MAX_KEYWORD_LENGTH;
1237     char *dst = prevWord;
1238     for (; i <= pos; i++) {
1239         *dst++ = styler[i];
1240     }
1241 	*dst = 0;
1242 }
1243 
keywordIsAmbiguous(const char * prevWord)1244 static bool keywordIsAmbiguous(const char *prevWord)
1245 {
1246     // Order from most likely used to least likely
1247     // Lots of ways to do a loop in Ruby besides 'while/until'
1248     if (!strcmp(prevWord, "if")
1249         || !strcmp(prevWord, "do")
1250         || !strcmp(prevWord, "while")
1251         || !strcmp(prevWord, "unless")
1252         || !strcmp(prevWord, "until")) {
1253         return true;
1254     } else {
1255         return false;
1256     }
1257 }
1258 
1259 // Demote keywords in the following conditions:
1260 // if, while, unless, until modify a statement
1261 // do after a while or until, as a noise word (like then after if)
1262 
keywordIsModifier(const char * word,int pos,Accessor & styler)1263 static bool keywordIsModifier(const char *word,
1264                               int pos,
1265                               Accessor &styler)
1266 {
1267     if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1268         return keywordDoStartsLoop(pos, styler);
1269     }
1270     char ch;
1271     int style = SCE_RB_DEFAULT;
1272 	int lineStart = styler.GetLine(pos);
1273     int lineStartPosn = styler.LineStart(lineStart);
1274     styler.Flush();
1275     while (--pos >= lineStartPosn) {
1276         style = actual_style(styler.StyleAt(pos));
1277 		if (style == SCE_RB_DEFAULT) {
1278 			if (iswhitespace(ch = styler[pos])) {
1279 				//continue
1280 			} else if (ch == '\r' || ch == '\n') {
1281 				// Scintilla's LineStart() and GetLine() routines aren't
1282 				// platform-independent, so if we have text prepared with
1283 				// a different system we can't rely on it.
1284 				return false;
1285 			}
1286 		} else {
1287             break;
1288 		}
1289     }
1290     if (pos < lineStartPosn) {
1291         return false; //XXX not quite right if the prev line is a continuation
1292     }
1293     // First things where the action is unambiguous
1294     switch (style) {
1295         case SCE_RB_DEFAULT:
1296         case SCE_RB_COMMENTLINE:
1297         case SCE_RB_POD:
1298         case SCE_RB_CLASSNAME:
1299         case SCE_RB_DEFNAME:
1300         case SCE_RB_MODULE_NAME:
1301             return false;
1302         case SCE_RB_OPERATOR:
1303             break;
1304         case SCE_RB_WORD:
1305             // Watch out for uses of 'else if'
1306             //XXX: Make a list of other keywords where 'if' isn't a modifier
1307             //     and can appear legitimately
1308             // Formulate this to avoid warnings from most compilers
1309             if (strcmp(word, "if") == 0) {
1310                 char prevWord[MAX_KEYWORD_LENGTH + 1];
1311                 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1312                 return strcmp(prevWord, "else") != 0;
1313             }
1314             return true;
1315         default:
1316             return true;
1317     }
1318     // Assume that if the keyword follows an operator,
1319     // usually it's a block assignment, like
1320     // a << if x then y else z
1321 
1322     ch = styler[pos];
1323     switch (ch) {
1324         case ')':
1325         case ']':
1326         case '}':
1327             return true;
1328         default:
1329             return false;
1330     }
1331 }
1332 
1333 #define WHILE_BACKWARDS "elihw"
1334 #define UNTIL_BACKWARDS "litnu"
1335 
1336 // Nothing fancy -- look to see if we follow a while/until somewhere
1337 // on the current line
1338 
keywordDoStartsLoop(int pos,Accessor & styler)1339 static bool keywordDoStartsLoop(int pos,
1340                                 Accessor &styler)
1341 {
1342     char ch;
1343     int style;
1344 	int lineStart = styler.GetLine(pos);
1345     int lineStartPosn = styler.LineStart(lineStart);
1346     styler.Flush();
1347     while (--pos >= lineStartPosn) {
1348         style = actual_style(styler.StyleAt(pos));
1349 		if (style == SCE_RB_DEFAULT) {
1350 			if ((ch = styler[pos]) == '\r' || ch == '\n') {
1351 				// Scintilla's LineStart() and GetLine() routines aren't
1352 				// platform-independent, so if we have text prepared with
1353 				// a different system we can't rely on it.
1354 				return false;
1355 			}
1356 		} else if (style == SCE_RB_WORD) {
1357             // Check for while or until, but write the word in backwards
1358             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1359             char *dst = prevWord;
1360             int wordLen = 0;
1361             int start_word;
1362             for (start_word = pos;
1363                  start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1364                  start_word--) {
1365                 if (++wordLen < MAX_KEYWORD_LENGTH) {
1366                     *dst++ = styler[start_word];
1367                 }
1368             }
1369             *dst = 0;
1370             // Did we see our keyword?
1371             if (!strcmp(prevWord, WHILE_BACKWARDS)
1372                 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1373                 return true;
1374             }
1375             // We can move pos to the beginning of the keyword, and then
1376             // accept another decrement, as we can never have two contiguous
1377             // keywords:
1378             // word1 word2
1379             //           ^
1380             //        <-  move to start_word
1381             //      ^
1382             //      <- loop decrement
1383             //     ^  # pointing to end of word1 is fine
1384             pos = start_word;
1385         }
1386     }
1387     return false;
1388 }
1389 
1390 /*
1391  *  Folding Ruby
1392  *
1393  *  The language is quite complex to analyze without a full parse.
1394  *  For example, this line shouldn't affect fold level:
1395  *
1396  *   print "hello" if feeling_friendly?
1397  *
1398  *  Neither should this:
1399  *
1400  *   print "hello" \
1401  *      if feeling_friendly?
1402  *
1403  *
1404  *  But this should:
1405  *
1406  *   if feeling_friendly?  #++
1407  *     print "hello" \
1408  *     print "goodbye"
1409  *   end                   #--
1410  *
1411  *  So we cheat, by actually looking at the existing indentation
1412  *  levels for each line, and just echoing it back.  Like Python.
1413  *  Then if we get better at it, we'll take braces into consideration,
1414  *  which always affect folding levels.
1415 
1416  *  How the keywords should work:
1417  *  No effect:
1418  *  __FILE__ __LINE__ BEGIN END alias and
1419  *  defined? false in nil not or self super then
1420  *  true undef
1421 
1422  *  Always increment:
1423  *  begin  class def do for module when {
1424  *
1425  *  Always decrement:
1426  *  end }
1427  *
1428  *  Increment if these start a statement
1429  *  if unless until while -- do nothing if they're modifiers
1430 
1431  *  These end a block if there's no modifier, but don't bother
1432  *  break next redo retry return yield
1433  *
1434  *  These temporarily de-indent, but re-indent
1435  *  case else elsif ensure rescue
1436  *
1437  *  This means that the folder reflects indentation rather
1438  *  than setting it.  The language-service updates indentation
1439  *  when users type return and finishes entering de-denters.
1440  *
1441  *  Later offer to fold POD, here-docs, strings, and blocks of comments
1442  */
1443 
FoldRbDoc(unsigned int startPos,int length,int initStyle,WordList * [],Accessor & styler)1444 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1445                       WordList *[], Accessor &styler) {
1446 	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1447 	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1448 
1449     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1450                         false);
1451 	unsigned int endPos = startPos + length;
1452 	int visibleChars = 0;
1453 	int lineCurrent = styler.GetLine(startPos);
1454 	int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1455                                          & SC_FOLDLEVELNUMBERMASK
1456                                          & ~SC_FOLDLEVELBASE);
1457 	int levelCurrent = levelPrev;
1458 	char chNext = styler[startPos];
1459 	int styleNext = styler.StyleAt(startPos);
1460 	int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1461     bool buffer_ends_with_eol = false;
1462 	for (unsigned int i = startPos; i < endPos; i++) {
1463 		char ch = chNext;
1464 		chNext = styler.SafeGetCharAt(i + 1);
1465 		int style = styleNext;
1466 		styleNext = styler.StyleAt(i + 1);
1467 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1468         if (style == SCE_RB_COMMENTLINE) {
1469             if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1470                 if (chNext == '{') {
1471 					levelCurrent++;
1472 				} else if (chNext == '}') {
1473 					levelCurrent--;
1474 				}
1475             }
1476         } else if (style == SCE_RB_OPERATOR) {
1477 			if (strchr("[{(", ch)) {
1478 				levelCurrent++;
1479 			} else if (strchr(")}]", ch)) {
1480                 // Don't decrement below 0
1481                 if (levelCurrent > 0)
1482                     levelCurrent--;
1483 			}
1484         } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1485             // Look at the keyword on the left and decide what to do
1486             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1487             prevWord[0] = 0;
1488             getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1489             if (!strcmp(prevWord, "end")) {
1490                 // Don't decrement below 0
1491                 if (levelCurrent > 0)
1492                     levelCurrent--;
1493             } else if (   !strcmp(prevWord, "if")
1494                        || !strcmp(prevWord, "def")
1495                        || !strcmp(prevWord, "class")
1496                        || !strcmp(prevWord, "module")
1497                        || !strcmp(prevWord, "begin")
1498                        || !strcmp(prevWord, "case")
1499                        || !strcmp(prevWord, "do")
1500                        || !strcmp(prevWord, "while")
1501                        || !strcmp(prevWord, "unless")
1502                        || !strcmp(prevWord, "until")
1503                        || !strcmp(prevWord, "for")
1504                           ) {
1505 				levelCurrent++;
1506             }
1507         }
1508 		if (atEOL) {
1509 			int lev = levelPrev;
1510 			if (visibleChars == 0 && foldCompact)
1511 				lev |= SC_FOLDLEVELWHITEFLAG;
1512 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1513 				lev |= SC_FOLDLEVELHEADERFLAG;
1514             styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1515 			lineCurrent++;
1516 			levelPrev = levelCurrent;
1517 			visibleChars = 0;
1518             buffer_ends_with_eol = true;
1519 		} else if (!isspacechar(ch)) {
1520 			visibleChars++;
1521             buffer_ends_with_eol = false;
1522         }
1523     }
1524 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1525     if (!buffer_ends_with_eol) {
1526         lineCurrent++;
1527         int new_lev = levelCurrent;
1528         if (visibleChars == 0 && foldCompact)
1529             new_lev |= SC_FOLDLEVELWHITEFLAG;
1530 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1531 				new_lev |= SC_FOLDLEVELHEADERFLAG;
1532             levelCurrent = new_lev;
1533     }
1534 	styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1535 }
1536 
1537 static const char * const rubyWordListDesc[] = {
1538 	"Keywords",
1539 	0
1540 };
1541 
1542 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
1543