1 // Scintilla source code edit control
2 /** @file LexRuby.cxx
3  ** Lexer for Ruby.
4  **/
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 
14 #include "Platform.h"
15 
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "KeyWords.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #ifdef SCI_NAMESPACE
23 using namespace Scintilla;
24 #endif
25 
26 //XXX Identical to Perl, put in common area
isEOLChar(char ch)27 static inline bool isEOLChar(char ch) {
28 	return (ch == '\r') || (ch == '\n');
29 }
30 
isRubyOperatorChar(char ch)31 static inline bool isRubyOperatorChar(char ch) {
32 	return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;
33 }
34 
35 
isSafeAlpha(char ch)36 static inline bool isSafeAlpha(char ch) {
37     return ((unsigned int) ch <= 127) && isalpha(ch);
38 }
39 
40 #define MAX_KEYWORD_LENGTH 200
41 
42 #define STYLE_MASK 63
43 #define actual_style(style) (style & STYLE_MASK)
44 
followsDot(unsigned int pos,Accessor & styler)45 static bool followsDot(unsigned int pos, Accessor &styler) {
46     styler.Flush();
47     for (; pos >= 1; --pos) {
48         int style = actual_style(styler.StyleAt(pos));
49         char ch;
50         switch (style) {
51             case SCE_RB_DEFAULT:
52                 ch = styler[pos];
53                 if (ch == ' ' || ch == '\t') {
54                     //continue
55                 } else {
56                     return false;
57                 }
58                 break;
59 
60             case SCE_RB_OPERATOR:
61                 return styler[pos] == '.';
62 
63             default:
64                 return false;
65         }
66     }
67     return false;
68 }
69 
70 // Forward declarations
71 static bool keywordIsAmbiguous(const char *prevWord);
72 static bool keywordDoStartsLoop(int pos,
73                                 Accessor &styler);
74 static bool keywordIsModifier(const char *word,
75                               int pos,
76                               Accessor &styler);
77 
ClassifyWordRb(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,char * prevWord)78 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
79 	char s[100];
80     unsigned int i, j;
81 	unsigned int lim = end - start + 1; // num chars to copy
82 	if (lim >= MAX_KEYWORD_LENGTH) {
83 		lim = MAX_KEYWORD_LENGTH - 1;
84 	}
85 	for (i = start, j = 0; j < lim; i++, j++) {
86 		s[j] = styler[i];
87 	}
88     s[j] = '\0';
89 	int chAttr;
90 	if (0 == strcmp(prevWord, "class"))
91 		chAttr = SCE_RB_CLASSNAME;
92 	else if (0 == strcmp(prevWord, "module"))
93 		chAttr = SCE_RB_MODULE_NAME;
94 	else if (0 == strcmp(prevWord, "def"))
95 		chAttr = SCE_RB_DEFNAME;
96     else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
97         if (keywordIsAmbiguous(s)
98             && keywordIsModifier(s, start, styler)) {
99 
100             // Demoted keywords are colored as keywords,
101             // but do not affect changes in indentation.
102             //
103             // Consider the word 'if':
104             // 1. <<if test ...>> : normal
105             // 2. <<stmt if test>> : demoted
106             // 3. <<lhs = if ...>> : normal: start a new indent level
107             // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
108 
109             chAttr = SCE_RB_WORD_DEMOTED;
110         } else {
111             chAttr = SCE_RB_WORD;
112         }
113 	} else
114         chAttr = SCE_RB_IDENTIFIER;
115 	styler.ColourTo(end, chAttr);
116 	if (chAttr == SCE_RB_WORD) {
117 		strcpy(prevWord, s);
118 	} else {
119 		prevWord[0] = 0;
120 	}
121     return chAttr;
122 }
123 
124 
125 //XXX Identical to Perl, put in common area
isMatch(Accessor & styler,int lengthDoc,int pos,const char * val)126 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
127 	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
128 		return false;
129 	}
130 	while (*val) {
131 		if (*val != styler[pos++]) {
132 			return false;
133 		}
134 		val++;
135 	}
136 	return true;
137 }
138 
139 // Do Ruby better -- find the end of the line, work back,
140 // and then check for leading white space
141 
142 // Precondition: the here-doc target can be indented
lookingAtHereDocDelim(Accessor & styler,int pos,int lengthDoc,const char * HereDocDelim)143 static bool lookingAtHereDocDelim(Accessor	   &styler,
144                                   int 			pos,
145                                   int 			lengthDoc,
146                                   const char   *HereDocDelim)
147 {
148     if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
149         return false;
150     }
151     while (--pos > 0) {
152         char ch = styler[pos];
153         if (isEOLChar(ch)) {
154             return true;
155         } else if (ch != ' ' && ch != '\t') {
156             return false;
157         }
158     }
159     return false;
160 }
161 
162 //XXX Identical to Perl, put in common area
opposite(char ch)163 static char opposite(char ch) {
164 	if (ch == '(')
165 		return ')';
166 	if (ch == '[')
167 		return ']';
168 	if (ch == '{')
169 		return '}';
170 	if (ch == '<')
171 		return '>';
172 	return ch;
173 }
174 
175 // Null transitions when we see we've reached the end
176 // and need to relex the curr char.
177 
redo_char(int & i,char & ch,char & chNext,char & chNext2,int & state)178 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
179                       int &state) {
180     i--;
181     chNext2 = chNext;
182     chNext = ch;
183     state = SCE_RB_DEFAULT;
184 }
185 
advance_char(int & i,char & ch,char & chNext,char & chNext2)186 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
187     i++;
188     ch = chNext;
189     chNext = chNext2;
190 }
191 
192 // precondition: startPos points to one after the EOL char
currLineContainsHereDelims(int & startPos,Accessor & styler)193 static bool currLineContainsHereDelims(int& startPos,
194                                        Accessor &styler) {
195     if (startPos <= 1)
196         return false;
197 
198     int pos;
199     for (pos = startPos - 1; pos > 0; pos--) {
200         char ch = styler.SafeGetCharAt(pos);
201         if (isEOLChar(ch)) {
202             // Leave the pointers where they are -- there are no
203             // here doc delims on the current line, even if
204             // the EOL isn't default style
205 
206             return false;
207         } else {
208             styler.Flush();
209             if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
210                 break;
211             }
212         }
213     }
214     if (pos == 0) {
215         return false;
216     }
217     // Update the pointers so we don't have to re-analyze the string
218     startPos = pos;
219     return true;
220 }
221 
222 
isEmptyLine(int pos,Accessor & styler)223 static bool isEmptyLine(int pos,
224                         Accessor &styler) {
225 	int spaceFlags = 0;
226 	int lineCurrent = styler.GetLine(pos);
227 	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
228     return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
229 }
230 
RE_CanFollowKeyword(const char * keyword)231 static bool RE_CanFollowKeyword(const char *keyword) {
232     if (!strcmp(keyword, "and")
233         || !strcmp(keyword, "begin")
234         || !strcmp(keyword, "break")
235         || !strcmp(keyword, "case")
236         || !strcmp(keyword, "do")
237         || !strcmp(keyword, "else")
238         || !strcmp(keyword, "elsif")
239         || !strcmp(keyword, "if")
240         || !strcmp(keyword, "next")
241         || !strcmp(keyword, "return")
242         || !strcmp(keyword, "when")
243         || !strcmp(keyword, "unless")
244         || !strcmp(keyword, "until")
245         || !strcmp(keyword, "not")
246         || !strcmp(keyword, "or")) {
247         return true;
248     }
249     return false;
250 }
251 
252 
253 //todo: if we aren't looking at a stdio character,
254 // move to the start of the first line that is not in a
255 // multi-line construct
256 
synchronizeDocStart(unsigned int & startPos,int & length,int & initStyle,Accessor & styler,bool skipWhiteSpace=false)257 static void synchronizeDocStart(unsigned int& startPos,
258                                 int &length,
259                                 int &initStyle,
260                                 Accessor &styler,
261                                 bool skipWhiteSpace=false) {
262 
263     styler.Flush();
264     int style = actual_style(styler.StyleAt(startPos));
265     switch (style) {
266         case SCE_RB_STDIN:
267         case SCE_RB_STDOUT:
268         case SCE_RB_STDERR:
269             // Don't do anything else with these.
270             return;
271     }
272 
273     int pos = startPos;
274     // Quick way to characterize each line
275     int lineStart;
276     for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
277         // Now look at the style before the previous line's EOL
278         pos = styler.LineStart(lineStart) - 1;
279         if (pos <= 10) {
280             lineStart = 0;
281             break;
282         }
283         char ch = styler.SafeGetCharAt(pos);
284         char chPrev = styler.SafeGetCharAt(pos - 1);
285         if (ch == '\n' && chPrev == '\r') {
286             pos--;
287         }
288         if (styler.SafeGetCharAt(pos - 1) == '\\') {
289             // Continuation line -- keep going
290         } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
291             // Part of multi-line construct -- keep going
292         } else if (currLineContainsHereDelims(pos, styler)) {
293             // Keep going, with pos and length now pointing
294             // at the end of the here-doc delimiter
295         } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
296             // Keep going
297         } else {
298             break;
299         }
300     }
301     pos = styler.LineStart(lineStart);
302     length += (startPos - pos);
303     startPos = pos;
304     initStyle = SCE_RB_DEFAULT;
305 }
306 
ColouriseRbDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)307 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
308 						   WordList *keywordlists[], Accessor &styler) {
309 
310 	// Lexer for Ruby often has to backtrack to start of current style to determine
311 	// which characters are being used as quotes, how deeply nested is the
312 	// start position and what the termination string is for here documents
313 
314 	WordList &keywords = *keywordlists[0];
315 
316 	class HereDocCls {
317 	public:
318 		int State;
319         // States
320         // 0: '<<' encountered
321 		// 1: collect the delimiter
322         // 1b: text between the end of the delimiter and the EOL
323 		// 2: here doc text (lines after the delimiter)
324 		char Quote;		// the char after '<<'
325 		bool Quoted;		// true if Quote in ('\'','"','`')
326 		int DelimiterLength;	// strlen(Delimiter)
327 		char Delimiter[256];	// the Delimiter, limit of 256: from Perl
328         bool CanBeIndented;
329 		HereDocCls() {
330 			State = 0;
331 			DelimiterLength = 0;
332 			Delimiter[0] = '\0';
333             CanBeIndented = false;
334 		}
335 	};
336 	HereDocCls HereDoc;
337 
338 	class QuoteCls {
339 		public:
340 		int  Count;
341 		char Up;
342 		char Down;
343 		QuoteCls() {
344 			this->New();
345 		}
346 		void New() {
347 			Count = 0;
348 			Up    = '\0';
349 			Down  = '\0';
350 		}
351 		void Open(char u) {
352 			Count++;
353 			Up    = u;
354 			Down  = opposite(Up);
355 		}
356 	};
357 	QuoteCls Quote;
358 
359     int numDots = 0;  // For numbers --
360                       // Don't start lexing in the middle of a num
361 
362     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
363                         false);
364 
365 	bool preferRE = true;
366     int state = initStyle;
367 	int lengthDoc = startPos + length;
368 
369 	char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
370 	prevWord[0] = '\0';
371 	if (length == 0)
372 		return;
373 
374 	char chPrev = styler.SafeGetCharAt(startPos - 1);
375 	char chNext = styler.SafeGetCharAt(startPos);
376 	// Ruby uses a different mask because bad indentation is marked by oring with 32
377 	styler.StartAt(startPos, 127);
378 	styler.StartSegment(startPos);
379 
380     static int q_states[] = {SCE_RB_STRING_Q,
381                              SCE_RB_STRING_QQ,
382                              SCE_RB_STRING_QR,
383                              SCE_RB_STRING_QW,
384                              SCE_RB_STRING_QW,
385                              SCE_RB_STRING_QX};
386     static const char* q_chars = "qQrwWx";
387 
388 	for (int i = startPos; i < lengthDoc; i++) {
389 		char ch = chNext;
390 		chNext = styler.SafeGetCharAt(i + 1);
391 		char chNext2 = styler.SafeGetCharAt(i + 2);
392 
393         if (styler.IsLeadByte(ch)) {
394 			chNext = chNext2;
395 			chPrev = ' ';
396 			i += 1;
397 			continue;
398 		}
399 
400         // skip on DOS/Windows
401         //No, don't, because some things will get tagged on,
402         // so we won't recognize keywords, for example
403 #if 0
404 		if (ch == '\r' && chNext == '\n') {
405 	    	continue;
406         }
407 #endif
408 
409         if (HereDoc.State == 1 && isEOLChar(ch)) {
410 			// Begin of here-doc (the line after the here-doc delimiter):
411 			HereDoc.State = 2;
412 			styler.ColourTo(i-1, state);
413             // Don't check for a missing quote, just jump into
414             // the here-doc state
415             state = SCE_RB_HERE_Q;
416         }
417 
418         // Regular transitions
419 		if (state == SCE_RB_DEFAULT) {
420             if (isdigit(ch)) {
421             	styler.ColourTo(i - 1, state);
422 				state = SCE_RB_NUMBER;
423                 numDots = 0;
424             } else if (iswordstart(ch)) {
425             	styler.ColourTo(i - 1, state);
426 				state = SCE_RB_WORD;
427 			} else if (ch == '#') {
428 				styler.ColourTo(i - 1, state);
429 				state = SCE_RB_COMMENTLINE;
430 			} else if (ch == '=') {
431 				// =begin indicates the start of a comment (doc) block
432                 if (i == 0 || isEOLChar(chPrev)
433                     && chNext == 'b'
434                     && styler.SafeGetCharAt(i + 2) == 'e'
435                     && styler.SafeGetCharAt(i + 3) == 'g'
436                     && styler.SafeGetCharAt(i + 4) == 'i'
437                     && styler.SafeGetCharAt(i + 5) == 'n'
438                     && !iswordchar(styler.SafeGetCharAt(i + 6))) {
439                     styler.ColourTo(i - 1, state);
440                     state = SCE_RB_POD;
441 				} else {
442 					styler.ColourTo(i - 1, state);
443 					styler.ColourTo(i, SCE_RB_OPERATOR);
444 					preferRE = true;
445 				}
446 			} else if (ch == '"') {
447 				styler.ColourTo(i - 1, state);
448 				state = SCE_RB_STRING;
449 				Quote.New();
450 				Quote.Open(ch);
451 			} else if (ch == '\'') {
452                 styler.ColourTo(i - 1, state);
453                 state = SCE_RB_CHARACTER;
454                 Quote.New();
455                 Quote.Open(ch);
456 			} else if (ch == '`') {
457 				styler.ColourTo(i - 1, state);
458 				state = SCE_RB_BACKTICKS;
459 				Quote.New();
460 				Quote.Open(ch);
461 			} else if (ch == '@') {
462                 // Instance or class var
463 				styler.ColourTo(i - 1, state);
464                 if (chNext == '@') {
465                     state = SCE_RB_CLASS_VAR;
466                     advance_char(i, ch, chNext, chNext2); // pass by ref
467                 } else {
468                     state = SCE_RB_INSTANCE_VAR;
469                 }
470 			} else if (ch == '$') {
471                 // Check for a builtin global
472 				styler.ColourTo(i - 1, state);
473                 // Recognize it bit by bit
474                 state = SCE_RB_GLOBAL;
475             } else if (ch == '/' && preferRE) {
476                 // Ambigous operator
477 				styler.ColourTo(i - 1, state);
478 				state = SCE_RB_REGEX;
479                 Quote.New();
480                 Quote.Open(ch);
481 			} else if (ch == '<' && chNext == '<' && chNext2 != '=') {
482 
483             // Recognise the '<<' symbol - either a here document or a binary op
484 
485 				styler.ColourTo(i - 1, state);
486                 i++;
487                 chNext = chNext2;
488 				styler.ColourTo(i, SCE_RB_OPERATOR);
489 
490                 if (preferRE) {
491                     state = SCE_RB_HERE_DELIM;
492 				    HereDoc.State = 0;
493                 } else {
494                     // leave state as default
495                     // We don't have all the heuristics Perl has for indications
496                     // of a here-doc, because '<<' is overloadable and used
497                     // for so many other classes.
498 					preferRE = true;
499                 }
500             } else if (ch == ':') {
501 				styler.ColourTo(i - 1, state);
502                 if (chNext == ':') {
503                     // Mark "::" as an operator, not symbol start
504                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
505                     advance_char(i, ch, chNext, chNext2); // pass by ref
506                     state = SCE_RB_DEFAULT;
507 					preferRE = false;
508                 } else if (iswordchar(chNext)) {
509 					state = SCE_RB_SYMBOL;
510                 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
511                     // Do the operator analysis in-line, looking ahead
512                     // Based on the table in pickaxe 2nd ed., page 339
513                     bool doColoring = true;
514                     switch (chNext) {
515                     case '[':
516                         if (chNext2 == ']' ) {
517                             char ch_tmp = styler.SafeGetCharAt(i + 3);
518                             if (ch_tmp == '=') {
519                                 i += 3;
520                                 ch = ch_tmp;
521                                 chNext = styler.SafeGetCharAt(i + 1);
522                             } else {
523                                 i += 2;
524                                 ch = chNext2;
525                                 chNext = ch_tmp;
526                             }
527                         } else {
528                             doColoring = false;
529                         }
530                         break;
531 
532                     case '*':
533                         if (chNext2 == '*') {
534                             i += 2;
535                             ch = chNext2;
536                             chNext = styler.SafeGetCharAt(i + 1);
537                         } else {
538                             advance_char(i, ch, chNext, chNext2);
539                         }
540                         break;
541 
542                     case '!':
543                         if (chNext2 == '=' || chNext2 == '~') {
544                             i += 2;
545                             ch = chNext2;
546                             chNext = styler.SafeGetCharAt(i + 1);
547                         } else {
548                             advance_char(i, ch, chNext, chNext2);
549                         }
550                         break;
551 
552                     case '<':
553                         if (chNext2 == '<') {
554                             i += 2;
555                             ch = chNext2;
556                             chNext = styler.SafeGetCharAt(i + 1);
557                         } else if (chNext2 == '=') {
558                             char ch_tmp = styler.SafeGetCharAt(i + 3);
559                             if (ch_tmp == '>') {  // <=> operator
560                                 i += 3;
561                                 ch = ch_tmp;
562                                 chNext = styler.SafeGetCharAt(i + 1);
563                             } else {
564                                 i += 2;
565                                 ch = chNext2;
566                                 chNext = ch_tmp;
567                             }
568                         } else {
569                             advance_char(i, ch, chNext, chNext2);
570                         }
571                         break;
572 
573                     default:
574                         // Simple one-character operators
575                         advance_char(i, ch, chNext, chNext2);
576                         break;
577                     }
578                     if (doColoring) {
579                         styler.ColourTo(i, SCE_RB_SYMBOL);
580                         state = SCE_RB_DEFAULT;
581                     }
582 				} else if (!preferRE) {
583 					// Don't color symbol strings (yet)
584 					// Just color the ":" and color rest as string
585 					styler.ColourTo(i, SCE_RB_SYMBOL);
586 					state = SCE_RB_DEFAULT;
587                 } else {
588                     styler.ColourTo(i, SCE_RB_OPERATOR);
589                     state = SCE_RB_DEFAULT;
590                     preferRE = true;
591                 }
592             } else if (ch == '%') {
593                 styler.ColourTo(i - 1, state);
594                 bool have_string = false;
595                 if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {
596                     Quote.New();
597                     const char *hit = strchr(q_chars, chNext);
598                     if (hit != NULL) {
599                         state = q_states[hit - q_chars];
600                         Quote.Open(chNext2);
601                         i += 2;
602                         ch = chNext2;
603 						chNext = styler.SafeGetCharAt(i + 1);
604                         have_string = true;
605                     }
606                 } else if (!iswordchar(chNext)) {
607                     state = SCE_RB_STRING_QQ;
608                     Quote.Open(chNext);
609                     advance_char(i, ch, chNext, chNext2); // pass by ref
610                     have_string = true;
611                 }
612                 if (!have_string) {
613                     styler.ColourTo(i, SCE_RB_OPERATOR);
614                     // stay in default
615                     preferRE = true;
616                 }
617             } else if (isoperator(ch)) {
618 				styler.ColourTo(i - 1, state);
619 				styler.ColourTo(i, SCE_RB_OPERATOR);
620                 // If we're ending an expression or block,
621                 // assume it ends an object, and the ambivalent
622                 // constructs are binary operators
623                 //
624                 // So if we don't have one of these chars,
625                 // we aren't ending an object exp'n, and ops
626                 // like : << / are unary operators.
627 
628                 preferRE = (strchr(")}]", ch) == NULL);
629                 // Stay in default state
630             } else if (isEOLChar(ch)) {
631                 // Make sure it's a true line-end, with no backslash
632                 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
633                     && chPrev != '\\') {
634                     // Assume we've hit the end of the statement.
635                     preferRE = true;
636                 }
637             }
638         } else if (state == SCE_RB_WORD) {
639             if (ch == '.' || !iswordchar(ch)) {
640                 // Words include x? in all contexts,
641                 // and <letters>= after either 'def' or a dot
642                 // Move along until a complete word is on our left
643 
644                 // Default accessor treats '.' as word-chars,
645                 // but we don't for now.
646 
647                 if (ch == '='
648                     && iswordchar(chPrev)
649                     && (chNext == '('
650                         || strchr(" \t\n\r", chNext) != NULL)
651                     && (!strcmp(prevWord, "def")
652                         || followsDot(styler.GetStartSegment(), styler))) {
653                     // <name>= is a name only when being def'd -- Get it the next time
654                     // This means that <name>=<name> is always lexed as
655                     // <name>, (op, =), <name>
656                 } else if ((ch == '?' || ch == '!')
657                            && iswordchar(chPrev)
658                            && !iswordchar(chNext)) {
659                     // <name>? is a name -- Get it the next time
660                     // But <name>?<name> is always lexed as
661                     // <name>, (op, ?), <name>
662                     // Same with <name>! to indicate a method that
663                     // modifies its target
664                 } else if (isEOLChar(ch)
665                            && isMatch(styler, lengthDoc, i - 7, "__END__")) {
666                     styler.ColourTo(i, SCE_RB_DATASECTION);
667                     state = SCE_RB_DATASECTION;
668                     // No need to handle this state -- we'll just move to the end
669                     preferRE = false;
670                 } else {
671 					int wordStartPos = styler.GetStartSegment();
672                     int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
673                     switch (word_style) {
674                         case SCE_RB_WORD:
675                             preferRE = RE_CanFollowKeyword(prevWord);
676 							break;
677 
678                         case SCE_RB_WORD_DEMOTED:
679                             preferRE = true;
680 							break;
681 
682                         case SCE_RB_IDENTIFIER:
683                             if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
684                                 preferRE = true;
685                             } else if (isEOLChar(ch)) {
686                                 preferRE = true;
687                             } else {
688                                 preferRE = false;
689                             }
690 							break;
691                         default:
692                             preferRE = false;
693                     }
694                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
695                 }
696             }
697         } else if (state == SCE_RB_NUMBER) {
698             if (isalnum(ch) || ch == '_') {
699                 // Keep going
700             } else if (ch == '.' && ++numDots == 1) {
701                 // Keep going
702             } else {
703                 styler.ColourTo(i - 1, state);
704                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
705                 preferRE = false;
706             }
707         } else if (state == SCE_RB_COMMENTLINE) {
708 			if (isEOLChar(ch)) {
709                 styler.ColourTo(i - 1, state);
710                 state = SCE_RB_DEFAULT;
711                 // Use whatever setting we had going into the comment
712             }
713         } else if (state == SCE_RB_HERE_DELIM) {
714             // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
715             // Slightly different: if we find an immediate '-',
716             // the target can appear indented.
717 
718 			if (HereDoc.State == 0) { // '<<' encountered
719 				HereDoc.State = 1;
720                 HereDoc.DelimiterLength = 0;
721                 if (ch == '-') {
722                     HereDoc.CanBeIndented = true;
723                     advance_char(i, ch, chNext, chNext2); // pass by ref
724                 } else {
725                     HereDoc.CanBeIndented = false;
726                 }
727                 if (isEOLChar(ch)) {
728                     // Bail out of doing a here doc if there's no target
729                     state = SCE_RB_DEFAULT;
730                     preferRE = false;
731                 } else {
732                     HereDoc.Quote = ch;
733 
734                     if (ch == '\'' || ch == '"' || ch == '`') {
735                         HereDoc.Quoted = true;
736                         HereDoc.Delimiter[0] = '\0';
737                     } else {
738                         HereDoc.Quoted = false;
739                         HereDoc.Delimiter[0] = ch;
740                         HereDoc.Delimiter[1] = '\0';
741                         HereDoc.DelimiterLength = 1;
742                     }
743                 }
744 			} else if (HereDoc.State == 1) { // collect the delimiter
745                 if (isEOLChar(ch)) {
746                     // End the quote now, and go back for more
747                     styler.ColourTo(i - 1, state);
748                     state = SCE_RB_DEFAULT;
749                     i--;
750                     chNext = ch;
751                     chNext2 = chNext;
752                     preferRE = false;
753                 } else if (HereDoc.Quoted) {
754 					if (ch == HereDoc.Quote) { // closing quote => end of delimiter
755 						styler.ColourTo(i, state);
756 						state = SCE_RB_DEFAULT;
757                         preferRE = false;
758                     } else {
759 						if (ch == '\\' && !isEOLChar(chNext)) {
760                             advance_char(i, ch, chNext, chNext2);
761 						}
762 						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
763 						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
764                     }
765                 } else { // an unquoted here-doc delimiter
766 					if (isalnum(ch) || ch == '_') {
767 						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
768 						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
769 					} else {
770 						styler.ColourTo(i - 1, state);
771                         redo_char(i, ch, chNext, chNext2, state);
772                         preferRE = false;
773 					}
774                 }
775 				if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
776 					styler.ColourTo(i - 1, state);
777 					state = SCE_RB_ERROR;
778                     preferRE = false;
779 				}
780             }
781         } else if (state == SCE_RB_HERE_Q) {
782             // Not needed: HereDoc.State == 2
783             // Indentable here docs: look backwards
784             // Non-indentable: look forwards, like in Perl
785             //
786             // Why: so we can quickly resolve things like <<-" abc"
787 
788             if (!HereDoc.CanBeIndented) {
789                 if (isEOLChar(chPrev)
790                     && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
791                     styler.ColourTo(i - 1, state);
792                     i += HereDoc.DelimiterLength - 1;
793                     chNext = styler.SafeGetCharAt(i + 1);
794                     if (isEOLChar(chNext)) {
795                         styler.ColourTo(i, SCE_RB_HERE_DELIM);
796                         state = SCE_RB_DEFAULT;
797                         HereDoc.State = 0;
798                         preferRE = false;
799                     }
800                     // Otherwise we skipped through the here doc faster.
801                 }
802             } else if (isEOLChar(chNext)
803                        && lookingAtHereDocDelim(styler,
804                                                 i - HereDoc.DelimiterLength + 1,
805                                                 lengthDoc,
806                                                 HereDoc.Delimiter)) {
807                 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
808                 styler.ColourTo(i, SCE_RB_HERE_DELIM);
809                 state = SCE_RB_DEFAULT;
810                 preferRE = false;
811                 HereDoc.State = 0;
812             }
813         } else if (state == SCE_RB_CLASS_VAR
814                    || state == SCE_RB_INSTANCE_VAR
815                    || state == SCE_RB_SYMBOL) {
816             if (!iswordchar(ch)) {
817                 styler.ColourTo(i - 1, state);
818                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
819                 preferRE = false;
820             }
821         } else if (state == SCE_RB_GLOBAL) {
822             if (!iswordchar(ch)) {
823                 // handle special globals here as well
824                 if (chPrev == '$') {
825                     if (ch == '-') {
826                         // Include the next char, like $-a
827                         advance_char(i, ch, chNext, chNext2);
828                     }
829                     styler.ColourTo(i, state);
830                     state = SCE_RB_DEFAULT;
831                 } else {
832                     styler.ColourTo(i - 1, state);
833                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
834                 }
835                 preferRE = false;
836             }
837         } else if (state == SCE_RB_POD) {
838             // PODs end with ^=end\s, -- any whitespace can follow =end
839             if (strchr(" \t\n\r", ch) != NULL
840                 && i > 5
841                 && isEOLChar(styler[i - 5])
842                 && isMatch(styler, lengthDoc, i - 4, "=end")) {
843                 styler.ColourTo(i - 1, state);
844                 state = SCE_RB_DEFAULT;
845                 preferRE = false;
846             }
847         } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
848             if (ch == '\\' && Quote.Up != '\\') {
849                 // Skip one
850                 advance_char(i, ch, chNext, chNext2);
851             } else if (ch == Quote.Down) {
852                 Quote.Count--;
853                 if (Quote.Count == 0) {
854                     // Include the options
855                     while (isSafeAlpha(chNext)) {
856                         i++;
857 						ch = chNext;
858                         chNext = styler.SafeGetCharAt(i + 1);
859                     }
860                     styler.ColourTo(i, state);
861                     state = SCE_RB_DEFAULT;
862                     preferRE = false;
863                 }
864             } else if (ch == Quote.Up) {
865                 // Only if close quoter != open quoter
866                 Quote.Count++;
867 
868             } else if (ch == '#' ) {
869                 //todo: distinguish comments from pound chars
870                 // for now, handle as comment
871                 styler.ColourTo(i - 1, state);
872                 bool inEscape = false;
873                 while (++i < lengthDoc) {
874                     ch = styler.SafeGetCharAt(i);
875                     if (ch == '\\') {
876                         inEscape = true;
877                     } else if (isEOLChar(ch)) {
878                         // Comment inside a regex
879                         styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
880                         break;
881                     } else if (inEscape) {
882                         inEscape = false;  // don't look at char
883                     } else if (ch == Quote.Down) {
884                         // Have the regular handler deal with this
885                         // to get trailing modifiers.
886                         i--;
887                         ch = styler[i];
888 						break;
889                     }
890                 }
891                 chNext = styler.SafeGetCharAt(i + 1);
892                 chNext2 = styler.SafeGetCharAt(i + 2);
893             }
894         // Quotes of all kinds...
895         } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
896                    state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
897                    state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
898                    state == SCE_RB_BACKTICKS) {
899             if (!Quote.Down && !isspacechar(ch)) {
900                 Quote.Open(ch);
901             } else if (ch == '\\' && Quote.Up != '\\') {
902                 //Riddle me this: Is it safe to skip *every* escaped char?
903                 advance_char(i, ch, chNext, chNext2);
904             } else if (ch == Quote.Down) {
905                 Quote.Count--;
906                 if (Quote.Count == 0) {
907                     styler.ColourTo(i, state);
908                     state = SCE_RB_DEFAULT;
909                     preferRE = false;
910                 }
911             } else if (ch == Quote.Up) {
912                 Quote.Count++;
913             }
914         }
915 
916         if (state == SCE_RB_ERROR) {
917             break;
918         }
919         chPrev = ch;
920     }
921     if (state == SCE_RB_WORD) {
922         // We've ended on a word, possibly at EOF, and need to
923         // classify it.
924         (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
925     } else {
926         styler.ColourTo(lengthDoc - 1, state);
927     }
928 }
929 
930 // Helper functions for folding
931 
getPrevWord(int pos,char * prevWord,Accessor & styler,int word_state)932 static void getPrevWord(int pos,
933                         char *prevWord,
934                         Accessor &styler,
935                         int word_state)
936 {
937     int i;
938     styler.Flush();
939     for (i = pos - 1; i > 0; i--) {
940         if (actual_style(styler.StyleAt(i)) != word_state) {
941             i++;
942             break;
943         }
944     }
945     if (i < pos - MAX_KEYWORD_LENGTH) // overflow
946         i = pos - MAX_KEYWORD_LENGTH;
947     char *dst = prevWord;
948     for (; i <= pos; i++) {
949         *dst++ = styler[i];
950     }
951 	*dst = 0;
952 }
953 
keywordIsAmbiguous(const char * prevWord)954 static bool keywordIsAmbiguous(const char *prevWord)
955 {
956     // Order from most likely used to least likely
957     // Lots of ways to do a loop in Ruby besides 'while/until'
958     if (!strcmp(prevWord, "if")
959         || !strcmp(prevWord, "do")
960         || !strcmp(prevWord, "while")
961         || !strcmp(prevWord, "unless")
962         || !strcmp(prevWord, "until")) {
963         return true;
964     } else {
965         return false;
966     }
967 }
968 
iswhitespace(char ch)969 static bool inline iswhitespace(char ch) {
970 	return ch == ' ' || ch == '\t';
971 }
972 
973 // Demote keywords in the following conditions:
974 // if, while, unless, until modify a statement
975 // do after a while or until, as a noise word (like then after if)
976 
keywordIsModifier(const char * word,int pos,Accessor & styler)977 static bool keywordIsModifier(const char *word,
978                               int pos,
979                               Accessor &styler)
980 {
981     if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
982         return keywordDoStartsLoop(pos, styler);
983     }
984     char ch;
985     int style = SCE_RB_DEFAULT;
986 	int lineStart = styler.GetLine(pos);
987     int lineStartPosn = styler.LineStart(lineStart);
988     styler.Flush();
989     while (--pos >= lineStartPosn) {
990         style = actual_style(styler.StyleAt(pos));
991 		if (style == SCE_RB_DEFAULT) {
992 			if (iswhitespace(ch = styler[pos])) {
993 				//continue
994 			} else if (ch == '\r' || ch == '\n') {
995 				// Scintilla's LineStart() and GetLine() routines aren't
996 				// platform-independent, so if we have text prepared with
997 				// a different system we can't rely on it.
998 				return false;
999 			}
1000 		} else {
1001             break;
1002 		}
1003     }
1004     if (pos < lineStartPosn) {
1005         return false; //XXX not quite right if the prev line is a continuation
1006     }
1007     // First things where the action is unambiguous
1008     switch (style) {
1009         case SCE_RB_DEFAULT:
1010         case SCE_RB_COMMENTLINE:
1011         case SCE_RB_POD:
1012         case SCE_RB_CLASSNAME:
1013         case SCE_RB_DEFNAME:
1014         case SCE_RB_MODULE_NAME:
1015             return false;
1016         case SCE_RB_OPERATOR:
1017             break;
1018         case SCE_RB_WORD:
1019             // Watch out for uses of 'else if'
1020             //XXX: Make a list of other keywords where 'if' isn't a modifier
1021             //     and can appear legitimately
1022             // Formulate this to avoid warnings from most compilers
1023             if (strcmp(word, "if") == 0) {
1024                 char prevWord[MAX_KEYWORD_LENGTH + 1];
1025                 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1026                 return strcmp(prevWord, "else") != 0;
1027             }
1028             return true;
1029         default:
1030             return true;
1031     }
1032     // Assume that if the keyword follows an operator,
1033     // usually it's a block assignment, like
1034     // a << if x then y else z
1035 
1036     ch = styler[pos];
1037     switch (ch) {
1038         case ')':
1039         case ']':
1040         case '}':
1041             return true;
1042         default:
1043             return false;
1044     }
1045 }
1046 
1047 #define WHILE_BACKWARDS "elihw"
1048 #define UNTIL_BACKWARDS "litnu"
1049 
1050 // Nothing fancy -- look to see if we follow a while/until somewhere
1051 // on the current line
1052 
keywordDoStartsLoop(int pos,Accessor & styler)1053 static bool keywordDoStartsLoop(int pos,
1054                                 Accessor &styler)
1055 {
1056     char ch;
1057     int style;
1058 	int lineStart = styler.GetLine(pos);
1059     int lineStartPosn = styler.LineStart(lineStart);
1060     styler.Flush();
1061     while (--pos >= lineStartPosn) {
1062         style = actual_style(styler.StyleAt(pos));
1063 		if (style == SCE_RB_DEFAULT) {
1064 			if ((ch = styler[pos]) == '\r' || ch == '\n') {
1065 				// Scintilla's LineStart() and GetLine() routines aren't
1066 				// platform-independent, so if we have text prepared with
1067 				// a different system we can't rely on it.
1068 				return false;
1069 			}
1070 		} else if (style == SCE_RB_WORD) {
1071             // Check for while or until, but write the word in backwards
1072             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1073             char *dst = prevWord;
1074             int wordLen = 0;
1075             int start_word;
1076             for (start_word = pos;
1077                  start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1078                  start_word--) {
1079                 if (++wordLen < MAX_KEYWORD_LENGTH) {
1080                     *dst++ = styler[start_word];
1081                 }
1082             }
1083             *dst = 0;
1084             // Did we see our keyword?
1085             if (!strcmp(prevWord, WHILE_BACKWARDS)
1086                 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1087                 return true;
1088             }
1089             // We can move pos to the beginning of the keyword, and then
1090             // accept another decrement, as we can never have two contiguous
1091             // keywords:
1092             // word1 word2
1093             //           ^
1094             //        <-  move to start_word
1095             //      ^
1096             //      <- loop decrement
1097             //     ^  # pointing to end of word1 is fine
1098             pos = start_word;
1099         }
1100     }
1101     return false;
1102 }
1103 
1104 /*
1105  *  Folding Ruby
1106  *
1107  *  The language is quite complex to analyze without a full parse.
1108  *  For example, this line shouldn't affect fold level:
1109  *
1110  *   print "hello" if feeling_friendly?
1111  *
1112  *  Neither should this:
1113  *
1114  *   print "hello" \
1115  *      if feeling_friendly?
1116  *
1117  *
1118  *  But this should:
1119  *
1120  *   if feeling_friendly?  #++
1121  *     print "hello" \
1122  *     print "goodbye"
1123  *   end                   #--
1124  *
1125  *  So we cheat, by actually looking at the existing indentation
1126  *  levels for each line, and just echoing it back.  Like Python.
1127  *  Then if we get better at it, we'll take braces into consideration,
1128  *  which always affect folding levels.
1129 
1130  *  How the keywords should work:
1131  *  No effect:
1132  *  __FILE__ __LINE__ BEGIN END alias and
1133  *  defined? false in nil not or self super then
1134  *  true undef
1135 
1136  *  Always increment:
1137  *  begin  class def do for module when {
1138  *
1139  *  Always decrement:
1140  *  end }
1141  *
1142  *  Increment if these start a statement
1143  *  if unless until while -- do nothing if they're modifiers
1144 
1145  *  These end a block if there's no modifier, but don't bother
1146  *  break next redo retry return yield
1147  *
1148  *  These temporarily de-indent, but re-indent
1149  *  case else elsif ensure rescue
1150  *
1151  *  This means that the folder reflects indentation rather
1152  *  than setting it.  The language-service updates indentation
1153  *  when users type return and finishes entering de-denters.
1154  *
1155  *  Later offer to fold POD, here-docs, strings, and blocks of comments
1156  */
1157 
FoldRbDoc(unsigned int startPos,int length,int initStyle,WordList * [],Accessor & styler)1158 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1159                       WordList *[], Accessor &styler) {
1160 	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1161 	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1162 
1163     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1164                         false);
1165 	unsigned int endPos = startPos + length;
1166 	int visibleChars = 0;
1167 	int lineCurrent = styler.GetLine(startPos);
1168 	int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1169                                          & SC_FOLDLEVELNUMBERMASK
1170                                          & ~SC_FOLDLEVELBASE);
1171 	int levelCurrent = levelPrev;
1172 	char chNext = styler[startPos];
1173 	int styleNext = styler.StyleAt(startPos);
1174 	int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1175     bool buffer_ends_with_eol = false;
1176 	for (unsigned int i = startPos; i < endPos; i++) {
1177 		char ch = chNext;
1178 		chNext = styler.SafeGetCharAt(i + 1);
1179 		int style = styleNext;
1180 		styleNext = styler.StyleAt(i + 1);
1181 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1182         if (style == SCE_RB_COMMENTLINE) {
1183             if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1184                 if (chNext == '{') {
1185 					levelCurrent++;
1186 				} else if (chNext == '}') {
1187 					levelCurrent--;
1188 				}
1189             }
1190         } else if (style == SCE_RB_OPERATOR) {
1191 			if (strchr("[{(", ch)) {
1192 				levelCurrent++;
1193 			} else if (strchr(")}]", ch)) {
1194                 // Don't decrement below 0
1195                 if (levelCurrent > 0)
1196                     levelCurrent--;
1197 			}
1198         } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1199             // Look at the keyword on the left and decide what to do
1200             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1201             prevWord[0] = 0;
1202             getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1203             if (!strcmp(prevWord, "end")) {
1204                 // Don't decrement below 0
1205                 if (levelCurrent > 0)
1206                     levelCurrent--;
1207             } else if (   !strcmp(prevWord, "if")
1208                        || !strcmp(prevWord, "def")
1209                        || !strcmp(prevWord, "class")
1210                        || !strcmp(prevWord, "module")
1211                        || !strcmp(prevWord, "begin")
1212                        || !strcmp(prevWord, "case")
1213                        || !strcmp(prevWord, "do")
1214                        || !strcmp(prevWord, "while")
1215                        || !strcmp(prevWord, "unless")
1216                        || !strcmp(prevWord, "until")
1217                        || !strcmp(prevWord, "for")
1218                           ) {
1219 				levelCurrent++;
1220             }
1221         }
1222 		if (atEOL) {
1223 			int lev = levelPrev;
1224 			if (visibleChars == 0 && foldCompact)
1225 				lev |= SC_FOLDLEVELWHITEFLAG;
1226 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1227 				lev |= SC_FOLDLEVELHEADERFLAG;
1228             styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1229 			lineCurrent++;
1230 			levelPrev = levelCurrent;
1231 			visibleChars = 0;
1232             buffer_ends_with_eol = true;
1233 		} else if (!isspacechar(ch)) {
1234 			visibleChars++;
1235             buffer_ends_with_eol = false;
1236         }
1237     }
1238 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1239     if (!buffer_ends_with_eol) {
1240         lineCurrent++;
1241         int new_lev = levelCurrent;
1242         if (visibleChars == 0 && foldCompact)
1243             new_lev |= SC_FOLDLEVELWHITEFLAG;
1244 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1245 				new_lev |= SC_FOLDLEVELHEADERFLAG;
1246             levelCurrent = new_lev;
1247     }
1248 	styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1249 }
1250 
1251 static const char * const rubyWordListDesc[] = {
1252 	"Keywords",
1253 	0
1254 };
1255 
1256 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
1257