scintilla/src/LexRuby.cxx

// Scintilla source code edit control
/** @file LexRuby.cxx
 ** Lexer for Ruby.
 **/
// Copyright 2001- by Clemens Wyss <wys@helbling.ch>
// The License.txt file describes the conditions under which this software may be distributed.

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>

#include "Platform.h"

#include "PropSet.h"
#include "Accessor.h"
#include "KeyWords.h"
#include "Scintilla.h"
#include "SciLexer.h"

#ifdef SCI_NAMESPACE
using namespace Scintilla;
#endif

//XXX Identical to Perl, put in common area
static inline bool isEOLChar(char ch) {
	return (ch == '\r') || (ch == '\n');
}

static inline bool isRubyOperatorChar(char ch) {
	return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;
}


static inline bool isSafeAlpha(char ch) {
    return ((unsigned int) ch <= 127) && isalpha(ch);
}

#define MAX_KEYWORD_LENGTH 200

#define STYLE_MASK 63
#define actual_style(style) (style & STYLE_MASK)

static bool followsDot(unsigned int pos, Accessor &styler) {
    styler.Flush();
    for (; pos >= 1; --pos) {
        int style = actual_style(styler.StyleAt(pos));
        char ch;
        switch (style) {
            case SCE_RB_DEFAULT:
                ch = styler[pos];
                if (ch == ' ' || ch == '\t') {
                    //continue
                } else {
                    return false;
                }
                break;

            case SCE_RB_OPERATOR:
                return styler[pos] == '.';

            default:
                return false;
        }
    }
    return false;
}

// Forward declarations
static bool keywordIsAmbiguous(const char *prevWord);
static bool keywordDoStartsLoop(int pos,
                                Accessor &styler);
static bool keywordIsModifier(const char *word,
                              int pos,
                              Accessor &styler);

static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
	char s[100];
    unsigned int i, j;
	unsigned int lim = end - start + 1; // num chars to copy
	if (lim >= MAX_KEYWORD_LENGTH) {
		lim = MAX_KEYWORD_LENGTH - 1;
	}
	for (i = start, j = 0; j < lim; i++, j++) {
		s[j] = styler[i];
	}
    s[j] = '\0';
	int chAttr;
	if (0 == strcmp(prevWord, "class"))
		chAttr = SCE_RB_CLASSNAME;
	else if (0 == strcmp(prevWord, "module"))
		chAttr = SCE_RB_MODULE_NAME;
	else if (0 == strcmp(prevWord, "def"))
		chAttr = SCE_RB_DEFNAME;
    else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
        if (keywordIsAmbiguous(s)
            && keywordIsModifier(s, start, styler)) {

            // Demoted keywords are colored as keywords,
            // but do not affect changes in indentation.
            //
            // Consider the word 'if':
            // 1. <<if test ...>> : normal
            // 2. <<stmt if test>> : demoted
            // 3. <<lhs = if ...>> : normal: start a new indent level
            // 4. <<obj.if = 10>> : color as identifer, since it follows '.'

            chAttr = SCE_RB_WORD_DEMOTED;
        } else {
            chAttr = SCE_RB_WORD;
        }
	} else
        chAttr = SCE_RB_IDENTIFIER;
	styler.ColourTo(end, chAttr);
	if (chAttr == SCE_RB_WORD) {
		strcpy(prevWord, s);
	} else {
		prevWord[0] = 0;
	}
    return chAttr;
}


//XXX Identical to Perl, put in common area
static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
		return false;
	}
	while (*val) {
		if (*val != styler[pos++]) {
			return false;
		}
		val++;
	}
	return true;
}

// Do Ruby better -- find the end of the line, work back,
// and then check for leading white space

// Precondition: the here-doc target can be indented
static bool lookingAtHereDocDelim(Accessor	   &styler,
                                  int 			pos,
                                  int 			lengthDoc,
                                  const char   *HereDocDelim)
{
    if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
        return false;
    }
    while (--pos > 0) {
        char ch = styler[pos];
        if (isEOLChar(ch)) {
            return true;
        } else if (ch != ' ' && ch != '\t') {
            return false;
        }
    }
    return false;
}

//XXX Identical to Perl, put in common area
static char opposite(char ch) {
	if (ch == '(')
		return ')';
	if (ch == '[')
		return ']';
	if (ch == '{')
		return '}';
	if (ch == '<')
		return '>';
	return ch;
}

// Null transitions when we see we've reached the end
// and need to relex the curr char.

static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
                      int &state) {
    i--;
    chNext2 = chNext;
    chNext = ch;
    state = SCE_RB_DEFAULT;
}

static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
    i++;
    ch = chNext;
    chNext = chNext2;
}

// precondition: startPos points to one after the EOL char
static bool currLineContainsHereDelims(int& startPos,
                                       Accessor &styler) {
    if (startPos <= 1)
        return false;

    int pos;
    for (pos = startPos - 1; pos > 0; pos--) {
        char ch = styler.SafeGetCharAt(pos);
        if (isEOLChar(ch)) {
            // Leave the pointers where they are -- there are no
            // here doc delims on the current line, even if
            // the EOL isn't default style

            return false;
        } else {
            styler.Flush();
            if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
                break;
            }
        }
    }
    if (pos == 0) {
        return false;
    }
    // Update the pointers so we don't have to re-analyze the string
    startPos = pos;
    return true;
}


static bool isEmptyLine(int pos,
                        Accessor &styler) {
	int spaceFlags = 0;
	int lineCurrent = styler.GetLine(pos);
	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
    return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
}

static bool RE_CanFollowKeyword(const char *keyword) {
    if (!strcmp(keyword, "and")
        || !strcmp(keyword, "begin")
        || !strcmp(keyword, "break")
        || !strcmp(keyword, "case")
        || !strcmp(keyword, "do")
        || !strcmp(keyword, "else")
        || !strcmp(keyword, "elsif")
        || !strcmp(keyword, "if")
        || !strcmp(keyword, "next")
        || !strcmp(keyword, "return")
        || !strcmp(keyword, "when")
        || !strcmp(keyword, "unless")
        || !strcmp(keyword, "until")
        || !strcmp(keyword, "not")
        || !strcmp(keyword, "or")) {
        return true;
    }
    return false;
}


//todo: if we aren't looking at a stdio character,
// move to the start of the first line that is not in a
// multi-line construct

static void synchronizeDocStart(unsigned int& startPos,
                                int &length,
                                int &initStyle,
                                Accessor &styler,
                                bool skipWhiteSpace=false) {

    styler.Flush();
    int style = actual_style(styler.StyleAt(startPos));
    switch (style) {
        case SCE_RB_STDIN:
        case SCE_RB_STDOUT:
        case SCE_RB_STDERR:
            // Don't do anything else with these.
            return;
    }

    int pos = startPos;
    // Quick way to characterize each line
    int lineStart;
    for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
        // Now look at the style before the previous line's EOL
        pos = styler.LineStart(lineStart) - 1;
        if (pos <= 10) {
            lineStart = 0;
            break;
        }
        char ch = styler.SafeGetCharAt(pos);
        char chPrev = styler.SafeGetCharAt(pos - 1);
        if (ch == '\n' && chPrev == '\r') {
            pos--;
        }
        if (styler.SafeGetCharAt(pos - 1) == '\\') {
            // Continuation line -- keep going
        } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
            // Part of multi-line construct -- keep going
        } else if (currLineContainsHereDelims(pos, styler)) {
            // Keep going, with pos and length now pointing
            // at the end of the here-doc delimiter
        } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
            // Keep going
        } else {
            break;
        }
    }
    pos = styler.LineStart(lineStart);
    length += (startPos - pos);
    startPos = pos;
    initStyle = SCE_RB_DEFAULT;
}

static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
						   WordList *keywordlists[], Accessor &styler) {

	// Lexer for Ruby often has to backtrack to start of current style to determine
	// which characters are being used as quotes, how deeply nested is the
	// start position and what the termination string is for here documents

	WordList &keywords = *keywordlists[0];

	class HereDocCls {
	public:
		int State;
        // States
        // 0: '<<' encountered
		// 1: collect the delimiter
        // 1b: text between the end of the delimiter and the EOL
		// 2: here doc text (lines after the delimiter)
		char Quote;		// the char after '<<'
		bool Quoted;		// true if Quote in ('\'','"','`')
		int DelimiterLength;	// strlen(Delimiter)
		char Delimiter[256];	// the Delimiter, limit of 256: from Perl
        bool CanBeIndented;
		HereDocCls() {
			State = 0;
			DelimiterLength = 0;
			Delimiter[0] = '\0';
            CanBeIndented = false;
		}
	};
	HereDocCls HereDoc;

	class QuoteCls {
		public:
		int  Count;
		char Up;
		char Down;
		QuoteCls() {
			this->New();
		}
		void New() {
			Count = 0;
			Up    = '\0';
			Down  = '\0';
		}
		void Open(char u) {
			Count++;
			Up    = u;
			Down  = opposite(Up);
		}
	};
	QuoteCls Quote;

    int numDots = 0;  // For numbers --
                      // Don't start lexing in the middle of a num

    synchronizeDocStart(startPos, length, initStyle, styler, // ref args
                        false);

	bool preferRE = true;
    int state = initStyle;
	int lengthDoc = startPos + length;

	char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
	prevWord[0] = '\0';
	if (length == 0)
		return;

	char chPrev = styler.SafeGetCharAt(startPos - 1);
	char chNext = styler.SafeGetCharAt(startPos);
	// Ruby uses a different mask because bad indentation is marked by oring with 32
	styler.StartAt(startPos, 127);
	styler.StartSegment(startPos);

    static int q_states[] = {SCE_RB_STRING_Q,
                             SCE_RB_STRING_QQ,
                             SCE_RB_STRING_QR,
                             SCE_RB_STRING_QW,
                             SCE_RB_STRING_QW,
                             SCE_RB_STRING_QX};
    static const char* q_chars = "qQrwWx";

	for (int i = startPos; i < lengthDoc; i++) {
		char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		char chNext2 = styler.SafeGetCharAt(i + 2);

        if (styler.IsLeadByte(ch)) {
			chNext = chNext2;
			chPrev = ' ';
			i += 1;
			continue;
		}

        // skip on DOS/Windows
        //No, don't, because some things will get tagged on,
        // so we won't recognize keywords, for example
#if 0
		if (ch == '\r' && chNext == '\n') {
	    	continue;
        }
#endif

        if (HereDoc.State == 1 && isEOLChar(ch)) {
			// Begin of here-doc (the line after the here-doc delimiter):
			HereDoc.State = 2;
			styler.ColourTo(i-1, state);
            // Don't check for a missing quote, just jump into
            // the here-doc state
            state = SCE_RB_HERE_Q;
        }

        // Regular transitions
		if (state == SCE_RB_DEFAULT) {
            if (isdigit(ch)) {
            	styler.ColourTo(i - 1, state);
				state = SCE_RB_NUMBER;
                numDots = 0;
            } else if (iswordstart(ch)) {
            	styler.ColourTo(i - 1, state);
				state = SCE_RB_WORD;
			} else if (ch == '#') {
				styler.ColourTo(i - 1, state);
				state = SCE_RB_COMMENTLINE;
			} else if (ch == '=') {
				// =begin indicates the start of a comment (doc) block
                if (i == 0 || isEOLChar(chPrev)
                    && chNext == 'b'
                    && styler.SafeGetCharAt(i + 2) == 'e'
                    && styler.SafeGetCharAt(i + 3) == 'g'
                    && styler.SafeGetCharAt(i + 4) == 'i'
                    && styler.SafeGetCharAt(i + 5) == 'n'
                    && !iswordchar(styler.SafeGetCharAt(i + 6))) {
                    styler.ColourTo(i - 1, state);
                    state = SCE_RB_POD;
				} else {
					styler.ColourTo(i - 1, state);
					styler.ColourTo(i, SCE_RB_OPERATOR);
					preferRE = true;
				}
			} else if (ch == '"') {
				styler.ColourTo(i - 1, state);
				state = SCE_RB_STRING;
				Quote.New();
				Quote.Open(ch);
			} else if (ch == '\'') {
                styler.ColourTo(i - 1, state);
                state = SCE_RB_CHARACTER;
                Quote.New();
                Quote.Open(ch);
			} else if (ch == '`') {
				styler.ColourTo(i - 1, state);
				state = SCE_RB_BACKTICKS;
				Quote.New();
				Quote.Open(ch);
			} else if (ch == '@') {
                // Instance or class var
				styler.ColourTo(i - 1, state);
                if (chNext == '@') {
                    state = SCE_RB_CLASS_VAR;
                    advance_char(i, ch, chNext, chNext2); // pass by ref
                } else {
                    state = SCE_RB_INSTANCE_VAR;
                }
			} else if (ch == '$') {
                // Check for a builtin global
				styler.ColourTo(i - 1, state);
                // Recognize it bit by bit
                state = SCE_RB_GLOBAL;
            } else if (ch == '/' && preferRE) {
                // Ambigous operator
				styler.ColourTo(i - 1, state);
				state = SCE_RB_REGEX;
                Quote.New();
                Quote.Open(ch);
			} else if (ch == '<' && chNext == '<' && chNext2 != '=') {

            // Recognise the '<<' symbol - either a here document or a binary op

				styler.ColourTo(i - 1, state);
                i++;
                chNext = chNext2;
				styler.ColourTo(i, SCE_RB_OPERATOR);

                if (preferRE) {
                    state = SCE_RB_HERE_DELIM;
				    HereDoc.State = 0;
                } else {
                    // leave state as default
                    // We don't have all the heuristics Perl has for indications
                    // of a here-doc, because '<<' is overloadable and used
                    // for so many other classes.
					preferRE = true;
                }
            } else if (ch == ':') {
				styler.ColourTo(i - 1, state);
                if (chNext == ':') {
                    // Mark "::" as an operator, not symbol start
                    styler.ColourTo(i + 1, SCE_RB_OPERATOR);
                    advance_char(i, ch, chNext, chNext2); // pass by ref
                    state = SCE_RB_DEFAULT;
					preferRE = false;
                } else if (iswordchar(chNext)) {
					state = SCE_RB_SYMBOL;
                } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
                    // Do the operator analysis in-line, looking ahead
                    // Based on the table in pickaxe 2nd ed., page 339
                    bool doColoring = true;
                    switch (chNext) {
                    case '[':
                        if (chNext2 == ']' ) {
                            char ch_tmp = styler.SafeGetCharAt(i + 3);
                            if (ch_tmp == '=') {
                                i += 3;
                                ch = ch_tmp;
                                chNext = styler.SafeGetCharAt(i + 1);
                            } else {
                                i += 2;
                                ch = chNext2;
                                chNext = ch_tmp;
                            }
                        } else {
                            doColoring = false;
                        }
                        break;

                    case '*':
                        if (chNext2 == '*') {
                            i += 2;
                            ch = chNext2;
                            chNext = styler.SafeGetCharAt(i + 1);
                        } else {
                            advance_char(i, ch, chNext, chNext2);
                        }
                        break;

                    case '!':
                        if (chNext2 == '=' || chNext2 == '~') {
                            i += 2;
                            ch = chNext2;
                            chNext = styler.SafeGetCharAt(i + 1);
                        } else {
                            advance_char(i, ch, chNext, chNext2);
                        }
                        break;

                    case '<':
                        if (chNext2 == '<') {
                            i += 2;
                            ch = chNext2;
                            chNext = styler.SafeGetCharAt(i + 1);
                        } else if (chNext2 == '=') {
                            char ch_tmp = styler.SafeGetCharAt(i + 3);
                            if (ch_tmp == '>') {  // <=> operator
                                i += 3;
                                ch = ch_tmp;
                                chNext = styler.SafeGetCharAt(i + 1);
                            } else {
                                i += 2;
                                ch = chNext2;
                                chNext = ch_tmp;
                            }
                        } else {
                            advance_char(i, ch, chNext, chNext2);
                        }
                        break;

                    default:
                        // Simple one-character operators
                        advance_char(i, ch, chNext, chNext2);
                        break;
                    }
                    if (doColoring) {
                        styler.ColourTo(i, SCE_RB_SYMBOL);
                        state = SCE_RB_DEFAULT;
                    }
				} else if (!preferRE) {
					// Don't color symbol strings (yet)
					// Just color the ":" and color rest as string
					styler.ColourTo(i, SCE_RB_SYMBOL);
					state = SCE_RB_DEFAULT;
                } else {
                    styler.ColourTo(i, SCE_RB_OPERATOR);
                    state = SCE_RB_DEFAULT;
                    preferRE = true;
                }
            } else if (ch == '%') {
                styler.ColourTo(i - 1, state);
                bool have_string = false;
                if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {
                    Quote.New();
                    const char *hit = strchr(q_chars, chNext);
                    if (hit != NULL) {
                        state = q_states[hit - q_chars];
                        Quote.Open(chNext2);
                        i += 2;
                        ch = chNext2;
						chNext = styler.SafeGetCharAt(i + 1);
                        have_string = true;
                    }
                } else if (!iswordchar(chNext)) {
                    state = SCE_RB_STRING_QQ;
                    Quote.Open(chNext);
                    advance_char(i, ch, chNext, chNext2); // pass by ref
                    have_string = true;
                }
                if (!have_string) {
                    styler.ColourTo(i, SCE_RB_OPERATOR);
                    // stay in default
                    preferRE = true;
                }
            } else if (isoperator(ch)) {
				styler.ColourTo(i - 1, state);
				styler.ColourTo(i, SCE_RB_OPERATOR);
                // If we're ending an expression or block,
                // assume it ends an object, and the ambivalent
                // constructs are binary operators
                //
                // So if we don't have one of these chars,
                // we aren't ending an object exp'n, and ops
                // like : << / are unary operators.

                preferRE = (strchr(")}]", ch) == NULL);
                // Stay in default state
            } else if (isEOLChar(ch)) {
                // Make sure it's a true line-end, with no backslash
                if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
                    && chPrev != '\\') {
                    // Assume we've hit the end of the statement.
                    preferRE = true;
                }
            }
        } else if (state == SCE_RB_WORD) {
            if (ch == '.' || !iswordchar(ch)) {
                // Words include x? in all contexts,
                // and <letters>= after either 'def' or a dot
                // Move along until a complete word is on our left

                // Default accessor treats '.' as word-chars,
                // but we don't for now.

                if (ch == '='
                    && iswordchar(chPrev)
                    && (chNext == '('
                        || strchr(" \t\n\r", chNext) != NULL)
                    && (!strcmp(prevWord, "def")
                        || followsDot(styler.GetStartSegment(), styler))) {
                    // <name>= is a name only when being def'd -- Get it the next time
                    // This means that <name>=<name> is always lexed as
                    // <name>, (op, =), <name>
                } else if ((ch == '?' || ch == '!')
                           && iswordchar(chPrev)
                           && !iswordchar(chNext)) {
                    // <name>? is a name -- Get it the next time
                    // But <name>?<name> is always lexed as
                    // <name>, (op, ?), <name>
                    // Same with <name>! to indicate a method that
                    // modifies its target
                } else if (isEOLChar(ch)
                           && isMatch(styler, lengthDoc, i - 7, "__END__")) {
                    styler.ColourTo(i, SCE_RB_DATASECTION);
                    state = SCE_RB_DATASECTION;
                    // No need to handle this state -- we'll just move to the end
                    preferRE = false;
                } else {
					int wordStartPos = styler.GetStartSegment();
                    int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
                    switch (word_style) {
                        case SCE_RB_WORD:
                            preferRE = RE_CanFollowKeyword(prevWord);
							break;

                        case SCE_RB_WORD_DEMOTED:
                            preferRE = true;
							break;

                        case SCE_RB_IDENTIFIER:
                            if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
                                preferRE = true;
                            } else if (isEOLChar(ch)) {
                                preferRE = true;
                            } else {
                                preferRE = false;
                            }
							break;
                        default:
                            preferRE = false;
                    }
                    redo_char(i, ch, chNext, chNext2, state); // pass by ref
                }
            }
        } else if (state == SCE_RB_NUMBER) {
            if (isalnum(ch) || ch == '_') {
                // Keep going
            } else if (ch == '.' && ++numDots == 1) {
                // Keep going
            } else {
                styler.ColourTo(i - 1, state);
                redo_char(i, ch, chNext, chNext2, state); // pass by ref
                preferRE = false;
            }
        } else if (state == SCE_RB_COMMENTLINE) {
			if (isEOLChar(ch)) {
                styler.ColourTo(i - 1, state);
                state = SCE_RB_DEFAULT;
                // Use whatever setting we had going into the comment
            }
        } else if (state == SCE_RB_HERE_DELIM) {
            // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
            // Slightly different: if we find an immediate '-',
            // the target can appear indented.

			if (HereDoc.State == 0) { // '<<' encountered
				HereDoc.State = 1;
                HereDoc.DelimiterLength = 0;
                if (ch == '-') {
                    HereDoc.CanBeIndented = true;
                    advance_char(i, ch, chNext, chNext2); // pass by ref
                } else {
                    HereDoc.CanBeIndented = false;
                }
                if (isEOLChar(ch)) {
                    // Bail out of doing a here doc if there's no target
                    state = SCE_RB_DEFAULT;
                    preferRE = false;
                } else {
                    HereDoc.Quote = ch;

                    if (ch == '\'' || ch == '"' || ch == '`') {
                        HereDoc.Quoted = true;
                        HereDoc.Delimiter[0] = '\0';
                    } else {
                        HereDoc.Quoted = false;
                        HereDoc.Delimiter[0] = ch;
                        HereDoc.Delimiter[1] = '\0';
                        HereDoc.DelimiterLength = 1;
                    }
                }
			} else if (HereDoc.State == 1) { // collect the delimiter
                if (isEOLChar(ch)) {
                    // End the quote now, and go back for more
                    styler.ColourTo(i - 1, state);
                    state = SCE_RB_DEFAULT;
                    i--;
                    chNext = ch;
                    chNext2 = chNext;
                    preferRE = false;
                } else if (HereDoc.Quoted) {
					if (ch == HereDoc.Quote) { // closing quote => end of delimiter
						styler.ColourTo(i, state);
						state = SCE_RB_DEFAULT;
                        preferRE = false;
                    } else {
						if (ch == '\\' && !isEOLChar(chNext)) {
                            advance_char(i, ch, chNext, chNext2);
						}
						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
                    }
                } else { // an unquoted here-doc delimiter
					if (isalnum(ch) || ch == '_') {
						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
					} else {
						styler.ColourTo(i - 1, state);
                        redo_char(i, ch, chNext, chNext2, state);
                        preferRE = false;
					}
                }
				if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
					styler.ColourTo(i - 1, state);
					state = SCE_RB_ERROR;
                    preferRE = false;
				}
            }
        } else if (state == SCE_RB_HERE_Q) {
            // Not needed: HereDoc.State == 2
            // Indentable here docs: look backwards
            // Non-indentable: look forwards, like in Perl
            //
            // Why: so we can quickly resolve things like <<-" abc"

            if (!HereDoc.CanBeIndented) {
                if (isEOLChar(chPrev)
                    && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
                    styler.ColourTo(i - 1, state);
                    i += HereDoc.DelimiterLength - 1;
                    chNext = styler.SafeGetCharAt(i + 1);
                    if (isEOLChar(chNext)) {
                        styler.ColourTo(i, SCE_RB_HERE_DELIM);
                        state = SCE_RB_DEFAULT;
                        HereDoc.State = 0;
                        preferRE = false;
                    }
                    // Otherwise we skipped through the here doc faster.
                }
            } else if (isEOLChar(chNext)
                       && lookingAtHereDocDelim(styler,
                                                i - HereDoc.DelimiterLength + 1,
                                                lengthDoc,
                                                HereDoc.Delimiter)) {
                styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
                styler.ColourTo(i, SCE_RB_HERE_DELIM);
                state = SCE_RB_DEFAULT;
                preferRE = false;
                HereDoc.State = 0;
            }
        } else if (state == SCE_RB_CLASS_VAR
                   || state == SCE_RB_INSTANCE_VAR
                   || state == SCE_RB_SYMBOL) {
            if (!iswordchar(ch)) {
                styler.ColourTo(i - 1, state);
                redo_char(i, ch, chNext, chNext2, state); // pass by ref
                preferRE = false;
            }
        } else if (state == SCE_RB_GLOBAL) {
            if (!iswordchar(ch)) {
                // handle special globals here as well
                if (chPrev == '$') {
                    if (ch == '-') {
                        // Include the next char, like $-a
                        advance_char(i, ch, chNext, chNext2);
                    }
                    styler.ColourTo(i, state);
                    state = SCE_RB_DEFAULT;
                } else {
                    styler.ColourTo(i - 1, state);
                    redo_char(i, ch, chNext, chNext2, state); // pass by ref
                }
                preferRE = false;
            }
        } else if (state == SCE_RB_POD) {
            // PODs end with ^=end\s, -- any whitespace can follow =end
            if (strchr(" \t\n\r", ch) != NULL
                && i > 5
                && isEOLChar(styler[i - 5])
                && isMatch(styler, lengthDoc, i - 4, "=end")) {
                styler.ColourTo(i - 1, state);
                state = SCE_RB_DEFAULT;
                preferRE = false;
            }
        } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
            if (ch == '\\' && Quote.Up != '\\') {
                // Skip one
                advance_char(i, ch, chNext, chNext2);
            } else if (ch == Quote.Down) {
                Quote.Count--;
                if (Quote.Count == 0) {
                    // Include the options
                    while (isSafeAlpha(chNext)) {
                        i++;
						ch = chNext;
                        chNext = styler.SafeGetCharAt(i + 1);
                    }
                    styler.ColourTo(i, state);
                    state = SCE_RB_DEFAULT;
                    preferRE = false;
                }
            } else if (ch == Quote.Up) {
                // Only if close quoter != open quoter
                Quote.Count++;

            } else if (ch == '#' ) {
                //todo: distinguish comments from pound chars
                // for now, handle as comment
                styler.ColourTo(i - 1, state);
                bool inEscape = false;
                while (++i < lengthDoc) {
                    ch = styler.SafeGetCharAt(i);
                    if (ch == '\\') {
                        inEscape = true;
                    } else if (isEOLChar(ch)) {
                        // Comment inside a regex
                        styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
                        break;
                    } else if (inEscape) {
                        inEscape = false;  // don't look at char
                    } else if (ch == Quote.Down) {
                        // Have the regular handler deal with this
                        // to get trailing modifiers.
                        i--;
                        ch = styler[i];
						break;
                    }
                }
                chNext = styler.SafeGetCharAt(i + 1);
                chNext2 = styler.SafeGetCharAt(i + 2);
            }
        // Quotes of all kinds...
        } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
                   state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
                   state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
                   state == SCE_RB_BACKTICKS) {
            if (!Quote.Down && !isspacechar(ch)) {
                Quote.Open(ch);
            } else if (ch == '\\' && Quote.Up != '\\') {
                //Riddle me this: Is it safe to skip *every* escaped char?
                advance_char(i, ch, chNext, chNext2);
            } else if (ch == Quote.Down) {
                Quote.Count--;
                if (Quote.Count == 0) {
                    styler.ColourTo(i, state);
                    state = SCE_RB_DEFAULT;
                    preferRE = false;
                }
            } else if (ch == Quote.Up) {
                Quote.Count++;
            }
        }

        if (state == SCE_RB_ERROR) {
            break;
        }
        chPrev = ch;
    }
    if (state == SCE_RB_WORD) {
        // We've ended on a word, possibly at EOF, and need to
        // classify it.
        (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
    } else {
        styler.ColourTo(lengthDoc - 1, state);
    }
}

// Helper functions for folding

static void getPrevWord(int pos,
                        char *prevWord,
                        Accessor &styler,
                        int word_state)
{
    int i;
    styler.Flush();
    for (i = pos - 1; i > 0; i--) {
        if (actual_style(styler.StyleAt(i)) != word_state) {
            i++;
            break;
        }
    }
    if (i < pos - MAX_KEYWORD_LENGTH) // overflow
        i = pos - MAX_KEYWORD_LENGTH;
    char *dst = prevWord;
    for (; i <= pos; i++) {
        *dst++ = styler[i];
    }
	*dst = 0;
}

static bool keywordIsAmbiguous(const char *prevWord)
{
    // Order from most likely used to least likely
    // Lots of ways to do a loop in Ruby besides 'while/until'
    if (!strcmp(prevWord, "if")
        || !strcmp(prevWord, "do")
        || !strcmp(prevWord, "while")
        || !strcmp(prevWord, "unless")
        || !strcmp(prevWord, "until")) {
        return true;
    } else {
        return false;
    }
}

static bool inline iswhitespace(char ch) {
	return ch == ' ' || ch == '\t';
}

// Demote keywords in the following conditions:
// if, while, unless, until modify a statement
// do after a while or until, as a noise word (like then after if)

static bool keywordIsModifier(const char *word,
                              int pos,
                              Accessor &styler)
{
    if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
        return keywordDoStartsLoop(pos, styler);
    }
    char ch;
    int style = SCE_RB_DEFAULT;
	int lineStart = styler.GetLine(pos);
    int lineStartPosn = styler.LineStart(lineStart);
    styler.Flush();
    while (--pos >= lineStartPosn) {
        style = actual_style(styler.StyleAt(pos));
		if (style == SCE_RB_DEFAULT) {
			if (iswhitespace(ch = styler[pos])) {
				//continue
			} else if (ch == '\r' || ch == '\n') {
				// Scintilla's LineStart() and GetLine() routines aren't
				// platform-independent, so if we have text prepared with
				// a different system we can't rely on it.
				return false;
			}
		} else {
            break;
		}
    }
    if (pos < lineStartPosn) {
        return false; //XXX not quite right if the prev line is a continuation
    }
    // First things where the action is unambiguous
    switch (style) {
        case SCE_RB_DEFAULT:
        case SCE_RB_COMMENTLINE:
        case SCE_RB_POD:
        case SCE_RB_CLASSNAME:
        case SCE_RB_DEFNAME:
        case SCE_RB_MODULE_NAME:
            return false;
        case SCE_RB_OPERATOR:
            break;
        case SCE_RB_WORD:
            // Watch out for uses of 'else if'
            //XXX: Make a list of other keywords where 'if' isn't a modifier
            //     and can appear legitimately
            // Formulate this to avoid warnings from most compilers
            if (strcmp(word, "if") == 0) {
                char prevWord[MAX_KEYWORD_LENGTH + 1];
                getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
                return strcmp(prevWord, "else") != 0;
            }
            return true;
        default:
            return true;
    }
    // Assume that if the keyword follows an operator,
    // usually it's a block assignment, like
    // a << if x then y else z

    ch = styler[pos];
    switch (ch) {
        case ')':
        case ']':
        case '}':
            return true;
        default:
            return false;
    }
}

#define WHILE_BACKWARDS "elihw"
#define UNTIL_BACKWARDS "litnu"

// Nothing fancy -- look to see if we follow a while/until somewhere
// on the current line

static bool keywordDoStartsLoop(int pos,
                                Accessor &styler)
{
    char ch;
    int style;
	int lineStart = styler.GetLine(pos);
    int lineStartPosn = styler.LineStart(lineStart);
    styler.Flush();
    while (--pos >= lineStartPosn) {
        style = actual_style(styler.StyleAt(pos));
		if (style == SCE_RB_DEFAULT) {
			if ((ch = styler[pos]) == '\r' || ch == '\n') {
				// Scintilla's LineStart() and GetLine() routines aren't
				// platform-independent, so if we have text prepared with
				// a different system we can't rely on it.
				return false;
			}
		} else if (style == SCE_RB_WORD) {
            // Check for while or until, but write the word in backwards
            char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
            char *dst = prevWord;
            int wordLen = 0;
            int start_word;
            for (start_word = pos;
                 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
                 start_word--) {
                if (++wordLen < MAX_KEYWORD_LENGTH) {
                    *dst++ = styler[start_word];
                }
            }
            *dst = 0;
            // Did we see our keyword?
            if (!strcmp(prevWord, WHILE_BACKWARDS)
                || !strcmp(prevWord, UNTIL_BACKWARDS)) {
                return true;
            }
            // We can move pos to the beginning of the keyword, and then
            // accept another decrement, as we can never have two contiguous
            // keywords:
            // word1 word2
            //           ^
            //        <-  move to start_word
            //      ^
            //      <- loop decrement
            //     ^  # pointing to end of word1 is fine
            pos = start_word;
        }
    }
    return false;
}

/*
 *  Folding Ruby
 *
 *  The language is quite complex to analyze without a full parse.
 *  For example, this line shouldn't affect fold level:
 *
 *   print "hello" if feeling_friendly?
 *
 *  Neither should this:
 *
 *   print "hello" \
 *      if feeling_friendly?
 *
 *
 *  But this should:
 *
 *   if feeling_friendly?  #++
 *     print "hello" \
 *     print "goodbye"
 *   end                   #--
 *
 *  So we cheat, by actually looking at the existing indentation
 *  levels for each line, and just echoing it back.  Like Python.
 *  Then if we get better at it, we'll take braces into consideration,
 *  which always affect folding levels.

 *  How the keywords should work:
 *  No effect:
 *  __FILE__ __LINE__ BEGIN END alias and
 *  defined? false in nil not or self super then
 *  true undef

 *  Always increment:
 *  begin  class def do for module when {
 *
 *  Always decrement:
 *  end }
 *
 *  Increment if these start a statement
 *  if unless until while -- do nothing if they're modifiers

 *  These end a block if there's no modifier, but don't bother
 *  break next redo retry return yield
 *
 *  These temporarily de-indent, but re-indent
 *  case else elsif ensure rescue
 *
 *  This means that the folder reflects indentation rather
 *  than setting it.  The language-service updates indentation
 *  when users type return and finishes entering de-denters.
 *
 *  Later offer to fold POD, here-docs, strings, and blocks of comments
 */

static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
                      WordList *[], Accessor &styler) {
	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;

    synchronizeDocStart(startPos, length, initStyle, styler, // ref args
                        false);
	unsigned int endPos = startPos + length;
	int visibleChars = 0;
	int lineCurrent = styler.GetLine(startPos);
	int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
                                         & SC_FOLDLEVELNUMBERMASK
                                         & ~SC_FOLDLEVELBASE);
	int levelCurrent = levelPrev;
	char chNext = styler[startPos];
	int styleNext = styler.StyleAt(startPos);
	int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
    bool buffer_ends_with_eol = false;
	for (unsigned int i = startPos; i < endPos; i++) {
		char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		int style = styleNext;
		styleNext = styler.StyleAt(i + 1);
		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
        if (style == SCE_RB_COMMENTLINE) {
            if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
                if (chNext == '{') {
					levelCurrent++;
				} else if (chNext == '}') {
					levelCurrent--;
				}
            }
        } else if (style == SCE_RB_OPERATOR) {
			if (strchr("[{(", ch)) {
				levelCurrent++;
			} else if (strchr(")}]", ch)) {
                // Don't decrement below 0
                if (levelCurrent > 0)
                    levelCurrent--;
			}
        } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
            // Look at the keyword on the left and decide what to do
            char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
            prevWord[0] = 0;
            getPrevWord(i, prevWord, styler, SCE_RB_WORD);
            if (!strcmp(prevWord, "end")) {
                // Don't decrement below 0
                if (levelCurrent > 0)
                    levelCurrent--;
            } else if (   !strcmp(prevWord, "if")
                       || !strcmp(prevWord, "def")
                       || !strcmp(prevWord, "class")
                       || !strcmp(prevWord, "module")
                       || !strcmp(prevWord, "begin")
                       || !strcmp(prevWord, "case")
                       || !strcmp(prevWord, "do")
                       || !strcmp(prevWord, "while")
                       || !strcmp(prevWord, "unless")
                       || !strcmp(prevWord, "until")
                       || !strcmp(prevWord, "for")
                          ) {
				levelCurrent++;
            }
        }
		if (atEOL) {
			int lev = levelPrev;
			if (visibleChars == 0 && foldCompact)
				lev |= SC_FOLDLEVELWHITEFLAG;
			if ((levelCurrent > levelPrev) && (visibleChars > 0))
				lev |= SC_FOLDLEVELHEADERFLAG;
            styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
			lineCurrent++;
			levelPrev = levelCurrent;
			visibleChars = 0;
            buffer_ends_with_eol = true;
		} else if (!isspacechar(ch)) {
			visibleChars++;
            buffer_ends_with_eol = false;
        }
    }
	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
    if (!buffer_ends_with_eol) {
        lineCurrent++;
        int new_lev = levelCurrent;
        if (visibleChars == 0 && foldCompact)
            new_lev |= SC_FOLDLEVELWHITEFLAG;
			if ((levelCurrent > levelPrev) && (visibleChars > 0))
				new_lev |= SC_FOLDLEVELHEADERFLAG;
            levelCurrent = new_lev;
    }
	styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
}

static const char * const rubyWordListDesc[] = {
	"Keywords",
	0
};

LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);