1 /** @file LexRust.cxx
2  ** Lexer for Rust.
3  **
4  ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
5  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6  **/
7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 #include <map>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "PropSetSimple.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "DefaultLexer.h"
33 
34 using namespace Scintilla;
35 
36 static const int NUM_RUST_KEYWORD_LISTS = 7;
37 static const int MAX_RUST_IDENT_CHARS = 1023;
38 
IsStreamCommentStyle(int style)39 static bool IsStreamCommentStyle(int style) {
40 	return style == SCE_RUST_COMMENTBLOCK ||
41 		   style == SCE_RUST_COMMENTBLOCKDOC;
42 }
43 
44 // Options used for LexerRust
45 struct OptionsRust {
46 	bool fold;
47 	bool foldSyntaxBased;
48 	bool foldComment;
49 	bool foldCommentMultiline;
50 	bool foldCommentExplicit;
51 	std::string foldExplicitStart;
52 	std::string foldExplicitEnd;
53 	bool foldExplicitAnywhere;
54 	bool foldCompact;
55 	int  foldAtElseInt;
56 	bool foldAtElse;
OptionsRustOptionsRust57 	OptionsRust() {
58 		fold = false;
59 		foldSyntaxBased = true;
60 		foldComment = false;
61 		foldCommentMultiline = true;
62 		foldCommentExplicit = true;
63 		foldExplicitStart = "";
64 		foldExplicitEnd   = "";
65 		foldExplicitAnywhere = false;
66 		foldCompact = true;
67 		foldAtElseInt = -1;
68 		foldAtElse = false;
69 	}
70 };
71 
72 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
73 			"Primary keywords and identifiers",
74 			"Built in types",
75 			"Other keywords",
76 			"Keywords 4",
77 			"Keywords 5",
78 			"Keywords 6",
79 			"Keywords 7",
80 			0,
81 		};
82 
83 struct OptionSetRust : public OptionSet<OptionsRust> {
OptionSetRustOptionSetRust84 	OptionSetRust() {
85 		DefineProperty("fold", &OptionsRust::fold);
86 
87 		DefineProperty("fold.comment", &OptionsRust::foldComment);
88 
89 		DefineProperty("fold.compact", &OptionsRust::foldCompact);
90 
91 		DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
92 
93 		DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
94 			"Set this property to 0 to disable syntax based folding.");
95 
96 		DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
97 			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
98 
99 		DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
100 			"Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
101 
102 		DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
103 			"The string to use for explicit fold start points, replacing the standard //{.");
104 
105 		DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
106 			"The string to use for explicit fold end points, replacing the standard //}.");
107 
108 		DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
109 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
110 
111 		DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
112 			"This option enables Rust folding on a \"} else {\" line of an if statement.");
113 
114 		DefineWordListSets(rustWordLists);
115 	}
116 };
117 
118 class LexerRust : public DefaultLexer {
119 	WordList keywords[NUM_RUST_KEYWORD_LISTS];
120 	OptionsRust options;
121 	OptionSetRust osRust;
122 public:
~LexerRust()123 	virtual ~LexerRust() {
124 	}
Release()125 	void SCI_METHOD Release() override {
126 		delete this;
127 	}
Version() const128 	int SCI_METHOD Version() const override {
129 		return lvOriginal;
130 	}
PropertyNames()131 	const char * SCI_METHOD PropertyNames() override {
132 		return osRust.PropertyNames();
133 	}
PropertyType(const char * name)134 	int SCI_METHOD PropertyType(const char *name) override {
135 		return osRust.PropertyType(name);
136 	}
DescribeProperty(const char * name)137 	const char * SCI_METHOD DescribeProperty(const char *name) override {
138 		return osRust.DescribeProperty(name);
139 	}
140 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()141 	const char * SCI_METHOD DescribeWordListSets() override {
142 		return osRust.DescribeWordListSets();
143 	}
144 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
145 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
146 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
PrivateCall(int,void *)147 	void * SCI_METHOD PrivateCall(int, void *) override {
148 		return 0;
149 	}
LexerFactoryRust()150 	static ILexer *LexerFactoryRust() {
151 		return new LexerRust();
152 	}
153 };
154 
PropertySet(const char * key,const char * val)155 Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
156 	if (osRust.PropertySet(&options, key, val)) {
157 		return 0;
158 	}
159 	return -1;
160 }
161 
WordListSet(int n,const char * wl)162 Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
163 	Sci_Position firstModification = -1;
164 	if (n < NUM_RUST_KEYWORD_LISTS) {
165 		WordList *wordListN = &keywords[n];
166 		WordList wlNew;
167 		wlNew.Set(wl);
168 		if (*wordListN != wlNew) {
169 			wordListN->Set(wl);
170 			firstModification = 0;
171 		}
172 	}
173 	return firstModification;
174 }
175 
IsWhitespace(int c)176 static bool IsWhitespace(int c) {
177     return c == ' ' || c == '\t' || c == '\r' || c == '\n';
178 }
179 
180 /* This isn't quite right for Unicode identifiers */
IsIdentifierStart(int ch)181 static bool IsIdentifierStart(int ch) {
182 	return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
183 }
184 
185 /* This isn't quite right for Unicode identifiers */
IsIdentifierContinue(int ch)186 static bool IsIdentifierContinue(int ch) {
187 	return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
188 }
189 
ScanWhitespace(Accessor & styler,Sci_Position & pos,Sci_Position max)190 static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
191 	while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
192 		if (pos == styler.LineEnd(styler.GetLine(pos)))
193 			styler.SetLineState(styler.GetLine(pos), 0);
194 		pos++;
195 	}
196 	styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
197 }
198 
GrabString(char * s,Accessor & styler,Sci_Position start,Sci_Position len)199 static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
200 	for (Sci_Position ii = 0; ii < len; ii++)
201 		s[ii] = styler[ii + start];
202 	s[len] = '\0';
203 }
204 
ScanIdentifier(Accessor & styler,Sci_Position & pos,WordList * keywords)205 static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
206 	Sci_Position start = pos;
207 	while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
208 		pos++;
209 
210 	if (styler.SafeGetCharAt(pos, '\0') == '!') {
211 		pos++;
212 		styler.ColourTo(pos - 1, SCE_RUST_MACRO);
213 	} else {
214 		char s[MAX_RUST_IDENT_CHARS + 1];
215 		Sci_Position len = pos - start;
216 		len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
217 		GrabString(s, styler, start, len);
218 		bool keyword = false;
219 		for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
220 			if (keywords[ii].InList(s)) {
221 				styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
222 				keyword = true;
223 				break;
224 			}
225 		}
226 		if (!keyword) {
227 			styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
228 		}
229 	}
230 }
231 
232 /* Scans a sequence of digits, returning true if it found any. */
ScanDigits(Accessor & styler,Sci_Position & pos,int base)233 static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
234 	Sci_Position old_pos = pos;
235 	for (;;) {
236 		int c = styler.SafeGetCharAt(pos, '\0');
237 		if (IsADigit(c, base) || c == '_')
238 			pos++;
239 		else
240 			break;
241 	}
242 	return old_pos != pos;
243 }
244 
245 /* Scans an integer and floating point literals. */
ScanNumber(Accessor & styler,Sci_Position & pos)246 static void ScanNumber(Accessor& styler, Sci_Position& pos) {
247 	int base = 10;
248 	int c = styler.SafeGetCharAt(pos, '\0');
249 	int n = styler.SafeGetCharAt(pos + 1, '\0');
250 	bool error = false;
251 	/* Scan the prefix, thus determining the base.
252 	 * 10 is default if there's no prefix. */
253 	if (c == '0' && n == 'x') {
254 		pos += 2;
255 		base = 16;
256 	} else if (c == '0' && n == 'b') {
257 		pos += 2;
258 		base = 2;
259 	} else if (c == '0' && n == 'o') {
260 		pos += 2;
261 		base = 8;
262 	}
263 
264 	/* Scan initial digits. The literal is malformed if there are none. */
265 	error |= !ScanDigits(styler, pos, base);
266 	/* See if there's an integer suffix. We mimic the Rust's lexer
267 	 * and munch it even if there was an error above. */
268 	c = styler.SafeGetCharAt(pos, '\0');
269 	if (c == 'u' || c == 'i') {
270 		pos++;
271 		c = styler.SafeGetCharAt(pos, '\0');
272 		n = styler.SafeGetCharAt(pos + 1, '\0');
273 		if (c == '8') {
274 			pos++;
275 		} else if (c == '1' && n == '6') {
276 			pos += 2;
277 		} else if (c == '3' && n == '2') {
278 			pos += 2;
279 		} else if (c == '6' && n == '4') {
280 			pos += 2;
281 		} else if (styler.Match(pos, "size")) {
282 			pos += 4;
283 		} else {
284 			error = true;
285 		}
286 	/* See if it's a floating point literal. These literals have to be base 10.
287 	 */
288 	} else if (!error) {
289 		/* If there's a period, it's a floating point literal unless it's
290 		 * followed by an identifier (meaning this is a method call, e.g.
291 		 * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
292 		 */
293 		n = styler.SafeGetCharAt(pos + 1, '\0');
294 		if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
295 			error |= base != 10;
296 			pos++;
297 			/* It's ok to have no digits after the period. */
298 			ScanDigits(styler, pos, 10);
299 		}
300 
301 		/* Look for the exponentiation. */
302 		c = styler.SafeGetCharAt(pos, '\0');
303 		if (c == 'e' || c == 'E') {
304 			error |= base != 10;
305 			pos++;
306 			c = styler.SafeGetCharAt(pos, '\0');
307 			if (c == '-' || c == '+')
308 				pos++;
309 			/* It is invalid to have no digits in the exponent. */
310 			error |= !ScanDigits(styler, pos, 10);
311 		}
312 
313 		/* Scan the floating point suffix. */
314 		c = styler.SafeGetCharAt(pos, '\0');
315 		if (c == 'f') {
316 			error |= base != 10;
317 			pos++;
318 			c = styler.SafeGetCharAt(pos, '\0');
319 			n = styler.SafeGetCharAt(pos + 1, '\0');
320 			if (c == '3' && n == '2') {
321 				pos += 2;
322 			} else if (c == '6' && n == '4') {
323 				pos += 2;
324 			} else {
325 				error = true;
326 			}
327 		}
328 	}
329 
330 	if (error)
331 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
332 	else
333 		styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
334 }
335 
IsOneCharOperator(int c)336 static bool IsOneCharOperator(int c) {
337 	return c == ';' || c == ',' || c == '(' || c == ')'
338 	    || c == '{' || c == '}' || c == '[' || c == ']'
339 	    || c == '@' || c == '#' || c == '~' || c == '+'
340 	    || c == '*' || c == '/' || c == '^' || c == '%'
341 	    || c == '.' || c == ':' || c == '!' || c == '<'
342 	    || c == '>' || c == '=' || c == '-' || c == '&'
343 	    || c == '|' || c == '$' || c == '?';
344 }
345 
IsTwoCharOperator(int c,int n)346 static bool IsTwoCharOperator(int c, int n) {
347 	return (c == '.' && n == '.') || (c == ':' && n == ':')
348 	    || (c == '!' && n == '=') || (c == '<' && n == '<')
349 	    || (c == '<' && n == '=') || (c == '>' && n == '>')
350 	    || (c == '>' && n == '=') || (c == '=' && n == '=')
351 	    || (c == '=' && n == '>') || (c == '-' && n == '>')
352 	    || (c == '&' && n == '&') || (c == '|' && n == '|')
353 	    || (c == '-' && n == '=') || (c == '&' && n == '=')
354 	    || (c == '|' && n == '=') || (c == '+' && n == '=')
355 	    || (c == '*' && n == '=') || (c == '/' && n == '=')
356 	    || (c == '^' && n == '=') || (c == '%' && n == '=');
357 }
358 
IsThreeCharOperator(int c,int n,int n2)359 static bool IsThreeCharOperator(int c, int n, int n2) {
360 	return (c == '<' && n == '<' && n2 == '=')
361 	    || (c == '>' && n == '>' && n2 == '=');
362 }
363 
IsValidCharacterEscape(int c)364 static bool IsValidCharacterEscape(int c) {
365 	return c == 'n'  || c == 'r' || c == 't' || c == '\\'
366 	    || c == '\'' || c == '"' || c == '0';
367 }
368 
IsValidStringEscape(int c)369 static bool IsValidStringEscape(int c) {
370 	return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
371 }
372 
ScanNumericEscape(Accessor & styler,Sci_Position & pos,Sci_Position num_digits,bool stop_asap)373 static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
374 	for (;;) {
375 		int c = styler.SafeGetCharAt(pos, '\0');
376 		if (!IsADigit(c, 16))
377 			break;
378 		num_digits--;
379 		pos++;
380 		if (num_digits == 0 && stop_asap)
381 			return true;
382 	}
383 	if (num_digits == 0) {
384 		return true;
385 	} else {
386 		return false;
387 	}
388 }
389 
390 /* This is overly permissive for character literals in order to accept UTF-8 encoded
391  * character literals. */
ScanCharacterLiteralOrLifetime(Accessor & styler,Sci_Position & pos,bool ascii_only)392 static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
393 	pos++;
394 	int c = styler.SafeGetCharAt(pos, '\0');
395 	int n = styler.SafeGetCharAt(pos + 1, '\0');
396 	bool done = false;
397 	bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
398 	bool valid_char = true;
399 	bool first = true;
400 	while (!done) {
401 		switch (c) {
402 			case '\\':
403 				done = true;
404 				if (IsValidCharacterEscape(n)) {
405 					pos += 2;
406 				} else if (n == 'x') {
407 					pos += 2;
408 					valid_char = ScanNumericEscape(styler, pos, 2, false);
409 				} else if (n == 'u' && !ascii_only) {
410 					pos += 2;
411 					if (styler.SafeGetCharAt(pos, '\0') != '{') {
412 						// old-style
413 						valid_char = ScanNumericEscape(styler, pos, 4, false);
414 					} else {
415 						int n_digits = 0;
416 						while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
417 						}
418 						if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
419 							pos++;
420 						else
421 							valid_char = false;
422 					}
423 				} else if (n == 'U' && !ascii_only) {
424 					pos += 2;
425 					valid_char = ScanNumericEscape(styler, pos, 8, false);
426 				} else {
427 					valid_char = false;
428 				}
429 				break;
430 			case '\'':
431 				valid_char = !first;
432 				done = true;
433 				break;
434 			case '\t':
435 			case '\n':
436 			case '\r':
437 			case '\0':
438 				valid_char = false;
439 				done = true;
440 				break;
441 			default:
442 				if (ascii_only && !IsASCII((char)c)) {
443 					done = true;
444 					valid_char = false;
445 				} else if (!IsIdentifierContinue(c) && !first) {
446 					done = true;
447 				} else {
448 					pos++;
449 				}
450 				break;
451 		}
452 		c = styler.SafeGetCharAt(pos, '\0');
453 		n = styler.SafeGetCharAt(pos + 1, '\0');
454 
455 		first = false;
456 	}
457 	if (styler.SafeGetCharAt(pos, '\0') == '\'') {
458 		valid_lifetime = false;
459 	} else {
460 		valid_char = false;
461 	}
462 	if (valid_lifetime) {
463 		styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
464 	} else if (valid_char) {
465 		pos++;
466 		styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
467 	} else {
468 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
469 	}
470 }
471 
472 enum CommentState {
473 	UnknownComment,
474 	DocComment,
475 	NotDocComment
476 };
477 
478 /*
479  * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
480  * Otherwise it's a regular comment.
481  */
ResumeBlockComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state,int level)482 static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
483 	int c = styler.SafeGetCharAt(pos, '\0');
484 	bool maybe_doc_comment = false;
485 	if (c == '*') {
486 		int n = styler.SafeGetCharAt(pos + 1, '\0');
487 		if (n != '*' && n != '/') {
488 			maybe_doc_comment = true;
489 		}
490 	} else if (c == '!') {
491 		maybe_doc_comment = true;
492 	}
493 
494 	for (;;) {
495 		int n = styler.SafeGetCharAt(pos + 1, '\0');
496 		if (pos == styler.LineEnd(styler.GetLine(pos)))
497 			styler.SetLineState(styler.GetLine(pos), level);
498 		if (c == '*') {
499 			pos++;
500 			if (n == '/') {
501 				pos++;
502 				level--;
503 				if (level == 0) {
504 					styler.SetLineState(styler.GetLine(pos), 0);
505 					if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
506 						styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
507 					else
508 						styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
509 					break;
510 				}
511 			}
512 		} else if (c == '/') {
513 			pos++;
514 			if (n == '*') {
515 				pos++;
516 				level++;
517 			}
518 		}
519 		else {
520 			pos++;
521 		}
522 		if (pos >= max) {
523 			if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
524 				styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
525 			else
526 				styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
527 			break;
528 		}
529 		c = styler.SafeGetCharAt(pos, '\0');
530 	}
531 }
532 
533 /*
534  * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
535  * Otherwise it's a normal line comment.
536  */
ResumeLineComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state)537 static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
538 	bool maybe_doc_comment = false;
539 	int c = styler.SafeGetCharAt(pos, '\0');
540 	if (c == '/') {
541 		if (pos < max) {
542 			pos++;
543 			c = styler.SafeGetCharAt(pos, '\0');
544 			if (c != '/') {
545 				maybe_doc_comment = true;
546 			}
547 		}
548 	} else if (c == '!') {
549 		maybe_doc_comment = true;
550 	}
551 
552 	while (pos < max && c != '\n') {
553 		if (pos == styler.LineEnd(styler.GetLine(pos)))
554 			styler.SetLineState(styler.GetLine(pos), 0);
555 		pos++;
556 		c = styler.SafeGetCharAt(pos, '\0');
557 	}
558 
559 	if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
560 		styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
561 	else
562 		styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
563 }
564 
ScanComments(Accessor & styler,Sci_Position & pos,Sci_Position max)565 static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
566 	pos++;
567 	int c = styler.SafeGetCharAt(pos, '\0');
568 	pos++;
569 	if (c == '/')
570 		ResumeLineComment(styler, pos, max, UnknownComment);
571 	else if (c == '*')
572 		ResumeBlockComment(styler, pos, max, UnknownComment, 1);
573 }
574 
ResumeString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)575 static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
576 	int c = styler.SafeGetCharAt(pos, '\0');
577 	bool error = false;
578 	while (c != '"' && !error) {
579 		if (pos >= max) {
580 			error = true;
581 			break;
582 		}
583 		if (pos == styler.LineEnd(styler.GetLine(pos)))
584 			styler.SetLineState(styler.GetLine(pos), 0);
585 		if (c == '\\') {
586 			int n = styler.SafeGetCharAt(pos + 1, '\0');
587 			if (IsValidStringEscape(n)) {
588 				pos += 2;
589 			} else if (n == 'x') {
590 				pos += 2;
591 				error = !ScanNumericEscape(styler, pos, 2, true);
592 			} else if (n == 'u' && !ascii_only) {
593 				pos += 2;
594 				if (styler.SafeGetCharAt(pos, '\0') != '{') {
595 					// old-style
596 					error = !ScanNumericEscape(styler, pos, 4, true);
597 				} else {
598 					int n_digits = 0;
599 					while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
600 					}
601 					if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
602 						pos++;
603 					else
604 						error = true;
605 				}
606 			} else if (n == 'U' && !ascii_only) {
607 				pos += 2;
608 				error = !ScanNumericEscape(styler, pos, 8, true);
609 			} else {
610 				pos += 1;
611 				error = true;
612 			}
613 		} else {
614 			if (ascii_only && !IsASCII((char)c))
615 				error = true;
616 			else
617 				pos++;
618 		}
619 		c = styler.SafeGetCharAt(pos, '\0');
620 	}
621 	if (!error)
622 		pos++;
623 	styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
624 }
625 
ResumeRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,int num_hashes,bool ascii_only)626 static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
627 	for (;;) {
628 		if (pos == styler.LineEnd(styler.GetLine(pos)))
629 			styler.SetLineState(styler.GetLine(pos), num_hashes);
630 
631 		int c = styler.SafeGetCharAt(pos, '\0');
632 		if (c == '"') {
633 			pos++;
634 			int trailing_num_hashes = 0;
635 			while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
636 				trailing_num_hashes++;
637 				pos++;
638 			}
639 			if (trailing_num_hashes == num_hashes) {
640 				styler.SetLineState(styler.GetLine(pos), 0);
641 				break;
642 			}
643 		} else if (pos >= max) {
644 			break;
645 		} else {
646 			if (ascii_only && !IsASCII((char)c))
647 				break;
648 			pos++;
649 		}
650 	}
651 	styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
652 }
653 
ScanRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)654 static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
655 	pos++;
656 	int num_hashes = 0;
657 	while (styler.SafeGetCharAt(pos, '\0') == '#') {
658 		num_hashes++;
659 		pos++;
660 	}
661 	if (styler.SafeGetCharAt(pos, '\0') != '"') {
662 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
663 	} else {
664 		pos++;
665 		ResumeRawString(styler, pos, max, num_hashes, ascii_only);
666 	}
667 }
668 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)669 void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
670 	PropSetSimple props;
671 	Accessor styler(pAccess, &props);
672 	Sci_Position pos = startPos;
673 	Sci_Position max = pos + length;
674 
675 	styler.StartAt(pos);
676 	styler.StartSegment(pos);
677 
678 	if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
679 		ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
680 	} else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
681 		ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
682 	} else if (initStyle == SCE_RUST_STRING) {
683 		ResumeString(styler, pos, max, false);
684 	} else if (initStyle == SCE_RUST_BYTESTRING) {
685 		ResumeString(styler, pos, max, true);
686 	} else if (initStyle == SCE_RUST_STRINGR) {
687 		ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
688 	} else if (initStyle == SCE_RUST_BYTESTRINGR) {
689 		ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
690 	}
691 
692 	while (pos < max) {
693 		int c = styler.SafeGetCharAt(pos, '\0');
694 		int n = styler.SafeGetCharAt(pos + 1, '\0');
695 		int n2 = styler.SafeGetCharAt(pos + 2, '\0');
696 
697 		if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
698 			pos += 2;
699 			ResumeLineComment(styler, pos, max, NotDocComment);
700 		} else if (IsWhitespace(c)) {
701 			ScanWhitespace(styler, pos, max);
702 		} else if (c == '/' && (n == '/' || n == '*')) {
703 			ScanComments(styler, pos, max);
704 		} else if (c == 'r' && (n == '#' || n == '"')) {
705 			ScanRawString(styler, pos, max, false);
706 		} else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
707 			pos++;
708 			ScanRawString(styler, pos, max, true);
709 		} else if (c == 'b' && n == '"') {
710 			pos += 2;
711 			ResumeString(styler, pos, max, true);
712 		} else if (c == 'b' && n == '\'') {
713 			pos++;
714 			ScanCharacterLiteralOrLifetime(styler, pos, true);
715 		} else if (IsIdentifierStart(c)) {
716 			ScanIdentifier(styler, pos, keywords);
717 		} else if (IsADigit(c)) {
718 			ScanNumber(styler, pos);
719 		} else if (IsThreeCharOperator(c, n, n2)) {
720 			pos += 3;
721 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
722 		} else if (IsTwoCharOperator(c, n)) {
723 			pos += 2;
724 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
725 		} else if (IsOneCharOperator(c)) {
726 			pos++;
727 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
728 		} else if (c == '\'') {
729 			ScanCharacterLiteralOrLifetime(styler, pos, false);
730 		} else if (c == '"') {
731 			pos++;
732 			ResumeString(styler, pos, max, false);
733 		} else {
734 			pos++;
735 			styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
736 		}
737 	}
738 	styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
739 	styler.Flush();
740 }
741 
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)742 void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
743 
744 	if (!options.fold)
745 		return;
746 
747 	LexAccessor styler(pAccess);
748 
749 	Sci_PositionU endPos = startPos + length;
750 	int visibleChars = 0;
751 	bool inLineComment = false;
752 	Sci_Position lineCurrent = styler.GetLine(startPos);
753 	int levelCurrent = SC_FOLDLEVELBASE;
754 	if (lineCurrent > 0)
755 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
756 	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
757 	int levelMinCurrent = levelCurrent;
758 	int levelNext = levelCurrent;
759 	char chNext = styler[startPos];
760 	int styleNext = styler.StyleAt(startPos);
761 	int style = initStyle;
762 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
763 	for (Sci_PositionU i = startPos; i < endPos; i++) {
764 		char ch = chNext;
765 		chNext = styler.SafeGetCharAt(i + 1);
766 		int stylePrev = style;
767 		style = styleNext;
768 		styleNext = styler.StyleAt(i + 1);
769 		bool atEOL = i == (lineStartNext-1);
770 		if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
771 			inLineComment = true;
772 		if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
773 			if (!IsStreamCommentStyle(stylePrev)) {
774 				levelNext++;
775 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
776 				// Comments don't end at end of line and the next character may be unstyled.
777 				levelNext--;
778 			}
779 		}
780 		if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
781 			if (userDefinedFoldMarkers) {
782 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
783 					levelNext++;
784 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
785 					levelNext--;
786 				}
787 			} else {
788 				if ((ch == '/') && (chNext == '/')) {
789 					char chNext2 = styler.SafeGetCharAt(i + 2);
790 					if (chNext2 == '{') {
791 						levelNext++;
792 					} else if (chNext2 == '}') {
793 						levelNext--;
794 					}
795 				}
796 			}
797 		}
798 		if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
799 			if (ch == '{') {
800 				// Measure the minimum before a '{' to allow
801 				// folding on "} else {"
802 				if (levelMinCurrent > levelNext) {
803 					levelMinCurrent = levelNext;
804 				}
805 				levelNext++;
806 			} else if (ch == '}') {
807 				levelNext--;
808 			}
809 		}
810 		if (!IsASpace(ch))
811 			visibleChars++;
812 		if (atEOL || (i == endPos-1)) {
813 			int levelUse = levelCurrent;
814 			if (options.foldSyntaxBased && options.foldAtElse) {
815 				levelUse = levelMinCurrent;
816 			}
817 			int lev = levelUse | levelNext << 16;
818 			if (visibleChars == 0 && options.foldCompact)
819 				lev |= SC_FOLDLEVELWHITEFLAG;
820 			if (levelUse < levelNext)
821 				lev |= SC_FOLDLEVELHEADERFLAG;
822 			if (lev != styler.LevelAt(lineCurrent)) {
823 				styler.SetLevel(lineCurrent, lev);
824 			}
825 			lineCurrent++;
826 			lineStartNext = styler.LineStart(lineCurrent+1);
827 			levelCurrent = levelNext;
828 			levelMinCurrent = levelCurrent;
829 			if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
830 				// There is an empty line at end of file so give it same level and empty
831 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
832 			}
833 			visibleChars = 0;
834 			inLineComment = false;
835 		}
836 	}
837 }
838 
839 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
840