1 /** @file LexRust.cxx
2  ** Lexer for Rust.
3  **
4  ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
5  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6  **/
7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 #include <map>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "PropSetSimple.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "DefaultLexer.h"
33 
34 using namespace Scintilla;
35 
36 static const int NUM_RUST_KEYWORD_LISTS = 7;
37 static const int MAX_RUST_IDENT_CHARS = 1023;
38 
IsStreamCommentStyle(int style)39 static bool IsStreamCommentStyle(int style) {
40 	return style == SCE_RUST_COMMENTBLOCK ||
41 		   style == SCE_RUST_COMMENTBLOCKDOC;
42 }
43 
44 // Options used for LexerRust
45 struct OptionsRust {
46 	bool fold;
47 	bool foldSyntaxBased;
48 	bool foldComment;
49 	bool foldCommentMultiline;
50 	bool foldCommentExplicit;
51 	std::string foldExplicitStart;
52 	std::string foldExplicitEnd;
53 	bool foldExplicitAnywhere;
54 	bool foldCompact;
55 	int  foldAtElseInt;
56 	bool foldAtElse;
OptionsRustOptionsRust57 	OptionsRust() {
58 		fold = false;
59 		foldSyntaxBased = true;
60 		foldComment = false;
61 		foldCommentMultiline = true;
62 		foldCommentExplicit = true;
63 		foldExplicitStart = "";
64 		foldExplicitEnd   = "";
65 		foldExplicitAnywhere = false;
66 		foldCompact = true;
67 		foldAtElseInt = -1;
68 		foldAtElse = false;
69 	}
70 };
71 
72 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
73 			"Primary keywords and identifiers",
74 			"Built in types",
75 			"Other keywords",
76 			"Keywords 4",
77 			"Keywords 5",
78 			"Keywords 6",
79 			"Keywords 7",
80 			0,
81 		};
82 
83 struct OptionSetRust : public OptionSet<OptionsRust> {
OptionSetRustOptionSetRust84 	OptionSetRust() {
85 		DefineProperty("fold", &OptionsRust::fold);
86 
87 		DefineProperty("fold.comment", &OptionsRust::foldComment);
88 
89 		DefineProperty("fold.compact", &OptionsRust::foldCompact);
90 
91 		DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
92 
93 		DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
94 			"Set this property to 0 to disable syntax based folding.");
95 
96 		DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
97 			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
98 
99 		DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
100 			"Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
101 
102 		DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
103 			"The string to use for explicit fold start points, replacing the standard //{.");
104 
105 		DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
106 			"The string to use for explicit fold end points, replacing the standard //}.");
107 
108 		DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
109 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
110 
111 		DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
112 			"This option enables Rust folding on a \"} else {\" line of an if statement.");
113 
114 		DefineWordListSets(rustWordLists);
115 	}
116 };
117 
118 class LexerRust : public DefaultLexer {
119 	WordList keywords[NUM_RUST_KEYWORD_LISTS];
120 	OptionsRust options;
121 	OptionSetRust osRust;
122 public:
LexerRust()123 	LexerRust() : DefaultLexer("rust", SCLEX_RUST) {
124 	}
~LexerRust()125 	virtual ~LexerRust() {
126 	}
Release()127 	void SCI_METHOD Release() override {
128 		delete this;
129 	}
Version() const130 	int SCI_METHOD Version() const override {
131 		return lvRelease5;
132 	}
PropertyNames()133 	const char * SCI_METHOD PropertyNames() override {
134 		return osRust.PropertyNames();
135 	}
PropertyType(const char * name)136 	int SCI_METHOD PropertyType(const char *name) override {
137 		return osRust.PropertyType(name);
138 	}
DescribeProperty(const char * name)139 	const char * SCI_METHOD DescribeProperty(const char *name) override {
140 		return osRust.DescribeProperty(name);
141 	}
142 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
PropertyGet(const char * key)143 	const char * SCI_METHOD PropertyGet(const char *key) override {
144 		return osRust.PropertyGet(key);
145 	}
DescribeWordListSets()146 	const char * SCI_METHOD DescribeWordListSets() override {
147 		return osRust.DescribeWordListSets();
148 	}
149 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
150 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
151 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
PrivateCall(int,void *)152 	void * SCI_METHOD PrivateCall(int, void *) override {
153 		return 0;
154 	}
LexerFactoryRust()155 	static ILexer5 *LexerFactoryRust() {
156 		return new LexerRust();
157 	}
158 };
159 
PropertySet(const char * key,const char * val)160 Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
161 	if (osRust.PropertySet(&options, key, val)) {
162 		return 0;
163 	}
164 	return -1;
165 }
166 
WordListSet(int n,const char * wl)167 Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
168 	Sci_Position firstModification = -1;
169 	if (n < NUM_RUST_KEYWORD_LISTS) {
170 		WordList *wordListN = &keywords[n];
171 		WordList wlNew;
172 		wlNew.Set(wl);
173 		if (*wordListN != wlNew) {
174 			wordListN->Set(wl);
175 			firstModification = 0;
176 		}
177 	}
178 	return firstModification;
179 }
180 
IsWhitespace(int c)181 static bool IsWhitespace(int c) {
182     return c == ' ' || c == '\t' || c == '\r' || c == '\n';
183 }
184 
185 /* This isn't quite right for Unicode identifiers */
IsIdentifierStart(int ch)186 static bool IsIdentifierStart(int ch) {
187 	return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
188 }
189 
190 /* This isn't quite right for Unicode identifiers */
IsIdentifierContinue(int ch)191 static bool IsIdentifierContinue(int ch) {
192 	return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
193 }
194 
ScanWhitespace(Accessor & styler,Sci_Position & pos,Sci_Position max)195 static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
196 	while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
197 		if (pos == styler.LineEnd(styler.GetLine(pos)))
198 			styler.SetLineState(styler.GetLine(pos), 0);
199 		pos++;
200 	}
201 	styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
202 }
203 
GrabString(char * s,Accessor & styler,Sci_Position start,Sci_Position len)204 static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
205 	for (Sci_Position ii = 0; ii < len; ii++)
206 		s[ii] = styler[ii + start];
207 	s[len] = '\0';
208 }
209 
ScanIdentifier(Accessor & styler,Sci_Position & pos,WordList * keywords)210 static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
211 	Sci_Position start = pos;
212 	while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
213 		pos++;
214 
215 	if (styler.SafeGetCharAt(pos, '\0') == '!') {
216 		pos++;
217 		styler.ColourTo(pos - 1, SCE_RUST_MACRO);
218 	} else {
219 		char s[MAX_RUST_IDENT_CHARS + 1];
220 		Sci_Position len = pos - start;
221 		len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
222 		GrabString(s, styler, start, len);
223 		bool keyword = false;
224 		for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
225 			if (keywords[ii].InList(s)) {
226 				styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
227 				keyword = true;
228 				break;
229 			}
230 		}
231 		if (!keyword) {
232 			styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
233 		}
234 	}
235 }
236 
237 /* Scans a sequence of digits, returning true if it found any. */
ScanDigits(Accessor & styler,Sci_Position & pos,int base)238 static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
239 	Sci_Position old_pos = pos;
240 	for (;;) {
241 		int c = styler.SafeGetCharAt(pos, '\0');
242 		if (IsADigit(c, base) || c == '_')
243 			pos++;
244 		else
245 			break;
246 	}
247 	return old_pos != pos;
248 }
249 
250 /* Scans an integer and floating point literals. */
ScanNumber(Accessor & styler,Sci_Position & pos)251 static void ScanNumber(Accessor& styler, Sci_Position& pos) {
252 	int base = 10;
253 	int c = styler.SafeGetCharAt(pos, '\0');
254 	int n = styler.SafeGetCharAt(pos + 1, '\0');
255 	bool error = false;
256 	/* Scan the prefix, thus determining the base.
257 	 * 10 is default if there's no prefix. */
258 	if (c == '0' && n == 'x') {
259 		pos += 2;
260 		base = 16;
261 	} else if (c == '0' && n == 'b') {
262 		pos += 2;
263 		base = 2;
264 	} else if (c == '0' && n == 'o') {
265 		pos += 2;
266 		base = 8;
267 	}
268 
269 	/* Scan initial digits. The literal is malformed if there are none. */
270 	error |= !ScanDigits(styler, pos, base);
271 	/* See if there's an integer suffix. We mimic the Rust's lexer
272 	 * and munch it even if there was an error above. */
273 	c = styler.SafeGetCharAt(pos, '\0');
274 	if (c == 'u' || c == 'i') {
275 		pos++;
276 		c = styler.SafeGetCharAt(pos, '\0');
277 		n = styler.SafeGetCharAt(pos + 1, '\0');
278 		if (c == '8') {
279 			pos++;
280 		} else if (c == '1' && n == '6') {
281 			pos += 2;
282 		} else if (c == '3' && n == '2') {
283 			pos += 2;
284 		} else if (c == '6' && n == '4') {
285 			pos += 2;
286 		} else if (styler.Match(pos, "size")) {
287 			pos += 4;
288 		} else {
289 			error = true;
290 		}
291 	/* See if it's a floating point literal. These literals have to be base 10.
292 	 */
293 	} else if (!error) {
294 		/* If there's a period, it's a floating point literal unless it's
295 		 * followed by an identifier (meaning this is a method call, e.g.
296 		 * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
297 		 */
298 		n = styler.SafeGetCharAt(pos + 1, '\0');
299 		if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
300 			error |= base != 10;
301 			pos++;
302 			/* It's ok to have no digits after the period. */
303 			ScanDigits(styler, pos, 10);
304 		}
305 
306 		/* Look for the exponentiation. */
307 		c = styler.SafeGetCharAt(pos, '\0');
308 		if (c == 'e' || c == 'E') {
309 			error |= base != 10;
310 			pos++;
311 			c = styler.SafeGetCharAt(pos, '\0');
312 			if (c == '-' || c == '+')
313 				pos++;
314 			/* It is invalid to have no digits in the exponent. */
315 			error |= !ScanDigits(styler, pos, 10);
316 		}
317 
318 		/* Scan the floating point suffix. */
319 		c = styler.SafeGetCharAt(pos, '\0');
320 		if (c == 'f') {
321 			error |= base != 10;
322 			pos++;
323 			c = styler.SafeGetCharAt(pos, '\0');
324 			n = styler.SafeGetCharAt(pos + 1, '\0');
325 			if (c == '3' && n == '2') {
326 				pos += 2;
327 			} else if (c == '6' && n == '4') {
328 				pos += 2;
329 			} else {
330 				error = true;
331 			}
332 		}
333 	}
334 
335 	if (error)
336 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
337 	else
338 		styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
339 }
340 
IsOneCharOperator(int c)341 static bool IsOneCharOperator(int c) {
342 	return c == ';' || c == ',' || c == '(' || c == ')'
343 	    || c == '{' || c == '}' || c == '[' || c == ']'
344 	    || c == '@' || c == '#' || c == '~' || c == '+'
345 	    || c == '*' || c == '/' || c == '^' || c == '%'
346 	    || c == '.' || c == ':' || c == '!' || c == '<'
347 	    || c == '>' || c == '=' || c == '-' || c == '&'
348 	    || c == '|' || c == '$' || c == '?';
349 }
350 
IsTwoCharOperator(int c,int n)351 static bool IsTwoCharOperator(int c, int n) {
352 	return (c == '.' && n == '.') || (c == ':' && n == ':')
353 	    || (c == '!' && n == '=') || (c == '<' && n == '<')
354 	    || (c == '<' && n == '=') || (c == '>' && n == '>')
355 	    || (c == '>' && n == '=') || (c == '=' && n == '=')
356 	    || (c == '=' && n == '>') || (c == '-' && n == '>')
357 	    || (c == '&' && n == '&') || (c == '|' && n == '|')
358 	    || (c == '-' && n == '=') || (c == '&' && n == '=')
359 	    || (c == '|' && n == '=') || (c == '+' && n == '=')
360 	    || (c == '*' && n == '=') || (c == '/' && n == '=')
361 	    || (c == '^' && n == '=') || (c == '%' && n == '=');
362 }
363 
IsThreeCharOperator(int c,int n,int n2)364 static bool IsThreeCharOperator(int c, int n, int n2) {
365 	return (c == '<' && n == '<' && n2 == '=')
366 	    || (c == '>' && n == '>' && n2 == '=');
367 }
368 
IsValidCharacterEscape(int c)369 static bool IsValidCharacterEscape(int c) {
370 	return c == 'n'  || c == 'r' || c == 't' || c == '\\'
371 	    || c == '\'' || c == '"' || c == '0';
372 }
373 
IsValidStringEscape(int c)374 static bool IsValidStringEscape(int c) {
375 	return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
376 }
377 
ScanNumericEscape(Accessor & styler,Sci_Position & pos,Sci_Position num_digits,bool stop_asap)378 static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
379 	for (;;) {
380 		int c = styler.SafeGetCharAt(pos, '\0');
381 		if (!IsADigit(c, 16))
382 			break;
383 		num_digits--;
384 		pos++;
385 		if (num_digits == 0 && stop_asap)
386 			return true;
387 	}
388 	if (num_digits == 0) {
389 		return true;
390 	} else {
391 		return false;
392 	}
393 }
394 
395 /* This is overly permissive for character literals in order to accept UTF-8 encoded
396  * character literals. */
ScanCharacterLiteralOrLifetime(Accessor & styler,Sci_Position & pos,bool ascii_only)397 static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
398 	pos++;
399 	int c = styler.SafeGetCharAt(pos, '\0');
400 	int n = styler.SafeGetCharAt(pos + 1, '\0');
401 	bool done = false;
402 	bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
403 	bool valid_char = true;
404 	bool first = true;
405 	while (!done) {
406 		switch (c) {
407 			case '\\':
408 				done = true;
409 				if (IsValidCharacterEscape(n)) {
410 					pos += 2;
411 				} else if (n == 'x') {
412 					pos += 2;
413 					valid_char = ScanNumericEscape(styler, pos, 2, false);
414 				} else if (n == 'u' && !ascii_only) {
415 					pos += 2;
416 					if (styler.SafeGetCharAt(pos, '\0') != '{') {
417 						// old-style
418 						valid_char = ScanNumericEscape(styler, pos, 4, false);
419 					} else {
420 						int n_digits = 0;
421 						while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
422 						}
423 						if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
424 							pos++;
425 						else
426 							valid_char = false;
427 					}
428 				} else if (n == 'U' && !ascii_only) {
429 					pos += 2;
430 					valid_char = ScanNumericEscape(styler, pos, 8, false);
431 				} else {
432 					valid_char = false;
433 				}
434 				break;
435 			case '\'':
436 				valid_char = !first;
437 				done = true;
438 				break;
439 			case '\t':
440 			case '\n':
441 			case '\r':
442 			case '\0':
443 				valid_char = false;
444 				done = true;
445 				break;
446 			default:
447 				if (ascii_only && !IsASCII((char)c)) {
448 					done = true;
449 					valid_char = false;
450 				} else if (!IsIdentifierContinue(c) && !first) {
451 					done = true;
452 				} else {
453 					pos++;
454 				}
455 				break;
456 		}
457 		c = styler.SafeGetCharAt(pos, '\0');
458 		n = styler.SafeGetCharAt(pos + 1, '\0');
459 
460 		first = false;
461 	}
462 	if (styler.SafeGetCharAt(pos, '\0') == '\'') {
463 		valid_lifetime = false;
464 	} else {
465 		valid_char = false;
466 	}
467 	if (valid_lifetime) {
468 		styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
469 	} else if (valid_char) {
470 		pos++;
471 		styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
472 	} else {
473 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
474 	}
475 }
476 
477 enum CommentState {
478 	UnknownComment,
479 	DocComment,
480 	NotDocComment
481 };
482 
483 /*
484  * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
485  * Otherwise it's a regular comment.
486  */
ResumeBlockComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state,int level)487 static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
488 	int c = styler.SafeGetCharAt(pos, '\0');
489 	bool maybe_doc_comment = false;
490 	if (c == '*') {
491 		int n = styler.SafeGetCharAt(pos + 1, '\0');
492 		if (n != '*' && n != '/') {
493 			maybe_doc_comment = true;
494 		}
495 	} else if (c == '!') {
496 		maybe_doc_comment = true;
497 	}
498 
499 	for (;;) {
500 		int n = styler.SafeGetCharAt(pos + 1, '\0');
501 		if (pos == styler.LineEnd(styler.GetLine(pos)))
502 			styler.SetLineState(styler.GetLine(pos), level);
503 		if (c == '*') {
504 			pos++;
505 			if (n == '/') {
506 				pos++;
507 				level--;
508 				if (level == 0) {
509 					styler.SetLineState(styler.GetLine(pos), 0);
510 					if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
511 						styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
512 					else
513 						styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
514 					break;
515 				}
516 			}
517 		} else if (c == '/') {
518 			pos++;
519 			if (n == '*') {
520 				pos++;
521 				level++;
522 			}
523 		}
524 		else {
525 			pos++;
526 		}
527 		if (pos >= max) {
528 			if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
529 				styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
530 			else
531 				styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
532 			break;
533 		}
534 		c = styler.SafeGetCharAt(pos, '\0');
535 	}
536 }
537 
538 /*
539  * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
540  * Otherwise it's a normal line comment.
541  */
ResumeLineComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state)542 static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
543 	bool maybe_doc_comment = false;
544 	int c = styler.SafeGetCharAt(pos, '\0');
545 	if (c == '/') {
546 		if (pos < max) {
547 			pos++;
548 			c = styler.SafeGetCharAt(pos, '\0');
549 			if (c != '/') {
550 				maybe_doc_comment = true;
551 			}
552 		}
553 	} else if (c == '!') {
554 		maybe_doc_comment = true;
555 	}
556 
557 	while (pos < max && c != '\n') {
558 		if (pos == styler.LineEnd(styler.GetLine(pos)))
559 			styler.SetLineState(styler.GetLine(pos), 0);
560 		pos++;
561 		c = styler.SafeGetCharAt(pos, '\0');
562 	}
563 
564 	if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
565 		styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
566 	else
567 		styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
568 }
569 
ScanComments(Accessor & styler,Sci_Position & pos,Sci_Position max)570 static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
571 	pos++;
572 	int c = styler.SafeGetCharAt(pos, '\0');
573 	pos++;
574 	if (c == '/')
575 		ResumeLineComment(styler, pos, max, UnknownComment);
576 	else if (c == '*')
577 		ResumeBlockComment(styler, pos, max, UnknownComment, 1);
578 }
579 
ResumeString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)580 static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
581 	int c = styler.SafeGetCharAt(pos, '\0');
582 	bool error = false;
583 	while (c != '"' && !error) {
584 		if (pos >= max) {
585 			error = true;
586 			break;
587 		}
588 		if (pos == styler.LineEnd(styler.GetLine(pos)))
589 			styler.SetLineState(styler.GetLine(pos), 0);
590 		if (c == '\\') {
591 			int n = styler.SafeGetCharAt(pos + 1, '\0');
592 			if (IsValidStringEscape(n)) {
593 				pos += 2;
594 			} else if (n == 'x') {
595 				pos += 2;
596 				error = !ScanNumericEscape(styler, pos, 2, true);
597 			} else if (n == 'u' && !ascii_only) {
598 				pos += 2;
599 				if (styler.SafeGetCharAt(pos, '\0') != '{') {
600 					// old-style
601 					error = !ScanNumericEscape(styler, pos, 4, true);
602 				} else {
603 					int n_digits = 0;
604 					while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
605 					}
606 					if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
607 						pos++;
608 					else
609 						error = true;
610 				}
611 			} else if (n == 'U' && !ascii_only) {
612 				pos += 2;
613 				error = !ScanNumericEscape(styler, pos, 8, true);
614 			} else {
615 				pos += 1;
616 				error = true;
617 			}
618 		} else {
619 			if (ascii_only && !IsASCII((char)c))
620 				error = true;
621 			else
622 				pos++;
623 		}
624 		c = styler.SafeGetCharAt(pos, '\0');
625 	}
626 	if (!error)
627 		pos++;
628 	styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
629 }
630 
ResumeRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,int num_hashes,bool ascii_only)631 static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
632 	for (;;) {
633 		if (pos == styler.LineEnd(styler.GetLine(pos)))
634 			styler.SetLineState(styler.GetLine(pos), num_hashes);
635 
636 		int c = styler.SafeGetCharAt(pos, '\0');
637 		if (c == '"') {
638 			pos++;
639 			int trailing_num_hashes = 0;
640 			while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
641 				trailing_num_hashes++;
642 				pos++;
643 			}
644 			if (trailing_num_hashes == num_hashes) {
645 				styler.SetLineState(styler.GetLine(pos), 0);
646 				break;
647 			}
648 		} else if (pos >= max) {
649 			break;
650 		} else {
651 			if (ascii_only && !IsASCII((char)c))
652 				break;
653 			pos++;
654 		}
655 	}
656 	styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
657 }
658 
ScanRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)659 static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
660 	pos++;
661 	int num_hashes = 0;
662 	while (styler.SafeGetCharAt(pos, '\0') == '#') {
663 		num_hashes++;
664 		pos++;
665 	}
666 	if (styler.SafeGetCharAt(pos, '\0') != '"') {
667 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
668 	} else {
669 		pos++;
670 		ResumeRawString(styler, pos, max, num_hashes, ascii_only);
671 	}
672 }
673 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)674 void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
675 	PropSetSimple props;
676 	Accessor styler(pAccess, &props);
677 	Sci_Position pos = startPos;
678 	Sci_Position max = pos + length;
679 
680 	styler.StartAt(pos);
681 	styler.StartSegment(pos);
682 
683 	if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
684 		ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
685 	} else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
686 		ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
687 	} else if (initStyle == SCE_RUST_STRING) {
688 		ResumeString(styler, pos, max, false);
689 	} else if (initStyle == SCE_RUST_BYTESTRING) {
690 		ResumeString(styler, pos, max, true);
691 	} else if (initStyle == SCE_RUST_STRINGR) {
692 		ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
693 	} else if (initStyle == SCE_RUST_BYTESTRINGR) {
694 		ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
695 	}
696 
697 	while (pos < max) {
698 		int c = styler.SafeGetCharAt(pos, '\0');
699 		int n = styler.SafeGetCharAt(pos + 1, '\0');
700 		int n2 = styler.SafeGetCharAt(pos + 2, '\0');
701 
702 		if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
703 			pos += 2;
704 			ResumeLineComment(styler, pos, max, NotDocComment);
705 		} else if (IsWhitespace(c)) {
706 			ScanWhitespace(styler, pos, max);
707 		} else if (c == '/' && (n == '/' || n == '*')) {
708 			ScanComments(styler, pos, max);
709 		} else if (c == 'r' && (n == '#' || n == '"')) {
710 			ScanRawString(styler, pos, max, false);
711 		} else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
712 			pos++;
713 			ScanRawString(styler, pos, max, true);
714 		} else if (c == 'b' && n == '"') {
715 			pos += 2;
716 			ResumeString(styler, pos, max, true);
717 		} else if (c == 'b' && n == '\'') {
718 			pos++;
719 			ScanCharacterLiteralOrLifetime(styler, pos, true);
720 		} else if (IsIdentifierStart(c)) {
721 			ScanIdentifier(styler, pos, keywords);
722 		} else if (IsADigit(c)) {
723 			ScanNumber(styler, pos);
724 		} else if (IsThreeCharOperator(c, n, n2)) {
725 			pos += 3;
726 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
727 		} else if (IsTwoCharOperator(c, n)) {
728 			pos += 2;
729 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
730 		} else if (IsOneCharOperator(c)) {
731 			pos++;
732 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
733 		} else if (c == '\'') {
734 			ScanCharacterLiteralOrLifetime(styler, pos, false);
735 		} else if (c == '"') {
736 			pos++;
737 			ResumeString(styler, pos, max, false);
738 		} else {
739 			pos++;
740 			styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
741 		}
742 	}
743 	styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
744 	styler.Flush();
745 }
746 
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)747 void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
748 
749 	if (!options.fold)
750 		return;
751 
752 	LexAccessor styler(pAccess);
753 
754 	Sci_PositionU endPos = startPos + length;
755 	int visibleChars = 0;
756 	bool inLineComment = false;
757 	Sci_Position lineCurrent = styler.GetLine(startPos);
758 	int levelCurrent = SC_FOLDLEVELBASE;
759 	if (lineCurrent > 0)
760 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
761 	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
762 	int levelMinCurrent = levelCurrent;
763 	int levelNext = levelCurrent;
764 	char chNext = styler[startPos];
765 	int styleNext = styler.StyleAt(startPos);
766 	int style = initStyle;
767 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
768 	for (Sci_PositionU i = startPos; i < endPos; i++) {
769 		char ch = chNext;
770 		chNext = styler.SafeGetCharAt(i + 1);
771 		int stylePrev = style;
772 		style = styleNext;
773 		styleNext = styler.StyleAt(i + 1);
774 		bool atEOL = i == (lineStartNext-1);
775 		if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
776 			inLineComment = true;
777 		if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
778 			if (!IsStreamCommentStyle(stylePrev)) {
779 				levelNext++;
780 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
781 				// Comments don't end at end of line and the next character may be unstyled.
782 				levelNext--;
783 			}
784 		}
785 		if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
786 			if (userDefinedFoldMarkers) {
787 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
788 					levelNext++;
789 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
790 					levelNext--;
791 				}
792 			} else {
793 				if ((ch == '/') && (chNext == '/')) {
794 					char chNext2 = styler.SafeGetCharAt(i + 2);
795 					if (chNext2 == '{') {
796 						levelNext++;
797 					} else if (chNext2 == '}') {
798 						levelNext--;
799 					}
800 				}
801 			}
802 		}
803 		if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
804 			if (ch == '{') {
805 				// Measure the minimum before a '{' to allow
806 				// folding on "} else {"
807 				if (levelMinCurrent > levelNext) {
808 					levelMinCurrent = levelNext;
809 				}
810 				levelNext++;
811 			} else if (ch == '}') {
812 				levelNext--;
813 			}
814 		}
815 		if (!IsASpace(ch))
816 			visibleChars++;
817 		if (atEOL || (i == endPos-1)) {
818 			int levelUse = levelCurrent;
819 			if (options.foldSyntaxBased && options.foldAtElse) {
820 				levelUse = levelMinCurrent;
821 			}
822 			int lev = levelUse | levelNext << 16;
823 			if (visibleChars == 0 && options.foldCompact)
824 				lev |= SC_FOLDLEVELWHITEFLAG;
825 			if (levelUse < levelNext)
826 				lev |= SC_FOLDLEVELHEADERFLAG;
827 			if (lev != styler.LevelAt(lineCurrent)) {
828 				styler.SetLevel(lineCurrent, lev);
829 			}
830 			lineCurrent++;
831 			lineStartNext = styler.LineStart(lineCurrent+1);
832 			levelCurrent = levelNext;
833 			levelMinCurrent = levelCurrent;
834 			if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
835 				// There is an empty line at end of file so give it same level and empty
836 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
837 			}
838 			visibleChars = 0;
839 			inLineComment = false;
840 		}
841 	}
842 }
843 
844 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
845