1 /** @file LexRust.cxx
2  ** Lexer for Rust.
3  **
4  ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
5  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6  **/
7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include <string>
18 #include <map>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "PropSetSimple.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 
33 #ifdef SCI_NAMESPACE
34 using namespace Scintilla;
35 #endif
36 
37 static const int NUM_RUST_KEYWORD_LISTS = 7;
38 static const int MAX_RUST_IDENT_CHARS = 1023;
39 
IsStreamCommentStyle(int style)40 static bool IsStreamCommentStyle(int style) {
41 	return style == SCE_RUST_COMMENTBLOCK ||
42 		   style == SCE_RUST_COMMENTBLOCKDOC;
43 }
44 
45 // Options used for LexerRust
46 struct OptionsRust {
47 	bool fold;
48 	bool foldSyntaxBased;
49 	bool foldComment;
50 	bool foldCommentMultiline;
51 	bool foldCommentExplicit;
52 	std::string foldExplicitStart;
53 	std::string foldExplicitEnd;
54 	bool foldExplicitAnywhere;
55 	bool foldCompact;
56 	int  foldAtElseInt;
57 	bool foldAtElse;
OptionsRustOptionsRust58 	OptionsRust() {
59 		fold = false;
60 		foldSyntaxBased = true;
61 		foldComment = false;
62 		foldCommentMultiline = true;
63 		foldCommentExplicit = true;
64 		foldExplicitStart = "";
65 		foldExplicitEnd   = "";
66 		foldExplicitAnywhere = false;
67 		foldCompact = true;
68 		foldAtElseInt = -1;
69 		foldAtElse = false;
70 	}
71 };
72 
73 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
74 			"Primary keywords and identifiers",
75 			"Built in types",
76 			"Other keywords",
77 			"Keywords 4",
78 			"Keywords 5",
79 			"Keywords 6",
80 			"Keywords 7",
81 			0,
82 		};
83 
84 struct OptionSetRust : public OptionSet<OptionsRust> {
OptionSetRustOptionSetRust85 	OptionSetRust() {
86 		DefineProperty("fold", &OptionsRust::fold);
87 
88 		DefineProperty("fold.comment", &OptionsRust::foldComment);
89 
90 		DefineProperty("fold.compact", &OptionsRust::foldCompact);
91 
92 		DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
93 
94 		DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
95 			"Set this property to 0 to disable syntax based folding.");
96 
97 		DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
98 			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
99 
100 		DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
101 			"Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
102 
103 		DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
104 			"The string to use for explicit fold start points, replacing the standard //{.");
105 
106 		DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
107 			"The string to use for explicit fold end points, replacing the standard //}.");
108 
109 		DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
110 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
111 
112 		DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
113 			"This option enables Rust folding on a \"} else {\" line of an if statement.");
114 
115 		DefineWordListSets(rustWordLists);
116 	}
117 };
118 
119 class LexerRust : public ILexer {
120 	WordList keywords[NUM_RUST_KEYWORD_LISTS];
121 	OptionsRust options;
122 	OptionSetRust osRust;
123 public:
~LexerRust()124 	virtual ~LexerRust() {
125 	}
Release()126 	void SCI_METHOD Release() {
127 		delete this;
128 	}
Version() const129 	int SCI_METHOD Version() const {
130 		return lvOriginal;
131 	}
PropertyNames()132 	const char * SCI_METHOD PropertyNames() {
133 		return osRust.PropertyNames();
134 	}
PropertyType(const char * name)135 	int SCI_METHOD PropertyType(const char *name) {
136 		return osRust.PropertyType(name);
137 	}
DescribeProperty(const char * name)138 	const char * SCI_METHOD DescribeProperty(const char *name) {
139 		return osRust.DescribeProperty(name);
140 	}
141 	int SCI_METHOD PropertySet(const char *key, const char *val);
DescribeWordListSets()142 	const char * SCI_METHOD DescribeWordListSets() {
143 		return osRust.DescribeWordListSets();
144 	}
145 	int SCI_METHOD WordListSet(int n, const char *wl);
146 	void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
147 	void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
PrivateCall(int,void *)148 	void * SCI_METHOD PrivateCall(int, void *) {
149 		return 0;
150 	}
LexerFactoryRust()151 	static ILexer *LexerFactoryRust() {
152 		return new LexerRust();
153 	}
154 };
155 
PropertySet(const char * key,const char * val)156 int SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
157 	if (osRust.PropertySet(&options, key, val)) {
158 		return 0;
159 	}
160 	return -1;
161 }
162 
WordListSet(int n,const char * wl)163 int SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
164 	int firstModification = -1;
165 	if (n < NUM_RUST_KEYWORD_LISTS) {
166 		WordList *wordListN = &keywords[n];
167 		WordList wlNew;
168 		wlNew.Set(wl);
169 		if (*wordListN != wlNew) {
170 			wordListN->Set(wl);
171 			firstModification = 0;
172 		}
173 	}
174 	return firstModification;
175 }
176 
IsWhitespace(int c)177 static bool IsWhitespace(int c) {
178     return c == ' ' || c == '\t' || c == '\r' || c == '\n';
179 }
180 
181 /* This isn't quite right for Unicode identifiers */
IsIdentifierStart(int ch)182 static bool IsIdentifierStart(int ch) {
183 	return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
184 }
185 
186 /* This isn't quite right for Unicode identifiers */
IsIdentifierContinue(int ch)187 static bool IsIdentifierContinue(int ch) {
188 	return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
189 }
190 
ScanWhitespace(Accessor & styler,int & pos,int max)191 static void ScanWhitespace(Accessor& styler, int& pos, int max) {
192 	while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
193 		if (pos == styler.LineEnd(styler.GetLine(pos)))
194 			styler.SetLineState(styler.GetLine(pos), 0);
195 		pos++;
196 	}
197 	styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
198 }
199 
GrabString(char * s,Accessor & styler,int start,int len)200 static void GrabString(char* s, Accessor& styler, int start, int len) {
201 	for (int ii = 0; ii < len; ii++)
202 		s[ii] = styler[ii + start];
203 	s[len] = '\0';
204 }
205 
ScanIdentifier(Accessor & styler,int & pos,WordList * keywords)206 static void ScanIdentifier(Accessor& styler, int& pos, WordList *keywords) {
207 	int start = pos;
208 	while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
209 		pos++;
210 
211 	if (styler.SafeGetCharAt(pos, '\0') == '!') {
212 		pos++;
213 		styler.ColourTo(pos - 1, SCE_RUST_MACRO);
214 	} else {
215 		char s[MAX_RUST_IDENT_CHARS + 1];
216 		int len = pos - start;
217 		len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
218 		GrabString(s, styler, start, len);
219 		bool keyword = false;
220 		for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
221 			if (keywords[ii].InList(s)) {
222 				styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
223 				keyword = true;
224 				break;
225 			}
226 		}
227 		if (!keyword) {
228 			styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
229 		}
230 	}
231 }
232 
233 /* Scans a sequence of digits, returning true if it found any. */
ScanDigits(Accessor & styler,int & pos,int base)234 static bool ScanDigits(Accessor& styler, int& pos, int base) {
235 	int old_pos = pos;
236 	for (;;) {
237 		int c = styler.SafeGetCharAt(pos, '\0');
238 		if (IsADigit(c, base) || c == '_')
239 			pos++;
240 		else
241 			break;
242 	}
243 	return old_pos != pos;
244 }
245 
246 /* Scans an integer and floating point literals. */
ScanNumber(Accessor & styler,int & pos)247 static void ScanNumber(Accessor& styler, int& pos) {
248 	int base = 10;
249 	int c = styler.SafeGetCharAt(pos, '\0');
250 	int n = styler.SafeGetCharAt(pos + 1, '\0');
251 	bool error = false;
252 	/* Scan the prefix, thus determining the base.
253 	 * 10 is default if there's no prefix. */
254 	if (c == '0' && n == 'x') {
255 		pos += 2;
256 		base = 16;
257 	} else if (c == '0' && n == 'b') {
258 		pos += 2;
259 		base = 2;
260 	} else if (c == '0' && n == 'o') {
261 		pos += 2;
262 		base = 8;
263 	}
264 
265 	/* Scan initial digits. The literal is malformed if there are none. */
266 	error |= !ScanDigits(styler, pos, base);
267 	/* See if there's an integer suffix. We mimic the Rust's lexer
268 	 * and munch it even if there was an error above. */
269 	c = styler.SafeGetCharAt(pos, '\0');
270 	if (c == 'u' || c == 'i') {
271 		pos++;
272 		c = styler.SafeGetCharAt(pos, '\0');
273 		n = styler.SafeGetCharAt(pos + 1, '\0');
274 		if (c == '8') {
275 			pos++;
276 		} else if (c == '1' && n == '6') {
277 			pos += 2;
278 		} else if (c == '3' && n == '2') {
279 			pos += 2;
280 		} else if (c == '6' && n == '4') {
281 			pos += 2;
282 		}
283 	/* See if it's a floating point literal. These literals have to be base 10.
284 	 */
285 	} else if (!error) {
286 		/* If there's a period, it's a floating point literal unless it's
287 		 * followed by an identifier (meaning this is a method call, e.g.
288 		 * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
289 		 */
290 		n = styler.SafeGetCharAt(pos + 1, '\0');
291 		if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
292 			error |= base != 10;
293 			pos++;
294 			/* It's ok to have no digits after the period. */
295 			ScanDigits(styler, pos, 10);
296 		}
297 
298 		/* Look for the exponentiation. */
299 		c = styler.SafeGetCharAt(pos, '\0');
300 		if (c == 'e' || c == 'E') {
301 			error |= base != 10;
302 			pos++;
303 			c = styler.SafeGetCharAt(pos, '\0');
304 			if (c == '-' || c == '+')
305 				pos++;
306 			/* It is invalid to have no digits in the exponent. */
307 			error |= !ScanDigits(styler, pos, 10);
308 		}
309 
310 		/* Scan the floating point suffix. */
311 		c = styler.SafeGetCharAt(pos, '\0');
312 		if (c == 'f') {
313 			error |= base != 10;
314 			pos++;
315 			c = styler.SafeGetCharAt(pos, '\0');
316 			n = styler.SafeGetCharAt(pos + 1, '\0');
317 			if (c == '3' && n == '2') {
318 				pos += 2;
319 			} else if (c == '6' && n == '4') {
320 				pos += 2;
321 			} else {
322 				error = true;
323 			}
324 		}
325 	}
326 
327 	if (error)
328 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
329 	else
330 		styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
331 }
332 
IsOneCharOperator(int c)333 static bool IsOneCharOperator(int c) {
334 	return c == ';' || c == ',' || c == '(' || c == ')'
335 	    || c == '{' || c == '}' || c == '[' || c == ']'
336 	    || c == '@' || c == '#' || c == '~' || c == '+'
337 	    || c == '*' || c == '/' || c == '^' || c == '%'
338 	    || c == '.' || c == ':' || c == '!' || c == '<'
339 	    || c == '>' || c == '=' || c == '-' || c == '&'
340 	    || c == '|' || c == '$';
341 }
342 
IsTwoCharOperator(int c,int n)343 static bool IsTwoCharOperator(int c, int n) {
344 	return (c == '.' && n == '.') || (c == ':' && n == ':')
345 	    || (c == '!' && n == '=') || (c == '<' && n == '<')
346 	    || (c == '<' && n == '=') || (c == '>' && n == '>')
347 	    || (c == '>' && n == '=') || (c == '=' && n == '=')
348 	    || (c == '=' && n == '>') || (c == '-' && n == '>')
349 	    || (c == '&' && n == '&') || (c == '|' && n == '|')
350 	    || (c == '-' && n == '=') || (c == '&' && n == '=')
351 	    || (c == '|' && n == '=') || (c == '+' && n == '=')
352 	    || (c == '*' && n == '=') || (c == '/' && n == '=')
353 	    || (c == '^' && n == '=') || (c == '%' && n == '=');
354 }
355 
IsThreeCharOperator(int c,int n,int n2)356 static bool IsThreeCharOperator(int c, int n, int n2) {
357 	return (c == '<' && n == '<' && n2 == '=')
358 	    || (c == '>' && n == '>' && n2 == '=');
359 }
360 
IsValidCharacterEscape(int c)361 static bool IsValidCharacterEscape(int c) {
362 	return c == 'n'  || c == 'r' || c == 't' || c == '\\'
363 	    || c == '\'' || c == '"' || c == '0';
364 }
365 
IsValidStringEscape(int c)366 static bool IsValidStringEscape(int c) {
367 	return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
368 }
369 
ScanNumericEscape(Accessor & styler,int & pos,int num_digits,bool stop_asap)370 static bool ScanNumericEscape(Accessor &styler, int& pos, int num_digits, bool stop_asap) {
371 	for (;;) {
372 		int c = styler.SafeGetCharAt(pos, '\0');
373 		if (!IsADigit(c, 16))
374 			break;
375 		num_digits--;
376 		pos++;
377 		if (num_digits == 0 && stop_asap)
378 			return true;
379 	}
380 	if (num_digits == 0) {
381 		return true;
382 	} else {
383 		return false;
384 	}
385 }
386 
387 /* This is overly permissive for character literals in order to accept UTF-8 encoded
388  * character literals. */
ScanCharacterLiteralOrLifetime(Accessor & styler,int & pos,bool ascii_only)389 static void ScanCharacterLiteralOrLifetime(Accessor &styler, int& pos, bool ascii_only) {
390 	pos++;
391 	int c = styler.SafeGetCharAt(pos, '\0');
392 	int n = styler.SafeGetCharAt(pos + 1, '\0');
393 	bool done = false;
394 	bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
395 	bool valid_char = true;
396 	bool first = true;
397 	while (!done) {
398 		switch (c) {
399 			case '\\':
400 				done = true;
401 				if (IsValidCharacterEscape(n)) {
402 					pos += 2;
403 				} else if (n == 'x') {
404 					pos += 2;
405 					valid_char = ScanNumericEscape(styler, pos, 2, false);
406 				} else if (n == 'u' && !ascii_only) {
407 					pos += 2;
408 					valid_char = ScanNumericEscape(styler, pos, 4, false);
409 				} else if (n == 'U' && !ascii_only) {
410 					pos += 2;
411 					valid_char = ScanNumericEscape(styler, pos, 8, false);
412 				} else {
413 					valid_char = false;
414 				}
415 				break;
416 			case '\'':
417 				valid_char = !first;
418 				done = true;
419 				break;
420 			case '\t':
421 			case '\n':
422 			case '\r':
423 			case '\0':
424 				valid_char = false;
425 				done = true;
426 				break;
427 			default:
428 				if (ascii_only && !IsASCII((char)c)) {
429 					done = true;
430 					valid_char = false;
431 				} else if (!IsIdentifierContinue(c) && !first) {
432 					done = true;
433 				} else {
434 					pos++;
435 				}
436 				break;
437 		}
438 		c = styler.SafeGetCharAt(pos, '\0');
439 		n = styler.SafeGetCharAt(pos + 1, '\0');
440 
441 		first = false;
442 	}
443 	if (styler.SafeGetCharAt(pos, '\0') == '\'') {
444 		valid_lifetime = false;
445 	} else {
446 		valid_char = false;
447 	}
448 	if (valid_lifetime) {
449 		styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
450 	} else if (valid_char) {
451 		pos++;
452 		styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
453 	} else {
454 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
455 	}
456 }
457 
458 enum CommentState {
459 	UnknownComment,
460 	DocComment,
461 	NotDocComment
462 };
463 
464 /*
465  * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
466  * Otherwise it's a regular comment.
467  */
ResumeBlockComment(Accessor & styler,int & pos,int max,CommentState state,int level)468 static void ResumeBlockComment(Accessor &styler, int& pos, int max, CommentState state, int level) {
469 	int c = styler.SafeGetCharAt(pos, '\0');
470 	bool maybe_doc_comment = false;
471 	if (c == '*') {
472 		int n = styler.SafeGetCharAt(pos + 1, '\0');
473 		if (n != '*' && n != '/') {
474 			maybe_doc_comment = true;
475 		}
476 	} else if (c == '!') {
477 		maybe_doc_comment = true;
478 	}
479 
480 	for (;;) {
481 		int n = styler.SafeGetCharAt(pos + 1, '\0');
482 		if (pos == styler.LineEnd(styler.GetLine(pos)))
483 			styler.SetLineState(styler.GetLine(pos), level);
484 		if (c == '*') {
485 			pos++;
486 			if (n == '/') {
487 				pos++;
488 				level--;
489 				if (level == 0) {
490 					styler.SetLineState(styler.GetLine(pos), 0);
491 					if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
492 						styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
493 					else
494 						styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
495 					break;
496 				}
497 			}
498 		} else if (c == '/') {
499 			pos++;
500 			if (n == '*') {
501 				pos++;
502 				level++;
503 			}
504 		}
505 		else {
506 			pos++;
507 		}
508 		if (pos >= max) {
509 			if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
510 				styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
511 			else
512 				styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
513 			break;
514 		}
515 		c = styler.SafeGetCharAt(pos, '\0');
516 	}
517 }
518 
519 /*
520  * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
521  * Otherwise it's a normal line comment.
522  */
ResumeLineComment(Accessor & styler,int & pos,int max,CommentState state)523 static void ResumeLineComment(Accessor &styler, int& pos, int max, CommentState state) {
524 	bool maybe_doc_comment = false;
525 	int c = styler.SafeGetCharAt(pos, '\0');
526 	if (c == '/') {
527 		if (pos < max) {
528 			pos++;
529 			c = styler.SafeGetCharAt(pos, '\0');
530 			if (c != '/') {
531 				maybe_doc_comment = true;
532 			}
533 		}
534 	} else if (c == '!') {
535 		maybe_doc_comment = true;
536 	}
537 
538 	while (pos < max && c != '\n') {
539 		if (pos == styler.LineEnd(styler.GetLine(pos)))
540 			styler.SetLineState(styler.GetLine(pos), 0);
541 		pos++;
542 		c = styler.SafeGetCharAt(pos, '\0');
543 	}
544 
545 	if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
546 		styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
547 	else
548 		styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
549 }
550 
ScanComments(Accessor & styler,int & pos,int max)551 static void ScanComments(Accessor &styler, int& pos, int max) {
552 	pos++;
553 	int c = styler.SafeGetCharAt(pos, '\0');
554 	pos++;
555 	if (c == '/')
556 		ResumeLineComment(styler, pos, max, UnknownComment);
557 	else if (c == '*')
558 		ResumeBlockComment(styler, pos, max, UnknownComment, 1);
559 }
560 
ResumeString(Accessor & styler,int & pos,int max,bool ascii_only)561 static void ResumeString(Accessor &styler, int& pos, int max, bool ascii_only) {
562 	int c = styler.SafeGetCharAt(pos, '\0');
563 	bool error = false;
564 	while (c != '"' && !error) {
565 		if (pos >= max) {
566 			error = true;
567 			break;
568 		}
569 		if (pos == styler.LineEnd(styler.GetLine(pos)))
570 			styler.SetLineState(styler.GetLine(pos), 0);
571 		if (c == '\\') {
572 			int n = styler.SafeGetCharAt(pos + 1, '\0');
573 			if (IsValidStringEscape(n)) {
574 				pos += 2;
575 			} else if (n == 'x') {
576 				pos += 2;
577 				error = !ScanNumericEscape(styler, pos, 2, true);
578 			} else if (n == 'u' && !ascii_only) {
579 				pos += 2;
580 				error = !ScanNumericEscape(styler, pos, 4, true);
581 			} else if (n == 'U' && !ascii_only) {
582 				pos += 2;
583 				error = !ScanNumericEscape(styler, pos, 8, true);
584 			} else {
585 				pos += 1;
586 				error = true;
587 			}
588 		} else {
589 			if (ascii_only && !IsASCII((char)c))
590 				error = true;
591 			else
592 				pos++;
593 		}
594 		c = styler.SafeGetCharAt(pos, '\0');
595 	}
596 	if (!error)
597 		pos++;
598 	styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
599 }
600 
ResumeRawString(Accessor & styler,int & pos,int max,int num_hashes,bool ascii_only)601 static void ResumeRawString(Accessor &styler, int& pos, int max, int num_hashes, bool ascii_only) {
602 	for (;;) {
603 		if (pos == styler.LineEnd(styler.GetLine(pos)))
604 			styler.SetLineState(styler.GetLine(pos), num_hashes);
605 
606 		int c = styler.SafeGetCharAt(pos, '\0');
607 		if (c == '"') {
608 			pos++;
609 			int trailing_num_hashes = 0;
610 			while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
611 				trailing_num_hashes++;
612 				pos++;
613 			}
614 			if (trailing_num_hashes == num_hashes) {
615 				styler.SetLineState(styler.GetLine(pos), 0);
616 				break;
617 			}
618 		} else if (pos >= max) {
619 			break;
620 		} else {
621 			if (ascii_only && !IsASCII((char)c))
622 				break;
623 			pos++;
624 		}
625 	}
626 	styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
627 }
628 
ScanRawString(Accessor & styler,int & pos,int max,bool ascii_only)629 static void ScanRawString(Accessor &styler, int& pos, int max, bool ascii_only) {
630 	pos++;
631 	int num_hashes = 0;
632 	while (styler.SafeGetCharAt(pos, '\0') == '#') {
633 		num_hashes++;
634 		pos++;
635 	}
636 	if (styler.SafeGetCharAt(pos, '\0') != '"') {
637 		styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
638 	} else {
639 		pos++;
640 		ResumeRawString(styler, pos, max, num_hashes, ascii_only);
641 	}
642 }
643 
Lex(unsigned int startPos,int length,int initStyle,IDocument * pAccess)644 void SCI_METHOD LexerRust::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
645 	PropSetSimple props;
646 	Accessor styler(pAccess, &props);
647 	int pos = startPos;
648 	int max = pos + length;
649 
650 	styler.StartAt(pos);
651 	styler.StartSegment(pos);
652 
653 	if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
654 		ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
655 	} else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
656 		ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
657 	} else if (initStyle == SCE_RUST_STRING) {
658 		ResumeString(styler, pos, max, false);
659 	} else if (initStyle == SCE_RUST_BYTESTRING) {
660 		ResumeString(styler, pos, max, true);
661 	} else if (initStyle == SCE_RUST_STRINGR) {
662 		ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
663 	} else if (initStyle == SCE_RUST_BYTESTRINGR) {
664 		ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
665 	}
666 
667 	while (pos < max) {
668 		int c = styler.SafeGetCharAt(pos, '\0');
669 		int n = styler.SafeGetCharAt(pos + 1, '\0');
670 		int n2 = styler.SafeGetCharAt(pos + 2, '\0');
671 
672 		if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
673 			pos += 2;
674 			ResumeLineComment(styler, pos, max, NotDocComment);
675 		} else if (IsWhitespace(c)) {
676 			ScanWhitespace(styler, pos, max);
677 		} else if (c == '/' && (n == '/' || n == '*')) {
678 			ScanComments(styler, pos, max);
679 		} else if (c == 'r' && (n == '#' || n == '"')) {
680 			ScanRawString(styler, pos, max, false);
681 		} else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
682 			pos++;
683 			ScanRawString(styler, pos, max, true);
684 		} else if (c == 'b' && n == '"') {
685 			pos += 2;
686 			ResumeString(styler, pos, max, true);
687 		} else if (c == 'b' && n == '\'') {
688 			pos++;
689 			ScanCharacterLiteralOrLifetime(styler, pos, true);
690 		} else if (IsIdentifierStart(c)) {
691 			ScanIdentifier(styler, pos, keywords);
692 		} else if (IsADigit(c)) {
693 			ScanNumber(styler, pos);
694 		} else if (IsThreeCharOperator(c, n, n2)) {
695 			pos += 3;
696 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
697 		} else if (IsTwoCharOperator(c, n)) {
698 			pos += 2;
699 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
700 		} else if (IsOneCharOperator(c)) {
701 			pos++;
702 			styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
703 		} else if (c == '\'') {
704 			ScanCharacterLiteralOrLifetime(styler, pos, false);
705 		} else if (c == '"') {
706 			pos++;
707 			ResumeString(styler, pos, max, false);
708 		} else {
709 			pos++;
710 			styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
711 		}
712 	}
713 	styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
714 	styler.Flush();
715 }
716 
Fold(unsigned int startPos,int length,int initStyle,IDocument * pAccess)717 void SCI_METHOD LexerRust::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
718 
719 	if (!options.fold)
720 		return;
721 
722 	LexAccessor styler(pAccess);
723 
724 	unsigned int endPos = startPos + length;
725 	int visibleChars = 0;
726 	bool inLineComment = false;
727 	int lineCurrent = styler.GetLine(startPos);
728 	int levelCurrent = SC_FOLDLEVELBASE;
729 	if (lineCurrent > 0)
730 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
731 	unsigned int lineStartNext = styler.LineStart(lineCurrent+1);
732 	int levelMinCurrent = levelCurrent;
733 	int levelNext = levelCurrent;
734 	char chNext = styler[startPos];
735 	int styleNext = styler.StyleAt(startPos);
736 	int style = initStyle;
737 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
738 	for (unsigned int i = startPos; i < endPos; i++) {
739 		char ch = chNext;
740 		chNext = styler.SafeGetCharAt(i + 1);
741 		int stylePrev = style;
742 		style = styleNext;
743 		styleNext = styler.StyleAt(i + 1);
744 		bool atEOL = i == (lineStartNext-1);
745 		if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
746 			inLineComment = true;
747 		if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
748 			if (!IsStreamCommentStyle(stylePrev)) {
749 				levelNext++;
750 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
751 				// Comments don't end at end of line and the next character may be unstyled.
752 				levelNext--;
753 			}
754 		}
755 		if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
756 			if (userDefinedFoldMarkers) {
757 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
758 					levelNext++;
759 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
760 					levelNext--;
761 				}
762 			} else {
763 				if ((ch == '/') && (chNext == '/')) {
764 					char chNext2 = styler.SafeGetCharAt(i + 2);
765 					if (chNext2 == '{') {
766 						levelNext++;
767 					} else if (chNext2 == '}') {
768 						levelNext--;
769 					}
770 				}
771 			}
772 		}
773 		if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
774 			if (ch == '{') {
775 				// Measure the minimum before a '{' to allow
776 				// folding on "} else {"
777 				if (levelMinCurrent > levelNext) {
778 					levelMinCurrent = levelNext;
779 				}
780 				levelNext++;
781 			} else if (ch == '}') {
782 				levelNext--;
783 			}
784 		}
785 		if (!IsASpace(ch))
786 			visibleChars++;
787 		if (atEOL || (i == endPos-1)) {
788 			int levelUse = levelCurrent;
789 			if (options.foldSyntaxBased && options.foldAtElse) {
790 				levelUse = levelMinCurrent;
791 			}
792 			int lev = levelUse | levelNext << 16;
793 			if (visibleChars == 0 && options.foldCompact)
794 				lev |= SC_FOLDLEVELWHITEFLAG;
795 			if (levelUse < levelNext)
796 				lev |= SC_FOLDLEVELHEADERFLAG;
797 			if (lev != styler.LevelAt(lineCurrent)) {
798 				styler.SetLevel(lineCurrent, lev);
799 			}
800 			lineCurrent++;
801 			lineStartNext = styler.LineStart(lineCurrent+1);
802 			levelCurrent = levelNext;
803 			levelMinCurrent = levelCurrent;
804 			if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
805 				// There is an empty line at end of file so give it same level and empty
806 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
807 			}
808 			visibleChars = 0;
809 			inLineComment = false;
810 		}
811 	}
812 }
813 
814 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
815