1 /** @file LexRust.cxx
2 ** Lexer for Rust.
3 **
4 ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
5 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 **/
7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16
17 #include <string>
18 #include <map>
19
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23
24 #include "PropSetSimple.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "DefaultLexer.h"
33
34 using namespace Scintilla;
35
36 static const int NUM_RUST_KEYWORD_LISTS = 7;
37 static const int MAX_RUST_IDENT_CHARS = 1023;
38
IsStreamCommentStyle(int style)39 static bool IsStreamCommentStyle(int style) {
40 return style == SCE_RUST_COMMENTBLOCK ||
41 style == SCE_RUST_COMMENTBLOCKDOC;
42 }
43
44 // Options used for LexerRust
45 struct OptionsRust {
46 bool fold;
47 bool foldSyntaxBased;
48 bool foldComment;
49 bool foldCommentMultiline;
50 bool foldCommentExplicit;
51 std::string foldExplicitStart;
52 std::string foldExplicitEnd;
53 bool foldExplicitAnywhere;
54 bool foldCompact;
55 int foldAtElseInt;
56 bool foldAtElse;
OptionsRustOptionsRust57 OptionsRust() {
58 fold = false;
59 foldSyntaxBased = true;
60 foldComment = false;
61 foldCommentMultiline = true;
62 foldCommentExplicit = true;
63 foldExplicitStart = "";
64 foldExplicitEnd = "";
65 foldExplicitAnywhere = false;
66 foldCompact = true;
67 foldAtElseInt = -1;
68 foldAtElse = false;
69 }
70 };
71
72 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
73 "Primary keywords and identifiers",
74 "Built in types",
75 "Other keywords",
76 "Keywords 4",
77 "Keywords 5",
78 "Keywords 6",
79 "Keywords 7",
80 0,
81 };
82
83 struct OptionSetRust : public OptionSet<OptionsRust> {
OptionSetRustOptionSetRust84 OptionSetRust() {
85 DefineProperty("fold", &OptionsRust::fold);
86
87 DefineProperty("fold.comment", &OptionsRust::foldComment);
88
89 DefineProperty("fold.compact", &OptionsRust::foldCompact);
90
91 DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
92
93 DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
94 "Set this property to 0 to disable syntax based folding.");
95
96 DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
97 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
98
99 DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
100 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
101
102 DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
103 "The string to use for explicit fold start points, replacing the standard //{.");
104
105 DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
106 "The string to use for explicit fold end points, replacing the standard //}.");
107
108 DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
109 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
110
111 DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
112 "This option enables Rust folding on a \"} else {\" line of an if statement.");
113
114 DefineWordListSets(rustWordLists);
115 }
116 };
117
118 class LexerRust : public DefaultLexer {
119 WordList keywords[NUM_RUST_KEYWORD_LISTS];
120 OptionsRust options;
121 OptionSetRust osRust;
122 public:
LexerRust()123 LexerRust() : DefaultLexer("rust", SCLEX_RUST) {
124 }
~LexerRust()125 virtual ~LexerRust() {
126 }
Release()127 void SCI_METHOD Release() override {
128 delete this;
129 }
Version() const130 int SCI_METHOD Version() const override {
131 return lvRelease5;
132 }
PropertyNames()133 const char * SCI_METHOD PropertyNames() override {
134 return osRust.PropertyNames();
135 }
PropertyType(const char * name)136 int SCI_METHOD PropertyType(const char *name) override {
137 return osRust.PropertyType(name);
138 }
DescribeProperty(const char * name)139 const char * SCI_METHOD DescribeProperty(const char *name) override {
140 return osRust.DescribeProperty(name);
141 }
142 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
PropertyGet(const char * key)143 const char * SCI_METHOD PropertyGet(const char *key) override {
144 return osRust.PropertyGet(key);
145 }
DescribeWordListSets()146 const char * SCI_METHOD DescribeWordListSets() override {
147 return osRust.DescribeWordListSets();
148 }
149 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
150 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
151 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
PrivateCall(int,void *)152 void * SCI_METHOD PrivateCall(int, void *) override {
153 return 0;
154 }
LexerFactoryRust()155 static ILexer5 *LexerFactoryRust() {
156 return new LexerRust();
157 }
158 };
159
PropertySet(const char * key,const char * val)160 Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
161 if (osRust.PropertySet(&options, key, val)) {
162 return 0;
163 }
164 return -1;
165 }
166
WordListSet(int n,const char * wl)167 Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
168 Sci_Position firstModification = -1;
169 if (n < NUM_RUST_KEYWORD_LISTS) {
170 WordList *wordListN = &keywords[n];
171 WordList wlNew;
172 wlNew.Set(wl);
173 if (*wordListN != wlNew) {
174 wordListN->Set(wl);
175 firstModification = 0;
176 }
177 }
178 return firstModification;
179 }
180
IsWhitespace(int c)181 static bool IsWhitespace(int c) {
182 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
183 }
184
185 /* This isn't quite right for Unicode identifiers */
IsIdentifierStart(int ch)186 static bool IsIdentifierStart(int ch) {
187 return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
188 }
189
190 /* This isn't quite right for Unicode identifiers */
IsIdentifierContinue(int ch)191 static bool IsIdentifierContinue(int ch) {
192 return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
193 }
194
ScanWhitespace(Accessor & styler,Sci_Position & pos,Sci_Position max)195 static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
196 while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
197 if (pos == styler.LineEnd(styler.GetLine(pos)))
198 styler.SetLineState(styler.GetLine(pos), 0);
199 pos++;
200 }
201 styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
202 }
203
GrabString(char * s,Accessor & styler,Sci_Position start,Sci_Position len)204 static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
205 for (Sci_Position ii = 0; ii < len; ii++)
206 s[ii] = styler[ii + start];
207 s[len] = '\0';
208 }
209
ScanIdentifier(Accessor & styler,Sci_Position & pos,WordList * keywords)210 static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
211 Sci_Position start = pos;
212 while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
213 pos++;
214
215 if (styler.SafeGetCharAt(pos, '\0') == '!') {
216 pos++;
217 styler.ColourTo(pos - 1, SCE_RUST_MACRO);
218 } else {
219 char s[MAX_RUST_IDENT_CHARS + 1];
220 Sci_Position len = pos - start;
221 len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
222 GrabString(s, styler, start, len);
223 bool keyword = false;
224 for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
225 if (keywords[ii].InList(s)) {
226 styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
227 keyword = true;
228 break;
229 }
230 }
231 if (!keyword) {
232 styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
233 }
234 }
235 }
236
237 /* Scans a sequence of digits, returning true if it found any. */
ScanDigits(Accessor & styler,Sci_Position & pos,int base)238 static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
239 Sci_Position old_pos = pos;
240 for (;;) {
241 int c = styler.SafeGetCharAt(pos, '\0');
242 if (IsADigit(c, base) || c == '_')
243 pos++;
244 else
245 break;
246 }
247 return old_pos != pos;
248 }
249
250 /* Scans an integer and floating point literals. */
ScanNumber(Accessor & styler,Sci_Position & pos)251 static void ScanNumber(Accessor& styler, Sci_Position& pos) {
252 int base = 10;
253 int c = styler.SafeGetCharAt(pos, '\0');
254 int n = styler.SafeGetCharAt(pos + 1, '\0');
255 bool error = false;
256 /* Scan the prefix, thus determining the base.
257 * 10 is default if there's no prefix. */
258 if (c == '0' && n == 'x') {
259 pos += 2;
260 base = 16;
261 } else if (c == '0' && n == 'b') {
262 pos += 2;
263 base = 2;
264 } else if (c == '0' && n == 'o') {
265 pos += 2;
266 base = 8;
267 }
268
269 /* Scan initial digits. The literal is malformed if there are none. */
270 error |= !ScanDigits(styler, pos, base);
271 /* See if there's an integer suffix. We mimic the Rust's lexer
272 * and munch it even if there was an error above. */
273 c = styler.SafeGetCharAt(pos, '\0');
274 if (c == 'u' || c == 'i') {
275 pos++;
276 c = styler.SafeGetCharAt(pos, '\0');
277 n = styler.SafeGetCharAt(pos + 1, '\0');
278 if (c == '8') {
279 pos++;
280 } else if (c == '1' && n == '6') {
281 pos += 2;
282 } else if (c == '3' && n == '2') {
283 pos += 2;
284 } else if (c == '6' && n == '4') {
285 pos += 2;
286 } else if (styler.Match(pos, "size")) {
287 pos += 4;
288 } else {
289 error = true;
290 }
291 /* See if it's a floating point literal. These literals have to be base 10.
292 */
293 } else if (!error) {
294 /* If there's a period, it's a floating point literal unless it's
295 * followed by an identifier (meaning this is a method call, e.g.
296 * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
297 */
298 n = styler.SafeGetCharAt(pos + 1, '\0');
299 if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
300 error |= base != 10;
301 pos++;
302 /* It's ok to have no digits after the period. */
303 ScanDigits(styler, pos, 10);
304 }
305
306 /* Look for the exponentiation. */
307 c = styler.SafeGetCharAt(pos, '\0');
308 if (c == 'e' || c == 'E') {
309 error |= base != 10;
310 pos++;
311 c = styler.SafeGetCharAt(pos, '\0');
312 if (c == '-' || c == '+')
313 pos++;
314 /* It is invalid to have no digits in the exponent. */
315 error |= !ScanDigits(styler, pos, 10);
316 }
317
318 /* Scan the floating point suffix. */
319 c = styler.SafeGetCharAt(pos, '\0');
320 if (c == 'f') {
321 error |= base != 10;
322 pos++;
323 c = styler.SafeGetCharAt(pos, '\0');
324 n = styler.SafeGetCharAt(pos + 1, '\0');
325 if (c == '3' && n == '2') {
326 pos += 2;
327 } else if (c == '6' && n == '4') {
328 pos += 2;
329 } else {
330 error = true;
331 }
332 }
333 }
334
335 if (error)
336 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
337 else
338 styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
339 }
340
IsOneCharOperator(int c)341 static bool IsOneCharOperator(int c) {
342 return c == ';' || c == ',' || c == '(' || c == ')'
343 || c == '{' || c == '}' || c == '[' || c == ']'
344 || c == '@' || c == '#' || c == '~' || c == '+'
345 || c == '*' || c == '/' || c == '^' || c == '%'
346 || c == '.' || c == ':' || c == '!' || c == '<'
347 || c == '>' || c == '=' || c == '-' || c == '&'
348 || c == '|' || c == '$' || c == '?';
349 }
350
IsTwoCharOperator(int c,int n)351 static bool IsTwoCharOperator(int c, int n) {
352 return (c == '.' && n == '.') || (c == ':' && n == ':')
353 || (c == '!' && n == '=') || (c == '<' && n == '<')
354 || (c == '<' && n == '=') || (c == '>' && n == '>')
355 || (c == '>' && n == '=') || (c == '=' && n == '=')
356 || (c == '=' && n == '>') || (c == '-' && n == '>')
357 || (c == '&' && n == '&') || (c == '|' && n == '|')
358 || (c == '-' && n == '=') || (c == '&' && n == '=')
359 || (c == '|' && n == '=') || (c == '+' && n == '=')
360 || (c == '*' && n == '=') || (c == '/' && n == '=')
361 || (c == '^' && n == '=') || (c == '%' && n == '=');
362 }
363
IsThreeCharOperator(int c,int n,int n2)364 static bool IsThreeCharOperator(int c, int n, int n2) {
365 return (c == '<' && n == '<' && n2 == '=')
366 || (c == '>' && n == '>' && n2 == '=');
367 }
368
IsValidCharacterEscape(int c)369 static bool IsValidCharacterEscape(int c) {
370 return c == 'n' || c == 'r' || c == 't' || c == '\\'
371 || c == '\'' || c == '"' || c == '0';
372 }
373
IsValidStringEscape(int c)374 static bool IsValidStringEscape(int c) {
375 return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
376 }
377
ScanNumericEscape(Accessor & styler,Sci_Position & pos,Sci_Position num_digits,bool stop_asap)378 static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
379 for (;;) {
380 int c = styler.SafeGetCharAt(pos, '\0');
381 if (!IsADigit(c, 16))
382 break;
383 num_digits--;
384 pos++;
385 if (num_digits == 0 && stop_asap)
386 return true;
387 }
388 if (num_digits == 0) {
389 return true;
390 } else {
391 return false;
392 }
393 }
394
395 /* This is overly permissive for character literals in order to accept UTF-8 encoded
396 * character literals. */
ScanCharacterLiteralOrLifetime(Accessor & styler,Sci_Position & pos,bool ascii_only)397 static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
398 pos++;
399 int c = styler.SafeGetCharAt(pos, '\0');
400 int n = styler.SafeGetCharAt(pos + 1, '\0');
401 bool done = false;
402 bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
403 bool valid_char = true;
404 bool first = true;
405 while (!done) {
406 switch (c) {
407 case '\\':
408 done = true;
409 if (IsValidCharacterEscape(n)) {
410 pos += 2;
411 } else if (n == 'x') {
412 pos += 2;
413 valid_char = ScanNumericEscape(styler, pos, 2, false);
414 } else if (n == 'u' && !ascii_only) {
415 pos += 2;
416 if (styler.SafeGetCharAt(pos, '\0') != '{') {
417 // old-style
418 valid_char = ScanNumericEscape(styler, pos, 4, false);
419 } else {
420 int n_digits = 0;
421 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
422 }
423 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
424 pos++;
425 else
426 valid_char = false;
427 }
428 } else if (n == 'U' && !ascii_only) {
429 pos += 2;
430 valid_char = ScanNumericEscape(styler, pos, 8, false);
431 } else {
432 valid_char = false;
433 }
434 break;
435 case '\'':
436 valid_char = !first;
437 done = true;
438 break;
439 case '\t':
440 case '\n':
441 case '\r':
442 case '\0':
443 valid_char = false;
444 done = true;
445 break;
446 default:
447 if (ascii_only && !IsASCII((char)c)) {
448 done = true;
449 valid_char = false;
450 } else if (!IsIdentifierContinue(c) && !first) {
451 done = true;
452 } else {
453 pos++;
454 }
455 break;
456 }
457 c = styler.SafeGetCharAt(pos, '\0');
458 n = styler.SafeGetCharAt(pos + 1, '\0');
459
460 first = false;
461 }
462 if (styler.SafeGetCharAt(pos, '\0') == '\'') {
463 valid_lifetime = false;
464 } else {
465 valid_char = false;
466 }
467 if (valid_lifetime) {
468 styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
469 } else if (valid_char) {
470 pos++;
471 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
472 } else {
473 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
474 }
475 }
476
477 enum CommentState {
478 UnknownComment,
479 DocComment,
480 NotDocComment
481 };
482
483 /*
484 * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
485 * Otherwise it's a regular comment.
486 */
ResumeBlockComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state,int level)487 static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
488 int c = styler.SafeGetCharAt(pos, '\0');
489 bool maybe_doc_comment = false;
490 if (c == '*') {
491 int n = styler.SafeGetCharAt(pos + 1, '\0');
492 if (n != '*' && n != '/') {
493 maybe_doc_comment = true;
494 }
495 } else if (c == '!') {
496 maybe_doc_comment = true;
497 }
498
499 for (;;) {
500 int n = styler.SafeGetCharAt(pos + 1, '\0');
501 if (pos == styler.LineEnd(styler.GetLine(pos)))
502 styler.SetLineState(styler.GetLine(pos), level);
503 if (c == '*') {
504 pos++;
505 if (n == '/') {
506 pos++;
507 level--;
508 if (level == 0) {
509 styler.SetLineState(styler.GetLine(pos), 0);
510 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
511 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
512 else
513 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
514 break;
515 }
516 }
517 } else if (c == '/') {
518 pos++;
519 if (n == '*') {
520 pos++;
521 level++;
522 }
523 }
524 else {
525 pos++;
526 }
527 if (pos >= max) {
528 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
529 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
530 else
531 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
532 break;
533 }
534 c = styler.SafeGetCharAt(pos, '\0');
535 }
536 }
537
538 /*
539 * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
540 * Otherwise it's a normal line comment.
541 */
ResumeLineComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state)542 static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
543 bool maybe_doc_comment = false;
544 int c = styler.SafeGetCharAt(pos, '\0');
545 if (c == '/') {
546 if (pos < max) {
547 pos++;
548 c = styler.SafeGetCharAt(pos, '\0');
549 if (c != '/') {
550 maybe_doc_comment = true;
551 }
552 }
553 } else if (c == '!') {
554 maybe_doc_comment = true;
555 }
556
557 while (pos < max && c != '\n') {
558 if (pos == styler.LineEnd(styler.GetLine(pos)))
559 styler.SetLineState(styler.GetLine(pos), 0);
560 pos++;
561 c = styler.SafeGetCharAt(pos, '\0');
562 }
563
564 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
565 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
566 else
567 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
568 }
569
ScanComments(Accessor & styler,Sci_Position & pos,Sci_Position max)570 static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
571 pos++;
572 int c = styler.SafeGetCharAt(pos, '\0');
573 pos++;
574 if (c == '/')
575 ResumeLineComment(styler, pos, max, UnknownComment);
576 else if (c == '*')
577 ResumeBlockComment(styler, pos, max, UnknownComment, 1);
578 }
579
ResumeString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)580 static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
581 int c = styler.SafeGetCharAt(pos, '\0');
582 bool error = false;
583 while (c != '"' && !error) {
584 if (pos >= max) {
585 error = true;
586 break;
587 }
588 if (pos == styler.LineEnd(styler.GetLine(pos)))
589 styler.SetLineState(styler.GetLine(pos), 0);
590 if (c == '\\') {
591 int n = styler.SafeGetCharAt(pos + 1, '\0');
592 if (IsValidStringEscape(n)) {
593 pos += 2;
594 } else if (n == 'x') {
595 pos += 2;
596 error = !ScanNumericEscape(styler, pos, 2, true);
597 } else if (n == 'u' && !ascii_only) {
598 pos += 2;
599 if (styler.SafeGetCharAt(pos, '\0') != '{') {
600 // old-style
601 error = !ScanNumericEscape(styler, pos, 4, true);
602 } else {
603 int n_digits = 0;
604 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
605 }
606 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
607 pos++;
608 else
609 error = true;
610 }
611 } else if (n == 'U' && !ascii_only) {
612 pos += 2;
613 error = !ScanNumericEscape(styler, pos, 8, true);
614 } else {
615 pos += 1;
616 error = true;
617 }
618 } else {
619 if (ascii_only && !IsASCII((char)c))
620 error = true;
621 else
622 pos++;
623 }
624 c = styler.SafeGetCharAt(pos, '\0');
625 }
626 if (!error)
627 pos++;
628 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
629 }
630
ResumeRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,int num_hashes,bool ascii_only)631 static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
632 for (;;) {
633 if (pos == styler.LineEnd(styler.GetLine(pos)))
634 styler.SetLineState(styler.GetLine(pos), num_hashes);
635
636 int c = styler.SafeGetCharAt(pos, '\0');
637 if (c == '"') {
638 pos++;
639 int trailing_num_hashes = 0;
640 while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
641 trailing_num_hashes++;
642 pos++;
643 }
644 if (trailing_num_hashes == num_hashes) {
645 styler.SetLineState(styler.GetLine(pos), 0);
646 break;
647 }
648 } else if (pos >= max) {
649 break;
650 } else {
651 if (ascii_only && !IsASCII((char)c))
652 break;
653 pos++;
654 }
655 }
656 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
657 }
658
ScanRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)659 static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
660 pos++;
661 int num_hashes = 0;
662 while (styler.SafeGetCharAt(pos, '\0') == '#') {
663 num_hashes++;
664 pos++;
665 }
666 if (styler.SafeGetCharAt(pos, '\0') != '"') {
667 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
668 } else {
669 pos++;
670 ResumeRawString(styler, pos, max, num_hashes, ascii_only);
671 }
672 }
673
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)674 void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
675 PropSetSimple props;
676 Accessor styler(pAccess, &props);
677 Sci_Position pos = startPos;
678 Sci_Position max = pos + length;
679
680 styler.StartAt(pos);
681 styler.StartSegment(pos);
682
683 if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
684 ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
685 } else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
686 ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
687 } else if (initStyle == SCE_RUST_STRING) {
688 ResumeString(styler, pos, max, false);
689 } else if (initStyle == SCE_RUST_BYTESTRING) {
690 ResumeString(styler, pos, max, true);
691 } else if (initStyle == SCE_RUST_STRINGR) {
692 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
693 } else if (initStyle == SCE_RUST_BYTESTRINGR) {
694 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
695 }
696
697 while (pos < max) {
698 int c = styler.SafeGetCharAt(pos, '\0');
699 int n = styler.SafeGetCharAt(pos + 1, '\0');
700 int n2 = styler.SafeGetCharAt(pos + 2, '\0');
701
702 if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
703 pos += 2;
704 ResumeLineComment(styler, pos, max, NotDocComment);
705 } else if (IsWhitespace(c)) {
706 ScanWhitespace(styler, pos, max);
707 } else if (c == '/' && (n == '/' || n == '*')) {
708 ScanComments(styler, pos, max);
709 } else if (c == 'r' && (n == '#' || n == '"')) {
710 ScanRawString(styler, pos, max, false);
711 } else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
712 pos++;
713 ScanRawString(styler, pos, max, true);
714 } else if (c == 'b' && n == '"') {
715 pos += 2;
716 ResumeString(styler, pos, max, true);
717 } else if (c == 'b' && n == '\'') {
718 pos++;
719 ScanCharacterLiteralOrLifetime(styler, pos, true);
720 } else if (IsIdentifierStart(c)) {
721 ScanIdentifier(styler, pos, keywords);
722 } else if (IsADigit(c)) {
723 ScanNumber(styler, pos);
724 } else if (IsThreeCharOperator(c, n, n2)) {
725 pos += 3;
726 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
727 } else if (IsTwoCharOperator(c, n)) {
728 pos += 2;
729 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
730 } else if (IsOneCharOperator(c)) {
731 pos++;
732 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
733 } else if (c == '\'') {
734 ScanCharacterLiteralOrLifetime(styler, pos, false);
735 } else if (c == '"') {
736 pos++;
737 ResumeString(styler, pos, max, false);
738 } else {
739 pos++;
740 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
741 }
742 }
743 styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
744 styler.Flush();
745 }
746
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)747 void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
748
749 if (!options.fold)
750 return;
751
752 LexAccessor styler(pAccess);
753
754 Sci_PositionU endPos = startPos + length;
755 int visibleChars = 0;
756 bool inLineComment = false;
757 Sci_Position lineCurrent = styler.GetLine(startPos);
758 int levelCurrent = SC_FOLDLEVELBASE;
759 if (lineCurrent > 0)
760 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
761 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
762 int levelMinCurrent = levelCurrent;
763 int levelNext = levelCurrent;
764 char chNext = styler[startPos];
765 int styleNext = styler.StyleAt(startPos);
766 int style = initStyle;
767 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
768 for (Sci_PositionU i = startPos; i < endPos; i++) {
769 char ch = chNext;
770 chNext = styler.SafeGetCharAt(i + 1);
771 int stylePrev = style;
772 style = styleNext;
773 styleNext = styler.StyleAt(i + 1);
774 bool atEOL = i == (lineStartNext-1);
775 if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
776 inLineComment = true;
777 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
778 if (!IsStreamCommentStyle(stylePrev)) {
779 levelNext++;
780 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
781 // Comments don't end at end of line and the next character may be unstyled.
782 levelNext--;
783 }
784 }
785 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
786 if (userDefinedFoldMarkers) {
787 if (styler.Match(i, options.foldExplicitStart.c_str())) {
788 levelNext++;
789 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
790 levelNext--;
791 }
792 } else {
793 if ((ch == '/') && (chNext == '/')) {
794 char chNext2 = styler.SafeGetCharAt(i + 2);
795 if (chNext2 == '{') {
796 levelNext++;
797 } else if (chNext2 == '}') {
798 levelNext--;
799 }
800 }
801 }
802 }
803 if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
804 if (ch == '{') {
805 // Measure the minimum before a '{' to allow
806 // folding on "} else {"
807 if (levelMinCurrent > levelNext) {
808 levelMinCurrent = levelNext;
809 }
810 levelNext++;
811 } else if (ch == '}') {
812 levelNext--;
813 }
814 }
815 if (!IsASpace(ch))
816 visibleChars++;
817 if (atEOL || (i == endPos-1)) {
818 int levelUse = levelCurrent;
819 if (options.foldSyntaxBased && options.foldAtElse) {
820 levelUse = levelMinCurrent;
821 }
822 int lev = levelUse | levelNext << 16;
823 if (visibleChars == 0 && options.foldCompact)
824 lev |= SC_FOLDLEVELWHITEFLAG;
825 if (levelUse < levelNext)
826 lev |= SC_FOLDLEVELHEADERFLAG;
827 if (lev != styler.LevelAt(lineCurrent)) {
828 styler.SetLevel(lineCurrent, lev);
829 }
830 lineCurrent++;
831 lineStartNext = styler.LineStart(lineCurrent+1);
832 levelCurrent = levelNext;
833 levelMinCurrent = levelCurrent;
834 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
835 // There is an empty line at end of file so give it same level and empty
836 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
837 }
838 visibleChars = 0;
839 inLineComment = false;
840 }
841 }
842 }
843
844 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
845