1 /** @file LexRust.cxx
2 ** Lexer for Rust.
3 **
4 ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
5 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 **/
7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16
17 #include <string>
18 #include <map>
19
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23
24 #include "PropSetSimple.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "DefaultLexer.h"
33
34 using namespace Scintilla;
35
36 static const int NUM_RUST_KEYWORD_LISTS = 7;
37 static const int MAX_RUST_IDENT_CHARS = 1023;
38
IsStreamCommentStyle(int style)39 static bool IsStreamCommentStyle(int style) {
40 return style == SCE_RUST_COMMENTBLOCK ||
41 style == SCE_RUST_COMMENTBLOCKDOC;
42 }
43
44 // Options used for LexerRust
45 struct OptionsRust {
46 bool fold;
47 bool foldSyntaxBased;
48 bool foldComment;
49 bool foldCommentMultiline;
50 bool foldCommentExplicit;
51 std::string foldExplicitStart;
52 std::string foldExplicitEnd;
53 bool foldExplicitAnywhere;
54 bool foldCompact;
55 int foldAtElseInt;
56 bool foldAtElse;
OptionsRustOptionsRust57 OptionsRust() {
58 fold = false;
59 foldSyntaxBased = true;
60 foldComment = false;
61 foldCommentMultiline = true;
62 foldCommentExplicit = true;
63 foldExplicitStart = "";
64 foldExplicitEnd = "";
65 foldExplicitAnywhere = false;
66 foldCompact = true;
67 foldAtElseInt = -1;
68 foldAtElse = false;
69 }
70 };
71
72 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
73 "Primary keywords and identifiers",
74 "Built in types",
75 "Other keywords",
76 "Keywords 4",
77 "Keywords 5",
78 "Keywords 6",
79 "Keywords 7",
80 0,
81 };
82
83 struct OptionSetRust : public OptionSet<OptionsRust> {
OptionSetRustOptionSetRust84 OptionSetRust() {
85 DefineProperty("fold", &OptionsRust::fold);
86
87 DefineProperty("fold.comment", &OptionsRust::foldComment);
88
89 DefineProperty("fold.compact", &OptionsRust::foldCompact);
90
91 DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
92
93 DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
94 "Set this property to 0 to disable syntax based folding.");
95
96 DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
97 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
98
99 DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
100 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
101
102 DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
103 "The string to use for explicit fold start points, replacing the standard //{.");
104
105 DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
106 "The string to use for explicit fold end points, replacing the standard //}.");
107
108 DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
109 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
110
111 DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
112 "This option enables Rust folding on a \"} else {\" line of an if statement.");
113
114 DefineWordListSets(rustWordLists);
115 }
116 };
117
118 class LexerRust : public DefaultLexer {
119 WordList keywords[NUM_RUST_KEYWORD_LISTS];
120 OptionsRust options;
121 OptionSetRust osRust;
122 public:
~LexerRust()123 virtual ~LexerRust() {
124 }
Release()125 void SCI_METHOD Release() override {
126 delete this;
127 }
Version() const128 int SCI_METHOD Version() const override {
129 return lvOriginal;
130 }
PropertyNames()131 const char * SCI_METHOD PropertyNames() override {
132 return osRust.PropertyNames();
133 }
PropertyType(const char * name)134 int SCI_METHOD PropertyType(const char *name) override {
135 return osRust.PropertyType(name);
136 }
DescribeProperty(const char * name)137 const char * SCI_METHOD DescribeProperty(const char *name) override {
138 return osRust.DescribeProperty(name);
139 }
140 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()141 const char * SCI_METHOD DescribeWordListSets() override {
142 return osRust.DescribeWordListSets();
143 }
144 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
145 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
146 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
PrivateCall(int,void *)147 void * SCI_METHOD PrivateCall(int, void *) override {
148 return 0;
149 }
LexerFactoryRust()150 static ILexer *LexerFactoryRust() {
151 return new LexerRust();
152 }
153 };
154
PropertySet(const char * key,const char * val)155 Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
156 if (osRust.PropertySet(&options, key, val)) {
157 return 0;
158 }
159 return -1;
160 }
161
WordListSet(int n,const char * wl)162 Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
163 Sci_Position firstModification = -1;
164 if (n < NUM_RUST_KEYWORD_LISTS) {
165 WordList *wordListN = &keywords[n];
166 WordList wlNew;
167 wlNew.Set(wl);
168 if (*wordListN != wlNew) {
169 wordListN->Set(wl);
170 firstModification = 0;
171 }
172 }
173 return firstModification;
174 }
175
IsWhitespace(int c)176 static bool IsWhitespace(int c) {
177 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
178 }
179
180 /* This isn't quite right for Unicode identifiers */
IsIdentifierStart(int ch)181 static bool IsIdentifierStart(int ch) {
182 return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
183 }
184
185 /* This isn't quite right for Unicode identifiers */
IsIdentifierContinue(int ch)186 static bool IsIdentifierContinue(int ch) {
187 return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
188 }
189
ScanWhitespace(Accessor & styler,Sci_Position & pos,Sci_Position max)190 static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
191 while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
192 if (pos == styler.LineEnd(styler.GetLine(pos)))
193 styler.SetLineState(styler.GetLine(pos), 0);
194 pos++;
195 }
196 styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
197 }
198
GrabString(char * s,Accessor & styler,Sci_Position start,Sci_Position len)199 static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
200 for (Sci_Position ii = 0; ii < len; ii++)
201 s[ii] = styler[ii + start];
202 s[len] = '\0';
203 }
204
ScanIdentifier(Accessor & styler,Sci_Position & pos,WordList * keywords)205 static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
206 Sci_Position start = pos;
207 while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
208 pos++;
209
210 if (styler.SafeGetCharAt(pos, '\0') == '!') {
211 pos++;
212 styler.ColourTo(pos - 1, SCE_RUST_MACRO);
213 } else {
214 char s[MAX_RUST_IDENT_CHARS + 1];
215 Sci_Position len = pos - start;
216 len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
217 GrabString(s, styler, start, len);
218 bool keyword = false;
219 for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
220 if (keywords[ii].InList(s)) {
221 styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
222 keyword = true;
223 break;
224 }
225 }
226 if (!keyword) {
227 styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
228 }
229 }
230 }
231
232 /* Scans a sequence of digits, returning true if it found any. */
ScanDigits(Accessor & styler,Sci_Position & pos,int base)233 static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
234 Sci_Position old_pos = pos;
235 for (;;) {
236 int c = styler.SafeGetCharAt(pos, '\0');
237 if (IsADigit(c, base) || c == '_')
238 pos++;
239 else
240 break;
241 }
242 return old_pos != pos;
243 }
244
245 /* Scans an integer and floating point literals. */
ScanNumber(Accessor & styler,Sci_Position & pos)246 static void ScanNumber(Accessor& styler, Sci_Position& pos) {
247 int base = 10;
248 int c = styler.SafeGetCharAt(pos, '\0');
249 int n = styler.SafeGetCharAt(pos + 1, '\0');
250 bool error = false;
251 /* Scan the prefix, thus determining the base.
252 * 10 is default if there's no prefix. */
253 if (c == '0' && n == 'x') {
254 pos += 2;
255 base = 16;
256 } else if (c == '0' && n == 'b') {
257 pos += 2;
258 base = 2;
259 } else if (c == '0' && n == 'o') {
260 pos += 2;
261 base = 8;
262 }
263
264 /* Scan initial digits. The literal is malformed if there are none. */
265 error |= !ScanDigits(styler, pos, base);
266 /* See if there's an integer suffix. We mimic the Rust's lexer
267 * and munch it even if there was an error above. */
268 c = styler.SafeGetCharAt(pos, '\0');
269 if (c == 'u' || c == 'i') {
270 pos++;
271 c = styler.SafeGetCharAt(pos, '\0');
272 n = styler.SafeGetCharAt(pos + 1, '\0');
273 if (c == '8') {
274 pos++;
275 } else if (c == '1' && n == '6') {
276 pos += 2;
277 } else if (c == '3' && n == '2') {
278 pos += 2;
279 } else if (c == '6' && n == '4') {
280 pos += 2;
281 } else if (styler.Match(pos, "size")) {
282 pos += 4;
283 } else {
284 error = true;
285 }
286 /* See if it's a floating point literal. These literals have to be base 10.
287 */
288 } else if (!error) {
289 /* If there's a period, it's a floating point literal unless it's
290 * followed by an identifier (meaning this is a method call, e.g.
291 * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
292 */
293 n = styler.SafeGetCharAt(pos + 1, '\0');
294 if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
295 error |= base != 10;
296 pos++;
297 /* It's ok to have no digits after the period. */
298 ScanDigits(styler, pos, 10);
299 }
300
301 /* Look for the exponentiation. */
302 c = styler.SafeGetCharAt(pos, '\0');
303 if (c == 'e' || c == 'E') {
304 error |= base != 10;
305 pos++;
306 c = styler.SafeGetCharAt(pos, '\0');
307 if (c == '-' || c == '+')
308 pos++;
309 /* It is invalid to have no digits in the exponent. */
310 error |= !ScanDigits(styler, pos, 10);
311 }
312
313 /* Scan the floating point suffix. */
314 c = styler.SafeGetCharAt(pos, '\0');
315 if (c == 'f') {
316 error |= base != 10;
317 pos++;
318 c = styler.SafeGetCharAt(pos, '\0');
319 n = styler.SafeGetCharAt(pos + 1, '\0');
320 if (c == '3' && n == '2') {
321 pos += 2;
322 } else if (c == '6' && n == '4') {
323 pos += 2;
324 } else {
325 error = true;
326 }
327 }
328 }
329
330 if (error)
331 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
332 else
333 styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
334 }
335
IsOneCharOperator(int c)336 static bool IsOneCharOperator(int c) {
337 return c == ';' || c == ',' || c == '(' || c == ')'
338 || c == '{' || c == '}' || c == '[' || c == ']'
339 || c == '@' || c == '#' || c == '~' || c == '+'
340 || c == '*' || c == '/' || c == '^' || c == '%'
341 || c == '.' || c == ':' || c == '!' || c == '<'
342 || c == '>' || c == '=' || c == '-' || c == '&'
343 || c == '|' || c == '$' || c == '?';
344 }
345
IsTwoCharOperator(int c,int n)346 static bool IsTwoCharOperator(int c, int n) {
347 return (c == '.' && n == '.') || (c == ':' && n == ':')
348 || (c == '!' && n == '=') || (c == '<' && n == '<')
349 || (c == '<' && n == '=') || (c == '>' && n == '>')
350 || (c == '>' && n == '=') || (c == '=' && n == '=')
351 || (c == '=' && n == '>') || (c == '-' && n == '>')
352 || (c == '&' && n == '&') || (c == '|' && n == '|')
353 || (c == '-' && n == '=') || (c == '&' && n == '=')
354 || (c == '|' && n == '=') || (c == '+' && n == '=')
355 || (c == '*' && n == '=') || (c == '/' && n == '=')
356 || (c == '^' && n == '=') || (c == '%' && n == '=');
357 }
358
IsThreeCharOperator(int c,int n,int n2)359 static bool IsThreeCharOperator(int c, int n, int n2) {
360 return (c == '<' && n == '<' && n2 == '=')
361 || (c == '>' && n == '>' && n2 == '=');
362 }
363
IsValidCharacterEscape(int c)364 static bool IsValidCharacterEscape(int c) {
365 return c == 'n' || c == 'r' || c == 't' || c == '\\'
366 || c == '\'' || c == '"' || c == '0';
367 }
368
IsValidStringEscape(int c)369 static bool IsValidStringEscape(int c) {
370 return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
371 }
372
ScanNumericEscape(Accessor & styler,Sci_Position & pos,Sci_Position num_digits,bool stop_asap)373 static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
374 for (;;) {
375 int c = styler.SafeGetCharAt(pos, '\0');
376 if (!IsADigit(c, 16))
377 break;
378 num_digits--;
379 pos++;
380 if (num_digits == 0 && stop_asap)
381 return true;
382 }
383 if (num_digits == 0) {
384 return true;
385 } else {
386 return false;
387 }
388 }
389
390 /* This is overly permissive for character literals in order to accept UTF-8 encoded
391 * character literals. */
ScanCharacterLiteralOrLifetime(Accessor & styler,Sci_Position & pos,bool ascii_only)392 static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
393 pos++;
394 int c = styler.SafeGetCharAt(pos, '\0');
395 int n = styler.SafeGetCharAt(pos + 1, '\0');
396 bool done = false;
397 bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
398 bool valid_char = true;
399 bool first = true;
400 while (!done) {
401 switch (c) {
402 case '\\':
403 done = true;
404 if (IsValidCharacterEscape(n)) {
405 pos += 2;
406 } else if (n == 'x') {
407 pos += 2;
408 valid_char = ScanNumericEscape(styler, pos, 2, false);
409 } else if (n == 'u' && !ascii_only) {
410 pos += 2;
411 if (styler.SafeGetCharAt(pos, '\0') != '{') {
412 // old-style
413 valid_char = ScanNumericEscape(styler, pos, 4, false);
414 } else {
415 int n_digits = 0;
416 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
417 }
418 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
419 pos++;
420 else
421 valid_char = false;
422 }
423 } else if (n == 'U' && !ascii_only) {
424 pos += 2;
425 valid_char = ScanNumericEscape(styler, pos, 8, false);
426 } else {
427 valid_char = false;
428 }
429 break;
430 case '\'':
431 valid_char = !first;
432 done = true;
433 break;
434 case '\t':
435 case '\n':
436 case '\r':
437 case '\0':
438 valid_char = false;
439 done = true;
440 break;
441 default:
442 if (ascii_only && !IsASCII((char)c)) {
443 done = true;
444 valid_char = false;
445 } else if (!IsIdentifierContinue(c) && !first) {
446 done = true;
447 } else {
448 pos++;
449 }
450 break;
451 }
452 c = styler.SafeGetCharAt(pos, '\0');
453 n = styler.SafeGetCharAt(pos + 1, '\0');
454
455 first = false;
456 }
457 if (styler.SafeGetCharAt(pos, '\0') == '\'') {
458 valid_lifetime = false;
459 } else {
460 valid_char = false;
461 }
462 if (valid_lifetime) {
463 styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
464 } else if (valid_char) {
465 pos++;
466 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
467 } else {
468 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
469 }
470 }
471
472 enum CommentState {
473 UnknownComment,
474 DocComment,
475 NotDocComment
476 };
477
478 /*
479 * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
480 * Otherwise it's a regular comment.
481 */
ResumeBlockComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state,int level)482 static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
483 int c = styler.SafeGetCharAt(pos, '\0');
484 bool maybe_doc_comment = false;
485 if (c == '*') {
486 int n = styler.SafeGetCharAt(pos + 1, '\0');
487 if (n != '*' && n != '/') {
488 maybe_doc_comment = true;
489 }
490 } else if (c == '!') {
491 maybe_doc_comment = true;
492 }
493
494 for (;;) {
495 int n = styler.SafeGetCharAt(pos + 1, '\0');
496 if (pos == styler.LineEnd(styler.GetLine(pos)))
497 styler.SetLineState(styler.GetLine(pos), level);
498 if (c == '*') {
499 pos++;
500 if (n == '/') {
501 pos++;
502 level--;
503 if (level == 0) {
504 styler.SetLineState(styler.GetLine(pos), 0);
505 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
506 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
507 else
508 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
509 break;
510 }
511 }
512 } else if (c == '/') {
513 pos++;
514 if (n == '*') {
515 pos++;
516 level++;
517 }
518 }
519 else {
520 pos++;
521 }
522 if (pos >= max) {
523 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
524 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
525 else
526 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
527 break;
528 }
529 c = styler.SafeGetCharAt(pos, '\0');
530 }
531 }
532
533 /*
534 * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
535 * Otherwise it's a normal line comment.
536 */
ResumeLineComment(Accessor & styler,Sci_Position & pos,Sci_Position max,CommentState state)537 static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
538 bool maybe_doc_comment = false;
539 int c = styler.SafeGetCharAt(pos, '\0');
540 if (c == '/') {
541 if (pos < max) {
542 pos++;
543 c = styler.SafeGetCharAt(pos, '\0');
544 if (c != '/') {
545 maybe_doc_comment = true;
546 }
547 }
548 } else if (c == '!') {
549 maybe_doc_comment = true;
550 }
551
552 while (pos < max && c != '\n') {
553 if (pos == styler.LineEnd(styler.GetLine(pos)))
554 styler.SetLineState(styler.GetLine(pos), 0);
555 pos++;
556 c = styler.SafeGetCharAt(pos, '\0');
557 }
558
559 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
560 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
561 else
562 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
563 }
564
ScanComments(Accessor & styler,Sci_Position & pos,Sci_Position max)565 static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
566 pos++;
567 int c = styler.SafeGetCharAt(pos, '\0');
568 pos++;
569 if (c == '/')
570 ResumeLineComment(styler, pos, max, UnknownComment);
571 else if (c == '*')
572 ResumeBlockComment(styler, pos, max, UnknownComment, 1);
573 }
574
ResumeString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)575 static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
576 int c = styler.SafeGetCharAt(pos, '\0');
577 bool error = false;
578 while (c != '"' && !error) {
579 if (pos >= max) {
580 error = true;
581 break;
582 }
583 if (pos == styler.LineEnd(styler.GetLine(pos)))
584 styler.SetLineState(styler.GetLine(pos), 0);
585 if (c == '\\') {
586 int n = styler.SafeGetCharAt(pos + 1, '\0');
587 if (IsValidStringEscape(n)) {
588 pos += 2;
589 } else if (n == 'x') {
590 pos += 2;
591 error = !ScanNumericEscape(styler, pos, 2, true);
592 } else if (n == 'u' && !ascii_only) {
593 pos += 2;
594 if (styler.SafeGetCharAt(pos, '\0') != '{') {
595 // old-style
596 error = !ScanNumericEscape(styler, pos, 4, true);
597 } else {
598 int n_digits = 0;
599 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
600 }
601 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
602 pos++;
603 else
604 error = true;
605 }
606 } else if (n == 'U' && !ascii_only) {
607 pos += 2;
608 error = !ScanNumericEscape(styler, pos, 8, true);
609 } else {
610 pos += 1;
611 error = true;
612 }
613 } else {
614 if (ascii_only && !IsASCII((char)c))
615 error = true;
616 else
617 pos++;
618 }
619 c = styler.SafeGetCharAt(pos, '\0');
620 }
621 if (!error)
622 pos++;
623 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
624 }
625
ResumeRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,int num_hashes,bool ascii_only)626 static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
627 for (;;) {
628 if (pos == styler.LineEnd(styler.GetLine(pos)))
629 styler.SetLineState(styler.GetLine(pos), num_hashes);
630
631 int c = styler.SafeGetCharAt(pos, '\0');
632 if (c == '"') {
633 pos++;
634 int trailing_num_hashes = 0;
635 while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
636 trailing_num_hashes++;
637 pos++;
638 }
639 if (trailing_num_hashes == num_hashes) {
640 styler.SetLineState(styler.GetLine(pos), 0);
641 break;
642 }
643 } else if (pos >= max) {
644 break;
645 } else {
646 if (ascii_only && !IsASCII((char)c))
647 break;
648 pos++;
649 }
650 }
651 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
652 }
653
ScanRawString(Accessor & styler,Sci_Position & pos,Sci_Position max,bool ascii_only)654 static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
655 pos++;
656 int num_hashes = 0;
657 while (styler.SafeGetCharAt(pos, '\0') == '#') {
658 num_hashes++;
659 pos++;
660 }
661 if (styler.SafeGetCharAt(pos, '\0') != '"') {
662 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
663 } else {
664 pos++;
665 ResumeRawString(styler, pos, max, num_hashes, ascii_only);
666 }
667 }
668
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)669 void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
670 PropSetSimple props;
671 Accessor styler(pAccess, &props);
672 Sci_Position pos = startPos;
673 Sci_Position max = pos + length;
674
675 styler.StartAt(pos);
676 styler.StartSegment(pos);
677
678 if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
679 ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
680 } else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
681 ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
682 } else if (initStyle == SCE_RUST_STRING) {
683 ResumeString(styler, pos, max, false);
684 } else if (initStyle == SCE_RUST_BYTESTRING) {
685 ResumeString(styler, pos, max, true);
686 } else if (initStyle == SCE_RUST_STRINGR) {
687 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
688 } else if (initStyle == SCE_RUST_BYTESTRINGR) {
689 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
690 }
691
692 while (pos < max) {
693 int c = styler.SafeGetCharAt(pos, '\0');
694 int n = styler.SafeGetCharAt(pos + 1, '\0');
695 int n2 = styler.SafeGetCharAt(pos + 2, '\0');
696
697 if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
698 pos += 2;
699 ResumeLineComment(styler, pos, max, NotDocComment);
700 } else if (IsWhitespace(c)) {
701 ScanWhitespace(styler, pos, max);
702 } else if (c == '/' && (n == '/' || n == '*')) {
703 ScanComments(styler, pos, max);
704 } else if (c == 'r' && (n == '#' || n == '"')) {
705 ScanRawString(styler, pos, max, false);
706 } else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
707 pos++;
708 ScanRawString(styler, pos, max, true);
709 } else if (c == 'b' && n == '"') {
710 pos += 2;
711 ResumeString(styler, pos, max, true);
712 } else if (c == 'b' && n == '\'') {
713 pos++;
714 ScanCharacterLiteralOrLifetime(styler, pos, true);
715 } else if (IsIdentifierStart(c)) {
716 ScanIdentifier(styler, pos, keywords);
717 } else if (IsADigit(c)) {
718 ScanNumber(styler, pos);
719 } else if (IsThreeCharOperator(c, n, n2)) {
720 pos += 3;
721 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
722 } else if (IsTwoCharOperator(c, n)) {
723 pos += 2;
724 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
725 } else if (IsOneCharOperator(c)) {
726 pos++;
727 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
728 } else if (c == '\'') {
729 ScanCharacterLiteralOrLifetime(styler, pos, false);
730 } else if (c == '"') {
731 pos++;
732 ResumeString(styler, pos, max, false);
733 } else {
734 pos++;
735 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
736 }
737 }
738 styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
739 styler.Flush();
740 }
741
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)742 void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
743
744 if (!options.fold)
745 return;
746
747 LexAccessor styler(pAccess);
748
749 Sci_PositionU endPos = startPos + length;
750 int visibleChars = 0;
751 bool inLineComment = false;
752 Sci_Position lineCurrent = styler.GetLine(startPos);
753 int levelCurrent = SC_FOLDLEVELBASE;
754 if (lineCurrent > 0)
755 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
756 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
757 int levelMinCurrent = levelCurrent;
758 int levelNext = levelCurrent;
759 char chNext = styler[startPos];
760 int styleNext = styler.StyleAt(startPos);
761 int style = initStyle;
762 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
763 for (Sci_PositionU i = startPos; i < endPos; i++) {
764 char ch = chNext;
765 chNext = styler.SafeGetCharAt(i + 1);
766 int stylePrev = style;
767 style = styleNext;
768 styleNext = styler.StyleAt(i + 1);
769 bool atEOL = i == (lineStartNext-1);
770 if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
771 inLineComment = true;
772 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
773 if (!IsStreamCommentStyle(stylePrev)) {
774 levelNext++;
775 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
776 // Comments don't end at end of line and the next character may be unstyled.
777 levelNext--;
778 }
779 }
780 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
781 if (userDefinedFoldMarkers) {
782 if (styler.Match(i, options.foldExplicitStart.c_str())) {
783 levelNext++;
784 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
785 levelNext--;
786 }
787 } else {
788 if ((ch == '/') && (chNext == '/')) {
789 char chNext2 = styler.SafeGetCharAt(i + 2);
790 if (chNext2 == '{') {
791 levelNext++;
792 } else if (chNext2 == '}') {
793 levelNext--;
794 }
795 }
796 }
797 }
798 if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
799 if (ch == '{') {
800 // Measure the minimum before a '{' to allow
801 // folding on "} else {"
802 if (levelMinCurrent > levelNext) {
803 levelMinCurrent = levelNext;
804 }
805 levelNext++;
806 } else if (ch == '}') {
807 levelNext--;
808 }
809 }
810 if (!IsASpace(ch))
811 visibleChars++;
812 if (atEOL || (i == endPos-1)) {
813 int levelUse = levelCurrent;
814 if (options.foldSyntaxBased && options.foldAtElse) {
815 levelUse = levelMinCurrent;
816 }
817 int lev = levelUse | levelNext << 16;
818 if (visibleChars == 0 && options.foldCompact)
819 lev |= SC_FOLDLEVELWHITEFLAG;
820 if (levelUse < levelNext)
821 lev |= SC_FOLDLEVELHEADERFLAG;
822 if (lev != styler.LevelAt(lineCurrent)) {
823 styler.SetLevel(lineCurrent, lev);
824 }
825 lineCurrent++;
826 lineStartNext = styler.LineStart(lineCurrent+1);
827 levelCurrent = levelNext;
828 levelMinCurrent = levelCurrent;
829 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
830 // There is an empty line at end of file so give it same level and empty
831 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
832 }
833 visibleChars = 0;
834 inLineComment = false;
835 }
836 }
837 }
838
839 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
840