1 use crate::fallback::{
2     is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3 };
4 use crate::{Delimiter, Punct, Spacing, TokenTree};
5 use std::char;
6 use std::str::{Bytes, CharIndices, Chars};
7 
8 #[derive(Copy, Clone, Eq, PartialEq)]
9 pub(crate) struct Cursor<'a> {
10     pub rest: &'a str,
11     #[cfg(span_locations)]
12     pub off: u32,
13 }
14 
15 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>16     fn advance(&self, bytes: usize) -> Cursor<'a> {
17         let (_front, rest) = self.rest.split_at(bytes);
18         Cursor {
19             rest,
20             #[cfg(span_locations)]
21             off: self.off + _front.chars().count() as u32,
22         }
23     }
24 
starts_with(&self, s: &str) -> bool25     fn starts_with(&self, s: &str) -> bool {
26         self.rest.starts_with(s)
27     }
28 
is_empty(&self) -> bool29     fn is_empty(&self) -> bool {
30         self.rest.is_empty()
31     }
32 
len(&self) -> usize33     fn len(&self) -> usize {
34         self.rest.len()
35     }
36 
as_bytes(&self) -> &'a [u8]37     fn as_bytes(&self) -> &'a [u8] {
38         self.rest.as_bytes()
39     }
40 
bytes(&self) -> Bytes<'a>41     fn bytes(&self) -> Bytes<'a> {
42         self.rest.bytes()
43     }
44 
chars(&self) -> Chars<'a>45     fn chars(&self) -> Chars<'a> {
46         self.rest.chars()
47     }
48 
char_indices(&self) -> CharIndices<'a>49     fn char_indices(&self) -> CharIndices<'a> {
50         self.rest.char_indices()
51     }
52 
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>53     fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
54         if self.starts_with(tag) {
55             Ok(self.advance(tag.len()))
56         } else {
57             Err(Reject)
58         }
59     }
60 }
61 
62 pub(crate) struct Reject;
63 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
64 
skip_whitespace(input: Cursor) -> Cursor65 fn skip_whitespace(input: Cursor) -> Cursor {
66     let mut s = input;
67 
68     while !s.is_empty() {
69         let byte = s.as_bytes()[0];
70         if byte == b'/' {
71             if s.starts_with("//")
72                 && (!s.starts_with("///") || s.starts_with("////"))
73                 && !s.starts_with("//!")
74             {
75                 let (cursor, _) = take_until_newline_or_eof(s);
76                 s = cursor;
77                 continue;
78             } else if s.starts_with("/**/") {
79                 s = s.advance(4);
80                 continue;
81             } else if s.starts_with("/*")
82                 && (!s.starts_with("/**") || s.starts_with("/***"))
83                 && !s.starts_with("/*!")
84             {
85                 match block_comment(s) {
86                     Ok((rest, _)) => {
87                         s = rest;
88                         continue;
89                     }
90                     Err(Reject) => return s,
91                 }
92             }
93         }
94         match byte {
95             b' ' | 0x09..=0x0d => {
96                 s = s.advance(1);
97                 continue;
98             }
99             b if b <= 0x7f => {}
100             _ => {
101                 let ch = s.chars().next().unwrap();
102                 if is_whitespace(ch) {
103                     s = s.advance(ch.len_utf8());
104                     continue;
105                 }
106             }
107         }
108         return s;
109     }
110     s
111 }
112 
block_comment(input: Cursor) -> PResult<&str>113 fn block_comment(input: Cursor) -> PResult<&str> {
114     if !input.starts_with("/*") {
115         return Err(Reject);
116     }
117 
118     let mut depth = 0;
119     let bytes = input.as_bytes();
120     let mut i = 0;
121     let upper = bytes.len() - 1;
122 
123     while i < upper {
124         if bytes[i] == b'/' && bytes[i + 1] == b'*' {
125             depth += 1;
126             i += 1; // eat '*'
127         } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
128             depth -= 1;
129             if depth == 0 {
130                 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
131             }
132             i += 1; // eat '/'
133         }
134         i += 1;
135     }
136 
137     Err(Reject)
138 }
139 
is_whitespace(ch: char) -> bool140 fn is_whitespace(ch: char) -> bool {
141     // Rust treats left-to-right mark and right-to-left mark as whitespace
142     ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
143 }
144 
word_break(input: Cursor) -> Result<Cursor, Reject>145 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
146     match input.chars().next() {
147         Some(ch) if is_ident_continue(ch) => Err(Reject),
148         Some(_) | None => Ok(input),
149     }
150 }
151 
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>152 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
153     let mut trees = Vec::new();
154     let mut stack = Vec::new();
155 
156     loop {
157         input = skip_whitespace(input);
158 
159         if let Ok((rest, tt)) = doc_comment(input) {
160             trees.extend(tt);
161             input = rest;
162             continue;
163         }
164 
165         #[cfg(span_locations)]
166         let lo = input.off;
167 
168         let first = match input.bytes().next() {
169             Some(first) => first,
170             None => match stack.last() {
171                 None => return Ok(TokenStream { inner: trees }),
172                 #[cfg(span_locations)]
173                 Some((lo, _frame)) => {
174                     return Err(LexError {
175                         span: Span { lo: *lo, hi: *lo },
176                     })
177                 }
178                 #[cfg(not(span_locations))]
179                 Some(_frame) => return Err(LexError { span: Span {} }),
180             },
181         };
182 
183         if let Some(open_delimiter) = match first {
184             b'(' => Some(Delimiter::Parenthesis),
185             b'[' => Some(Delimiter::Bracket),
186             b'{' => Some(Delimiter::Brace),
187             _ => None,
188         } {
189             input = input.advance(1);
190             let frame = (open_delimiter, trees);
191             #[cfg(span_locations)]
192             let frame = (lo, frame);
193             stack.push(frame);
194             trees = Vec::new();
195         } else if let Some(close_delimiter) = match first {
196             b')' => Some(Delimiter::Parenthesis),
197             b']' => Some(Delimiter::Bracket),
198             b'}' => Some(Delimiter::Brace),
199             _ => None,
200         } {
201             let frame = match stack.pop() {
202                 Some(frame) => frame,
203                 None => return Err(lex_error(input)),
204             };
205             #[cfg(span_locations)]
206             let (lo, frame) = frame;
207             let (open_delimiter, outer) = frame;
208             if open_delimiter != close_delimiter {
209                 return Err(lex_error(input));
210             }
211             input = input.advance(1);
212             let mut g = Group::new(open_delimiter, TokenStream { inner: trees });
213             g.set_span(Span {
214                 #[cfg(span_locations)]
215                 lo,
216                 #[cfg(span_locations)]
217                 hi: input.off,
218             });
219             trees = outer;
220             trees.push(TokenTree::Group(crate::Group::_new_stable(g)));
221         } else {
222             let (rest, mut tt) = match leaf_token(input) {
223                 Ok((rest, tt)) => (rest, tt),
224                 Err(Reject) => return Err(lex_error(input)),
225             };
226             tt.set_span(crate::Span::_new_stable(Span {
227                 #[cfg(span_locations)]
228                 lo,
229                 #[cfg(span_locations)]
230                 hi: rest.off,
231             }));
232             trees.push(tt);
233             input = rest;
234         }
235     }
236 }
237 
lex_error(cursor: Cursor) -> LexError238 fn lex_error(cursor: Cursor) -> LexError {
239     #[cfg(not(span_locations))]
240     let _ = cursor;
241     LexError {
242         span: Span {
243             #[cfg(span_locations)]
244             lo: cursor.off,
245             #[cfg(span_locations)]
246             hi: cursor.off,
247         },
248     }
249 }
250 
leaf_token(input: Cursor) -> PResult<TokenTree>251 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
252     if let Ok((input, l)) = literal(input) {
253         // must be parsed before ident
254         Ok((input, TokenTree::Literal(crate::Literal::_new_stable(l))))
255     } else if let Ok((input, p)) = punct(input) {
256         Ok((input, TokenTree::Punct(p)))
257     } else if let Ok((input, i)) = ident(input) {
258         Ok((input, TokenTree::Ident(i)))
259     } else {
260         Err(Reject)
261     }
262 }
263 
ident(input: Cursor) -> PResult<crate::Ident>264 fn ident(input: Cursor) -> PResult<crate::Ident> {
265     if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"]
266         .iter()
267         .any(|prefix| input.starts_with(prefix))
268     {
269         Err(Reject)
270     } else {
271         ident_any(input)
272     }
273 }
274 
ident_any(input: Cursor) -> PResult<crate::Ident>275 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
276     let raw = input.starts_with("r#");
277     let rest = input.advance((raw as usize) << 1);
278 
279     let (rest, sym) = ident_not_raw(rest)?;
280 
281     if !raw {
282         let ident = crate::Ident::new(sym, crate::Span::call_site());
283         return Ok((rest, ident));
284     }
285 
286     if sym == "_" {
287         return Err(Reject);
288     }
289 
290     let ident = crate::Ident::_new_raw(sym, crate::Span::call_site());
291     Ok((rest, ident))
292 }
293 
ident_not_raw(input: Cursor) -> PResult<&str>294 fn ident_not_raw(input: Cursor) -> PResult<&str> {
295     let mut chars = input.char_indices();
296 
297     match chars.next() {
298         Some((_, ch)) if is_ident_start(ch) => {}
299         _ => return Err(Reject),
300     }
301 
302     let mut end = input.len();
303     for (i, ch) in chars {
304         if !is_ident_continue(ch) {
305             end = i;
306             break;
307         }
308     }
309 
310     Ok((input.advance(end), &input.rest[..end]))
311 }
312 
literal(input: Cursor) -> PResult<Literal>313 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
314     let rest = literal_nocapture(input)?;
315     let end = input.len() - rest.len();
316     Ok((rest, Literal::_new(input.rest[..end].to_string())))
317 }
318 
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>319 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
320     if let Ok(ok) = string(input) {
321         Ok(ok)
322     } else if let Ok(ok) = byte_string(input) {
323         Ok(ok)
324     } else if let Ok(ok) = byte(input) {
325         Ok(ok)
326     } else if let Ok(ok) = character(input) {
327         Ok(ok)
328     } else if let Ok(ok) = float(input) {
329         Ok(ok)
330     } else if let Ok(ok) = int(input) {
331         Ok(ok)
332     } else {
333         Err(Reject)
334     }
335 }
336 
literal_suffix(input: Cursor) -> Cursor337 fn literal_suffix(input: Cursor) -> Cursor {
338     match ident_not_raw(input) {
339         Ok((input, _)) => input,
340         Err(Reject) => input,
341     }
342 }
343 
string(input: Cursor) -> Result<Cursor, Reject>344 fn string(input: Cursor) -> Result<Cursor, Reject> {
345     if let Ok(input) = input.parse("\"") {
346         cooked_string(input)
347     } else if let Ok(input) = input.parse("r") {
348         raw_string(input)
349     } else {
350         Err(Reject)
351     }
352 }
353 
cooked_string(input: Cursor) -> Result<Cursor, Reject>354 fn cooked_string(input: Cursor) -> Result<Cursor, Reject> {
355     let mut chars = input.char_indices().peekable();
356 
357     while let Some((i, ch)) = chars.next() {
358         match ch {
359             '"' => {
360                 let input = input.advance(i + 1);
361                 return Ok(literal_suffix(input));
362             }
363             '\r' => match chars.next() {
364                 Some((_, '\n')) => {}
365                 _ => break,
366             },
367             '\\' => match chars.next() {
368                 Some((_, 'x')) => {
369                     if !backslash_x_char(&mut chars) {
370                         break;
371                     }
372                 }
373                 Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
374                 | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
375                 Some((_, 'u')) => {
376                     if !backslash_u(&mut chars) {
377                         break;
378                     }
379                 }
380                 Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
381                     let mut last = ch;
382                     loop {
383                         if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
384                             return Err(Reject);
385                         }
386                         match chars.peek() {
387                             Some((_, ch)) if ch.is_whitespace() => {
388                                 last = *ch;
389                                 chars.next();
390                             }
391                             _ => break,
392                         }
393                     }
394                 }
395                 _ => break,
396             },
397             _ch => {}
398         }
399     }
400     Err(Reject)
401 }
402 
byte_string(input: Cursor) -> Result<Cursor, Reject>403 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
404     if let Ok(input) = input.parse("b\"") {
405         cooked_byte_string(input)
406     } else if let Ok(input) = input.parse("br") {
407         raw_string(input)
408     } else {
409         Err(Reject)
410     }
411 }
412 
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>413 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
414     let mut bytes = input.bytes().enumerate();
415     while let Some((offset, b)) = bytes.next() {
416         match b {
417             b'"' => {
418                 let input = input.advance(offset + 1);
419                 return Ok(literal_suffix(input));
420             }
421             b'\r' => match bytes.next() {
422                 Some((_, b'\n')) => {}
423                 _ => break,
424             },
425             b'\\' => match bytes.next() {
426                 Some((_, b'x')) => {
427                     if !backslash_x_byte(&mut bytes) {
428                         break;
429                     }
430                 }
431                 Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\'))
432                 | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {}
433                 Some((newline, b @ b'\n')) | Some((newline, b @ b'\r')) => {
434                     let mut last = b as char;
435                     let rest = input.advance(newline + 1);
436                     let mut chars = rest.char_indices();
437                     loop {
438                         if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
439                             return Err(Reject);
440                         }
441                         match chars.next() {
442                             Some((_, ch)) if ch.is_whitespace() => last = ch,
443                             Some((offset, _)) => {
444                                 input = rest.advance(offset);
445                                 bytes = input.bytes().enumerate();
446                                 break;
447                             }
448                             None => return Err(Reject),
449                         }
450                     }
451                 }
452                 _ => break,
453             },
454             b if b < 0x80 => {}
455             _ => break,
456         }
457     }
458     Err(Reject)
459 }
460 
raw_string(input: Cursor) -> Result<Cursor, Reject>461 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
462     let mut chars = input.char_indices();
463     let mut n = 0;
464     for (i, ch) in &mut chars {
465         match ch {
466             '"' => {
467                 n = i;
468                 break;
469             }
470             '#' => {}
471             _ => return Err(Reject),
472         }
473     }
474     while let Some((i, ch)) = chars.next() {
475         match ch {
476             '"' if input.rest[i + 1..].starts_with(&input.rest[..n]) => {
477                 let rest = input.advance(i + 1 + n);
478                 return Ok(literal_suffix(rest));
479             }
480             '\r' => match chars.next() {
481                 Some((_, '\n')) => {}
482                 _ => break,
483             },
484             _ => {}
485         }
486     }
487     Err(Reject)
488 }
489 
byte(input: Cursor) -> Result<Cursor, Reject>490 fn byte(input: Cursor) -> Result<Cursor, Reject> {
491     let input = input.parse("b'")?;
492     let mut bytes = input.bytes().enumerate();
493     let ok = match bytes.next().map(|(_, b)| b) {
494         Some(b'\\') => match bytes.next().map(|(_, b)| b) {
495             Some(b'x') => backslash_x_byte(&mut bytes),
496             Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'')
497             | Some(b'"') => true,
498             _ => false,
499         },
500         b => b.is_some(),
501     };
502     if !ok {
503         return Err(Reject);
504     }
505     let (offset, _) = bytes.next().ok_or(Reject)?;
506     if !input.chars().as_str().is_char_boundary(offset) {
507         return Err(Reject);
508     }
509     let input = input.advance(offset).parse("'")?;
510     Ok(literal_suffix(input))
511 }
512 
character(input: Cursor) -> Result<Cursor, Reject>513 fn character(input: Cursor) -> Result<Cursor, Reject> {
514     let input = input.parse("'")?;
515     let mut chars = input.char_indices();
516     let ok = match chars.next().map(|(_, ch)| ch) {
517         Some('\\') => match chars.next().map(|(_, ch)| ch) {
518             Some('x') => backslash_x_char(&mut chars),
519             Some('u') => backslash_u(&mut chars),
520             Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
521                 true
522             }
523             _ => false,
524         },
525         ch => ch.is_some(),
526     };
527     if !ok {
528         return Err(Reject);
529     }
530     let (idx, _) = chars.next().ok_or(Reject)?;
531     let input = input.advance(idx).parse("'")?;
532     Ok(literal_suffix(input))
533 }
534 
535 macro_rules! next_ch {
536     ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
537         match $chars.next() {
538             Some((_, ch)) => match ch {
539                 $pat $(| $rest)* => ch,
540                 _ => return false,
541             },
542             None => return false,
543         }
544     };
545 }
546 
backslash_x_char<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>,547 fn backslash_x_char<I>(chars: &mut I) -> bool
548 where
549     I: Iterator<Item = (usize, char)>,
550 {
551     next_ch!(chars @ '0'..='7');
552     next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
553     true
554 }
555 
backslash_x_byte<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, u8)>,556 fn backslash_x_byte<I>(chars: &mut I) -> bool
557 where
558     I: Iterator<Item = (usize, u8)>,
559 {
560     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
561     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
562     true
563 }
564 
backslash_u<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>,565 fn backslash_u<I>(chars: &mut I) -> bool
566 where
567     I: Iterator<Item = (usize, char)>,
568 {
569     next_ch!(chars @ '{');
570     let mut value = 0;
571     let mut len = 0;
572     for (_, ch) in chars {
573         let digit = match ch {
574             '0'..='9' => ch as u8 - b'0',
575             'a'..='f' => 10 + ch as u8 - b'a',
576             'A'..='F' => 10 + ch as u8 - b'A',
577             '_' if len > 0 => continue,
578             '}' if len > 0 => return char::from_u32(value).is_some(),
579             _ => return false,
580         };
581         if len == 6 {
582             return false;
583         }
584         value *= 0x10;
585         value += u32::from(digit);
586         len += 1;
587     }
588     false
589 }
590 
float(input: Cursor) -> Result<Cursor, Reject>591 fn float(input: Cursor) -> Result<Cursor, Reject> {
592     let mut rest = float_digits(input)?;
593     if let Some(ch) = rest.chars().next() {
594         if is_ident_start(ch) {
595             rest = ident_not_raw(rest)?.0;
596         }
597     }
598     word_break(rest)
599 }
600 
float_digits(input: Cursor) -> Result<Cursor, Reject>601 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
602     let mut chars = input.chars().peekable();
603     match chars.next() {
604         Some(ch) if ch >= '0' && ch <= '9' => {}
605         _ => return Err(Reject),
606     }
607 
608     let mut len = 1;
609     let mut has_dot = false;
610     let mut has_exp = false;
611     while let Some(&ch) = chars.peek() {
612         match ch {
613             '0'..='9' | '_' => {
614                 chars.next();
615                 len += 1;
616             }
617             '.' => {
618                 if has_dot {
619                     break;
620                 }
621                 chars.next();
622                 if chars
623                     .peek()
624                     .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
625                 {
626                     return Err(Reject);
627                 }
628                 len += 1;
629                 has_dot = true;
630             }
631             'e' | 'E' => {
632                 chars.next();
633                 len += 1;
634                 has_exp = true;
635                 break;
636             }
637             _ => break,
638         }
639     }
640 
641     if !(has_dot || has_exp) {
642         return Err(Reject);
643     }
644 
645     if has_exp {
646         let token_before_exp = if has_dot {
647             Ok(input.advance(len - 1))
648         } else {
649             Err(Reject)
650         };
651         let mut has_sign = false;
652         let mut has_exp_value = false;
653         while let Some(&ch) = chars.peek() {
654             match ch {
655                 '+' | '-' => {
656                     if has_exp_value {
657                         break;
658                     }
659                     if has_sign {
660                         return token_before_exp;
661                     }
662                     chars.next();
663                     len += 1;
664                     has_sign = true;
665                 }
666                 '0'..='9' => {
667                     chars.next();
668                     len += 1;
669                     has_exp_value = true;
670                 }
671                 '_' => {
672                     chars.next();
673                     len += 1;
674                 }
675                 _ => break,
676             }
677         }
678         if !has_exp_value {
679             return token_before_exp;
680         }
681     }
682 
683     Ok(input.advance(len))
684 }
685 
int(input: Cursor) -> Result<Cursor, Reject>686 fn int(input: Cursor) -> Result<Cursor, Reject> {
687     let mut rest = digits(input)?;
688     if let Some(ch) = rest.chars().next() {
689         if is_ident_start(ch) {
690             rest = ident_not_raw(rest)?.0;
691         }
692     }
693     word_break(rest)
694 }
695 
digits(mut input: Cursor) -> Result<Cursor, Reject>696 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
697     let base = if input.starts_with("0x") {
698         input = input.advance(2);
699         16
700     } else if input.starts_with("0o") {
701         input = input.advance(2);
702         8
703     } else if input.starts_with("0b") {
704         input = input.advance(2);
705         2
706     } else {
707         10
708     };
709 
710     let mut len = 0;
711     let mut empty = true;
712     for b in input.bytes() {
713         match b {
714             b'0'..=b'9' => {
715                 let digit = (b - b'0') as u64;
716                 if digit >= base {
717                     return Err(Reject);
718                 }
719             }
720             b'a'..=b'f' => {
721                 let digit = 10 + (b - b'a') as u64;
722                 if digit >= base {
723                     break;
724                 }
725             }
726             b'A'..=b'F' => {
727                 let digit = 10 + (b - b'A') as u64;
728                 if digit >= base {
729                     break;
730                 }
731             }
732             b'_' => {
733                 if empty && base == 10 {
734                     return Err(Reject);
735                 }
736                 len += 1;
737                 continue;
738             }
739             _ => break,
740         };
741         len += 1;
742         empty = false;
743     }
744     if empty {
745         Err(Reject)
746     } else {
747         Ok(input.advance(len))
748     }
749 }
750 
punct(input: Cursor) -> PResult<Punct>751 fn punct(input: Cursor) -> PResult<Punct> {
752     let (rest, ch) = punct_char(input)?;
753     if ch == '\'' {
754         if ident_any(rest)?.0.starts_with("'") {
755             Err(Reject)
756         } else {
757             Ok((rest, Punct::new('\'', Spacing::Joint)))
758         }
759     } else {
760         let kind = match punct_char(rest) {
761             Ok(_) => Spacing::Joint,
762             Err(Reject) => Spacing::Alone,
763         };
764         Ok((rest, Punct::new(ch, kind)))
765     }
766 }
767 
punct_char(input: Cursor) -> PResult<char>768 fn punct_char(input: Cursor) -> PResult<char> {
769     if input.starts_with("//") || input.starts_with("/*") {
770         // Do not accept `/` of a comment as a punct.
771         return Err(Reject);
772     }
773 
774     let mut chars = input.chars();
775     let first = match chars.next() {
776         Some(ch) => ch,
777         None => {
778             return Err(Reject);
779         }
780     };
781     let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
782     if recognized.contains(first) {
783         Ok((input.advance(first.len_utf8()), first))
784     } else {
785         Err(Reject)
786     }
787 }
788 
doc_comment(input: Cursor) -> PResult<Vec<TokenTree>>789 fn doc_comment(input: Cursor) -> PResult<Vec<TokenTree>> {
790     #[cfg(span_locations)]
791     let lo = input.off;
792     let (rest, (comment, inner)) = doc_comment_contents(input)?;
793     let span = crate::Span::_new_stable(Span {
794         #[cfg(span_locations)]
795         lo,
796         #[cfg(span_locations)]
797         hi: rest.off,
798     });
799 
800     let mut scan_for_bare_cr = comment;
801     while let Some(cr) = scan_for_bare_cr.find('\r') {
802         let rest = &scan_for_bare_cr[cr + 1..];
803         if !rest.starts_with('\n') {
804             return Err(Reject);
805         }
806         scan_for_bare_cr = rest;
807     }
808 
809     let mut trees = Vec::new();
810     trees.push(TokenTree::Punct(Punct::new('#', Spacing::Alone)));
811     if inner {
812         trees.push(Punct::new('!', Spacing::Alone).into());
813     }
814     let mut stream = vec![
815         TokenTree::Ident(crate::Ident::new("doc", span)),
816         TokenTree::Punct(Punct::new('=', Spacing::Alone)),
817         TokenTree::Literal(crate::Literal::string(comment)),
818     ];
819     for tt in &mut stream {
820         tt.set_span(span);
821     }
822     let group = Group::new(Delimiter::Bracket, stream.into_iter().collect());
823     trees.push(crate::Group::_new_stable(group).into());
824     for tt in &mut trees {
825         tt.set_span(span);
826     }
827     Ok((rest, trees))
828 }
829 
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>830 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
831     if input.starts_with("//!") {
832         let input = input.advance(3);
833         let (input, s) = take_until_newline_or_eof(input);
834         Ok((input, (s, true)))
835     } else if input.starts_with("/*!") {
836         let (input, s) = block_comment(input)?;
837         Ok((input, (&s[3..s.len() - 2], true)))
838     } else if input.starts_with("///") {
839         let input = input.advance(3);
840         if input.starts_with("/") {
841             return Err(Reject);
842         }
843         let (input, s) = take_until_newline_or_eof(input);
844         Ok((input, (s, false)))
845     } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
846         let (input, s) = block_comment(input)?;
847         Ok((input, (&s[3..s.len() - 2], false)))
848     } else {
849         Err(Reject)
850     }
851 }
852 
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)853 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
854     let chars = input.char_indices();
855 
856     for (i, ch) in chars {
857         if ch == '\n' {
858             return (input.advance(i), &input.rest[..i]);
859         } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
860             return (input.advance(i + 1), &input.rest[..i]);
861         }
862     }
863 
864     (input.advance(input.len()), input.rest)
865 }
866