1 use std::ascii;
2 use std::borrow::Borrow;
3 use std::cell::RefCell;
4 #[cfg(procmacro2_semver_exempt)]
5 use std::cmp;
6 use std::collections::HashMap;
7 use std::fmt;
8 use std::iter;
9 use std::marker::PhantomData;
10 use std::rc::Rc;
11 use std::str::FromStr;
12 use std::vec;
13 
14 use proc_macro;
15 use unicode_xid::UnicodeXID;
16 use strnom::{Cursor, PResult, skip_whitespace, block_comment, whitespace, word_break};
17 
18 use {TokenTree, TokenNode, Delimiter, Spacing};
19 
20 #[derive(Clone, Debug)]
21 pub struct TokenStream {
22     inner: Vec<TokenTree>,
23 }
24 
25 #[derive(Debug)]
26 pub struct LexError;
27 
28 impl TokenStream {
empty() -> TokenStream29     pub fn empty() -> TokenStream {
30         TokenStream { inner: Vec::new() }
31     }
32 
is_empty(&self) -> bool33     pub fn is_empty(&self) -> bool {
34         self.inner.len() == 0
35     }
36 }
37 
38 #[cfg(procmacro2_semver_exempt)]
get_cursor(src: &str) -> Cursor39 fn get_cursor(src: &str) -> Cursor {
40     // Create a dummy file & add it to the codemap
41     CODEMAP.with(|cm| {
42         let mut cm = cm.borrow_mut();
43         let name = format!("<parsed string {}>", cm.files.len());
44         let span = cm.add_file(&name, src);
45         Cursor {
46             rest: src,
47             off: span.lo,
48         }
49     })
50 }
51 
52 #[cfg(not(procmacro2_semver_exempt))]
get_cursor(src: &str) -> Cursor53 fn get_cursor(src: &str) -> Cursor {
54     Cursor {
55         rest: src,
56     }
57 }
58 
59 impl FromStr for TokenStream {
60     type Err = LexError;
61 
from_str(src: &str) -> Result<TokenStream, LexError>62     fn from_str(src: &str) -> Result<TokenStream, LexError> {
63         // Create a dummy file & add it to the codemap
64         let cursor = get_cursor(src);
65 
66         match token_stream(cursor) {
67             Ok((input, output)) => {
68                 if skip_whitespace(input).len() != 0 {
69                     Err(LexError)
70                 } else {
71                     Ok(output.0)
72                 }
73             }
74             Err(LexError) => Err(LexError),
75         }
76     }
77 }
78 
79 impl fmt::Display for TokenStream {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result80     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
81         let mut joint = false;
82         for (i, tt) in self.inner.iter().enumerate() {
83             if i != 0 && !joint {
84                 write!(f, " ")?;
85             }
86             joint = false;
87             match tt.kind {
88                 TokenNode::Group(delim, ref stream) => {
89                     let (start, end) = match delim {
90                         Delimiter::Parenthesis => ("(", ")"),
91                         Delimiter::Brace => ("{", "}"),
92                         Delimiter::Bracket => ("[", "]"),
93                         Delimiter::None => ("", ""),
94                     };
95                     if stream.0.inner.len() == 0 {
96                         write!(f, "{} {}", start, end)?
97                     } else {
98                         write!(f, "{} {} {}", start, stream, end)?
99                     }
100                 }
101                 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
102                 TokenNode::Op(ch, ref op) => {
103                     write!(f, "{}", ch)?;
104                     match *op {
105                         Spacing::Alone => {}
106                         Spacing::Joint => joint = true,
107                     }
108                 }
109                 TokenNode::Literal(ref literal) => {
110                     write!(f, "{}", literal)?;
111                     // handle comments
112                     if (literal.0).0.starts_with("/") {
113                         write!(f, "\n")?;
114                     }
115                 }
116             }
117         }
118 
119         Ok(())
120     }
121 }
122 
123 impl From<proc_macro::TokenStream> for TokenStream {
from(inner: proc_macro::TokenStream) -> TokenStream124     fn from(inner: proc_macro::TokenStream) -> TokenStream {
125         inner.to_string().parse().expect("compiler token stream parse failed")
126     }
127 }
128 
129 impl From<TokenStream> for proc_macro::TokenStream {
from(inner: TokenStream) -> proc_macro::TokenStream130     fn from(inner: TokenStream) -> proc_macro::TokenStream {
131         inner.to_string().parse().expect("failed to parse to compiler tokens")
132     }
133 }
134 
135 
136 impl From<TokenTree> for TokenStream {
from(tree: TokenTree) -> TokenStream137     fn from(tree: TokenTree) -> TokenStream {
138         TokenStream { inner: vec![tree] }
139     }
140 }
141 
142 impl iter::FromIterator<TokenStream> for TokenStream {
from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self143     fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
144         let mut v = Vec::new();
145 
146         for stream in streams.into_iter() {
147             v.extend(stream.inner);
148         }
149 
150         TokenStream { inner: v }
151     }
152 }
153 
154 pub type TokenTreeIter = vec::IntoIter<TokenTree>;
155 
156 impl IntoIterator for TokenStream {
157     type Item = TokenTree;
158     type IntoIter = TokenTreeIter;
159 
into_iter(self) -> TokenTreeIter160     fn into_iter(self) -> TokenTreeIter {
161         self.inner.into_iter()
162     }
163 }
164 
165 #[cfg(procmacro2_semver_exempt)]
166 #[derive(Clone, PartialEq, Eq, Debug)]
167 pub struct FileName(String);
168 
169 #[cfg(procmacro2_semver_exempt)]
170 impl fmt::Display for FileName {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result171     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
172         self.0.fmt(f)
173     }
174 }
175 
176 #[cfg(procmacro2_semver_exempt)]
177 #[derive(Clone, PartialEq, Eq)]
178 pub struct SourceFile {
179     name: FileName,
180 }
181 
182 #[cfg(procmacro2_semver_exempt)]
183 impl SourceFile {
184     /// Get the path to this source file as a string.
path(&self) -> &FileName185     pub fn path(&self) -> &FileName {
186         &self.name
187     }
188 
is_real(&self) -> bool189     pub fn is_real(&self) -> bool {
190         // XXX(nika): Support real files in the future?
191         false
192     }
193 }
194 
195 #[cfg(procmacro2_semver_exempt)]
196 impl AsRef<FileName> for SourceFile {
as_ref(&self) -> &FileName197     fn as_ref(&self) -> &FileName {
198         self.path()
199     }
200 }
201 
202 #[cfg(procmacro2_semver_exempt)]
203 impl fmt::Debug for SourceFile {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result204     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205         f.debug_struct("SourceFile")
206             .field("path", &self.path())
207             .field("is_real", &self.is_real())
208             .finish()
209     }
210 }
211 
212 #[cfg(procmacro2_semver_exempt)]
213 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
214 pub struct LineColumn {
215     pub line: usize,
216     pub column: usize,
217 }
218 
219 #[cfg(procmacro2_semver_exempt)]
220 thread_local! {
221     static CODEMAP: RefCell<Codemap> = RefCell::new(Codemap {
222         // NOTE: We start with a single dummy file which all call_site() and
223         // def_site() spans reference.
224         files: vec![FileInfo {
225             name: "<unspecified>".to_owned(),
226             span: Span { lo: 0, hi: 0 },
227             lines: vec![0],
228         }],
229     });
230 }
231 
232 #[cfg(procmacro2_semver_exempt)]
233 struct FileInfo {
234     name: String,
235     span: Span,
236     lines: Vec<usize>,
237 }
238 
239 #[cfg(procmacro2_semver_exempt)]
240 impl FileInfo {
offset_line_column(&self, offset: usize) -> LineColumn241     fn offset_line_column(&self, offset: usize) -> LineColumn {
242         assert!(self.span_within(Span { lo: offset as u32, hi: offset as u32 }));
243         let offset = offset - self.span.lo as usize;
244         match self.lines.binary_search(&offset) {
245             Ok(found) => LineColumn {
246                 line: found + 1,
247                 column: 0
248             },
249             Err(idx) => LineColumn {
250                 line: idx,
251                 column: offset - self.lines[idx - 1]
252             },
253         }
254     }
255 
span_within(&self, span: Span) -> bool256     fn span_within(&self, span: Span) -> bool {
257         span.lo >= self.span.lo && span.hi <= self.span.hi
258     }
259 }
260 
261 /// Computes the offsets of each line in the given source string.
262 #[cfg(procmacro2_semver_exempt)]
lines_offsets(s: &str) -> Vec<usize>263 fn lines_offsets(s: &str) -> Vec<usize> {
264     let mut lines = vec![0];
265     let mut prev = 0;
266     while let Some(len) = s[prev..].find('\n') {
267         prev += len + 1;
268         lines.push(prev);
269     }
270     lines
271 }
272 
273 #[cfg(procmacro2_semver_exempt)]
274 struct Codemap {
275     files: Vec<FileInfo>,
276 }
277 
278 #[cfg(procmacro2_semver_exempt)]
279 impl Codemap {
next_start_pos(&self) -> u32280     fn next_start_pos(&self) -> u32 {
281         // Add 1 so there's always space between files.
282         //
283         // We'll always have at least 1 file, as we initialize our files list
284         // with a dummy file.
285         self.files.last().unwrap().span.hi + 1
286     }
287 
add_file(&mut self, name: &str, src: &str) -> Span288     fn add_file(&mut self, name: &str, src: &str) -> Span {
289         let lines = lines_offsets(src);
290         let lo = self.next_start_pos();
291         // XXX(nika): Shouild we bother doing a checked cast or checked add here?
292         let span = Span { lo: lo, hi: lo + (src.len() as u32) };
293 
294         self.files.push(FileInfo {
295             name: name.to_owned(),
296             span: span,
297             lines: lines,
298         });
299 
300         span
301     }
302 
fileinfo(&self, span: Span) -> &FileInfo303     fn fileinfo(&self, span: Span) -> &FileInfo {
304         for file in &self.files {
305             if file.span_within(span) {
306                 return file;
307             }
308         }
309         panic!("Invalid span with no related FileInfo!");
310     }
311 }
312 
313 #[derive(Clone, Copy, Debug)]
314 pub struct Span {
315     #[cfg(procmacro2_semver_exempt)]
316     lo: u32,
317     #[cfg(procmacro2_semver_exempt)]
318     hi: u32,
319 }
320 
321 impl Span {
322     #[cfg(not(procmacro2_semver_exempt))]
call_site() -> Span323     pub fn call_site() -> Span {
324         Span {}
325     }
326 
327     #[cfg(procmacro2_semver_exempt)]
call_site() -> Span328     pub fn call_site() -> Span {
329         Span { lo: 0, hi: 0 }
330     }
331 
def_site() -> Span332     pub fn def_site() -> Span {
333         Span::call_site()
334     }
335 
resolved_at(&self, _other: Span) -> Span336     pub fn resolved_at(&self, _other: Span) -> Span {
337         // Stable spans consist only of line/column information, so
338         // `resolved_at` and `located_at` only select which span the
339         // caller wants line/column information from.
340         *self
341     }
342 
located_at(&self, other: Span) -> Span343     pub fn located_at(&self, other: Span) -> Span {
344         other
345     }
346 
347     #[cfg(procmacro2_semver_exempt)]
source_file(&self) -> SourceFile348     pub fn source_file(&self) -> SourceFile {
349         CODEMAP.with(|cm| {
350             let cm = cm.borrow();
351             let fi = cm.fileinfo(*self);
352             SourceFile {
353                 name: FileName(fi.name.clone()),
354             }
355         })
356     }
357 
358     #[cfg(procmacro2_semver_exempt)]
start(&self) -> LineColumn359     pub fn start(&self) -> LineColumn {
360         CODEMAP.with(|cm| {
361             let cm = cm.borrow();
362             let fi = cm.fileinfo(*self);
363             fi.offset_line_column(self.lo as usize)
364         })
365     }
366 
367     #[cfg(procmacro2_semver_exempt)]
end(&self) -> LineColumn368     pub fn end(&self) -> LineColumn {
369         CODEMAP.with(|cm| {
370             let cm = cm.borrow();
371             let fi = cm.fileinfo(*self);
372             fi.offset_line_column(self.hi as usize)
373         })
374     }
375 
376     #[cfg(procmacro2_semver_exempt)]
join(&self, other: Span) -> Option<Span>377     pub fn join(&self, other: Span) -> Option<Span> {
378         CODEMAP.with(|cm| {
379             let cm = cm.borrow();
380             // If `other` is not within the same FileInfo as us, return None.
381             if !cm.fileinfo(*self).span_within(other) {
382                 return None;
383             }
384             Some(Span {
385                 lo: cmp::min(self.lo, other.lo),
386                 hi: cmp::max(self.hi, other.hi),
387             })
388         })
389     }
390 }
391 
392 #[derive(Copy, Clone)]
393 pub struct Term {
394     intern: usize,
395     not_send_sync: PhantomData<*const ()>,
396 }
397 
398 thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
399 
400 impl Term {
intern(string: &str) -> Term401     pub fn intern(string: &str) -> Term {
402         Term {
403             intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
404             not_send_sync: PhantomData,
405         }
406     }
407 
as_str(&self) -> &str408     pub fn as_str(&self) -> &str {
409         SYMBOLS.with(|interner| {
410             let interner = interner.borrow();
411             let s = interner.get(self.intern);
412             unsafe {
413                 &*(s as *const str)
414             }
415         })
416     }
417 }
418 
419 impl fmt::Debug for Term {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result420     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
421         f.debug_tuple("Term").field(&self.as_str()).finish()
422     }
423 }
424 
425 struct Interner {
426     string_to_index: HashMap<MyRc, usize>,
427     index_to_string: Vec<Rc<String>>,
428 }
429 
430 #[derive(Hash, Eq, PartialEq)]
431 struct MyRc(Rc<String>);
432 
433 impl Borrow<str> for MyRc {
borrow(&self) -> &str434     fn borrow(&self) -> &str {
435         &self.0
436     }
437 }
438 
439 impl Interner {
new() -> Interner440     fn new() -> Interner {
441         Interner {
442             string_to_index: HashMap::new(),
443             index_to_string: Vec::new(),
444         }
445     }
446 
intern(&mut self, s: &str) -> usize447    fn intern(&mut self, s: &str) -> usize {
448         if let Some(&idx) = self.string_to_index.get(s) {
449             return idx
450         }
451         let s = Rc::new(s.to_string());
452         self.index_to_string.push(s.clone());
453         self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
454         self.index_to_string.len() - 1
455     }
456 
get(&self, idx: usize) -> &str457    fn get(&self, idx: usize) -> &str {
458        &self.index_to_string[idx]
459    }
460 }
461 
462 #[derive(Clone, Debug)]
463 pub struct Literal(String);
464 
465 impl Literal {
byte_char(byte: u8) -> Literal466     pub fn byte_char(byte: u8) -> Literal {
467         match byte {
468             0 => Literal(format!("b'\\0'")),
469             b'\"' => Literal(format!("b'\"'")),
470             n => {
471                 let mut escaped = "b'".to_string();
472                 escaped.extend(ascii::escape_default(n).map(|c| c as char));
473                 escaped.push('\'');
474                 Literal(escaped)
475             }
476         }
477     }
478 
byte_string(bytes: &[u8]) -> Literal479     pub fn byte_string(bytes: &[u8]) -> Literal {
480         let mut escaped = "b\"".to_string();
481         for b in bytes {
482             match *b {
483                 b'\0' => escaped.push_str(r"\0"),
484                 b'\t' => escaped.push_str(r"\t"),
485                 b'\n' => escaped.push_str(r"\n"),
486                 b'\r' => escaped.push_str(r"\r"),
487                 b'"' => escaped.push_str("\\\""),
488                 b'\\' => escaped.push_str("\\\\"),
489                 b'\x20' ... b'\x7E' => escaped.push(*b as char),
490                 _ => escaped.push_str(&format!("\\x{:02X}", b)),
491             }
492         }
493         escaped.push('"');
494         Literal(escaped)
495     }
496 
doccomment(s: &str) -> Literal497     pub fn doccomment(s: &str) -> Literal {
498         Literal(s.to_string())
499     }
500 
float(n: f64) -> Literal501     pub fn float(n: f64) -> Literal {
502         if !n.is_finite() {
503             panic!("Invalid float literal {}", n);
504         }
505         let mut s = n.to_string();
506         if !s.contains('.') {
507             s += ".0";
508         }
509         Literal(s)
510     }
511 
integer(s: i64) -> Literal512     pub fn integer(s: i64) -> Literal {
513         Literal(s.to_string())
514     }
515 
raw_string(s: &str, pounds: usize) -> Literal516     pub fn raw_string(s: &str, pounds: usize) -> Literal {
517         let mut ret = format!("r");
518         ret.extend((0..pounds).map(|_| "#"));
519         ret.push('"');
520         ret.push_str(s);
521         ret.push('"');
522         ret.extend((0..pounds).map(|_| "#"));
523         Literal(ret)
524     }
525 
raw_byte_string(s: &str, pounds: usize) -> Literal526     pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
527         let mut ret = format!("br");
528         ret.extend((0..pounds).map(|_| "#"));
529         ret.push('"');
530         ret.push_str(s);
531         ret.push('"');
532         ret.extend((0..pounds).map(|_| "#"));
533         Literal(ret)
534     }
535 }
536 
537 impl fmt::Display for Literal {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result538     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
539         self.0.fmt(f)
540     }
541 }
542 
543 macro_rules! ints {
544     ($($t:ty,)*) => {$(
545         impl From<$t> for Literal {
546             fn from(t: $t) -> Literal {
547                 Literal(format!(concat!("{}", stringify!($t)), t))
548             }
549         }
550     )*}
551 }
552 
553 ints! {
554     u8, u16, u32, u64, usize,
555     i8, i16, i32, i64, isize,
556 }
557 
558 macro_rules! floats {
559     ($($t:ty,)*) => {$(
560         impl From<$t> for Literal {
561             fn from(t: $t) -> Literal {
562                 assert!(!t.is_nan());
563                 assert!(!t.is_infinite());
564                 Literal(format!(concat!("{}", stringify!($t)), t))
565             }
566         }
567     )*}
568 }
569 
570 floats! {
571     f32, f64,
572 }
573 
574 impl<'a> From<&'a str> for Literal {
from(t: &'a str) -> Literal575     fn from(t: &'a str) -> Literal {
576         let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
577         s.push('"');
578         s.insert(0, '"');
579         Literal(s)
580     }
581 }
582 
583 impl From<char> for Literal {
from(t: char) -> Literal584     fn from(t: char) -> Literal {
585         Literal(format!("'{}'", t.escape_default().collect::<String>()))
586     }
587 }
588 
589 named!(token_stream -> ::TokenStream, map!(
590     many0!(token_tree),
591     |trees| ::TokenStream(TokenStream { inner: trees })
592 ));
593 
594 #[cfg(not(procmacro2_semver_exempt))]
token_tree(input: Cursor) -> PResult<TokenTree>595 fn token_tree(input: Cursor) -> PResult<TokenTree> {
596     let (input, kind) = token_kind(input)?;
597     Ok((input, TokenTree {
598         span: ::Span(Span {}),
599         kind: kind,
600     }))
601 }
602 
603 #[cfg(procmacro2_semver_exempt)]
token_tree(input: Cursor) -> PResult<TokenTree>604 fn token_tree(input: Cursor) -> PResult<TokenTree> {
605     let input = skip_whitespace(input);
606     let lo = input.off;
607     let (input, kind) = token_kind(input)?;
608     let hi = input.off;
609     Ok((input, TokenTree {
610         span: ::Span(Span {
611             lo: lo,
612             hi: hi,
613         }),
614         kind: kind,
615     }))
616 }
617 
618 named!(token_kind -> TokenNode, alt!(
619     map!(delimited, |(d, s)| TokenNode::Group(d, s))
620     |
621     map!(literal, TokenNode::Literal) // must be before symbol
622     |
623     symbol
624     |
625     map!(op, |(op, kind)| TokenNode::Op(op, kind))
626 ));
627 
628 named!(delimited -> (Delimiter, ::TokenStream), alt!(
629     delimited!(
630         punct!("("),
631         token_stream,
632         punct!(")")
633     ) => { |ts| (Delimiter::Parenthesis, ts) }
634     |
635     delimited!(
636         punct!("["),
637         token_stream,
638         punct!("]")
639     ) => { |ts| (Delimiter::Bracket, ts) }
640     |
641     delimited!(
642         punct!("{"),
643         token_stream,
644         punct!("}")
645     ) => { |ts| (Delimiter::Brace, ts) }
646 ));
647 
symbol(mut input: Cursor) -> PResult<TokenNode>648 fn symbol(mut input: Cursor) -> PResult<TokenNode> {
649     input = skip_whitespace(input);
650 
651     let mut chars = input.char_indices();
652 
653     let lifetime = input.starts_with("'");
654     if lifetime {
655         chars.next();
656     }
657 
658     match chars.next() {
659         Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
660         _ => return Err(LexError),
661     }
662 
663     let mut end = input.len();
664     for (i, ch) in chars {
665         if !UnicodeXID::is_xid_continue(ch) {
666             end = i;
667             break;
668         }
669     }
670 
671     if lifetime && &input.rest[..end] != "'static" && KEYWORDS.contains(&&input.rest[1..end]) {
672         Err(LexError)
673     } else {
674         let a = &input.rest[..end];
675         if a == "_" {
676             Ok((input.advance(end), TokenNode::Op('_', Spacing::Alone)))
677         } else {
678             Ok((input.advance(end), TokenNode::Term(::Term::intern(a))))
679         }
680     }
681 }
682 
683 // From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
684 static KEYWORDS: &'static [&'static str] = &[
685     "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
686     "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
687     "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
688     "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
689     "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
690     "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
691     "yield",
692 ];
693 
literal(input: Cursor) -> PResult<::Literal>694 fn literal(input: Cursor) -> PResult<::Literal> {
695     let input_no_ws = skip_whitespace(input);
696 
697     match literal_nocapture(input_no_ws) {
698         Ok((a, ())) => {
699             let start = input.len() - input_no_ws.len();
700             let len = input_no_ws.len() - a.len();
701             let end = start + len;
702             Ok((a, ::Literal(Literal(input.rest[start..end].to_string()))))
703         }
704         Err(LexError) => Err(LexError),
705     }
706 }
707 
708 named!(literal_nocapture -> (), alt!(
709     string
710     |
711     byte_string
712     |
713     byte
714     |
715     character
716     |
717     float
718     |
719     int
720     |
721     doc_comment
722 ));
723 
724 named!(string -> (), alt!(
725     quoted_string
726     |
727     preceded!(
728         punct!("r"),
729         raw_string
730     ) => { |_| () }
731 ));
732 
733 named!(quoted_string -> (), delimited!(
734     punct!("\""),
735     cooked_string,
736     tag!("\"")
737 ));
738 
cooked_string(input: Cursor) -> PResult<()>739 fn cooked_string(input: Cursor) -> PResult<()> {
740     let mut chars = input.char_indices().peekable();
741     while let Some((byte_offset, ch)) = chars.next() {
742         match ch {
743             '"' => {
744                 return Ok((input.advance(byte_offset), ()));
745             }
746             '\r' => {
747                 if let Some((_, '\n')) = chars.next() {
748                     // ...
749                 } else {
750                     break;
751                 }
752             }
753             '\\' => {
754                 match chars.next() {
755                     Some((_, 'x')) => {
756                         if !backslash_x_char(&mut chars) {
757                             break
758                         }
759                     }
760                     Some((_, 'n')) |
761                     Some((_, 'r')) |
762                     Some((_, 't')) |
763                     Some((_, '\\')) |
764                     Some((_, '\'')) |
765                     Some((_, '"')) |
766                     Some((_, '0')) => {}
767                     Some((_, 'u')) => {
768                         if !backslash_u(&mut chars) {
769                             break
770                         }
771                     }
772                     Some((_, '\n')) | Some((_, '\r')) => {
773                         while let Some(&(_, ch)) = chars.peek() {
774                             if ch.is_whitespace() {
775                                 chars.next();
776                             } else {
777                                 break;
778                             }
779                         }
780                     }
781                     _ => break,
782                 }
783             }
784             _ch => {}
785         }
786     }
787     Err(LexError)
788 }
789 
790 named!(byte_string -> (), alt!(
791     delimited!(
792         punct!("b\""),
793         cooked_byte_string,
794         tag!("\"")
795     ) => { |_| () }
796     |
797     preceded!(
798         punct!("br"),
799         raw_string
800     ) => { |_| () }
801 ));
802 
cooked_byte_string(mut input: Cursor) -> PResult<()>803 fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
804     let mut bytes = input.bytes().enumerate();
805     'outer: while let Some((offset, b)) = bytes.next() {
806         match b {
807             b'"' => {
808                 return Ok((input.advance(offset), ()));
809             }
810             b'\r' => {
811                 if let Some((_, b'\n')) = bytes.next() {
812                     // ...
813                 } else {
814                     break;
815                 }
816             }
817             b'\\' => {
818                 match bytes.next() {
819                     Some((_, b'x')) => {
820                         if !backslash_x_byte(&mut bytes) {
821                             break
822                         }
823                     }
824                     Some((_, b'n')) |
825                     Some((_, b'r')) |
826                     Some((_, b't')) |
827                     Some((_, b'\\')) |
828                     Some((_, b'0')) |
829                     Some((_, b'\'')) |
830                     Some((_, b'"'))  => {}
831                     Some((newline, b'\n')) |
832                     Some((newline, b'\r')) => {
833                         let rest = input.advance(newline + 1);
834                         for (offset, ch) in rest.char_indices() {
835                             if !ch.is_whitespace() {
836                                 input = rest.advance(offset);
837                                 bytes = input.bytes().enumerate();
838                                 continue 'outer;
839                             }
840                         }
841                         break;
842                     }
843                     _ => break,
844                 }
845             }
846             b if b < 0x80 => {}
847             _ => break,
848         }
849     }
850     Err(LexError)
851 }
852 
raw_string(input: Cursor) -> PResult<()>853 fn raw_string(input: Cursor) -> PResult<()> {
854     let mut chars = input.char_indices();
855     let mut n = 0;
856     while let Some((byte_offset, ch)) = chars.next() {
857         match ch {
858             '"' => {
859                 n = byte_offset;
860                 break;
861             }
862             '#' => {}
863             _ => return Err(LexError),
864         }
865     }
866     for (byte_offset, ch) in chars {
867         match ch {
868             '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
869                 let rest = input.advance(byte_offset + 1 + n);
870                 return Ok((rest, ()))
871             }
872             '\r' => {}
873             _ => {}
874         }
875     }
876     Err(LexError)
877 }
878 
879 named!(byte -> (), do_parse!(
880     punct!("b") >>
881     tag!("'") >>
882     cooked_byte >>
883     tag!("'") >>
884     (())
885 ));
886 
cooked_byte(input: Cursor) -> PResult<()>887 fn cooked_byte(input: Cursor) -> PResult<()> {
888     let mut bytes = input.bytes().enumerate();
889     let ok = match bytes.next().map(|(_, b)| b) {
890         Some(b'\\') => {
891             match bytes.next().map(|(_, b)| b) {
892                 Some(b'x') => backslash_x_byte(&mut bytes),
893                 Some(b'n') |
894                 Some(b'r') |
895                 Some(b't') |
896                 Some(b'\\') |
897                 Some(b'0') |
898                 Some(b'\'') |
899                 Some(b'"') => true,
900                 _ => false,
901             }
902         }
903         b => b.is_some(),
904     };
905     if ok {
906         match bytes.next() {
907             Some((offset, _)) => {
908                 if input.chars().as_str().is_char_boundary(offset) {
909                     Ok((input.advance(offset), ()))
910                 } else {
911                     Err(LexError)
912                 }
913             }
914             None => Ok((input.advance(input.len()), ())),
915         }
916     } else {
917         Err(LexError)
918     }
919 }
920 
921 named!(character -> (), do_parse!(
922     punct!("'") >>
923     cooked_char >>
924     tag!("'") >>
925     (())
926 ));
927 
cooked_char(input: Cursor) -> PResult<()>928 fn cooked_char(input: Cursor) -> PResult<()> {
929     let mut chars = input.char_indices();
930     let ok = match chars.next().map(|(_, ch)| ch) {
931         Some('\\') => {
932             match chars.next().map(|(_, ch)| ch) {
933                 Some('x') => backslash_x_char(&mut chars),
934                 Some('u') => backslash_u(&mut chars),
935                 Some('n') |
936                 Some('r') |
937                 Some('t') |
938                 Some('\\') |
939                 Some('0') |
940                 Some('\'') |
941                 Some('"') => true,
942                 _ => false,
943             }
944         }
945         ch => ch.is_some(),
946     };
947     if ok {
948         match chars.next() {
949             Some((idx, _)) => Ok((input.advance(idx), ())),
950             None => Ok((input.advance(input.len()), ())),
951         }
952     } else {
953         Err(LexError)
954     }
955 }
956 
957 macro_rules! next_ch {
958     ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
959         match $chars.next() {
960             Some((_, ch)) => match ch {
961                 $pat $(| $rest)*  => ch,
962                 _ => return false,
963             },
964             None => return false
965         }
966     };
967 }
968 
backslash_x_char<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>969 fn backslash_x_char<I>(chars: &mut I) -> bool
970     where I: Iterator<Item = (usize, char)>
971 {
972     next_ch!(chars @ '0'...'7');
973     next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
974     true
975 }
976 
backslash_x_byte<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, u8)>977 fn backslash_x_byte<I>(chars: &mut I) -> bool
978     where I: Iterator<Item = (usize, u8)>
979 {
980     next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
981     next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
982     true
983 }
984 
backslash_u<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>985 fn backslash_u<I>(chars: &mut I) -> bool
986     where I: Iterator<Item = (usize, char)>
987 {
988     next_ch!(chars @ '{');
989     next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
990     loop {
991         let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '_' | '}');
992         if c == '}' {
993             return true;
994         }
995     }
996 }
997 
float(input: Cursor) -> PResult<()>998 fn float(input: Cursor) -> PResult<()> {
999     let (rest, ()) = float_digits(input)?;
1000     for suffix in &["f32", "f64"] {
1001         if rest.starts_with(suffix) {
1002             return word_break(rest.advance(suffix.len()));
1003         }
1004     }
1005     word_break(rest)
1006 }
1007 
float_digits(input: Cursor) -> PResult<()>1008 fn float_digits(input: Cursor) -> PResult<()> {
1009     let mut chars = input.chars().peekable();
1010     match chars.next() {
1011         Some(ch) if ch >= '0' && ch <= '9' => {}
1012         _ => return Err(LexError),
1013     }
1014 
1015     let mut len = 1;
1016     let mut has_dot = false;
1017     let mut has_exp = false;
1018     while let Some(&ch) = chars.peek() {
1019         match ch {
1020             '0'...'9' | '_' => {
1021                 chars.next();
1022                 len += 1;
1023             }
1024             '.' => {
1025                 if has_dot {
1026                     break;
1027                 }
1028                 chars.next();
1029                 if chars.peek()
1030                        .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
1031                        .unwrap_or(false) {
1032                     return Err(LexError);
1033                 }
1034                 len += 1;
1035                 has_dot = true;
1036             }
1037             'e' | 'E' => {
1038                 chars.next();
1039                 len += 1;
1040                 has_exp = true;
1041                 break;
1042             }
1043             _ => break,
1044         }
1045     }
1046 
1047     let rest = input.advance(len);
1048     if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
1049         return Err(LexError);
1050     }
1051 
1052     if has_exp {
1053         let mut has_exp_value = false;
1054         while let Some(&ch) = chars.peek() {
1055             match ch {
1056                 '+' | '-' => {
1057                     if has_exp_value {
1058                         break;
1059                     }
1060                     chars.next();
1061                     len += 1;
1062                 }
1063                 '0'...'9' => {
1064                     chars.next();
1065                     len += 1;
1066                     has_exp_value = true;
1067                 }
1068                 '_' => {
1069                     chars.next();
1070                     len += 1;
1071                 }
1072                 _ => break,
1073             }
1074         }
1075         if !has_exp_value {
1076             return Err(LexError);
1077         }
1078     }
1079 
1080     Ok((input.advance(len), ()))
1081 }
1082 
int(input: Cursor) -> PResult<()>1083 fn int(input: Cursor) -> PResult<()> {
1084     let (rest, ()) = digits(input)?;
1085     for suffix in &[
1086         "isize",
1087         "i8",
1088         "i16",
1089         "i32",
1090         "i64",
1091         "i128",
1092         "usize",
1093         "u8",
1094         "u16",
1095         "u32",
1096         "u64",
1097         "u128",
1098     ] {
1099         if rest.starts_with(suffix) {
1100             return word_break(rest.advance(suffix.len()));
1101         }
1102     }
1103     word_break(rest)
1104 }
1105 
digits(mut input: Cursor) -> PResult<()>1106 fn digits(mut input: Cursor) -> PResult<()> {
1107     let base = if input.starts_with("0x") {
1108         input = input.advance(2);
1109         16
1110     } else if input.starts_with("0o") {
1111         input = input.advance(2);
1112         8
1113     } else if input.starts_with("0b") {
1114         input = input.advance(2);
1115         2
1116     } else {
1117         10
1118     };
1119 
1120     let mut len = 0;
1121     let mut empty = true;
1122     for b in input.bytes() {
1123         let digit = match b {
1124             b'0'...b'9' => (b - b'0') as u64,
1125             b'a'...b'f' => 10 + (b - b'a') as u64,
1126             b'A'...b'F' => 10 + (b - b'A') as u64,
1127             b'_' => {
1128                 if empty && base == 10 {
1129                     return Err(LexError);
1130                 }
1131                 len += 1;
1132                 continue;
1133             }
1134             _ => break,
1135         };
1136         if digit >= base {
1137             return Err(LexError);
1138         }
1139         len += 1;
1140         empty = false;
1141     }
1142     if empty {
1143         Err(LexError)
1144     } else {
1145         Ok((input.advance(len), ()))
1146     }
1147 }
1148 
op(input: Cursor) -> PResult<(char, Spacing)>1149 fn op(input: Cursor) -> PResult<(char, Spacing)> {
1150     let input = skip_whitespace(input);
1151     match op_char(input) {
1152         Ok((rest, ch)) => {
1153             let kind = match op_char(rest) {
1154                 Ok(_) => Spacing::Joint,
1155                 Err(LexError) => Spacing::Alone,
1156             };
1157             Ok((rest, (ch, kind)))
1158         }
1159         Err(LexError) => Err(LexError),
1160     }
1161 }
1162 
op_char(input: Cursor) -> PResult<char>1163 fn op_char(input: Cursor) -> PResult<char> {
1164     let mut chars = input.chars();
1165     let first = match chars.next() {
1166         Some(ch) => ch,
1167         None => {
1168             return Err(LexError);
1169         }
1170     };
1171     let recognized = "~!@#$%^&*-=+|;:,<.>/?";
1172     if recognized.contains(first) {
1173         Ok((input.advance(first.len_utf8()), first))
1174     } else {
1175         Err(LexError)
1176     }
1177 }
1178 
1179 named!(doc_comment -> (), alt!(
1180     do_parse!(
1181         punct!("//!") >>
1182         take_until!("\n") >>
1183         (())
1184     )
1185     |
1186     do_parse!(
1187         option!(whitespace) >>
1188         peek!(tag!("/*!")) >>
1189         block_comment >>
1190         (())
1191     )
1192     |
1193     do_parse!(
1194         punct!("///") >>
1195         not!(tag!("/")) >>
1196         take_until!("\n") >>
1197         (())
1198     )
1199     |
1200     do_parse!(
1201         option!(whitespace) >>
1202         peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
1203         block_comment >>
1204         (())
1205     )
1206 ));
1207