1 use std::ascii;
2 use std::borrow::Borrow;
3 use std::cell::RefCell;
4 #[cfg(procmacro2_semver_exempt)]
5 use std::cmp;
6 use std::collections::HashMap;
7 use std::fmt;
8 use std::iter;
9 use std::marker::PhantomData;
10 use std::rc::Rc;
11 use std::str::FromStr;
12 use std::vec;
13
14 use proc_macro;
15 use unicode_xid::UnicodeXID;
16 use strnom::{Cursor, PResult, skip_whitespace, block_comment, whitespace, word_break};
17
18 use {TokenTree, TokenNode, Delimiter, Spacing};
19
20 #[derive(Clone, Debug)]
21 pub struct TokenStream {
22 inner: Vec<TokenTree>,
23 }
24
25 #[derive(Debug)]
26 pub struct LexError;
27
28 impl TokenStream {
empty() -> TokenStream29 pub fn empty() -> TokenStream {
30 TokenStream { inner: Vec::new() }
31 }
32
is_empty(&self) -> bool33 pub fn is_empty(&self) -> bool {
34 self.inner.len() == 0
35 }
36 }
37
38 #[cfg(procmacro2_semver_exempt)]
get_cursor(src: &str) -> Cursor39 fn get_cursor(src: &str) -> Cursor {
40 // Create a dummy file & add it to the codemap
41 CODEMAP.with(|cm| {
42 let mut cm = cm.borrow_mut();
43 let name = format!("<parsed string {}>", cm.files.len());
44 let span = cm.add_file(&name, src);
45 Cursor {
46 rest: src,
47 off: span.lo,
48 }
49 })
50 }
51
52 #[cfg(not(procmacro2_semver_exempt))]
get_cursor(src: &str) -> Cursor53 fn get_cursor(src: &str) -> Cursor {
54 Cursor {
55 rest: src,
56 }
57 }
58
59 impl FromStr for TokenStream {
60 type Err = LexError;
61
from_str(src: &str) -> Result<TokenStream, LexError>62 fn from_str(src: &str) -> Result<TokenStream, LexError> {
63 // Create a dummy file & add it to the codemap
64 let cursor = get_cursor(src);
65
66 match token_stream(cursor) {
67 Ok((input, output)) => {
68 if skip_whitespace(input).len() != 0 {
69 Err(LexError)
70 } else {
71 Ok(output.0)
72 }
73 }
74 Err(LexError) => Err(LexError),
75 }
76 }
77 }
78
79 impl fmt::Display for TokenStream {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result80 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
81 let mut joint = false;
82 for (i, tt) in self.inner.iter().enumerate() {
83 if i != 0 && !joint {
84 write!(f, " ")?;
85 }
86 joint = false;
87 match tt.kind {
88 TokenNode::Group(delim, ref stream) => {
89 let (start, end) = match delim {
90 Delimiter::Parenthesis => ("(", ")"),
91 Delimiter::Brace => ("{", "}"),
92 Delimiter::Bracket => ("[", "]"),
93 Delimiter::None => ("", ""),
94 };
95 if stream.0.inner.len() == 0 {
96 write!(f, "{} {}", start, end)?
97 } else {
98 write!(f, "{} {} {}", start, stream, end)?
99 }
100 }
101 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
102 TokenNode::Op(ch, ref op) => {
103 write!(f, "{}", ch)?;
104 match *op {
105 Spacing::Alone => {}
106 Spacing::Joint => joint = true,
107 }
108 }
109 TokenNode::Literal(ref literal) => {
110 write!(f, "{}", literal)?;
111 // handle comments
112 if (literal.0).0.starts_with("/") {
113 write!(f, "\n")?;
114 }
115 }
116 }
117 }
118
119 Ok(())
120 }
121 }
122
123 impl From<proc_macro::TokenStream> for TokenStream {
from(inner: proc_macro::TokenStream) -> TokenStream124 fn from(inner: proc_macro::TokenStream) -> TokenStream {
125 inner.to_string().parse().expect("compiler token stream parse failed")
126 }
127 }
128
129 impl From<TokenStream> for proc_macro::TokenStream {
from(inner: TokenStream) -> proc_macro::TokenStream130 fn from(inner: TokenStream) -> proc_macro::TokenStream {
131 inner.to_string().parse().expect("failed to parse to compiler tokens")
132 }
133 }
134
135
136 impl From<TokenTree> for TokenStream {
from(tree: TokenTree) -> TokenStream137 fn from(tree: TokenTree) -> TokenStream {
138 TokenStream { inner: vec![tree] }
139 }
140 }
141
142 impl iter::FromIterator<TokenStream> for TokenStream {
from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self143 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
144 let mut v = Vec::new();
145
146 for stream in streams.into_iter() {
147 v.extend(stream.inner);
148 }
149
150 TokenStream { inner: v }
151 }
152 }
153
154 pub type TokenTreeIter = vec::IntoIter<TokenTree>;
155
156 impl IntoIterator for TokenStream {
157 type Item = TokenTree;
158 type IntoIter = TokenTreeIter;
159
into_iter(self) -> TokenTreeIter160 fn into_iter(self) -> TokenTreeIter {
161 self.inner.into_iter()
162 }
163 }
164
165 #[cfg(procmacro2_semver_exempt)]
166 #[derive(Clone, PartialEq, Eq, Debug)]
167 pub struct FileName(String);
168
169 #[cfg(procmacro2_semver_exempt)]
170 impl fmt::Display for FileName {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result171 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
172 self.0.fmt(f)
173 }
174 }
175
176 #[cfg(procmacro2_semver_exempt)]
177 #[derive(Clone, PartialEq, Eq)]
178 pub struct SourceFile {
179 name: FileName,
180 }
181
182 #[cfg(procmacro2_semver_exempt)]
183 impl SourceFile {
184 /// Get the path to this source file as a string.
path(&self) -> &FileName185 pub fn path(&self) -> &FileName {
186 &self.name
187 }
188
is_real(&self) -> bool189 pub fn is_real(&self) -> bool {
190 // XXX(nika): Support real files in the future?
191 false
192 }
193 }
194
195 #[cfg(procmacro2_semver_exempt)]
196 impl AsRef<FileName> for SourceFile {
as_ref(&self) -> &FileName197 fn as_ref(&self) -> &FileName {
198 self.path()
199 }
200 }
201
202 #[cfg(procmacro2_semver_exempt)]
203 impl fmt::Debug for SourceFile {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result204 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205 f.debug_struct("SourceFile")
206 .field("path", &self.path())
207 .field("is_real", &self.is_real())
208 .finish()
209 }
210 }
211
212 #[cfg(procmacro2_semver_exempt)]
213 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
214 pub struct LineColumn {
215 pub line: usize,
216 pub column: usize,
217 }
218
219 #[cfg(procmacro2_semver_exempt)]
220 thread_local! {
221 static CODEMAP: RefCell<Codemap> = RefCell::new(Codemap {
222 // NOTE: We start with a single dummy file which all call_site() and
223 // def_site() spans reference.
224 files: vec![FileInfo {
225 name: "<unspecified>".to_owned(),
226 span: Span { lo: 0, hi: 0 },
227 lines: vec![0],
228 }],
229 });
230 }
231
232 #[cfg(procmacro2_semver_exempt)]
233 struct FileInfo {
234 name: String,
235 span: Span,
236 lines: Vec<usize>,
237 }
238
239 #[cfg(procmacro2_semver_exempt)]
240 impl FileInfo {
offset_line_column(&self, offset: usize) -> LineColumn241 fn offset_line_column(&self, offset: usize) -> LineColumn {
242 assert!(self.span_within(Span { lo: offset as u32, hi: offset as u32 }));
243 let offset = offset - self.span.lo as usize;
244 match self.lines.binary_search(&offset) {
245 Ok(found) => LineColumn {
246 line: found + 1,
247 column: 0
248 },
249 Err(idx) => LineColumn {
250 line: idx,
251 column: offset - self.lines[idx - 1]
252 },
253 }
254 }
255
span_within(&self, span: Span) -> bool256 fn span_within(&self, span: Span) -> bool {
257 span.lo >= self.span.lo && span.hi <= self.span.hi
258 }
259 }
260
261 /// Computes the offsets of each line in the given source string.
262 #[cfg(procmacro2_semver_exempt)]
lines_offsets(s: &str) -> Vec<usize>263 fn lines_offsets(s: &str) -> Vec<usize> {
264 let mut lines = vec![0];
265 let mut prev = 0;
266 while let Some(len) = s[prev..].find('\n') {
267 prev += len + 1;
268 lines.push(prev);
269 }
270 lines
271 }
272
273 #[cfg(procmacro2_semver_exempt)]
274 struct Codemap {
275 files: Vec<FileInfo>,
276 }
277
278 #[cfg(procmacro2_semver_exempt)]
279 impl Codemap {
next_start_pos(&self) -> u32280 fn next_start_pos(&self) -> u32 {
281 // Add 1 so there's always space between files.
282 //
283 // We'll always have at least 1 file, as we initialize our files list
284 // with a dummy file.
285 self.files.last().unwrap().span.hi + 1
286 }
287
add_file(&mut self, name: &str, src: &str) -> Span288 fn add_file(&mut self, name: &str, src: &str) -> Span {
289 let lines = lines_offsets(src);
290 let lo = self.next_start_pos();
291 // XXX(nika): Shouild we bother doing a checked cast or checked add here?
292 let span = Span { lo: lo, hi: lo + (src.len() as u32) };
293
294 self.files.push(FileInfo {
295 name: name.to_owned(),
296 span: span,
297 lines: lines,
298 });
299
300 span
301 }
302
fileinfo(&self, span: Span) -> &FileInfo303 fn fileinfo(&self, span: Span) -> &FileInfo {
304 for file in &self.files {
305 if file.span_within(span) {
306 return file;
307 }
308 }
309 panic!("Invalid span with no related FileInfo!");
310 }
311 }
312
313 #[derive(Clone, Copy, Debug)]
314 pub struct Span {
315 #[cfg(procmacro2_semver_exempt)]
316 lo: u32,
317 #[cfg(procmacro2_semver_exempt)]
318 hi: u32,
319 }
320
321 impl Span {
322 #[cfg(not(procmacro2_semver_exempt))]
call_site() -> Span323 pub fn call_site() -> Span {
324 Span {}
325 }
326
327 #[cfg(procmacro2_semver_exempt)]
call_site() -> Span328 pub fn call_site() -> Span {
329 Span { lo: 0, hi: 0 }
330 }
331
def_site() -> Span332 pub fn def_site() -> Span {
333 Span::call_site()
334 }
335
resolved_at(&self, _other: Span) -> Span336 pub fn resolved_at(&self, _other: Span) -> Span {
337 // Stable spans consist only of line/column information, so
338 // `resolved_at` and `located_at` only select which span the
339 // caller wants line/column information from.
340 *self
341 }
342
located_at(&self, other: Span) -> Span343 pub fn located_at(&self, other: Span) -> Span {
344 other
345 }
346
347 #[cfg(procmacro2_semver_exempt)]
source_file(&self) -> SourceFile348 pub fn source_file(&self) -> SourceFile {
349 CODEMAP.with(|cm| {
350 let cm = cm.borrow();
351 let fi = cm.fileinfo(*self);
352 SourceFile {
353 name: FileName(fi.name.clone()),
354 }
355 })
356 }
357
358 #[cfg(procmacro2_semver_exempt)]
start(&self) -> LineColumn359 pub fn start(&self) -> LineColumn {
360 CODEMAP.with(|cm| {
361 let cm = cm.borrow();
362 let fi = cm.fileinfo(*self);
363 fi.offset_line_column(self.lo as usize)
364 })
365 }
366
367 #[cfg(procmacro2_semver_exempt)]
end(&self) -> LineColumn368 pub fn end(&self) -> LineColumn {
369 CODEMAP.with(|cm| {
370 let cm = cm.borrow();
371 let fi = cm.fileinfo(*self);
372 fi.offset_line_column(self.hi as usize)
373 })
374 }
375
376 #[cfg(procmacro2_semver_exempt)]
join(&self, other: Span) -> Option<Span>377 pub fn join(&self, other: Span) -> Option<Span> {
378 CODEMAP.with(|cm| {
379 let cm = cm.borrow();
380 // If `other` is not within the same FileInfo as us, return None.
381 if !cm.fileinfo(*self).span_within(other) {
382 return None;
383 }
384 Some(Span {
385 lo: cmp::min(self.lo, other.lo),
386 hi: cmp::max(self.hi, other.hi),
387 })
388 })
389 }
390 }
391
392 #[derive(Copy, Clone)]
393 pub struct Term {
394 intern: usize,
395 not_send_sync: PhantomData<*const ()>,
396 }
397
398 thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
399
400 impl Term {
intern(string: &str) -> Term401 pub fn intern(string: &str) -> Term {
402 Term {
403 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
404 not_send_sync: PhantomData,
405 }
406 }
407
as_str(&self) -> &str408 pub fn as_str(&self) -> &str {
409 SYMBOLS.with(|interner| {
410 let interner = interner.borrow();
411 let s = interner.get(self.intern);
412 unsafe {
413 &*(s as *const str)
414 }
415 })
416 }
417 }
418
419 impl fmt::Debug for Term {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result420 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
421 f.debug_tuple("Term").field(&self.as_str()).finish()
422 }
423 }
424
425 struct Interner {
426 string_to_index: HashMap<MyRc, usize>,
427 index_to_string: Vec<Rc<String>>,
428 }
429
430 #[derive(Hash, Eq, PartialEq)]
431 struct MyRc(Rc<String>);
432
433 impl Borrow<str> for MyRc {
borrow(&self) -> &str434 fn borrow(&self) -> &str {
435 &self.0
436 }
437 }
438
439 impl Interner {
new() -> Interner440 fn new() -> Interner {
441 Interner {
442 string_to_index: HashMap::new(),
443 index_to_string: Vec::new(),
444 }
445 }
446
intern(&mut self, s: &str) -> usize447 fn intern(&mut self, s: &str) -> usize {
448 if let Some(&idx) = self.string_to_index.get(s) {
449 return idx
450 }
451 let s = Rc::new(s.to_string());
452 self.index_to_string.push(s.clone());
453 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
454 self.index_to_string.len() - 1
455 }
456
get(&self, idx: usize) -> &str457 fn get(&self, idx: usize) -> &str {
458 &self.index_to_string[idx]
459 }
460 }
461
462 #[derive(Clone, Debug)]
463 pub struct Literal(String);
464
465 impl Literal {
byte_char(byte: u8) -> Literal466 pub fn byte_char(byte: u8) -> Literal {
467 match byte {
468 0 => Literal(format!("b'\\0'")),
469 b'\"' => Literal(format!("b'\"'")),
470 n => {
471 let mut escaped = "b'".to_string();
472 escaped.extend(ascii::escape_default(n).map(|c| c as char));
473 escaped.push('\'');
474 Literal(escaped)
475 }
476 }
477 }
478
byte_string(bytes: &[u8]) -> Literal479 pub fn byte_string(bytes: &[u8]) -> Literal {
480 let mut escaped = "b\"".to_string();
481 for b in bytes {
482 match *b {
483 b'\0' => escaped.push_str(r"\0"),
484 b'\t' => escaped.push_str(r"\t"),
485 b'\n' => escaped.push_str(r"\n"),
486 b'\r' => escaped.push_str(r"\r"),
487 b'"' => escaped.push_str("\\\""),
488 b'\\' => escaped.push_str("\\\\"),
489 b'\x20' ... b'\x7E' => escaped.push(*b as char),
490 _ => escaped.push_str(&format!("\\x{:02X}", b)),
491 }
492 }
493 escaped.push('"');
494 Literal(escaped)
495 }
496
doccomment(s: &str) -> Literal497 pub fn doccomment(s: &str) -> Literal {
498 Literal(s.to_string())
499 }
500
float(n: f64) -> Literal501 pub fn float(n: f64) -> Literal {
502 if !n.is_finite() {
503 panic!("Invalid float literal {}", n);
504 }
505 let mut s = n.to_string();
506 if !s.contains('.') {
507 s += ".0";
508 }
509 Literal(s)
510 }
511
integer(s: i64) -> Literal512 pub fn integer(s: i64) -> Literal {
513 Literal(s.to_string())
514 }
515
raw_string(s: &str, pounds: usize) -> Literal516 pub fn raw_string(s: &str, pounds: usize) -> Literal {
517 let mut ret = format!("r");
518 ret.extend((0..pounds).map(|_| "#"));
519 ret.push('"');
520 ret.push_str(s);
521 ret.push('"');
522 ret.extend((0..pounds).map(|_| "#"));
523 Literal(ret)
524 }
525
raw_byte_string(s: &str, pounds: usize) -> Literal526 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
527 let mut ret = format!("br");
528 ret.extend((0..pounds).map(|_| "#"));
529 ret.push('"');
530 ret.push_str(s);
531 ret.push('"');
532 ret.extend((0..pounds).map(|_| "#"));
533 Literal(ret)
534 }
535 }
536
537 impl fmt::Display for Literal {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result538 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
539 self.0.fmt(f)
540 }
541 }
542
543 macro_rules! ints {
544 ($($t:ty,)*) => {$(
545 impl From<$t> for Literal {
546 fn from(t: $t) -> Literal {
547 Literal(format!(concat!("{}", stringify!($t)), t))
548 }
549 }
550 )*}
551 }
552
553 ints! {
554 u8, u16, u32, u64, usize,
555 i8, i16, i32, i64, isize,
556 }
557
558 macro_rules! floats {
559 ($($t:ty,)*) => {$(
560 impl From<$t> for Literal {
561 fn from(t: $t) -> Literal {
562 assert!(!t.is_nan());
563 assert!(!t.is_infinite());
564 Literal(format!(concat!("{}", stringify!($t)), t))
565 }
566 }
567 )*}
568 }
569
570 floats! {
571 f32, f64,
572 }
573
574 impl<'a> From<&'a str> for Literal {
from(t: &'a str) -> Literal575 fn from(t: &'a str) -> Literal {
576 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
577 s.push('"');
578 s.insert(0, '"');
579 Literal(s)
580 }
581 }
582
583 impl From<char> for Literal {
from(t: char) -> Literal584 fn from(t: char) -> Literal {
585 Literal(format!("'{}'", t.escape_default().collect::<String>()))
586 }
587 }
588
589 named!(token_stream -> ::TokenStream, map!(
590 many0!(token_tree),
591 |trees| ::TokenStream(TokenStream { inner: trees })
592 ));
593
594 #[cfg(not(procmacro2_semver_exempt))]
token_tree(input: Cursor) -> PResult<TokenTree>595 fn token_tree(input: Cursor) -> PResult<TokenTree> {
596 let (input, kind) = token_kind(input)?;
597 Ok((input, TokenTree {
598 span: ::Span(Span {}),
599 kind: kind,
600 }))
601 }
602
603 #[cfg(procmacro2_semver_exempt)]
token_tree(input: Cursor) -> PResult<TokenTree>604 fn token_tree(input: Cursor) -> PResult<TokenTree> {
605 let input = skip_whitespace(input);
606 let lo = input.off;
607 let (input, kind) = token_kind(input)?;
608 let hi = input.off;
609 Ok((input, TokenTree {
610 span: ::Span(Span {
611 lo: lo,
612 hi: hi,
613 }),
614 kind: kind,
615 }))
616 }
617
618 named!(token_kind -> TokenNode, alt!(
619 map!(delimited, |(d, s)| TokenNode::Group(d, s))
620 |
621 map!(literal, TokenNode::Literal) // must be before symbol
622 |
623 symbol
624 |
625 map!(op, |(op, kind)| TokenNode::Op(op, kind))
626 ));
627
628 named!(delimited -> (Delimiter, ::TokenStream), alt!(
629 delimited!(
630 punct!("("),
631 token_stream,
632 punct!(")")
633 ) => { |ts| (Delimiter::Parenthesis, ts) }
634 |
635 delimited!(
636 punct!("["),
637 token_stream,
638 punct!("]")
639 ) => { |ts| (Delimiter::Bracket, ts) }
640 |
641 delimited!(
642 punct!("{"),
643 token_stream,
644 punct!("}")
645 ) => { |ts| (Delimiter::Brace, ts) }
646 ));
647
symbol(mut input: Cursor) -> PResult<TokenNode>648 fn symbol(mut input: Cursor) -> PResult<TokenNode> {
649 input = skip_whitespace(input);
650
651 let mut chars = input.char_indices();
652
653 let lifetime = input.starts_with("'");
654 if lifetime {
655 chars.next();
656 }
657
658 match chars.next() {
659 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
660 _ => return Err(LexError),
661 }
662
663 let mut end = input.len();
664 for (i, ch) in chars {
665 if !UnicodeXID::is_xid_continue(ch) {
666 end = i;
667 break;
668 }
669 }
670
671 if lifetime && &input.rest[..end] != "'static" && KEYWORDS.contains(&&input.rest[1..end]) {
672 Err(LexError)
673 } else {
674 let a = &input.rest[..end];
675 if a == "_" {
676 Ok((input.advance(end), TokenNode::Op('_', Spacing::Alone)))
677 } else {
678 Ok((input.advance(end), TokenNode::Term(::Term::intern(a))))
679 }
680 }
681 }
682
683 // From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
684 static KEYWORDS: &'static [&'static str] = &[
685 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
686 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
687 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
688 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
689 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
690 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
691 "yield",
692 ];
693
literal(input: Cursor) -> PResult<::Literal>694 fn literal(input: Cursor) -> PResult<::Literal> {
695 let input_no_ws = skip_whitespace(input);
696
697 match literal_nocapture(input_no_ws) {
698 Ok((a, ())) => {
699 let start = input.len() - input_no_ws.len();
700 let len = input_no_ws.len() - a.len();
701 let end = start + len;
702 Ok((a, ::Literal(Literal(input.rest[start..end].to_string()))))
703 }
704 Err(LexError) => Err(LexError),
705 }
706 }
707
708 named!(literal_nocapture -> (), alt!(
709 string
710 |
711 byte_string
712 |
713 byte
714 |
715 character
716 |
717 float
718 |
719 int
720 |
721 doc_comment
722 ));
723
724 named!(string -> (), alt!(
725 quoted_string
726 |
727 preceded!(
728 punct!("r"),
729 raw_string
730 ) => { |_| () }
731 ));
732
733 named!(quoted_string -> (), delimited!(
734 punct!("\""),
735 cooked_string,
736 tag!("\"")
737 ));
738
cooked_string(input: Cursor) -> PResult<()>739 fn cooked_string(input: Cursor) -> PResult<()> {
740 let mut chars = input.char_indices().peekable();
741 while let Some((byte_offset, ch)) = chars.next() {
742 match ch {
743 '"' => {
744 return Ok((input.advance(byte_offset), ()));
745 }
746 '\r' => {
747 if let Some((_, '\n')) = chars.next() {
748 // ...
749 } else {
750 break;
751 }
752 }
753 '\\' => {
754 match chars.next() {
755 Some((_, 'x')) => {
756 if !backslash_x_char(&mut chars) {
757 break
758 }
759 }
760 Some((_, 'n')) |
761 Some((_, 'r')) |
762 Some((_, 't')) |
763 Some((_, '\\')) |
764 Some((_, '\'')) |
765 Some((_, '"')) |
766 Some((_, '0')) => {}
767 Some((_, 'u')) => {
768 if !backslash_u(&mut chars) {
769 break
770 }
771 }
772 Some((_, '\n')) | Some((_, '\r')) => {
773 while let Some(&(_, ch)) = chars.peek() {
774 if ch.is_whitespace() {
775 chars.next();
776 } else {
777 break;
778 }
779 }
780 }
781 _ => break,
782 }
783 }
784 _ch => {}
785 }
786 }
787 Err(LexError)
788 }
789
790 named!(byte_string -> (), alt!(
791 delimited!(
792 punct!("b\""),
793 cooked_byte_string,
794 tag!("\"")
795 ) => { |_| () }
796 |
797 preceded!(
798 punct!("br"),
799 raw_string
800 ) => { |_| () }
801 ));
802
cooked_byte_string(mut input: Cursor) -> PResult<()>803 fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
804 let mut bytes = input.bytes().enumerate();
805 'outer: while let Some((offset, b)) = bytes.next() {
806 match b {
807 b'"' => {
808 return Ok((input.advance(offset), ()));
809 }
810 b'\r' => {
811 if let Some((_, b'\n')) = bytes.next() {
812 // ...
813 } else {
814 break;
815 }
816 }
817 b'\\' => {
818 match bytes.next() {
819 Some((_, b'x')) => {
820 if !backslash_x_byte(&mut bytes) {
821 break
822 }
823 }
824 Some((_, b'n')) |
825 Some((_, b'r')) |
826 Some((_, b't')) |
827 Some((_, b'\\')) |
828 Some((_, b'0')) |
829 Some((_, b'\'')) |
830 Some((_, b'"')) => {}
831 Some((newline, b'\n')) |
832 Some((newline, b'\r')) => {
833 let rest = input.advance(newline + 1);
834 for (offset, ch) in rest.char_indices() {
835 if !ch.is_whitespace() {
836 input = rest.advance(offset);
837 bytes = input.bytes().enumerate();
838 continue 'outer;
839 }
840 }
841 break;
842 }
843 _ => break,
844 }
845 }
846 b if b < 0x80 => {}
847 _ => break,
848 }
849 }
850 Err(LexError)
851 }
852
raw_string(input: Cursor) -> PResult<()>853 fn raw_string(input: Cursor) -> PResult<()> {
854 let mut chars = input.char_indices();
855 let mut n = 0;
856 while let Some((byte_offset, ch)) = chars.next() {
857 match ch {
858 '"' => {
859 n = byte_offset;
860 break;
861 }
862 '#' => {}
863 _ => return Err(LexError),
864 }
865 }
866 for (byte_offset, ch) in chars {
867 match ch {
868 '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
869 let rest = input.advance(byte_offset + 1 + n);
870 return Ok((rest, ()))
871 }
872 '\r' => {}
873 _ => {}
874 }
875 }
876 Err(LexError)
877 }
878
879 named!(byte -> (), do_parse!(
880 punct!("b") >>
881 tag!("'") >>
882 cooked_byte >>
883 tag!("'") >>
884 (())
885 ));
886
cooked_byte(input: Cursor) -> PResult<()>887 fn cooked_byte(input: Cursor) -> PResult<()> {
888 let mut bytes = input.bytes().enumerate();
889 let ok = match bytes.next().map(|(_, b)| b) {
890 Some(b'\\') => {
891 match bytes.next().map(|(_, b)| b) {
892 Some(b'x') => backslash_x_byte(&mut bytes),
893 Some(b'n') |
894 Some(b'r') |
895 Some(b't') |
896 Some(b'\\') |
897 Some(b'0') |
898 Some(b'\'') |
899 Some(b'"') => true,
900 _ => false,
901 }
902 }
903 b => b.is_some(),
904 };
905 if ok {
906 match bytes.next() {
907 Some((offset, _)) => {
908 if input.chars().as_str().is_char_boundary(offset) {
909 Ok((input.advance(offset), ()))
910 } else {
911 Err(LexError)
912 }
913 }
914 None => Ok((input.advance(input.len()), ())),
915 }
916 } else {
917 Err(LexError)
918 }
919 }
920
921 named!(character -> (), do_parse!(
922 punct!("'") >>
923 cooked_char >>
924 tag!("'") >>
925 (())
926 ));
927
cooked_char(input: Cursor) -> PResult<()>928 fn cooked_char(input: Cursor) -> PResult<()> {
929 let mut chars = input.char_indices();
930 let ok = match chars.next().map(|(_, ch)| ch) {
931 Some('\\') => {
932 match chars.next().map(|(_, ch)| ch) {
933 Some('x') => backslash_x_char(&mut chars),
934 Some('u') => backslash_u(&mut chars),
935 Some('n') |
936 Some('r') |
937 Some('t') |
938 Some('\\') |
939 Some('0') |
940 Some('\'') |
941 Some('"') => true,
942 _ => false,
943 }
944 }
945 ch => ch.is_some(),
946 };
947 if ok {
948 match chars.next() {
949 Some((idx, _)) => Ok((input.advance(idx), ())),
950 None => Ok((input.advance(input.len()), ())),
951 }
952 } else {
953 Err(LexError)
954 }
955 }
956
957 macro_rules! next_ch {
958 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
959 match $chars.next() {
960 Some((_, ch)) => match ch {
961 $pat $(| $rest)* => ch,
962 _ => return false,
963 },
964 None => return false
965 }
966 };
967 }
968
backslash_x_char<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>969 fn backslash_x_char<I>(chars: &mut I) -> bool
970 where I: Iterator<Item = (usize, char)>
971 {
972 next_ch!(chars @ '0'...'7');
973 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
974 true
975 }
976
backslash_x_byte<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, u8)>977 fn backslash_x_byte<I>(chars: &mut I) -> bool
978 where I: Iterator<Item = (usize, u8)>
979 {
980 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
981 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
982 true
983 }
984
backslash_u<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>985 fn backslash_u<I>(chars: &mut I) -> bool
986 where I: Iterator<Item = (usize, char)>
987 {
988 next_ch!(chars @ '{');
989 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
990 loop {
991 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '_' | '}');
992 if c == '}' {
993 return true;
994 }
995 }
996 }
997
float(input: Cursor) -> PResult<()>998 fn float(input: Cursor) -> PResult<()> {
999 let (rest, ()) = float_digits(input)?;
1000 for suffix in &["f32", "f64"] {
1001 if rest.starts_with(suffix) {
1002 return word_break(rest.advance(suffix.len()));
1003 }
1004 }
1005 word_break(rest)
1006 }
1007
float_digits(input: Cursor) -> PResult<()>1008 fn float_digits(input: Cursor) -> PResult<()> {
1009 let mut chars = input.chars().peekable();
1010 match chars.next() {
1011 Some(ch) if ch >= '0' && ch <= '9' => {}
1012 _ => return Err(LexError),
1013 }
1014
1015 let mut len = 1;
1016 let mut has_dot = false;
1017 let mut has_exp = false;
1018 while let Some(&ch) = chars.peek() {
1019 match ch {
1020 '0'...'9' | '_' => {
1021 chars.next();
1022 len += 1;
1023 }
1024 '.' => {
1025 if has_dot {
1026 break;
1027 }
1028 chars.next();
1029 if chars.peek()
1030 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
1031 .unwrap_or(false) {
1032 return Err(LexError);
1033 }
1034 len += 1;
1035 has_dot = true;
1036 }
1037 'e' | 'E' => {
1038 chars.next();
1039 len += 1;
1040 has_exp = true;
1041 break;
1042 }
1043 _ => break,
1044 }
1045 }
1046
1047 let rest = input.advance(len);
1048 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
1049 return Err(LexError);
1050 }
1051
1052 if has_exp {
1053 let mut has_exp_value = false;
1054 while let Some(&ch) = chars.peek() {
1055 match ch {
1056 '+' | '-' => {
1057 if has_exp_value {
1058 break;
1059 }
1060 chars.next();
1061 len += 1;
1062 }
1063 '0'...'9' => {
1064 chars.next();
1065 len += 1;
1066 has_exp_value = true;
1067 }
1068 '_' => {
1069 chars.next();
1070 len += 1;
1071 }
1072 _ => break,
1073 }
1074 }
1075 if !has_exp_value {
1076 return Err(LexError);
1077 }
1078 }
1079
1080 Ok((input.advance(len), ()))
1081 }
1082
int(input: Cursor) -> PResult<()>1083 fn int(input: Cursor) -> PResult<()> {
1084 let (rest, ()) = digits(input)?;
1085 for suffix in &[
1086 "isize",
1087 "i8",
1088 "i16",
1089 "i32",
1090 "i64",
1091 "i128",
1092 "usize",
1093 "u8",
1094 "u16",
1095 "u32",
1096 "u64",
1097 "u128",
1098 ] {
1099 if rest.starts_with(suffix) {
1100 return word_break(rest.advance(suffix.len()));
1101 }
1102 }
1103 word_break(rest)
1104 }
1105
digits(mut input: Cursor) -> PResult<()>1106 fn digits(mut input: Cursor) -> PResult<()> {
1107 let base = if input.starts_with("0x") {
1108 input = input.advance(2);
1109 16
1110 } else if input.starts_with("0o") {
1111 input = input.advance(2);
1112 8
1113 } else if input.starts_with("0b") {
1114 input = input.advance(2);
1115 2
1116 } else {
1117 10
1118 };
1119
1120 let mut len = 0;
1121 let mut empty = true;
1122 for b in input.bytes() {
1123 let digit = match b {
1124 b'0'...b'9' => (b - b'0') as u64,
1125 b'a'...b'f' => 10 + (b - b'a') as u64,
1126 b'A'...b'F' => 10 + (b - b'A') as u64,
1127 b'_' => {
1128 if empty && base == 10 {
1129 return Err(LexError);
1130 }
1131 len += 1;
1132 continue;
1133 }
1134 _ => break,
1135 };
1136 if digit >= base {
1137 return Err(LexError);
1138 }
1139 len += 1;
1140 empty = false;
1141 }
1142 if empty {
1143 Err(LexError)
1144 } else {
1145 Ok((input.advance(len), ()))
1146 }
1147 }
1148
op(input: Cursor) -> PResult<(char, Spacing)>1149 fn op(input: Cursor) -> PResult<(char, Spacing)> {
1150 let input = skip_whitespace(input);
1151 match op_char(input) {
1152 Ok((rest, ch)) => {
1153 let kind = match op_char(rest) {
1154 Ok(_) => Spacing::Joint,
1155 Err(LexError) => Spacing::Alone,
1156 };
1157 Ok((rest, (ch, kind)))
1158 }
1159 Err(LexError) => Err(LexError),
1160 }
1161 }
1162
op_char(input: Cursor) -> PResult<char>1163 fn op_char(input: Cursor) -> PResult<char> {
1164 let mut chars = input.chars();
1165 let first = match chars.next() {
1166 Some(ch) => ch,
1167 None => {
1168 return Err(LexError);
1169 }
1170 };
1171 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
1172 if recognized.contains(first) {
1173 Ok((input.advance(first.len_utf8()), first))
1174 } else {
1175 Err(LexError)
1176 }
1177 }
1178
1179 named!(doc_comment -> (), alt!(
1180 do_parse!(
1181 punct!("//!") >>
1182 take_until!("\n") >>
1183 (())
1184 )
1185 |
1186 do_parse!(
1187 option!(whitespace) >>
1188 peek!(tag!("/*!")) >>
1189 block_comment >>
1190 (())
1191 )
1192 |
1193 do_parse!(
1194 punct!("///") >>
1195 not!(tag!("/")) >>
1196 take_until!("\n") >>
1197 (())
1198 )
1199 |
1200 do_parse!(
1201 option!(whitespace) >>
1202 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
1203 block_comment >>
1204 (())
1205 )
1206 ));
1207