1 use std::borrow::Cow;
2 use std::char;
3 use std::str;
4 use std::string;
5 use std::string::String as StdString;
6
7 use self::Token::*;
8
9 /// A span, designating a range of bytes where a token is located.
10 #[derive(Eq, PartialEq, Debug, Clone, Copy)]
11 pub struct Span {
12 /// The start of the range.
13 pub start: usize,
14 /// The end of the range (exclusive).
15 pub end: usize,
16 }
17
18 impl From<Span> for (usize, usize) {
19 fn from(Span { start, end }: Span) -> (usize, usize) {
20 (start, end)
21 }
22 }
23
24 #[derive(Eq, PartialEq, Debug)]
25 pub enum Token<'a> {
26 Whitespace(&'a str),
27 Newline,
28 Comment(&'a str),
29
30 Equals,
31 Period,
32 Comma,
33 Colon,
34 Plus,
35 LeftBrace,
36 RightBrace,
37 LeftBracket,
38 RightBracket,
39
40 Keylike(&'a str),
41 String {
42 src: &'a str,
43 val: Cow<'a, str>,
44 multiline: bool,
45 },
46 }
47
48 #[derive(Eq, PartialEq, Debug)]
49 pub enum Error {
50 InvalidCharInString(usize, char),
51 InvalidEscape(usize, char),
52 InvalidHexEscape(usize, char),
53 InvalidEscapeValue(usize, u32),
54 NewlineInString(usize),
55 Unexpected(usize, char),
56 UnterminatedString(usize),
57 NewlineInTableKey(usize),
58 MultilineStringKey(usize),
59 EmptyTableKey(usize),
60 Wanted {
61 at: usize,
62 expected: &'static str,
63 found: &'static str,
64 },
65 }
66
67 #[derive(Clone)]
68 pub struct Tokenizer<'a> {
69 input: &'a str,
70 chars: CrlfFold<'a>,
71 }
72
73 #[derive(Clone)]
74 struct CrlfFold<'a> {
75 chars: str::CharIndices<'a>,
76 }
77
78 #[derive(Debug)]
79 enum MaybeString {
80 NotEscaped(usize),
81 Owned(string::String),
82 }
83
84 impl<'a> Tokenizer<'a> {
new(input: &'a str) -> Tokenizer<'a>85 pub fn new(input: &'a str) -> Tokenizer<'a> {
86 let mut t = Tokenizer {
87 input,
88 chars: CrlfFold {
89 chars: input.char_indices(),
90 },
91 };
92 // Eat utf-8 BOM
93 t.eatc('\u{feff}');
94 t
95 }
96
next(&mut self) -> Result<Option<(Span, Token<'a>)>, Error>97 pub fn next(&mut self) -> Result<Option<(Span, Token<'a>)>, Error> {
98 let (start, token) = match self.one() {
99 Some((start, '\n')) => (start, Newline),
100 Some((start, ' ')) => (start, self.whitespace_token(start)),
101 Some((start, '\t')) => (start, self.whitespace_token(start)),
102 Some((start, '#')) => (start, self.comment_token(start)),
103 Some((start, '=')) => (start, Equals),
104 Some((start, '.')) => (start, Period),
105 Some((start, ',')) => (start, Comma),
106 Some((start, ':')) => (start, Colon),
107 Some((start, '+')) => (start, Plus),
108 Some((start, '{')) => (start, LeftBrace),
109 Some((start, '}')) => (start, RightBrace),
110 Some((start, '[')) => (start, LeftBracket),
111 Some((start, ']')) => (start, RightBracket),
112 Some((start, '\'')) => {
113 return self
114 .literal_string(start)
115 .map(|t| Some((self.step_span(start), t)))
116 }
117 Some((start, '"')) => {
118 return self
119 .basic_string(start)
120 .map(|t| Some((self.step_span(start), t)))
121 }
122 Some((start, ch)) if is_keylike(ch) => (start, self.keylike(start)),
123
124 Some((start, ch)) => return Err(Error::Unexpected(start, ch)),
125 None => return Ok(None),
126 };
127
128 let span = self.step_span(start);
129 Ok(Some((span, token)))
130 }
131
peek(&mut self) -> Result<Option<(Span, Token<'a>)>, Error>132 pub fn peek(&mut self) -> Result<Option<(Span, Token<'a>)>, Error> {
133 self.clone().next()
134 }
135
eat(&mut self, expected: Token<'a>) -> Result<bool, Error>136 pub fn eat(&mut self, expected: Token<'a>) -> Result<bool, Error> {
137 self.eat_spanned(expected).map(|s| s.is_some())
138 }
139
140 /// Eat a value, returning it's span if it was consumed.
eat_spanned(&mut self, expected: Token<'a>) -> Result<Option<Span>, Error>141 pub fn eat_spanned(&mut self, expected: Token<'a>) -> Result<Option<Span>, Error> {
142 let span = match self.peek()? {
143 Some((span, ref found)) if expected == *found => span,
144 Some(_) => return Ok(None),
145 None => return Ok(None),
146 };
147
148 drop(self.next());
149 Ok(Some(span))
150 }
151
expect(&mut self, expected: Token<'a>) -> Result<(), Error>152 pub fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> {
153 // ignore span
154 let _ = self.expect_spanned(expected)?;
155 Ok(())
156 }
157
158 /// Expect the given token returning its span.
expect_spanned(&mut self, expected: Token<'a>) -> Result<Span, Error>159 pub fn expect_spanned(&mut self, expected: Token<'a>) -> Result<Span, Error> {
160 let current = self.current();
161 match self.next()? {
162 Some((span, found)) => {
163 if expected == found {
164 Ok(span)
165 } else {
166 Err(Error::Wanted {
167 at: current,
168 expected: expected.describe(),
169 found: found.describe(),
170 })
171 }
172 }
173 None => Err(Error::Wanted {
174 at: self.input.len(),
175 expected: expected.describe(),
176 found: "eof",
177 }),
178 }
179 }
180
table_key(&mut self) -> Result<(Span, Cow<'a, str>), Error>181 pub fn table_key(&mut self) -> Result<(Span, Cow<'a, str>), Error> {
182 let current = self.current();
183 match self.next()? {
184 Some((span, Token::Keylike(k))) => Ok((span, k.into())),
185 Some((
186 span,
187 Token::String {
188 src,
189 val,
190 multiline,
191 },
192 )) => {
193 let offset = self.substr_offset(src);
194 if multiline {
195 return Err(Error::MultilineStringKey(offset));
196 }
197 if val == "" {
198 return Err(Error::EmptyTableKey(offset));
199 }
200 match src.find('\n') {
201 None => Ok((span, val)),
202 Some(i) => Err(Error::NewlineInTableKey(offset + i)),
203 }
204 }
205 Some((_, other)) => Err(Error::Wanted {
206 at: current,
207 expected: "a table key",
208 found: other.describe(),
209 }),
210 None => Err(Error::Wanted {
211 at: self.input.len(),
212 expected: "a table key",
213 found: "eof",
214 }),
215 }
216 }
217
eat_whitespace(&mut self) -> Result<(), Error>218 pub fn eat_whitespace(&mut self) -> Result<(), Error> {
219 while self.eatc(' ') || self.eatc('\t') {
220 // ...
221 }
222 Ok(())
223 }
224
eat_comment(&mut self) -> Result<bool, Error>225 pub fn eat_comment(&mut self) -> Result<bool, Error> {
226 if !self.eatc('#') {
227 return Ok(false);
228 }
229 drop(self.comment_token(0));
230 self.eat_newline_or_eof().map(|()| true)
231 }
232
eat_newline_or_eof(&mut self) -> Result<(), Error>233 pub fn eat_newline_or_eof(&mut self) -> Result<(), Error> {
234 let current = self.current();
235 match self.next()? {
236 None | Some((_, Token::Newline)) => Ok(()),
237 Some((_, other)) => Err(Error::Wanted {
238 at: current,
239 expected: "newline",
240 found: other.describe(),
241 }),
242 }
243 }
244
skip_to_newline(&mut self)245 pub fn skip_to_newline(&mut self) {
246 loop {
247 match self.one() {
248 Some((_, '\n')) | None => break,
249 _ => {}
250 }
251 }
252 }
253
eatc(&mut self, ch: char) -> bool254 fn eatc(&mut self, ch: char) -> bool {
255 match self.chars.clone().next() {
256 Some((_, ch2)) if ch == ch2 => {
257 self.one();
258 true
259 }
260 _ => false,
261 }
262 }
263
current(&mut self) -> usize264 pub fn current(&mut self) -> usize {
265 self.chars
266 .clone()
267 .next()
268 .map(|i| i.0)
269 .unwrap_or_else(|| self.input.len())
270 }
271
input(&self) -> &'a str272 pub fn input(&self) -> &'a str {
273 self.input
274 }
275
whitespace_token(&mut self, start: usize) -> Token<'a>276 fn whitespace_token(&mut self, start: usize) -> Token<'a> {
277 while self.eatc(' ') || self.eatc('\t') {
278 // ...
279 }
280 Whitespace(&self.input[start..self.current()])
281 }
282
comment_token(&mut self, start: usize) -> Token<'a>283 fn comment_token(&mut self, start: usize) -> Token<'a> {
284 while let Some((_, ch)) = self.chars.clone().next() {
285 if ch != '\t' && (ch < '\u{20}' || ch > '\u{10ffff}') {
286 break;
287 }
288 self.one();
289 }
290 Comment(&self.input[start..self.current()])
291 }
292
read_string( &mut self, delim: char, start: usize, new_ch: &mut dyn FnMut( &mut Tokenizer<'_>, &mut MaybeString, bool, usize, char, ) -> Result<(), Error>, ) -> Result<Token<'a>, Error>293 fn read_string(
294 &mut self,
295 delim: char,
296 start: usize,
297 new_ch: &mut dyn FnMut(
298 &mut Tokenizer<'_>,
299 &mut MaybeString,
300 bool,
301 usize,
302 char,
303 ) -> Result<(), Error>,
304 ) -> Result<Token<'a>, Error> {
305 let mut multiline = false;
306 if self.eatc(delim) {
307 if self.eatc(delim) {
308 multiline = true;
309 } else {
310 return Ok(String {
311 src: &self.input[start..start + 2],
312 val: Cow::Borrowed(""),
313 multiline: false,
314 });
315 }
316 }
317 let mut val = MaybeString::NotEscaped(self.current());
318 let mut n = 0;
319 'outer: loop {
320 n += 1;
321 match self.one() {
322 Some((i, '\n')) => {
323 if multiline {
324 if self.input.as_bytes()[i] == b'\r' {
325 val.to_owned(&self.input[..i]);
326 }
327 if n == 1 {
328 val = MaybeString::NotEscaped(self.current());
329 } else {
330 val.push('\n');
331 }
332 continue;
333 } else {
334 return Err(Error::NewlineInString(i));
335 }
336 }
337 Some((mut i, ch)) if ch == delim => {
338 if multiline {
339 if !self.eatc(delim) {
340 val.push(delim);
341 continue 'outer;
342 }
343 if !self.eatc(delim) {
344 val.push(delim);
345 val.push(delim);
346 continue 'outer;
347 }
348 if self.eatc(delim) {
349 val.push(delim);
350 i += 1;
351 }
352 if self.eatc(delim) {
353 val.push(delim);
354 i += 1;
355 }
356 }
357 return Ok(String {
358 src: &self.input[start..self.current()],
359 val: val.into_cow(&self.input[..i]),
360 multiline,
361 });
362 }
363 Some((i, c)) => new_ch(self, &mut val, multiline, i, c)?,
364 None => return Err(Error::UnterminatedString(start)),
365 }
366 }
367 }
368
literal_string(&mut self, start: usize) -> Result<Token<'a>, Error>369 fn literal_string(&mut self, start: usize) -> Result<Token<'a>, Error> {
370 self.read_string('\'', start, &mut |_me, val, _multi, i, ch| {
371 if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') {
372 val.push(ch);
373 Ok(())
374 } else {
375 Err(Error::InvalidCharInString(i, ch))
376 }
377 })
378 }
379
basic_string(&mut self, start: usize) -> Result<Token<'a>, Error>380 fn basic_string(&mut self, start: usize) -> Result<Token<'a>, Error> {
381 self.read_string('"', start, &mut |me, val, multi, i, ch| match ch {
382 '\\' => {
383 val.to_owned(&me.input[..i]);
384 match me.chars.next() {
385 Some((_, '"')) => val.push('"'),
386 Some((_, '\\')) => val.push('\\'),
387 Some((_, 'b')) => val.push('\u{8}'),
388 Some((_, 'f')) => val.push('\u{c}'),
389 Some((_, 'n')) => val.push('\n'),
390 Some((_, 'r')) => val.push('\r'),
391 Some((_, 't')) => val.push('\t'),
392 Some((i, c @ 'u')) | Some((i, c @ 'U')) => {
393 let len = if c == 'u' { 4 } else { 8 };
394 val.push(me.hex(start, i, len)?);
395 }
396 Some((i, c @ ' ')) | Some((i, c @ '\t')) | Some((i, c @ '\n')) if multi => {
397 if c != '\n' {
398 while let Some((_, ch)) = me.chars.clone().next() {
399 match ch {
400 ' ' | '\t' => {
401 me.chars.next();
402 continue;
403 }
404 '\n' => {
405 me.chars.next();
406 break;
407 }
408 _ => return Err(Error::InvalidEscape(i, c)),
409 }
410 }
411 }
412 while let Some((_, ch)) = me.chars.clone().next() {
413 match ch {
414 ' ' | '\t' | '\n' => {
415 me.chars.next();
416 }
417 _ => break,
418 }
419 }
420 }
421 Some((i, c)) => return Err(Error::InvalidEscape(i, c)),
422 None => return Err(Error::UnterminatedString(start)),
423 }
424 Ok(())
425 }
426 ch if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') => {
427 val.push(ch);
428 Ok(())
429 }
430 _ => Err(Error::InvalidCharInString(i, ch)),
431 })
432 }
433
hex(&mut self, start: usize, i: usize, len: usize) -> Result<char, Error>434 fn hex(&mut self, start: usize, i: usize, len: usize) -> Result<char, Error> {
435 let mut buf = StdString::with_capacity(len);
436 for _ in 0..len {
437 match self.one() {
438 Some((_, ch)) if ch as u32 <= 0x7F && ch.is_digit(16) => buf.push(ch),
439 Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)),
440 None => return Err(Error::UnterminatedString(start)),
441 }
442 }
443 let val = u32::from_str_radix(&buf, 16).unwrap();
444 match char::from_u32(val) {
445 Some(ch) => Ok(ch),
446 None => Err(Error::InvalidEscapeValue(i, val)),
447 }
448 }
449
keylike(&mut self, start: usize) -> Token<'a>450 fn keylike(&mut self, start: usize) -> Token<'a> {
451 while let Some((_, ch)) = self.peek_one() {
452 if !is_keylike(ch) {
453 break;
454 }
455 self.one();
456 }
457 Keylike(&self.input[start..self.current()])
458 }
459
substr_offset(&self, s: &'a str) -> usize460 pub fn substr_offset(&self, s: &'a str) -> usize {
461 assert!(s.len() <= self.input.len());
462 let a = self.input.as_ptr() as usize;
463 let b = s.as_ptr() as usize;
464 assert!(a <= b);
465 b - a
466 }
467
468 /// Calculate the span of a single character.
step_span(&mut self, start: usize) -> Span469 fn step_span(&mut self, start: usize) -> Span {
470 let end = self
471 .peek_one()
472 .map(|t| t.0)
473 .unwrap_or_else(|| self.input.len());
474 Span { start, end }
475 }
476
477 /// Peek one char without consuming it.
peek_one(&mut self) -> Option<(usize, char)>478 fn peek_one(&mut self) -> Option<(usize, char)> {
479 self.chars.clone().next()
480 }
481
482 /// Take one char.
one(&mut self) -> Option<(usize, char)>483 pub fn one(&mut self) -> Option<(usize, char)> {
484 self.chars.next()
485 }
486 }
487
488 impl<'a> Iterator for CrlfFold<'a> {
489 type Item = (usize, char);
490
next(&mut self) -> Option<(usize, char)>491 fn next(&mut self) -> Option<(usize, char)> {
492 self.chars.next().map(|(i, c)| {
493 if c == '\r' {
494 let mut attempt = self.chars.clone();
495 if let Some((_, '\n')) = attempt.next() {
496 self.chars = attempt;
497 return (i, '\n');
498 }
499 }
500 (i, c)
501 })
502 }
503 }
504
505 impl MaybeString {
push(&mut self, ch: char)506 fn push(&mut self, ch: char) {
507 match *self {
508 MaybeString::NotEscaped(..) => {}
509 MaybeString::Owned(ref mut s) => s.push(ch),
510 }
511 }
512
to_owned(&mut self, input: &str)513 fn to_owned(&mut self, input: &str) {
514 match *self {
515 MaybeString::NotEscaped(start) => {
516 *self = MaybeString::Owned(input[start..].to_owned());
517 }
518 MaybeString::Owned(..) => {}
519 }
520 }
521
into_cow(self, input: &str) -> Cow<'_, str>522 fn into_cow(self, input: &str) -> Cow<'_, str> {
523 match self {
524 MaybeString::NotEscaped(start) => Cow::Borrowed(&input[start..]),
525 MaybeString::Owned(s) => Cow::Owned(s),
526 }
527 }
528 }
529
is_keylike(ch: char) -> bool530 fn is_keylike(ch: char) -> bool {
531 ('A' <= ch && ch <= 'Z')
532 || ('a' <= ch && ch <= 'z')
533 || ('0' <= ch && ch <= '9')
534 || ch == '-'
535 || ch == '_'
536 }
537
538 impl<'a> Token<'a> {
describe(&self) -> &'static str539 pub fn describe(&self) -> &'static str {
540 match *self {
541 Token::Keylike(_) => "an identifier",
542 Token::Equals => "an equals",
543 Token::Period => "a period",
544 Token::Comment(_) => "a comment",
545 Token::Newline => "a newline",
546 Token::Whitespace(_) => "whitespace",
547 Token::Comma => "a comma",
548 Token::RightBrace => "a right brace",
549 Token::LeftBrace => "a left brace",
550 Token::RightBracket => "a right bracket",
551 Token::LeftBracket => "a left bracket",
552 Token::String { multiline, .. } => {
553 if multiline {
554 "a multiline string"
555 } else {
556 "a string"
557 }
558 }
559 Token::Colon => "a colon",
560 Token::Plus => "a plus",
561 }
562 }
563 }
564
565 #[cfg(test)]
566 mod tests {
567 use super::{Error, Token, Tokenizer};
568 use std::borrow::Cow;
569
err(input: &str, err: Error)570 fn err(input: &str, err: Error) {
571 let mut t = Tokenizer::new(input);
572 let token = t.next().unwrap_err();
573 assert_eq!(token, err);
574 assert!(t.next().unwrap().is_none());
575 }
576
577 #[test]
literal_strings()578 fn literal_strings() {
579 fn t(input: &str, val: &str, multiline: bool) {
580 let mut t = Tokenizer::new(input);
581 let (_, token) = t.next().unwrap().unwrap();
582 assert_eq!(
583 token,
584 Token::String {
585 src: input,
586 val: Cow::Borrowed(val),
587 multiline: multiline,
588 }
589 );
590 assert!(t.next().unwrap().is_none());
591 }
592
593 t("''", "", false);
594 t("''''''", "", true);
595 t("'''\n'''", "", true);
596 t("'a'", "a", false);
597 t("'\"a'", "\"a", false);
598 t("''''a'''", "'a", true);
599 t("'''\n'a\n'''", "'a\n", true);
600 t("'''a\n'a\r\n'''", "a\n'a\n", true);
601 }
602
603 #[test]
basic_strings()604 fn basic_strings() {
605 fn t(input: &str, val: &str, multiline: bool) {
606 let mut t = Tokenizer::new(input);
607 let (_, token) = t.next().unwrap().unwrap();
608 assert_eq!(
609 token,
610 Token::String {
611 src: input,
612 val: Cow::Borrowed(val),
613 multiline: multiline,
614 }
615 );
616 assert!(t.next().unwrap().is_none());
617 }
618
619 t(r#""""#, "", false);
620 t(r#""""""""#, "", true);
621 t(r#""a""#, "a", false);
622 t(r#""""a""""#, "a", true);
623 t(r#""\t""#, "\t", false);
624 t(r#""\u0000""#, "\0", false);
625 t(r#""\U00000000""#, "\0", false);
626 t(r#""\U000A0000""#, "\u{A0000}", false);
627 t(r#""\\t""#, "\\t", false);
628 t("\"\t\"", "\t", false);
629 t("\"\"\"\n\t\"\"\"", "\t", true);
630 t("\"\"\"\\\n\"\"\"", "", true);
631 t(
632 "\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"",
633 "",
634 true,
635 );
636 t(r#""\r""#, "\r", false);
637 t(r#""\n""#, "\n", false);
638 t(r#""\b""#, "\u{8}", false);
639 t(r#""a\fa""#, "a\u{c}a", false);
640 t(r#""\"a""#, "\"a", false);
641 t("\"\"\"\na\"\"\"", "a", true);
642 t("\"\"\"\n\"\"\"", "", true);
643 t(r#""""a\"""b""""#, "a\"\"\"b", true);
644 err(r#""\a"#, Error::InvalidEscape(2, 'a'));
645 err("\"\\\n", Error::InvalidEscape(2, '\n'));
646 err("\"\\\r\n", Error::InvalidEscape(2, '\n'));
647 err("\"\\", Error::UnterminatedString(0));
648 err("\"\u{0}", Error::InvalidCharInString(1, '\u{0}'));
649 err(r#""\U00""#, Error::InvalidHexEscape(5, '"'));
650 err(r#""\U00"#, Error::UnterminatedString(0));
651 err(r#""\uD800"#, Error::InvalidEscapeValue(2, 0xd800));
652 err(r#""\UFFFFFFFF"#, Error::InvalidEscapeValue(2, 0xffff_ffff));
653 }
654
655 #[test]
keylike()656 fn keylike() {
657 fn t(input: &str) {
658 let mut t = Tokenizer::new(input);
659 let (_, token) = t.next().unwrap().unwrap();
660 assert_eq!(token, Token::Keylike(input));
661 assert!(t.next().unwrap().is_none());
662 }
663 t("foo");
664 t("0bar");
665 t("bar0");
666 t("1234");
667 t("a-b");
668 t("a_B");
669 t("-_-");
670 t("___");
671 }
672
673 #[test]
all()674 fn all() {
675 fn t(input: &str, expected: &[((usize, usize), Token<'_>, &str)]) {
676 let mut tokens = Tokenizer::new(input);
677 let mut actual: Vec<((usize, usize), Token<'_>, &str)> = Vec::new();
678 while let Some((span, token)) = tokens.next().unwrap() {
679 actual.push((span.into(), token, &input[span.start..span.end]));
680 }
681 for (a, b) in actual.iter().zip(expected) {
682 assert_eq!(a, b);
683 }
684 assert_eq!(actual.len(), expected.len());
685 }
686
687 t(
688 " a ",
689 &[
690 ((0, 1), Token::Whitespace(" "), " "),
691 ((1, 2), Token::Keylike("a"), "a"),
692 ((2, 3), Token::Whitespace(" "), " "),
693 ],
694 );
695
696 t(
697 " a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ",
698 &[
699 ((0, 1), Token::Whitespace(" "), " "),
700 ((1, 2), Token::Keylike("a"), "a"),
701 ((2, 4), Token::Whitespace("\t "), "\t "),
702 ((4, 5), Token::LeftBracket, "["),
703 ((5, 6), Token::LeftBracket, "["),
704 ((6, 7), Token::RightBracket, "]"),
705 ((7, 8), Token::RightBracket, "]"),
706 ((8, 11), Token::Whitespace(" \t "), " \t "),
707 ((11, 12), Token::LeftBracket, "["),
708 ((12, 13), Token::RightBracket, "]"),
709 ((13, 14), Token::Whitespace(" "), " "),
710 ((14, 15), Token::LeftBrace, "{"),
711 ((15, 16), Token::RightBrace, "}"),
712 ((16, 17), Token::Whitespace(" "), " "),
713 ((17, 18), Token::Comma, ","),
714 ((18, 19), Token::Whitespace(" "), " "),
715 ((19, 20), Token::Period, "."),
716 ((20, 21), Token::Whitespace(" "), " "),
717 ((21, 22), Token::Equals, "="),
718 ((22, 23), Token::Newline, "\n"),
719 ((23, 29), Token::Comment("# foo "), "# foo "),
720 ((29, 31), Token::Newline, "\r\n"),
721 ((31, 36), Token::Comment("#foo "), "#foo "),
722 ((36, 37), Token::Newline, "\n"),
723 ((37, 38), Token::Whitespace(" "), " "),
724 ],
725 );
726 }
727
728 #[test]
bare_cr_bad()729 fn bare_cr_bad() {
730 err("\r", Error::Unexpected(0, '\r'));
731 err("'\n", Error::NewlineInString(1));
732 err("'\u{0}", Error::InvalidCharInString(1, '\u{0}'));
733 err("'", Error::UnterminatedString(0));
734 err("\u{0}", Error::Unexpected(0, '\u{0}'));
735 }
736
737 #[test]
bad_comment()738 fn bad_comment() {
739 let mut t = Tokenizer::new("#\u{0}");
740 t.next().unwrap().unwrap();
741 assert_eq!(t.next(), Err(Error::Unexpected(1, '\u{0}')));
742 assert!(t.next().unwrap().is_none());
743 }
744 }
745