1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use std::char;
11 use std::iter::Peekable;
12 
13 use pest::error::{Error, ErrorVariant};
14 use pest::iterators::{Pair, Pairs};
15 use pest::prec_climber::{Assoc, Operator, PrecClimber};
16 use pest::{Parser, Span};
17 
18 use ast::{Expr, Rule as AstRule, RuleType};
19 use validator;
20 
21 include!("grammar.rs");
22 
parse(rule: Rule, data: &str) -> Result<Pairs<Rule>, Error<Rule>>23 pub fn parse(rule: Rule, data: &str) -> Result<Pairs<Rule>, Error<Rule>> {
24     PestParser::parse(rule, data)
25 }
26 
27 #[derive(Clone, Debug, Eq, PartialEq)]
28 pub struct ParserRule<'i> {
29     pub name: String,
30     pub span: Span<'i>,
31     pub ty: RuleType,
32     pub node: ParserNode<'i>,
33 }
34 
35 #[derive(Clone, Debug, Eq, PartialEq)]
36 pub struct ParserNode<'i> {
37     pub expr: ParserExpr<'i>,
38     pub span: Span<'i>,
39 }
40 
41 impl<'i> ParserNode<'i> {
filter_map_top_down<F, T>(self, mut f: F) -> Vec<T> where F: FnMut(ParserNode<'i>) -> Option<T>,42     pub fn filter_map_top_down<F, T>(self, mut f: F) -> Vec<T>
43     where
44         F: FnMut(ParserNode<'i>) -> Option<T>,
45     {
46         pub fn filter_internal<'i, F, T>(node: ParserNode<'i>, f: &mut F, result: &mut Vec<T>)
47         where
48             F: FnMut(ParserNode<'i>) -> Option<T>,
49         {
50             if let Some(value) = f(node.clone()) {
51                 result.push(value);
52             }
53 
54             match node.expr {
55                 // TODO: Use box syntax when it gets stabilized.
56                 ParserExpr::PosPred(node) => {
57                     filter_internal(*node, f, result);
58                 }
59                 ParserExpr::NegPred(node) => {
60                     filter_internal(*node, f, result);
61                 }
62                 ParserExpr::Seq(lhs, rhs) => {
63                     filter_internal(*lhs, f, result);
64                     filter_internal(*rhs, f, result);
65                 }
66                 ParserExpr::Choice(lhs, rhs) => {
67                     filter_internal(*lhs, f, result);
68                     filter_internal(*rhs, f, result);
69                 }
70                 ParserExpr::Rep(node) => {
71                     filter_internal(*node, f, result);
72                 }
73                 ParserExpr::RepOnce(node) => {
74                     filter_internal(*node, f, result);
75                 }
76                 ParserExpr::RepExact(node, _) => {
77                     filter_internal(*node, f, result);
78                 }
79                 ParserExpr::RepMin(node, _) => {
80                     filter_internal(*node, f, result);
81                 }
82                 ParserExpr::RepMax(node, _) => {
83                     filter_internal(*node, f, result);
84                 }
85                 ParserExpr::RepMinMax(node, ..) => {
86                     filter_internal(*node, f, result);
87                 }
88                 ParserExpr::Opt(node) => {
89                     filter_internal(*node, f, result);
90                 }
91                 ParserExpr::Push(node) => {
92                     filter_internal(*node, f, result);
93                 }
94                 _ => (),
95             }
96         }
97 
98         let mut result = vec![];
99 
100         filter_internal(self, &mut f, &mut result);
101 
102         result
103     }
104 }
105 
106 #[derive(Clone, Debug, Eq, PartialEq)]
107 pub enum ParserExpr<'i> {
108     Str(String),
109     Insens(String),
110     Range(String, String),
111     Ident(String),
112     PeekSlice(i32, Option<i32>),
113     PosPred(Box<ParserNode<'i>>),
114     NegPred(Box<ParserNode<'i>>),
115     Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
116     Choice(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
117     Opt(Box<ParserNode<'i>>),
118     Rep(Box<ParserNode<'i>>),
119     RepOnce(Box<ParserNode<'i>>),
120     RepExact(Box<ParserNode<'i>>, u32),
121     RepMin(Box<ParserNode<'i>>, u32),
122     RepMax(Box<ParserNode<'i>>, u32),
123     RepMinMax(Box<ParserNode<'i>>, u32, u32),
124     Push(Box<ParserNode<'i>>),
125 }
126 
convert_rule(rule: ParserRule) -> AstRule127 fn convert_rule(rule: ParserRule) -> AstRule {
128     match rule {
129         ParserRule { name, ty, node, .. } => {
130             let expr = convert_node(node);
131 
132             AstRule { name, ty, expr }
133         }
134     }
135 }
136 
convert_node(node: ParserNode) -> Expr137 fn convert_node(node: ParserNode) -> Expr {
138     match node.expr {
139         ParserExpr::Str(string) => Expr::Str(string),
140         ParserExpr::Insens(string) => Expr::Insens(string),
141         ParserExpr::Range(start, end) => Expr::Range(start, end),
142         ParserExpr::Ident(ident) => Expr::Ident(ident),
143         ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end),
144         ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))),
145         ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))),
146         ParserExpr::Seq(node1, node2) => Expr::Seq(
147             Box::new(convert_node(*node1)),
148             Box::new(convert_node(*node2)),
149         ),
150         ParserExpr::Choice(node1, node2) => Expr::Choice(
151             Box::new(convert_node(*node1)),
152             Box::new(convert_node(*node2)),
153         ),
154         ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))),
155         ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))),
156         ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))),
157         ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num),
158         ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max),
159         ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max),
160         ParserExpr::RepMinMax(node, min, max) => {
161             Expr::RepMinMax(Box::new(convert_node(*node)), min, max)
162         }
163         ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))),
164     }
165 }
166 
consume_rules(pairs: Pairs<Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>>167 pub fn consume_rules(pairs: Pairs<Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>> {
168     let rules = consume_rules_with_spans(pairs)?;
169     let errors = validator::validate_ast(&rules);
170     if errors.is_empty() {
171         Ok(rules.into_iter().map(convert_rule).collect())
172     } else {
173         Err(errors)
174     }
175 }
176 
consume_rules_with_spans<'i>( pairs: Pairs<'i, Rule>, ) -> Result<Vec<ParserRule<'i>>, Vec<Error<Rule>>>177 fn consume_rules_with_spans<'i>(
178     pairs: Pairs<'i, Rule>,
179 ) -> Result<Vec<ParserRule<'i>>, Vec<Error<Rule>>> {
180     let climber = PrecClimber::new(vec![
181         Operator::new(Rule::choice_operator, Assoc::Left),
182         Operator::new(Rule::sequence_operator, Assoc::Left),
183     ]);
184 
185     pairs
186         .filter(|pair| pair.as_rule() == Rule::grammar_rule)
187         .map(|pair| {
188             let mut pairs = pair.into_inner().peekable();
189 
190             let span = pairs.next().unwrap().as_span();
191             let name = span.as_str().to_owned();
192 
193             pairs.next().unwrap(); // assignment_operator
194 
195             let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace {
196                 match pairs.next().unwrap().as_rule() {
197                     Rule::silent_modifier => RuleType::Silent,
198                     Rule::atomic_modifier => RuleType::Atomic,
199                     Rule::compound_atomic_modifier => RuleType::CompoundAtomic,
200                     Rule::non_atomic_modifier => RuleType::NonAtomic,
201                     _ => unreachable!(),
202                 }
203             } else {
204                 RuleType::Normal
205             };
206 
207             pairs.next().unwrap(); // opening_brace
208 
209             let node = consume_expr(pairs.next().unwrap().into_inner().peekable(), &climber)?;
210 
211             Ok(ParserRule {
212                 name,
213                 span,
214                 ty,
215                 node,
216             })
217         })
218         .collect()
219 }
220 
consume_expr<'i>( pairs: Peekable<Pairs<'i, Rule>>, climber: &PrecClimber<Rule>, ) -> Result<ParserNode<'i>, Vec<Error<Rule>>>221 fn consume_expr<'i>(
222     pairs: Peekable<Pairs<'i, Rule>>,
223     climber: &PrecClimber<Rule>,
224 ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
225     fn unaries<'i>(
226         mut pairs: Peekable<Pairs<'i, Rule>>,
227         climber: &PrecClimber<Rule>,
228     ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
229         let pair = pairs.next().unwrap();
230 
231         let node = match pair.as_rule() {
232             Rule::opening_paren => {
233                 let node = unaries(pairs, climber)?;
234                 let end = node.span.end_pos();
235 
236                 ParserNode {
237                     expr: node.expr,
238                     span: pair.as_span().start_pos().span(&end),
239                 }
240             }
241             Rule::positive_predicate_operator => {
242                 let node = unaries(pairs, climber)?;
243                 let end = node.span.end_pos();
244 
245                 ParserNode {
246                     expr: ParserExpr::PosPred(Box::new(node)),
247                     span: pair.as_span().start_pos().span(&end),
248                 }
249             }
250             Rule::negative_predicate_operator => {
251                 let node = unaries(pairs, climber)?;
252                 let end = node.span.end_pos();
253 
254                 ParserNode {
255                     expr: ParserExpr::NegPred(Box::new(node)),
256                     span: pair.as_span().start_pos().span(&end),
257                 }
258             }
259             other_rule => {
260                 let node = match other_rule {
261                     Rule::expression => consume_expr(pair.into_inner().peekable(), climber)?,
262                     Rule::_push => {
263                         let start = pair.clone().as_span().start_pos();
264                         let mut pairs = pair.into_inner();
265                         pairs.next().unwrap(); // opening_paren
266                         let pair = pairs.next().unwrap();
267 
268                         let node = consume_expr(pair.into_inner().peekable(), climber)?;
269                         let end = node.span.end_pos();
270 
271                         ParserNode {
272                             expr: ParserExpr::Push(Box::new(node)),
273                             span: start.span(&end),
274                         }
275                     }
276                     Rule::peek_slice => {
277                         let mut pairs = pair.clone().into_inner();
278                         pairs.next().unwrap(); // opening_brack
279                         let pair_start = pairs.next().unwrap(); // .. or integer
280                         let start: i32 = match pair_start.as_rule() {
281                             Rule::range_operator => 0,
282                             Rule::integer => {
283                                 pairs.next().unwrap(); // ..
284                                 pair_start.as_str().parse().unwrap()
285                             }
286                             _ => unreachable!(),
287                         };
288                         let pair_end = pairs.next().unwrap(); // integer or }
289                         let end: Option<i32> = match pair_end.as_rule() {
290                             Rule::closing_brack => None,
291                             Rule::integer => {
292                                 pairs.next().unwrap(); // }
293                                 Some(pair_end.as_str().parse().unwrap())
294                             }
295                             _ => unreachable!(),
296                         };
297                         ParserNode {
298                             expr: ParserExpr::PeekSlice(start, end),
299                             span: pair.as_span(),
300                         }
301                     }
302                     Rule::identifier => ParserNode {
303                         expr: ParserExpr::Ident(pair.as_str().to_owned()),
304                         span: pair.clone().as_span(),
305                     },
306                     Rule::string => {
307                         let string = unescape(pair.as_str()).expect("incorrect string literal");
308                         ParserNode {
309                             expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()),
310                             span: pair.clone().as_span(),
311                         }
312                     }
313                     Rule::insensitive_string => {
314                         let string = unescape(pair.as_str()).expect("incorrect string literal");
315                         ParserNode {
316                             expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()),
317                             span: pair.clone().as_span(),
318                         }
319                     }
320                     Rule::range => {
321                         let mut pairs = pair.into_inner();
322                         let pair = pairs.next().unwrap();
323                         let start = unescape(pair.as_str()).expect("incorrect char literal");
324                         let start_pos = pair.clone().as_span().start_pos();
325                         pairs.next();
326                         let pair = pairs.next().unwrap();
327                         let end = unescape(pair.as_str()).expect("incorrect char literal");
328                         let end_pos = pair.clone().as_span().end_pos();
329 
330                         ParserNode {
331                             expr: ParserExpr::Range(
332                                 start[1..start.len() - 1].to_owned(),
333                                 end[1..end.len() - 1].to_owned(),
334                             ),
335                             span: start_pos.span(&end_pos),
336                         }
337                     }
338                     _ => unreachable!(),
339                 };
340 
341                 pairs.fold(
342                     Ok(node),
343                     |node: Result<ParserNode<'i>, Vec<Error<Rule>>>, pair| {
344                         let node = node?;
345 
346                         let node = match pair.as_rule() {
347                             Rule::optional_operator => {
348                                 let start = node.span.start_pos();
349                                 ParserNode {
350                                     expr: ParserExpr::Opt(Box::new(node)),
351                                     span: start.span(&pair.as_span().end_pos()),
352                                 }
353                             }
354                             Rule::repeat_operator => {
355                                 let start = node.span.start_pos();
356                                 ParserNode {
357                                     expr: ParserExpr::Rep(Box::new(node)),
358                                     span: start.span(&pair.as_span().end_pos()),
359                                 }
360                             }
361                             Rule::repeat_once_operator => {
362                                 let start = node.span.start_pos();
363                                 ParserNode {
364                                     expr: ParserExpr::RepOnce(Box::new(node)),
365                                     span: start.span(&pair.as_span().end_pos()),
366                                 }
367                             }
368                             Rule::repeat_exact => {
369                                 let mut inner = pair.clone().into_inner();
370 
371                                 inner.next().unwrap(); // opening_brace
372 
373                                 let number = inner.next().unwrap();
374                                 let num = if let Ok(num) = number.as_str().parse::<u32>() {
375                                     num
376                                 } else {
377                                     return Err(vec![Error::new_from_span(
378                                         ErrorVariant::CustomError {
379                                             message: "number cannot overflow u32".to_owned(),
380                                         },
381                                         number.as_span(),
382                                     )]);
383                                 };
384 
385                                 if num == 0 {
386                                     let error: Error<Rule> = Error::new_from_span(
387                                         ErrorVariant::CustomError {
388                                             message: "cannot repeat 0 times".to_owned(),
389                                         },
390                                         number.as_span(),
391                                     );
392 
393                                     return Err(vec![error]);
394                                 }
395 
396                                 let start = node.span.start_pos();
397                                 ParserNode {
398                                     expr: ParserExpr::RepExact(Box::new(node), num),
399                                     span: start.span(&pair.as_span().end_pos()),
400                                 }
401                             }
402                             Rule::repeat_min => {
403                                 let mut inner = pair.clone().into_inner();
404 
405                                 inner.next().unwrap(); // opening_brace
406 
407                                 let min_number = inner.next().unwrap();
408                                 let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
409                                     min
410                                 } else {
411                                     return Err(vec![Error::new_from_span(
412                                         ErrorVariant::CustomError {
413                                             message: "number cannot overflow u32".to_owned(),
414                                         },
415                                         min_number.as_span(),
416                                     )]);
417                                 };
418 
419                                 let start = node.span.start_pos();
420                                 ParserNode {
421                                     expr: ParserExpr::RepMin(Box::new(node), min),
422                                     span: start.span(&pair.as_span().end_pos()),
423                                 }
424                             }
425                             Rule::repeat_max => {
426                                 let mut inner = pair.clone().into_inner();
427 
428                                 inner.next().unwrap(); // opening_brace
429                                 inner.next().unwrap(); // comma
430 
431                                 let max_number = inner.next().unwrap();
432                                 let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
433                                     max
434                                 } else {
435                                     return Err(vec![Error::new_from_span(
436                                         ErrorVariant::CustomError {
437                                             message: "number cannot overflow u32".to_owned(),
438                                         },
439                                         max_number.as_span(),
440                                     )]);
441                                 };
442 
443                                 if max == 0 {
444                                     let error: Error<Rule> = Error::new_from_span(
445                                         ErrorVariant::CustomError {
446                                             message: "cannot repeat 0 times".to_owned(),
447                                         },
448                                         max_number.as_span(),
449                                     );
450 
451                                     return Err(vec![error]);
452                                 }
453 
454                                 let start = node.span.start_pos();
455                                 ParserNode {
456                                     expr: ParserExpr::RepMax(Box::new(node), max),
457                                     span: start.span(&pair.as_span().end_pos()),
458                                 }
459                             }
460                             Rule::repeat_min_max => {
461                                 let mut inner = pair.clone().into_inner();
462 
463                                 inner.next().unwrap(); // opening_brace
464 
465                                 let min_number = inner.next().unwrap();
466                                 let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
467                                     min
468                                 } else {
469                                     return Err(vec![Error::new_from_span(
470                                         ErrorVariant::CustomError {
471                                             message: "number cannot overflow u32".to_owned(),
472                                         },
473                                         min_number.as_span(),
474                                     )]);
475                                 };
476 
477                                 inner.next().unwrap(); // comma
478 
479                                 let max_number = inner.next().unwrap();
480                                 let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
481                                     max
482                                 } else {
483                                     return Err(vec![Error::new_from_span(
484                                         ErrorVariant::CustomError {
485                                             message: "number cannot overflow u32".to_owned(),
486                                         },
487                                         max_number.as_span(),
488                                     )]);
489                                 };
490 
491                                 if max == 0 {
492                                     let error: Error<Rule> = Error::new_from_span(
493                                         ErrorVariant::CustomError {
494                                             message: "cannot repeat 0 times".to_owned(),
495                                         },
496                                         max_number.as_span(),
497                                     );
498 
499                                     return Err(vec![error]);
500                                 }
501 
502                                 let start = node.span.start_pos();
503                                 ParserNode {
504                                     expr: ParserExpr::RepMinMax(Box::new(node), min, max),
505                                     span: start.span(&pair.as_span().end_pos()),
506                                 }
507                             }
508                             Rule::closing_paren => {
509                                 let start = node.span.start_pos();
510 
511                                 ParserNode {
512                                     expr: node.expr,
513                                     span: start.span(&pair.as_span().end_pos()),
514                                 }
515                             }
516                             _ => unreachable!(),
517                         };
518 
519                         Ok(node)
520                     },
521                 )?
522             }
523         };
524 
525         Ok(node)
526     }
527 
528     let term = |pair: Pair<'i, Rule>| unaries(pair.into_inner().peekable(), climber);
529     let infix = |lhs: Result<ParserNode<'i>, Vec<Error<Rule>>>,
530                  op: Pair<'i, Rule>,
531                  rhs: Result<ParserNode<'i>, Vec<Error<Rule>>>| match op.as_rule() {
532         Rule::sequence_operator => {
533             let lhs = lhs?;
534             let rhs = rhs?;
535 
536             let start = lhs.span.start_pos();
537             let end = rhs.span.end_pos();
538 
539             Ok(ParserNode {
540                 expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)),
541                 span: start.span(&end),
542             })
543         }
544         Rule::choice_operator => {
545             let lhs = lhs?;
546             let rhs = rhs?;
547 
548             let start = lhs.span.start_pos();
549             let end = rhs.span.end_pos();
550 
551             Ok(ParserNode {
552                 expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)),
553                 span: start.span(&end),
554             })
555         }
556         _ => unreachable!(),
557     };
558 
559     climber.climb(pairs, term, infix)
560 }
561 
unescape(string: &str) -> Option<String>562 fn unescape(string: &str) -> Option<String> {
563     let mut result = String::new();
564     let mut chars = string.chars();
565 
566     loop {
567         match chars.next() {
568             Some('\\') => match chars.next()? {
569                 '"' => result.push('"'),
570                 '\\' => result.push('\\'),
571                 'r' => result.push('\r'),
572                 'n' => result.push('\n'),
573                 't' => result.push('\t'),
574                 '0' => result.push('\0'),
575                 '\'' => result.push('\''),
576                 'x' => {
577                     let string: String = chars.clone().take(2).collect();
578 
579                     if string.len() != 2 {
580                         return None;
581                     }
582 
583                     for _ in 0..string.len() {
584                         chars.next()?;
585                     }
586 
587                     let value = u8::from_str_radix(&string, 16).ok()?;
588 
589                     result.push(char::from(value));
590                 }
591                 'u' => {
592                     if chars.next()? != '{' {
593                         return None;
594                     }
595 
596                     let string: String = chars.clone().take_while(|c| *c != '}').collect();
597 
598                     if string.len() < 2 || 6 < string.len() {
599                         return None;
600                     }
601 
602                     for _ in 0..string.len() + 1 {
603                         chars.next()?;
604                     }
605 
606                     let value = u32::from_str_radix(&string, 16).ok()?;
607 
608                     result.push(char::from_u32(value)?);
609                 }
610                 _ => return None,
611             },
612             Some(c) => result.push(c),
613             None => return Some(result),
614         };
615     }
616 }
617 
618 #[cfg(test)]
619 mod tests {
620     use super::super::unwrap_or_report;
621     use super::*;
622 
623     #[test]
rules()624     fn rules() {
625         parses_to! {
626             parser: PestParser,
627             input: "a = { b } c = { d }",
628             rule: Rule::grammar_rules,
629             tokens: [
630                 grammar_rule(0, 9, [
631                     identifier(0, 1),
632                     assignment_operator(2, 3),
633                     opening_brace(4, 5),
634                     expression(6, 8, [
635                         term(6, 8, [
636                             identifier(6, 7)
637                         ])
638                     ]),
639                     closing_brace(8, 9)
640                 ]),
641                 grammar_rule(10, 19, [
642                     identifier(10, 11),
643                     assignment_operator(12, 13),
644                     opening_brace(14, 15),
645                     expression(16, 18, [
646                         term(16, 18, [
647                             identifier(16, 17)
648                         ])
649                     ]),
650                     closing_brace(18, 19)
651                 ])
652             ]
653         };
654     }
655 
656     #[test]
rule()657     fn rule() {
658         parses_to! {
659             parser: PestParser,
660             input: "a = ! { b ~ c }",
661             rule: Rule::grammar_rule,
662             tokens: [
663                 grammar_rule(0, 15, [
664                     identifier(0, 1),
665                     assignment_operator(2, 3),
666                     non_atomic_modifier(4, 5),
667                     opening_brace(6, 7),
668                     expression(8, 14, [
669                         term(8, 10, [
670                             identifier(8, 9)
671                         ]),
672                         sequence_operator(10, 11),
673                         term(12, 14, [
674                             identifier(12, 13)
675                         ])
676                     ]),
677                     closing_brace(14, 15)
678                 ])
679             ]
680         };
681     }
682 
683     #[test]
expression()684     fn expression() {
685         parses_to! {
686             parser: PestParser,
687             input: "_a | 'a'..'b' ~ !^\"abc\" ~ (d | e)*?",
688             rule: Rule::expression,
689             tokens: [
690                 expression(0, 35, [
691                     term(0, 3, [
692                         identifier(0, 2)
693                     ]),
694                     choice_operator(3, 4),
695                     term(5, 14, [
696                         range(5, 13, [
697                             character(5, 8, [
698                                 single_quote(5, 6),
699                                 inner_chr(6, 7),
700                                 single_quote(7, 8)
701                             ]),
702                             range_operator(8, 10),
703                             character(10, 13, [
704                                 single_quote(10, 11),
705                                 inner_chr(11, 12),
706                                 single_quote(12, 13)
707                             ])
708                         ])
709                     ]),
710                     sequence_operator(14, 15),
711                     term(16, 24, [
712                         negative_predicate_operator(16, 17),
713                         insensitive_string(17, 23, [
714                             string(18, 23, [
715                                 quote(18, 19),
716                                 inner_str(19, 22),
717                                 quote(22, 23)
718                             ])
719                         ])
720                     ]),
721                     sequence_operator(24, 25),
722                     term(26, 35, [
723                         opening_paren(26, 27),
724                         expression(27, 32, [
725                             term(27, 29, [
726                                 identifier(27, 28)
727                             ]),
728                             choice_operator(29, 30),
729                             term(31, 32, [
730                                 identifier(31, 32)
731                             ])
732                         ]),
733                         closing_paren(32, 33),
734                         repeat_operator(33, 34),
735                         optional_operator(34, 35)
736                     ])
737                 ])
738             ]
739         };
740     }
741 
742     #[test]
repeat_exact()743     fn repeat_exact() {
744         parses_to! {
745             parser: PestParser,
746             input: "{1}",
747             rule: Rule::repeat_exact,
748             tokens: [
749                 repeat_exact(0, 3, [
750                     opening_brace(0, 1),
751                     number(1, 2),
752                     closing_brace(2, 3)
753                 ])
754             ]
755         };
756     }
757 
758     #[test]
repeat_min()759     fn repeat_min() {
760         parses_to! {
761             parser: PestParser,
762             input: "{2,}",
763             rule: Rule::repeat_min,
764             tokens: [
765                 repeat_min(0, 4, [
766                     opening_brace(0,1),
767                     number(1,2),
768                     comma(2,3),
769                     closing_brace(3,4)
770                 ])
771             ]
772         }
773     }
774 
775     #[test]
repeat_max()776     fn repeat_max() {
777         parses_to! {
778             parser: PestParser,
779             input: "{, 3}",
780             rule: Rule::repeat_max,
781             tokens: [
782                 repeat_max(0, 5, [
783                     opening_brace(0,1),
784                     comma(1,2),
785                     number(3,4),
786                     closing_brace(4,5)
787                 ])
788             ]
789         }
790     }
791 
792     #[test]
repeat_min_max()793     fn repeat_min_max() {
794         parses_to! {
795             parser: PestParser,
796             input: "{1, 2}",
797             rule: Rule::repeat_min_max,
798             tokens: [
799                 repeat_min_max(0, 6, [
800                     opening_brace(0, 1),
801                     number(1, 2),
802                     comma(2, 3),
803                     number(4, 5),
804                     closing_brace(5, 6)
805                 ])
806             ]
807         };
808     }
809 
810     #[test]
push()811     fn push() {
812         parses_to! {
813             parser: PestParser,
814             input: "PUSH ( a )",
815             rule: Rule::_push,
816             tokens: [
817                 _push(0, 10, [
818                     opening_paren(5, 6),
819                     expression(7, 9, [
820                         term(7, 9, [
821                             identifier(7, 8)
822                         ])
823                     ]),
824                     closing_paren(9, 10)
825                 ])
826             ]
827         };
828     }
829 
830     #[test]
peek_slice_all()831     fn peek_slice_all() {
832         parses_to! {
833             parser: PestParser,
834             input: "PEEK[..]",
835             rule: Rule::peek_slice,
836             tokens: [
837                 peek_slice(0, 8, [
838                     opening_brack(4, 5),
839                     range_operator(5, 7),
840                     closing_brack(7, 8)
841                 ])
842             ]
843         };
844     }
845 
846     #[test]
peek_slice_start()847     fn peek_slice_start() {
848         parses_to! {
849             parser: PestParser,
850             input: "PEEK[1..]",
851             rule: Rule::peek_slice,
852             tokens: [
853                 peek_slice(0, 9, [
854                     opening_brack(4, 5),
855                     integer(5, 6),
856                     range_operator(6, 8),
857                     closing_brack(8, 9)
858                 ])
859             ]
860         };
861     }
862 
863     #[test]
peek_slice_end()864     fn peek_slice_end() {
865         parses_to! {
866             parser: PestParser,
867             input: "PEEK[ ..-1]",
868             rule: Rule::peek_slice,
869             tokens: [
870                 peek_slice(0, 11, [
871                     opening_brack(4, 5),
872                     range_operator(6, 8),
873                     integer(8, 10),
874                     closing_brack(10, 11)
875                 ])
876             ]
877         };
878     }
879 
880     #[test]
peek_slice_start_end()881     fn peek_slice_start_end() {
882         parses_to! {
883             parser: PestParser,
884             input: "PEEK[-5..10]",
885             rule: Rule::peek_slice,
886             tokens: [
887                 peek_slice(0, 12, [
888                     opening_brack(4, 5),
889                     integer(5, 7),
890                     range_operator(7, 9),
891                     integer(9, 11),
892                     closing_brack(11, 12)
893                 ])
894             ]
895         };
896     }
897 
898     #[test]
identifier()899     fn identifier() {
900         parses_to! {
901             parser: PestParser,
902             input: "_a8943",
903             rule: Rule::identifier,
904             tokens: [
905                 identifier(0, 6)
906             ]
907         };
908     }
909 
910     #[test]
string()911     fn string() {
912         parses_to! {
913             parser: PestParser,
914             input: "\"aaaaa\\n\\r\\t\\\\\\0\\'\\\"\\x0F\\u{123abC}\\u{12}aaaaa\"",
915             rule: Rule::string,
916             tokens: [
917                 string(0, 46, [
918                     quote(0, 1),
919                     inner_str(1, 45),
920                     quote(45, 46)
921                 ])
922             ]
923         };
924     }
925 
926     #[test]
insensitive_string()927     fn insensitive_string() {
928         parses_to! {
929             parser: PestParser,
930             input: "^  \"\\\"hi\"",
931             rule: Rule::insensitive_string,
932             tokens: [
933                 insensitive_string(0, 9, [
934                     string(3, 9, [
935                         quote(3, 4),
936                         inner_str(4, 8),
937                         quote(8, 9)
938                     ])
939                 ])
940             ]
941         };
942     }
943 
944     #[test]
range()945     fn range() {
946         parses_to! {
947             parser: PestParser,
948             input: "'\\n' .. '\\x1a'",
949             rule: Rule::range,
950             tokens: [
951                 range(0, 14, [
952                     character(0, 4, [
953                         single_quote(0, 1),
954                         inner_chr(1, 3),
955                         single_quote(3, 4)
956                     ]),
957                     range_operator(5, 7),
958                     character(8, 14, [
959                         single_quote(8, 9),
960                         inner_chr(9, 13),
961                         single_quote(13, 14)
962                     ])
963                 ])
964             ]
965         };
966     }
967 
968     #[test]
character()969     fn character() {
970         parses_to! {
971             parser: PestParser,
972             input: "'\\u{123abC}'",
973             rule: Rule::character,
974             tokens: [
975                 character(0, 12, [
976                     single_quote(0, 1),
977                     inner_chr(1, 11),
978                     single_quote(11, 12)
979                 ])
980             ]
981         };
982     }
983 
984     #[test]
number()985     fn number() {
986         parses_to! {
987             parser: PestParser,
988             input: "0123",
989             rule: Rule::number,
990             tokens: [
991                 number(0, 4)
992             ]
993         };
994     }
995 
996     #[test]
comment()997     fn comment() {
998         parses_to! {
999             parser: PestParser,
1000             input: "a ~    // asda\n b",
1001             rule: Rule::expression,
1002             tokens: [
1003                 expression(0, 17, [
1004                     term(0, 2, [
1005                         identifier(0, 1)
1006                     ]),
1007                     sequence_operator(2, 3),
1008                     term(16, 17, [
1009                         identifier(16, 17)
1010                     ])
1011                 ])
1012             ]
1013         };
1014     }
1015 
1016     #[test]
wrong_identifier()1017     fn wrong_identifier() {
1018         fails_with! {
1019             parser: PestParser,
1020             input: "0",
1021             rule: Rule::grammar_rules,
1022             positives: vec![Rule::identifier],
1023             negatives: vec![],
1024             pos: 0
1025         };
1026     }
1027 
1028     #[test]
missing_assignment_operator()1029     fn missing_assignment_operator() {
1030         fails_with! {
1031             parser: PestParser,
1032             input: "a {}",
1033             rule: Rule::grammar_rules,
1034             positives: vec![Rule::assignment_operator],
1035             negatives: vec![],
1036             pos: 2
1037         };
1038     }
1039 
1040     #[test]
wrong_modifier()1041     fn wrong_modifier() {
1042         fails_with! {
1043             parser: PestParser,
1044             input: "a = *{}",
1045             rule: Rule::grammar_rules,
1046             positives: vec![
1047                 Rule::opening_brace,
1048                 Rule::silent_modifier,
1049                 Rule::atomic_modifier,
1050                 Rule::compound_atomic_modifier,
1051                 Rule::non_atomic_modifier
1052             ],
1053             negatives: vec![],
1054             pos: 4
1055         };
1056     }
1057 
1058     #[test]
missing_opening_brace()1059     fn missing_opening_brace() {
1060         fails_with! {
1061             parser: PestParser,
1062             input: "a = _",
1063             rule: Rule::grammar_rules,
1064             positives: vec![Rule::opening_brace],
1065             negatives: vec![],
1066             pos: 5
1067         };
1068     }
1069 
1070     #[test]
empty_rule()1071     fn empty_rule() {
1072         fails_with! {
1073             parser: PestParser,
1074             input: "a = {}",
1075             rule: Rule::grammar_rules,
1076             positives: vec![Rule::term],
1077             negatives: vec![],
1078             pos: 5
1079         };
1080     }
1081 
1082     #[test]
missing_rhs()1083     fn missing_rhs() {
1084         fails_with! {
1085             parser: PestParser,
1086             input: "a = { b ~ }",
1087             rule: Rule::grammar_rules,
1088             positives: vec![Rule::term],
1089             negatives: vec![],
1090             pos: 10
1091         };
1092     }
1093 
1094     #[test]
wrong_op()1095     fn wrong_op() {
1096         fails_with! {
1097             parser: PestParser,
1098             input: "a = { b % }",
1099             rule: Rule::grammar_rules,
1100             positives: vec![
1101                 Rule::opening_brace,
1102                 Rule::closing_brace,
1103                 Rule::sequence_operator,
1104                 Rule::choice_operator,
1105                 Rule::optional_operator,
1106                 Rule::repeat_operator,
1107                 Rule::repeat_once_operator
1108             ],
1109             negatives: vec![],
1110             pos: 8
1111         };
1112     }
1113 
1114     #[test]
missing_closing_paren()1115     fn missing_closing_paren() {
1116         fails_with! {
1117             parser: PestParser,
1118             input: "a = { (b }",
1119             rule: Rule::grammar_rules,
1120             positives: vec![
1121                 Rule::opening_brace,
1122                 Rule::closing_paren,
1123                 Rule::sequence_operator,
1124                 Rule::choice_operator,
1125                 Rule::optional_operator,
1126                 Rule::repeat_operator,
1127                 Rule::repeat_once_operator
1128             ],
1129             negatives: vec![],
1130             pos: 9
1131         };
1132     }
1133 
1134     #[test]
missing_term()1135     fn missing_term() {
1136         fails_with! {
1137             parser: PestParser,
1138             input: "a = { ! }",
1139             rule: Rule::grammar_rules,
1140             positives: vec![
1141                 Rule::opening_paren,
1142                 Rule::positive_predicate_operator,
1143                 Rule::negative_predicate_operator,
1144                 Rule::_push,
1145                 Rule::peek_slice,
1146                 Rule::identifier,
1147                 Rule::insensitive_string,
1148                 Rule::quote,
1149                 Rule::single_quote
1150             ],
1151             negatives: vec![],
1152             pos: 8
1153         };
1154     }
1155 
1156     #[test]
string_missing_ending_quote()1157     fn string_missing_ending_quote() {
1158         fails_with! {
1159             parser: PestParser,
1160             input: "a = { \" }",
1161             rule: Rule::grammar_rules,
1162             positives: vec![Rule::quote],
1163             negatives: vec![],
1164             pos: 9
1165         };
1166     }
1167 
1168     #[test]
insensitive_missing_string()1169     fn insensitive_missing_string() {
1170         fails_with! {
1171             parser: PestParser,
1172             input: "a = { ^ }",
1173             rule: Rule::grammar_rules,
1174             positives: vec![Rule::quote],
1175             negatives: vec![],
1176             pos: 8
1177         };
1178     }
1179 
1180     #[test]
char_missing_ending_single_quote()1181     fn char_missing_ending_single_quote() {
1182         fails_with! {
1183             parser: PestParser,
1184             input: "a = { \' }",
1185             rule: Rule::grammar_rules,
1186             positives: vec![Rule::single_quote],
1187             negatives: vec![],
1188             pos: 8
1189         };
1190     }
1191 
1192     #[test]
range_missing_range_operator()1193     fn range_missing_range_operator() {
1194         fails_with! {
1195             parser: PestParser,
1196             input: "a = { \'a\' }",
1197             rule: Rule::grammar_rules,
1198             positives: vec![Rule::range_operator],
1199             negatives: vec![],
1200             pos: 10
1201         };
1202     }
1203 
1204     #[test]
wrong_postfix()1205     fn wrong_postfix() {
1206         fails_with! {
1207             parser: PestParser,
1208             input: "a = { a& }",
1209             rule: Rule::grammar_rules,
1210             positives: vec![
1211                 Rule::opening_brace,
1212                 Rule::closing_brace,
1213                 Rule::sequence_operator,
1214                 Rule::choice_operator,
1215                 Rule::optional_operator,
1216                 Rule::repeat_operator,
1217                 Rule::repeat_once_operator
1218             ],
1219             negatives: vec![],
1220             pos: 7
1221         };
1222     }
1223 
1224     #[test]
ast()1225     fn ast() {
1226         let input =
1227             "rule = _{ a{1} ~ \"a\"{3,} ~ b{, 2} ~ \"b\"{1, 2} | !(^\"c\" | PUSH('d'..'e'))?* }";
1228 
1229         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1230         let ast = consume_rules_with_spans(pairs).unwrap();
1231         let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
1232 
1233         assert_eq!(
1234             ast,
1235             vec![AstRule {
1236                 name: "rule".to_owned(),
1237                 ty: RuleType::Silent,
1238                 expr: Expr::Choice(
1239                     Box::new(Expr::Seq(
1240                         Box::new(Expr::Seq(
1241                             Box::new(Expr::Seq(
1242                                 Box::new(Expr::RepExact(Box::new(Expr::Ident("a".to_owned())), 1)),
1243                                 Box::new(Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 3))
1244                             )),
1245                             Box::new(Expr::RepMax(Box::new(Expr::Ident("b".to_owned())), 2))
1246                         )),
1247                         Box::new(Expr::RepMinMax(Box::new(Expr::Str("b".to_owned())), 1, 2))
1248                     )),
1249                     Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt(
1250                         Box::new(Expr::Choice(
1251                             Box::new(Expr::Insens("c".to_owned())),
1252                             Box::new(Expr::Push(Box::new(Expr::Range(
1253                                 "d".to_owned(),
1254                                 "e".to_owned()
1255                             ))))
1256                         ))
1257                     ))))))
1258                 )
1259             },]
1260         );
1261     }
1262 
1263     #[test]
ast_peek_slice()1264     fn ast_peek_slice() {
1265         let input = "rule = _{ PEEK[-04..] ~ PEEK[..3] }";
1266 
1267         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1268         let ast = consume_rules_with_spans(pairs).unwrap();
1269         let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
1270 
1271         assert_eq!(
1272             ast,
1273             vec![AstRule {
1274                 name: "rule".to_owned(),
1275                 ty: RuleType::Silent,
1276                 expr: Expr::Seq(
1277                     Box::new(Expr::PeekSlice(-4, None)),
1278                     Box::new(Expr::PeekSlice(0, Some(3))),
1279                 )
1280             }],
1281         );
1282     }
1283 
1284     #[test]
1285     #[should_panic(expected = "grammar error
1286 
1287  --> 1:13
1288   |
1289 1 | rule = { \"\"{4294967297} }
1290   |             ^--------^
1291   |
1292   = number cannot overflow u32")]
repeat_exact_overflow()1293     fn repeat_exact_overflow() {
1294         let input = "rule = { \"\"{4294967297} }";
1295 
1296         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1297         unwrap_or_report(consume_rules_with_spans(pairs));
1298     }
1299 
1300     #[test]
1301     #[should_panic(expected = "grammar error
1302 
1303  --> 1:13
1304   |
1305 1 | rule = { \"\"{0} }
1306   |             ^
1307   |
1308   = cannot repeat 0 times")]
repeat_exact_zero()1309     fn repeat_exact_zero() {
1310         let input = "rule = { \"\"{0} }";
1311 
1312         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1313         unwrap_or_report(consume_rules_with_spans(pairs));
1314     }
1315 
1316     #[test]
1317     #[should_panic(expected = "grammar error
1318 
1319  --> 1:13
1320   |
1321 1 | rule = { \"\"{4294967297,} }
1322   |             ^--------^
1323   |
1324   = number cannot overflow u32")]
repeat_min_overflow()1325     fn repeat_min_overflow() {
1326         let input = "rule = { \"\"{4294967297,} }";
1327 
1328         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1329         unwrap_or_report(consume_rules_with_spans(pairs));
1330     }
1331 
1332     #[test]
1333     #[should_panic(expected = "grammar error
1334 
1335  --> 1:14
1336   |
1337 1 | rule = { \"\"{,4294967297} }
1338   |              ^--------^
1339   |
1340   = number cannot overflow u32")]
repeat_max_overflow()1341     fn repeat_max_overflow() {
1342         let input = "rule = { \"\"{,4294967297} }";
1343 
1344         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1345         unwrap_or_report(consume_rules_with_spans(pairs));
1346     }
1347 
1348     #[test]
1349     #[should_panic(expected = "grammar error
1350 
1351  --> 1:14
1352   |
1353 1 | rule = { \"\"{,0} }
1354   |              ^
1355   |
1356   = cannot repeat 0 times")]
repeat_max_zero()1357     fn repeat_max_zero() {
1358         let input = "rule = { \"\"{,0} }";
1359 
1360         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1361         unwrap_or_report(consume_rules_with_spans(pairs));
1362     }
1363 
1364     #[test]
1365     #[should_panic(expected = "grammar error
1366 
1367  --> 1:13
1368   |
1369 1 | rule = { \"\"{4294967297,4294967298} }
1370   |             ^--------^
1371   |
1372   = number cannot overflow u32")]
repeat_min_max_overflow()1373     fn repeat_min_max_overflow() {
1374         let input = "rule = { \"\"{4294967297,4294967298} }";
1375 
1376         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1377         unwrap_or_report(consume_rules_with_spans(pairs));
1378     }
1379 
1380     #[test]
1381     #[should_panic(expected = "grammar error
1382 
1383  --> 1:15
1384   |
1385 1 | rule = { \"\"{0,0} }
1386   |               ^
1387   |
1388   = cannot repeat 0 times")]
repeat_min_max_zero()1389     fn repeat_min_max_zero() {
1390         let input = "rule = { \"\"{0,0} }";
1391 
1392         let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1393         unwrap_or_report(consume_rules_with_spans(pairs));
1394     }
1395 
1396     #[test]
unescape_all()1397     fn unescape_all() {
1398         let string = r"a\nb\x55c\u{111}d";
1399 
1400         assert_eq!(unescape(string), Some("a\nb\x55c\u{111}d".to_owned()));
1401     }
1402 
1403     #[test]
unescape_empty_escape()1404     fn unescape_empty_escape() {
1405         let string = r"\";
1406 
1407         assert_eq!(unescape(string), None);
1408     }
1409 
1410     #[test]
unescape_wrong_escape()1411     fn unescape_wrong_escape() {
1412         let string = r"\w";
1413 
1414         assert_eq!(unescape(string), None);
1415     }
1416 
1417     #[test]
unescape_backslash()1418     fn unescape_backslash() {
1419         let string = "\\\\";
1420         assert_eq!(unescape(string), Some("\\".to_owned()));
1421     }
1422 
1423     #[test]
1424     fn unescape_return() {
1425         let string = "\\r";
1426         assert_eq!(unescape(string), Some("\r".to_owned()));
1427     }
1428 
1429     #[test]
1430     fn unescape_tab() {
1431         let string = "\\t";
1432         assert_eq!(unescape(string), Some("\t".to_owned()));
1433     }
1434 
1435     #[test]
1436     fn unescape_null() {
1437         let string = "\\0";
1438         assert_eq!(unescape(string), Some("\0".to_owned()));
1439     }
1440 
1441     #[test]
1442     fn unescape_single_quote() {
1443         let string = "\\'";
1444         assert_eq!(unescape(string), Some("\'".to_owned()));
1445     }
1446 
1447     #[test]
1448     fn unescape_wrong_byte() {
1449         let string = r"\xfg";
1450 
1451         assert_eq!(unescape(string), None);
1452     }
1453 
1454     #[test]
1455     fn unescape_short_byte() {
1456         let string = r"\xf";
1457 
1458         assert_eq!(unescape(string), None);
1459     }
1460 
1461     #[test]
1462     fn unescape_no_open_brace_unicode() {
1463         let string = r"\u11";
1464 
1465         assert_eq!(unescape(string), None);
1466     }
1467 
1468     #[test]
1469     fn unescape_no_close_brace_unicode() {
1470         let string = r"\u{11";
1471 
1472         assert_eq!(unescape(string), None);
1473     }
1474 
1475     #[test]
1476     fn unescape_short_unicode() {
1477         let string = r"\u{1}";
1478 
1479         assert_eq!(unescape(string), None);
1480     }
1481 
1482     #[test]
1483     fn unescape_long_unicode() {
1484         let string = r"\u{1111111}";
1485 
1486         assert_eq!(unescape(string), None);
1487     }
1488 }
1489