1 /*!
2 This module provides a regular expression printer for `Ast`.
3 */
4 
5 use std::fmt;
6 
7 use crate::ast::visitor::{self, Visitor};
8 use crate::ast::{self, Ast};
9 
10 /// A builder for constructing a printer.
11 ///
12 /// Note that since a printer doesn't have any configuration knobs, this type
13 /// remains unexported.
14 #[derive(Clone, Debug)]
15 struct PrinterBuilder {
16     _priv: (),
17 }
18 
19 impl Default for PrinterBuilder {
default() -> PrinterBuilder20     fn default() -> PrinterBuilder {
21         PrinterBuilder::new()
22     }
23 }
24 
25 impl PrinterBuilder {
new() -> PrinterBuilder26     fn new() -> PrinterBuilder {
27         PrinterBuilder { _priv: () }
28     }
29 
build(&self) -> Printer30     fn build(&self) -> Printer {
31         Printer { _priv: () }
32     }
33 }
34 
35 /// A printer for a regular expression abstract syntax tree.
36 ///
37 /// A printer converts an abstract syntax tree (AST) to a regular expression
38 /// pattern string. This particular printer uses constant stack space and heap
39 /// space proportional to the size of the AST.
40 ///
41 /// This printer will not necessarily preserve the original formatting of the
42 /// regular expression pattern string. For example, all whitespace and comments
43 /// are ignored.
44 #[derive(Debug)]
45 pub struct Printer {
46     _priv: (),
47 }
48 
49 impl Printer {
50     /// Create a new printer.
new() -> Printer51     pub fn new() -> Printer {
52         PrinterBuilder::new().build()
53     }
54 
55     /// Print the given `Ast` to the given writer. The writer must implement
56     /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57     /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58     /// implementations) or a `&mut String`.
print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result59     pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60         visitor::visit(ast, Writer { printer: self, wtr: wtr })
61     }
62 }
63 
64 #[derive(Debug)]
65 struct Writer<'p, W> {
66     printer: &'p mut Printer,
67     wtr: W,
68 }
69 
70 impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
71     type Output = ();
72     type Err = fmt::Error;
73 
finish(self) -> fmt::Result74     fn finish(self) -> fmt::Result {
75         Ok(())
76     }
77 
visit_pre(&mut self, ast: &Ast) -> fmt::Result78     fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
79         match *ast {
80             Ast::Group(ref x) => self.fmt_group_pre(x),
81             Ast::Class(ast::Class::Bracketed(ref x)) => {
82                 self.fmt_class_bracketed_pre(x)
83             }
84             _ => Ok(()),
85         }
86     }
87 
visit_post(&mut self, ast: &Ast) -> fmt::Result88     fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
89         use crate::ast::Class;
90 
91         match *ast {
92             Ast::Empty(_) => Ok(()),
93             Ast::Flags(ref x) => self.fmt_set_flags(x),
94             Ast::Literal(ref x) => self.fmt_literal(x),
95             Ast::Dot(_) => self.wtr.write_str("."),
96             Ast::Assertion(ref x) => self.fmt_assertion(x),
97             Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
98             Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
99             Ast::Class(Class::Bracketed(ref x)) => {
100                 self.fmt_class_bracketed_post(x)
101             }
102             Ast::Repetition(ref x) => self.fmt_repetition(x),
103             Ast::Group(ref x) => self.fmt_group_post(x),
104             Ast::Alternation(_) => Ok(()),
105             Ast::Concat(_) => Ok(()),
106         }
107     }
108 
visit_alternation_in(&mut self) -> fmt::Result109     fn visit_alternation_in(&mut self) -> fmt::Result {
110         self.wtr.write_str("|")
111     }
112 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>113     fn visit_class_set_item_pre(
114         &mut self,
115         ast: &ast::ClassSetItem,
116     ) -> Result<(), Self::Err> {
117         match *ast {
118             ast::ClassSetItem::Bracketed(ref x) => {
119                 self.fmt_class_bracketed_pre(x)
120             }
121             _ => Ok(()),
122         }
123     }
124 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>125     fn visit_class_set_item_post(
126         &mut self,
127         ast: &ast::ClassSetItem,
128     ) -> Result<(), Self::Err> {
129         use crate::ast::ClassSetItem::*;
130 
131         match *ast {
132             Empty(_) => Ok(()),
133             Literal(ref x) => self.fmt_literal(x),
134             Range(ref x) => {
135                 self.fmt_literal(&x.start)?;
136                 self.wtr.write_str("-")?;
137                 self.fmt_literal(&x.end)?;
138                 Ok(())
139             }
140             Ascii(ref x) => self.fmt_class_ascii(x),
141             Unicode(ref x) => self.fmt_class_unicode(x),
142             Perl(ref x) => self.fmt_class_perl(x),
143             Bracketed(ref x) => self.fmt_class_bracketed_post(x),
144             Union(_) => Ok(()),
145         }
146     }
147 
visit_class_set_binary_op_in( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>148     fn visit_class_set_binary_op_in(
149         &mut self,
150         ast: &ast::ClassSetBinaryOp,
151     ) -> Result<(), Self::Err> {
152         self.fmt_class_set_binary_op_kind(&ast.kind)
153     }
154 }
155 
156 impl<'p, W: fmt::Write> Writer<'p, W> {
fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result157     fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
158         use crate::ast::GroupKind::*;
159         match ast.kind {
160             CaptureIndex(_) => self.wtr.write_str("("),
161             CaptureName(ref x) => {
162                 self.wtr.write_str("(?P<")?;
163                 self.wtr.write_str(&x.name)?;
164                 self.wtr.write_str(">")?;
165                 Ok(())
166             }
167             NonCapturing(ref flags) => {
168                 self.wtr.write_str("(?")?;
169                 self.fmt_flags(flags)?;
170                 self.wtr.write_str(":")?;
171                 Ok(())
172             }
173         }
174     }
175 
fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result176     fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
177         self.wtr.write_str(")")
178     }
179 
fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result180     fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
181         use crate::ast::RepetitionKind::*;
182         match ast.op.kind {
183             ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
184             ZeroOrOne => self.wtr.write_str("??"),
185             ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
186             ZeroOrMore => self.wtr.write_str("*?"),
187             OneOrMore if ast.greedy => self.wtr.write_str("+"),
188             OneOrMore => self.wtr.write_str("+?"),
189             Range(ref x) => {
190                 self.fmt_repetition_range(x)?;
191                 if !ast.greedy {
192                     self.wtr.write_str("?")?;
193                 }
194                 Ok(())
195             }
196         }
197     }
198 
fmt_repetition_range( &mut self, ast: &ast::RepetitionRange, ) -> fmt::Result199     fn fmt_repetition_range(
200         &mut self,
201         ast: &ast::RepetitionRange,
202     ) -> fmt::Result {
203         use crate::ast::RepetitionRange::*;
204         match *ast {
205             Exactly(x) => write!(self.wtr, "{{{}}}", x),
206             AtLeast(x) => write!(self.wtr, "{{{},}}", x),
207             Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
208         }
209     }
210 
fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result211     fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
212         use crate::ast::LiteralKind::*;
213 
214         match ast.kind {
215             Verbatim => self.wtr.write_char(ast.c),
216             Punctuation => write!(self.wtr, r"\{}", ast.c),
217             Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
218             HexFixed(ast::HexLiteralKind::X) => {
219                 write!(self.wtr, r"\x{:02X}", ast.c as u32)
220             }
221             HexFixed(ast::HexLiteralKind::UnicodeShort) => {
222                 write!(self.wtr, r"\u{:04X}", ast.c as u32)
223             }
224             HexFixed(ast::HexLiteralKind::UnicodeLong) => {
225                 write!(self.wtr, r"\U{:08X}", ast.c as u32)
226             }
227             HexBrace(ast::HexLiteralKind::X) => {
228                 write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
229             }
230             HexBrace(ast::HexLiteralKind::UnicodeShort) => {
231                 write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
232             }
233             HexBrace(ast::HexLiteralKind::UnicodeLong) => {
234                 write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
235             }
236             Special(ast::SpecialLiteralKind::Bell) => {
237                 self.wtr.write_str(r"\a")
238             }
239             Special(ast::SpecialLiteralKind::FormFeed) => {
240                 self.wtr.write_str(r"\f")
241             }
242             Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
243             Special(ast::SpecialLiteralKind::LineFeed) => {
244                 self.wtr.write_str(r"\n")
245             }
246             Special(ast::SpecialLiteralKind::CarriageReturn) => {
247                 self.wtr.write_str(r"\r")
248             }
249             Special(ast::SpecialLiteralKind::VerticalTab) => {
250                 self.wtr.write_str(r"\v")
251             }
252             Special(ast::SpecialLiteralKind::Space) => {
253                 self.wtr.write_str(r"\ ")
254             }
255         }
256     }
257 
fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result258     fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
259         use crate::ast::AssertionKind::*;
260         match ast.kind {
261             StartLine => self.wtr.write_str("^"),
262             EndLine => self.wtr.write_str("$"),
263             StartText => self.wtr.write_str(r"\A"),
264             EndText => self.wtr.write_str(r"\z"),
265             WordBoundary => self.wtr.write_str(r"\b"),
266             NotWordBoundary => self.wtr.write_str(r"\B"),
267         }
268     }
269 
fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result270     fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
271         self.wtr.write_str("(?")?;
272         self.fmt_flags(&ast.flags)?;
273         self.wtr.write_str(")")?;
274         Ok(())
275     }
276 
fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result277     fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
278         use crate::ast::{Flag, FlagsItemKind};
279 
280         for item in &ast.items {
281             match item.kind {
282                 FlagsItemKind::Negation => self.wtr.write_str("-"),
283                 FlagsItemKind::Flag(ref flag) => match *flag {
284                     Flag::CaseInsensitive => self.wtr.write_str("i"),
285                     Flag::MultiLine => self.wtr.write_str("m"),
286                     Flag::DotMatchesNewLine => self.wtr.write_str("s"),
287                     Flag::SwapGreed => self.wtr.write_str("U"),
288                     Flag::Unicode => self.wtr.write_str("u"),
289                     Flag::IgnoreWhitespace => self.wtr.write_str("x"),
290                 },
291             }?;
292         }
293         Ok(())
294     }
295 
fmt_class_bracketed_pre( &mut self, ast: &ast::ClassBracketed, ) -> fmt::Result296     fn fmt_class_bracketed_pre(
297         &mut self,
298         ast: &ast::ClassBracketed,
299     ) -> fmt::Result {
300         if ast.negated {
301             self.wtr.write_str("[^")
302         } else {
303             self.wtr.write_str("[")
304         }
305     }
306 
fmt_class_bracketed_post( &mut self, _ast: &ast::ClassBracketed, ) -> fmt::Result307     fn fmt_class_bracketed_post(
308         &mut self,
309         _ast: &ast::ClassBracketed,
310     ) -> fmt::Result {
311         self.wtr.write_str("]")
312     }
313 
fmt_class_set_binary_op_kind( &mut self, ast: &ast::ClassSetBinaryOpKind, ) -> fmt::Result314     fn fmt_class_set_binary_op_kind(
315         &mut self,
316         ast: &ast::ClassSetBinaryOpKind,
317     ) -> fmt::Result {
318         use crate::ast::ClassSetBinaryOpKind::*;
319         match *ast {
320             Intersection => self.wtr.write_str("&&"),
321             Difference => self.wtr.write_str("--"),
322             SymmetricDifference => self.wtr.write_str("~~"),
323         }
324     }
325 
fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result326     fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
327         use crate::ast::ClassPerlKind::*;
328         match ast.kind {
329             Digit if ast.negated => self.wtr.write_str(r"\D"),
330             Digit => self.wtr.write_str(r"\d"),
331             Space if ast.negated => self.wtr.write_str(r"\S"),
332             Space => self.wtr.write_str(r"\s"),
333             Word if ast.negated => self.wtr.write_str(r"\W"),
334             Word => self.wtr.write_str(r"\w"),
335         }
336     }
337 
fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result338     fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
339         use crate::ast::ClassAsciiKind::*;
340         match ast.kind {
341             Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
342             Alnum => self.wtr.write_str("[:alnum:]"),
343             Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
344             Alpha => self.wtr.write_str("[:alpha:]"),
345             Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
346             Ascii => self.wtr.write_str("[:ascii:]"),
347             Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
348             Blank => self.wtr.write_str("[:blank:]"),
349             Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
350             Cntrl => self.wtr.write_str("[:cntrl:]"),
351             Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
352             Digit => self.wtr.write_str("[:digit:]"),
353             Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
354             Graph => self.wtr.write_str("[:graph:]"),
355             Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
356             Lower => self.wtr.write_str("[:lower:]"),
357             Print if ast.negated => self.wtr.write_str("[:^print:]"),
358             Print => self.wtr.write_str("[:print:]"),
359             Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
360             Punct => self.wtr.write_str("[:punct:]"),
361             Space if ast.negated => self.wtr.write_str("[:^space:]"),
362             Space => self.wtr.write_str("[:space:]"),
363             Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
364             Upper => self.wtr.write_str("[:upper:]"),
365             Word if ast.negated => self.wtr.write_str("[:^word:]"),
366             Word => self.wtr.write_str("[:word:]"),
367             Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
368             Xdigit => self.wtr.write_str("[:xdigit:]"),
369         }
370     }
371 
fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result372     fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
373         use crate::ast::ClassUnicodeKind::*;
374         use crate::ast::ClassUnicodeOpKind::*;
375 
376         if ast.negated {
377             self.wtr.write_str(r"\P")?;
378         } else {
379             self.wtr.write_str(r"\p")?;
380         }
381         match ast.kind {
382             OneLetter(c) => self.wtr.write_char(c),
383             Named(ref x) => write!(self.wtr, "{{{}}}", x),
384             NamedValue { op: Equal, ref name, ref value } => {
385                 write!(self.wtr, "{{{}={}}}", name, value)
386             }
387             NamedValue { op: Colon, ref name, ref value } => {
388                 write!(self.wtr, "{{{}:{}}}", name, value)
389             }
390             NamedValue { op: NotEqual, ref name, ref value } => {
391                 write!(self.wtr, "{{{}!={}}}", name, value)
392             }
393         }
394     }
395 }
396 
397 #[cfg(test)]
398 mod tests {
399     use super::Printer;
400     use crate::ast::parse::ParserBuilder;
401 
roundtrip(given: &str)402     fn roundtrip(given: &str) {
403         roundtrip_with(|b| b, given);
404     }
405 
roundtrip_with<F>(mut f: F, given: &str) where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,406     fn roundtrip_with<F>(mut f: F, given: &str)
407     where
408         F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
409     {
410         let mut builder = ParserBuilder::new();
411         f(&mut builder);
412         let ast = builder.build().parse(given).unwrap();
413 
414         let mut printer = Printer::new();
415         let mut dst = String::new();
416         printer.print(&ast, &mut dst).unwrap();
417         assert_eq!(given, dst);
418     }
419 
420     #[test]
print_literal()421     fn print_literal() {
422         roundtrip("a");
423         roundtrip(r"\[");
424         roundtrip_with(|b| b.octal(true), r"\141");
425         roundtrip(r"\x61");
426         roundtrip(r"\x7F");
427         roundtrip(r"\u0061");
428         roundtrip(r"\U00000061");
429         roundtrip(r"\x{61}");
430         roundtrip(r"\x{7F}");
431         roundtrip(r"\u{61}");
432         roundtrip(r"\U{61}");
433 
434         roundtrip(r"\a");
435         roundtrip(r"\f");
436         roundtrip(r"\t");
437         roundtrip(r"\n");
438         roundtrip(r"\r");
439         roundtrip(r"\v");
440         roundtrip(r"(?x)\ ");
441     }
442 
443     #[test]
print_dot()444     fn print_dot() {
445         roundtrip(".");
446     }
447 
448     #[test]
print_concat()449     fn print_concat() {
450         roundtrip("ab");
451         roundtrip("abcde");
452         roundtrip("a(bcd)ef");
453     }
454 
455     #[test]
print_alternation()456     fn print_alternation() {
457         roundtrip("a|b");
458         roundtrip("a|b|c|d|e");
459         roundtrip("|a|b|c|d|e");
460         roundtrip("|a|b|c|d|e|");
461         roundtrip("a(b|c|d)|e|f");
462     }
463 
464     #[test]
print_assertion()465     fn print_assertion() {
466         roundtrip(r"^");
467         roundtrip(r"$");
468         roundtrip(r"\A");
469         roundtrip(r"\z");
470         roundtrip(r"\b");
471         roundtrip(r"\B");
472     }
473 
474     #[test]
print_repetition()475     fn print_repetition() {
476         roundtrip("a?");
477         roundtrip("a??");
478         roundtrip("a*");
479         roundtrip("a*?");
480         roundtrip("a+");
481         roundtrip("a+?");
482         roundtrip("a{5}");
483         roundtrip("a{5}?");
484         roundtrip("a{5,}");
485         roundtrip("a{5,}?");
486         roundtrip("a{5,10}");
487         roundtrip("a{5,10}?");
488     }
489 
490     #[test]
print_flags()491     fn print_flags() {
492         roundtrip("(?i)");
493         roundtrip("(?-i)");
494         roundtrip("(?s-i)");
495         roundtrip("(?-si)");
496         roundtrip("(?siUmux)");
497     }
498 
499     #[test]
print_group()500     fn print_group() {
501         roundtrip("(?i:a)");
502         roundtrip("(?P<foo>a)");
503         roundtrip("(a)");
504     }
505 
506     #[test]
print_class()507     fn print_class() {
508         roundtrip(r"[abc]");
509         roundtrip(r"[a-z]");
510         roundtrip(r"[^a-z]");
511         roundtrip(r"[a-z0-9]");
512         roundtrip(r"[-a-z0-9]");
513         roundtrip(r"[-a-z0-9]");
514         roundtrip(r"[a-z0-9---]");
515         roundtrip(r"[a-z&&m-n]");
516         roundtrip(r"[[a-z&&m-n]]");
517         roundtrip(r"[a-z--m-n]");
518         roundtrip(r"[a-z~~m-n]");
519         roundtrip(r"[a-z[0-9]]");
520         roundtrip(r"[a-z[^0-9]]");
521 
522         roundtrip(r"\d");
523         roundtrip(r"\D");
524         roundtrip(r"\s");
525         roundtrip(r"\S");
526         roundtrip(r"\w");
527         roundtrip(r"\W");
528 
529         roundtrip(r"[[:alnum:]]");
530         roundtrip(r"[[:^alnum:]]");
531         roundtrip(r"[[:alpha:]]");
532         roundtrip(r"[[:^alpha:]]");
533         roundtrip(r"[[:ascii:]]");
534         roundtrip(r"[[:^ascii:]]");
535         roundtrip(r"[[:blank:]]");
536         roundtrip(r"[[:^blank:]]");
537         roundtrip(r"[[:cntrl:]]");
538         roundtrip(r"[[:^cntrl:]]");
539         roundtrip(r"[[:digit:]]");
540         roundtrip(r"[[:^digit:]]");
541         roundtrip(r"[[:graph:]]");
542         roundtrip(r"[[:^graph:]]");
543         roundtrip(r"[[:lower:]]");
544         roundtrip(r"[[:^lower:]]");
545         roundtrip(r"[[:print:]]");
546         roundtrip(r"[[:^print:]]");
547         roundtrip(r"[[:punct:]]");
548         roundtrip(r"[[:^punct:]]");
549         roundtrip(r"[[:space:]]");
550         roundtrip(r"[[:^space:]]");
551         roundtrip(r"[[:upper:]]");
552         roundtrip(r"[[:^upper:]]");
553         roundtrip(r"[[:word:]]");
554         roundtrip(r"[[:^word:]]");
555         roundtrip(r"[[:xdigit:]]");
556         roundtrip(r"[[:^xdigit:]]");
557 
558         roundtrip(r"\pL");
559         roundtrip(r"\PL");
560         roundtrip(r"\p{L}");
561         roundtrip(r"\P{L}");
562         roundtrip(r"\p{X=Y}");
563         roundtrip(r"\P{X=Y}");
564         roundtrip(r"\p{X:Y}");
565         roundtrip(r"\P{X:Y}");
566         roundtrip(r"\p{X!=Y}");
567         roundtrip(r"\P{X!=Y}");
568     }
569 }
570