1 /*! 2 This module provides a regular expression printer for `Ast`. 3 */ 4 5 use std::fmt; 6 7 use ast::visitor::{self, Visitor}; 8 use ast::{self, Ast}; 9 10 /// A builder for constructing a printer. 11 /// 12 /// Note that since a printer doesn't have any configuration knobs, this type 13 /// remains unexported. 14 #[derive(Clone, Debug)] 15 struct PrinterBuilder { 16 _priv: (), 17 } 18 19 impl Default for PrinterBuilder { default() -> PrinterBuilder20 fn default() -> PrinterBuilder { 21 PrinterBuilder::new() 22 } 23 } 24 25 impl PrinterBuilder { new() -> PrinterBuilder26 fn new() -> PrinterBuilder { 27 PrinterBuilder { _priv: () } 28 } 29 build(&self) -> Printer30 fn build(&self) -> Printer { 31 Printer { _priv: () } 32 } 33 } 34 35 /// A printer for a regular expression abstract syntax tree. 36 /// 37 /// A printer converts an abstract syntax tree (AST) to a regular expression 38 /// pattern string. This particular printer uses constant stack space and heap 39 /// space proportional to the size of the AST. 40 /// 41 /// This printer will not necessarily preserve the original formatting of the 42 /// regular expression pattern string. For example, all whitespace and comments 43 /// are ignored. 44 #[derive(Debug)] 45 pub struct Printer { 46 _priv: (), 47 } 48 49 impl Printer { 50 /// Create a new printer. new() -> Printer51 pub fn new() -> Printer { 52 PrinterBuilder::new().build() 53 } 54 55 /// Print the given `Ast` to the given writer. The writer must implement 56 /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used 57 /// here are a `fmt::Formatter` (which is available in `fmt::Display` 58 /// implementations) or a `&mut String`. print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result59 pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result { 60 visitor::visit(ast, Writer { printer: self, wtr: wtr }) 61 } 62 } 63 64 #[derive(Debug)] 65 struct Writer<'p, W> { 66 printer: &'p mut Printer, 67 wtr: W, 68 } 69 70 impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { 71 type Output = (); 72 type Err = fmt::Error; 73 finish(self) -> fmt::Result74 fn finish(self) -> fmt::Result { 75 Ok(()) 76 } 77 visit_pre(&mut self, ast: &Ast) -> fmt::Result78 fn visit_pre(&mut self, ast: &Ast) -> fmt::Result { 79 match *ast { 80 Ast::Group(ref x) => self.fmt_group_pre(x), 81 Ast::Class(ast::Class::Bracketed(ref x)) => { 82 self.fmt_class_bracketed_pre(x) 83 } 84 _ => Ok(()), 85 } 86 } 87 visit_post(&mut self, ast: &Ast) -> fmt::Result88 fn visit_post(&mut self, ast: &Ast) -> fmt::Result { 89 use ast::Class; 90 91 match *ast { 92 Ast::Empty(_) => Ok(()), 93 Ast::Flags(ref x) => self.fmt_set_flags(x), 94 Ast::Literal(ref x) => self.fmt_literal(x), 95 Ast::Dot(_) => self.wtr.write_str("."), 96 Ast::Assertion(ref x) => self.fmt_assertion(x), 97 Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x), 98 Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x), 99 Ast::Class(Class::Bracketed(ref x)) => { 100 self.fmt_class_bracketed_post(x) 101 } 102 Ast::Repetition(ref x) => self.fmt_repetition(x), 103 Ast::Group(ref x) => self.fmt_group_post(x), 104 Ast::Alternation(_) => Ok(()), 105 Ast::Concat(_) => Ok(()), 106 } 107 } 108 visit_alternation_in(&mut self) -> fmt::Result109 fn visit_alternation_in(&mut self) -> fmt::Result { 110 self.wtr.write_str("|") 111 } 112 visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>113 fn visit_class_set_item_pre( 114 &mut self, 115 ast: &ast::ClassSetItem, 116 ) -> Result<(), Self::Err> { 117 match *ast { 118 ast::ClassSetItem::Bracketed(ref x) => { 119 self.fmt_class_bracketed_pre(x) 120 } 121 _ => Ok(()), 122 } 123 } 124 visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>125 fn visit_class_set_item_post( 126 &mut self, 127 ast: &ast::ClassSetItem, 128 ) -> Result<(), Self::Err> { 129 use ast::ClassSetItem::*; 130 131 match *ast { 132 Empty(_) => Ok(()), 133 Literal(ref x) => self.fmt_literal(x), 134 Range(ref x) => { 135 self.fmt_literal(&x.start)?; 136 self.wtr.write_str("-")?; 137 self.fmt_literal(&x.end)?; 138 Ok(()) 139 } 140 Ascii(ref x) => self.fmt_class_ascii(x), 141 Unicode(ref x) => self.fmt_class_unicode(x), 142 Perl(ref x) => self.fmt_class_perl(x), 143 Bracketed(ref x) => self.fmt_class_bracketed_post(x), 144 Union(_) => Ok(()), 145 } 146 } 147 visit_class_set_binary_op_in( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>148 fn visit_class_set_binary_op_in( 149 &mut self, 150 ast: &ast::ClassSetBinaryOp, 151 ) -> Result<(), Self::Err> { 152 self.fmt_class_set_binary_op_kind(&ast.kind) 153 } 154 } 155 156 impl<'p, W: fmt::Write> Writer<'p, W> { fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result157 fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result { 158 use ast::GroupKind::*; 159 match ast.kind { 160 CaptureIndex(_) => self.wtr.write_str("("), 161 CaptureName(ref x) => { 162 self.wtr.write_str("(?P<")?; 163 self.wtr.write_str(&x.name)?; 164 self.wtr.write_str(">")?; 165 Ok(()) 166 } 167 NonCapturing(ref flags) => { 168 self.wtr.write_str("(?")?; 169 self.fmt_flags(flags)?; 170 self.wtr.write_str(":")?; 171 Ok(()) 172 } 173 } 174 } 175 fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result176 fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result { 177 self.wtr.write_str(")") 178 } 179 fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result180 fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result { 181 use ast::RepetitionKind::*; 182 match ast.op.kind { 183 ZeroOrOne if ast.greedy => self.wtr.write_str("?"), 184 ZeroOrOne => self.wtr.write_str("??"), 185 ZeroOrMore if ast.greedy => self.wtr.write_str("*"), 186 ZeroOrMore => self.wtr.write_str("*?"), 187 OneOrMore if ast.greedy => self.wtr.write_str("+"), 188 OneOrMore => self.wtr.write_str("+?"), 189 Range(ref x) => { 190 self.fmt_repetition_range(x)?; 191 if !ast.greedy { 192 self.wtr.write_str("?")?; 193 } 194 Ok(()) 195 } 196 } 197 } 198 fmt_repetition_range( &mut self, ast: &ast::RepetitionRange, ) -> fmt::Result199 fn fmt_repetition_range( 200 &mut self, 201 ast: &ast::RepetitionRange, 202 ) -> fmt::Result { 203 use ast::RepetitionRange::*; 204 match *ast { 205 Exactly(x) => write!(self.wtr, "{{{}}}", x), 206 AtLeast(x) => write!(self.wtr, "{{{},}}", x), 207 Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y), 208 } 209 } 210 fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result211 fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result { 212 use ast::LiteralKind::*; 213 214 match ast.kind { 215 Verbatim => self.wtr.write_char(ast.c), 216 Punctuation => write!(self.wtr, r"\{}", ast.c), 217 Octal => write!(self.wtr, r"\{:o}", ast.c as u32), 218 HexFixed(ast::HexLiteralKind::X) => { 219 write!(self.wtr, r"\x{:02X}", ast.c as u32) 220 } 221 HexFixed(ast::HexLiteralKind::UnicodeShort) => { 222 write!(self.wtr, r"\u{:04X}", ast.c as u32) 223 } 224 HexFixed(ast::HexLiteralKind::UnicodeLong) => { 225 write!(self.wtr, r"\U{:08X}", ast.c as u32) 226 } 227 HexBrace(ast::HexLiteralKind::X) => { 228 write!(self.wtr, r"\x{{{:X}}}", ast.c as u32) 229 } 230 HexBrace(ast::HexLiteralKind::UnicodeShort) => { 231 write!(self.wtr, r"\u{{{:X}}}", ast.c as u32) 232 } 233 HexBrace(ast::HexLiteralKind::UnicodeLong) => { 234 write!(self.wtr, r"\U{{{:X}}}", ast.c as u32) 235 } 236 Special(ast::SpecialLiteralKind::Bell) => { 237 self.wtr.write_str(r"\a") 238 } 239 Special(ast::SpecialLiteralKind::FormFeed) => { 240 self.wtr.write_str(r"\f") 241 } 242 Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"), 243 Special(ast::SpecialLiteralKind::LineFeed) => { 244 self.wtr.write_str(r"\n") 245 } 246 Special(ast::SpecialLiteralKind::CarriageReturn) => { 247 self.wtr.write_str(r"\r") 248 } 249 Special(ast::SpecialLiteralKind::VerticalTab) => { 250 self.wtr.write_str(r"\v") 251 } 252 Special(ast::SpecialLiteralKind::Space) => { 253 self.wtr.write_str(r"\ ") 254 } 255 } 256 } 257 fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result258 fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result { 259 use ast::AssertionKind::*; 260 match ast.kind { 261 StartLine => self.wtr.write_str("^"), 262 EndLine => self.wtr.write_str("$"), 263 StartText => self.wtr.write_str(r"\A"), 264 EndText => self.wtr.write_str(r"\z"), 265 WordBoundary => self.wtr.write_str(r"\b"), 266 NotWordBoundary => self.wtr.write_str(r"\B"), 267 } 268 } 269 fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result270 fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result { 271 self.wtr.write_str("(?")?; 272 self.fmt_flags(&ast.flags)?; 273 self.wtr.write_str(")")?; 274 Ok(()) 275 } 276 fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result277 fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result { 278 use ast::{Flag, FlagsItemKind}; 279 280 for item in &ast.items { 281 match item.kind { 282 FlagsItemKind::Negation => self.wtr.write_str("-"), 283 FlagsItemKind::Flag(ref flag) => match *flag { 284 Flag::CaseInsensitive => self.wtr.write_str("i"), 285 Flag::MultiLine => self.wtr.write_str("m"), 286 Flag::DotMatchesNewLine => self.wtr.write_str("s"), 287 Flag::SwapGreed => self.wtr.write_str("U"), 288 Flag::Unicode => self.wtr.write_str("u"), 289 Flag::IgnoreWhitespace => self.wtr.write_str("x"), 290 }, 291 }?; 292 } 293 Ok(()) 294 } 295 fmt_class_bracketed_pre( &mut self, ast: &ast::ClassBracketed, ) -> fmt::Result296 fn fmt_class_bracketed_pre( 297 &mut self, 298 ast: &ast::ClassBracketed, 299 ) -> fmt::Result { 300 if ast.negated { 301 self.wtr.write_str("[^") 302 } else { 303 self.wtr.write_str("[") 304 } 305 } 306 fmt_class_bracketed_post( &mut self, _ast: &ast::ClassBracketed, ) -> fmt::Result307 fn fmt_class_bracketed_post( 308 &mut self, 309 _ast: &ast::ClassBracketed, 310 ) -> fmt::Result { 311 self.wtr.write_str("]") 312 } 313 fmt_class_set_binary_op_kind( &mut self, ast: &ast::ClassSetBinaryOpKind, ) -> fmt::Result314 fn fmt_class_set_binary_op_kind( 315 &mut self, 316 ast: &ast::ClassSetBinaryOpKind, 317 ) -> fmt::Result { 318 use ast::ClassSetBinaryOpKind::*; 319 match *ast { 320 Intersection => self.wtr.write_str("&&"), 321 Difference => self.wtr.write_str("--"), 322 SymmetricDifference => self.wtr.write_str("~~"), 323 } 324 } 325 fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result326 fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result { 327 use ast::ClassPerlKind::*; 328 match ast.kind { 329 Digit if ast.negated => self.wtr.write_str(r"\D"), 330 Digit => self.wtr.write_str(r"\d"), 331 Space if ast.negated => self.wtr.write_str(r"\S"), 332 Space => self.wtr.write_str(r"\s"), 333 Word if ast.negated => self.wtr.write_str(r"\W"), 334 Word => self.wtr.write_str(r"\w"), 335 } 336 } 337 fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result338 fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result { 339 use ast::ClassAsciiKind::*; 340 match ast.kind { 341 Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"), 342 Alnum => self.wtr.write_str("[:alnum:]"), 343 Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"), 344 Alpha => self.wtr.write_str("[:alpha:]"), 345 Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"), 346 Ascii => self.wtr.write_str("[:ascii:]"), 347 Blank if ast.negated => self.wtr.write_str("[:^blank:]"), 348 Blank => self.wtr.write_str("[:blank:]"), 349 Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"), 350 Cntrl => self.wtr.write_str("[:cntrl:]"), 351 Digit if ast.negated => self.wtr.write_str("[:^digit:]"), 352 Digit => self.wtr.write_str("[:digit:]"), 353 Graph if ast.negated => self.wtr.write_str("[:^graph:]"), 354 Graph => self.wtr.write_str("[:graph:]"), 355 Lower if ast.negated => self.wtr.write_str("[:^lower:]"), 356 Lower => self.wtr.write_str("[:lower:]"), 357 Print if ast.negated => self.wtr.write_str("[:^print:]"), 358 Print => self.wtr.write_str("[:print:]"), 359 Punct if ast.negated => self.wtr.write_str("[:^punct:]"), 360 Punct => self.wtr.write_str("[:punct:]"), 361 Space if ast.negated => self.wtr.write_str("[:^space:]"), 362 Space => self.wtr.write_str("[:space:]"), 363 Upper if ast.negated => self.wtr.write_str("[:^upper:]"), 364 Upper => self.wtr.write_str("[:upper:]"), 365 Word if ast.negated => self.wtr.write_str("[:^word:]"), 366 Word => self.wtr.write_str("[:word:]"), 367 Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"), 368 Xdigit => self.wtr.write_str("[:xdigit:]"), 369 } 370 } 371 fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result372 fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result { 373 use ast::ClassUnicodeKind::*; 374 use ast::ClassUnicodeOpKind::*; 375 376 if ast.negated { 377 self.wtr.write_str(r"\P")?; 378 } else { 379 self.wtr.write_str(r"\p")?; 380 } 381 match ast.kind { 382 OneLetter(c) => self.wtr.write_char(c), 383 Named(ref x) => write!(self.wtr, "{{{}}}", x), 384 NamedValue { op: Equal, ref name, ref value } => { 385 write!(self.wtr, "{{{}={}}}", name, value) 386 } 387 NamedValue { op: Colon, ref name, ref value } => { 388 write!(self.wtr, "{{{}:{}}}", name, value) 389 } 390 NamedValue { op: NotEqual, ref name, ref value } => { 391 write!(self.wtr, "{{{}!={}}}", name, value) 392 } 393 } 394 } 395 } 396 397 #[cfg(test)] 398 mod tests { 399 use super::Printer; 400 use ast::parse::ParserBuilder; 401 roundtrip(given: &str)402 fn roundtrip(given: &str) { 403 roundtrip_with(|b| b, given); 404 } 405 roundtrip_with<F>(mut f: F, given: &str) where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,406 fn roundtrip_with<F>(mut f: F, given: &str) 407 where 408 F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder, 409 { 410 let mut builder = ParserBuilder::new(); 411 f(&mut builder); 412 let ast = builder.build().parse(given).unwrap(); 413 414 let mut printer = Printer::new(); 415 let mut dst = String::new(); 416 printer.print(&ast, &mut dst).unwrap(); 417 assert_eq!(given, dst); 418 } 419 420 #[test] print_literal()421 fn print_literal() { 422 roundtrip("a"); 423 roundtrip(r"\["); 424 roundtrip_with(|b| b.octal(true), r"\141"); 425 roundtrip(r"\x61"); 426 roundtrip(r"\x7F"); 427 roundtrip(r"\u0061"); 428 roundtrip(r"\U00000061"); 429 roundtrip(r"\x{61}"); 430 roundtrip(r"\x{7F}"); 431 roundtrip(r"\u{61}"); 432 roundtrip(r"\U{61}"); 433 434 roundtrip(r"\a"); 435 roundtrip(r"\f"); 436 roundtrip(r"\t"); 437 roundtrip(r"\n"); 438 roundtrip(r"\r"); 439 roundtrip(r"\v"); 440 roundtrip(r"(?x)\ "); 441 } 442 443 #[test] print_dot()444 fn print_dot() { 445 roundtrip("."); 446 } 447 448 #[test] print_concat()449 fn print_concat() { 450 roundtrip("ab"); 451 roundtrip("abcde"); 452 roundtrip("a(bcd)ef"); 453 } 454 455 #[test] print_alternation()456 fn print_alternation() { 457 roundtrip("a|b"); 458 roundtrip("a|b|c|d|e"); 459 roundtrip("|a|b|c|d|e"); 460 roundtrip("|a|b|c|d|e|"); 461 roundtrip("a(b|c|d)|e|f"); 462 } 463 464 #[test] print_assertion()465 fn print_assertion() { 466 roundtrip(r"^"); 467 roundtrip(r"$"); 468 roundtrip(r"\A"); 469 roundtrip(r"\z"); 470 roundtrip(r"\b"); 471 roundtrip(r"\B"); 472 } 473 474 #[test] print_repetition()475 fn print_repetition() { 476 roundtrip("a?"); 477 roundtrip("a??"); 478 roundtrip("a*"); 479 roundtrip("a*?"); 480 roundtrip("a+"); 481 roundtrip("a+?"); 482 roundtrip("a{5}"); 483 roundtrip("a{5}?"); 484 roundtrip("a{5,}"); 485 roundtrip("a{5,}?"); 486 roundtrip("a{5,10}"); 487 roundtrip("a{5,10}?"); 488 } 489 490 #[test] print_flags()491 fn print_flags() { 492 roundtrip("(?i)"); 493 roundtrip("(?-i)"); 494 roundtrip("(?s-i)"); 495 roundtrip("(?-si)"); 496 roundtrip("(?siUmux)"); 497 } 498 499 #[test] print_group()500 fn print_group() { 501 roundtrip("(?i:a)"); 502 roundtrip("(?P<foo>a)"); 503 roundtrip("(a)"); 504 } 505 506 #[test] print_class()507 fn print_class() { 508 roundtrip(r"[abc]"); 509 roundtrip(r"[a-z]"); 510 roundtrip(r"[^a-z]"); 511 roundtrip(r"[a-z0-9]"); 512 roundtrip(r"[-a-z0-9]"); 513 roundtrip(r"[-a-z0-9]"); 514 roundtrip(r"[a-z0-9---]"); 515 roundtrip(r"[a-z&&m-n]"); 516 roundtrip(r"[[a-z&&m-n]]"); 517 roundtrip(r"[a-z--m-n]"); 518 roundtrip(r"[a-z~~m-n]"); 519 roundtrip(r"[a-z[0-9]]"); 520 roundtrip(r"[a-z[^0-9]]"); 521 522 roundtrip(r"\d"); 523 roundtrip(r"\D"); 524 roundtrip(r"\s"); 525 roundtrip(r"\S"); 526 roundtrip(r"\w"); 527 roundtrip(r"\W"); 528 529 roundtrip(r"[[:alnum:]]"); 530 roundtrip(r"[[:^alnum:]]"); 531 roundtrip(r"[[:alpha:]]"); 532 roundtrip(r"[[:^alpha:]]"); 533 roundtrip(r"[[:ascii:]]"); 534 roundtrip(r"[[:^ascii:]]"); 535 roundtrip(r"[[:blank:]]"); 536 roundtrip(r"[[:^blank:]]"); 537 roundtrip(r"[[:cntrl:]]"); 538 roundtrip(r"[[:^cntrl:]]"); 539 roundtrip(r"[[:digit:]]"); 540 roundtrip(r"[[:^digit:]]"); 541 roundtrip(r"[[:graph:]]"); 542 roundtrip(r"[[:^graph:]]"); 543 roundtrip(r"[[:lower:]]"); 544 roundtrip(r"[[:^lower:]]"); 545 roundtrip(r"[[:print:]]"); 546 roundtrip(r"[[:^print:]]"); 547 roundtrip(r"[[:punct:]]"); 548 roundtrip(r"[[:^punct:]]"); 549 roundtrip(r"[[:space:]]"); 550 roundtrip(r"[[:^space:]]"); 551 roundtrip(r"[[:upper:]]"); 552 roundtrip(r"[[:^upper:]]"); 553 roundtrip(r"[[:word:]]"); 554 roundtrip(r"[[:^word:]]"); 555 roundtrip(r"[[:xdigit:]]"); 556 roundtrip(r"[[:^xdigit:]]"); 557 558 roundtrip(r"\pL"); 559 roundtrip(r"\PL"); 560 roundtrip(r"\p{L}"); 561 roundtrip(r"\P{L}"); 562 roundtrip(r"\p{X=Y}"); 563 roundtrip(r"\P{X=Y}"); 564 roundtrip(r"\p{X:Y}"); 565 roundtrip(r"\P{X:Y}"); 566 roundtrip(r"\p{X!=Y}"); 567 roundtrip(r"\P{X!=Y}"); 568 } 569 } 570