1# -*- coding: utf-8 -*- 2 3""" Deroff.py, ported to Python from the venerable deroff.c """ 4 5import sys, re, string 6 7IS_PY3 = sys.version_info[0] >= 3 8 9 10class Deroffer: 11 12 g_specs_specletter = { 13 # Output composed latin1 letters 14 "-D": "\320", 15 "Sd": "\360", 16 "Tp": "\376", 17 "TP": "\336", 18 "AE": "\306", 19 "ae": "\346", 20 "OE": "OE", 21 "oe": "oe", 22 ":a": "\344", 23 ":A": "\304", 24 ":e": "\353", 25 ":E": "\313", 26 ":i": "\357", 27 ":I": "\317", 28 ":o": "\366", 29 ":O": "\326", 30 ":u": "\374", 31 ":U": "\334", 32 ":y": "\377", 33 "ss": "\337", 34 "'A": "\301", 35 "'E": "\311", 36 "'I": "\315", 37 "'O": "\323", 38 "'U": "\332", 39 "'Y": "\335", 40 "'a": "\341", 41 "'e": "\351", 42 "'i": "\355", 43 "'o": "\363", 44 "'u": "\372", 45 "'y": "\375", 46 "^A": "\302", 47 "^E": "\312", 48 "^I": "\316", 49 "^O": "\324", 50 "^U": "\333", 51 "^a": "\342", 52 "^e": "\352", 53 "^i": "\356", 54 "^o": "\364", 55 "^u": "\373", 56 "`A": "\300", 57 "`E": "\310", 58 "`I": "\314", 59 "`O": "\322", 60 "`U": "\331", 61 "`a": "\340", 62 "`e": "\350", 63 "`i": "\354", 64 "`o": "\362", 65 "`u": "\371", 66 "~A": "\303", 67 "~N": "\321", 68 "~O": "\325", 69 "~a": "\343", 70 "~n": "\361", 71 "~o": "\365", 72 ",C": "\307", 73 ",c": "\347", 74 "/l": "/l", 75 "/L": "/L", 76 "/o": "\370", 77 "/O": "\330", 78 "oA": "\305", 79 "oa": "\345", 80 # Ligatures 81 "fi": "fi", 82 "ff": "ff", 83 "fl": "fl", 84 "Fi": "ffi", 85 "Ff": "fff", 86 "Fl": "ffl", 87 } 88 89 g_specs = { 90 "mi": "-", 91 "en": "-", 92 "hy": "-", 93 "em": "--", 94 "lq": "“", 95 "rq": "”", 96 "Bq": ",,", 97 "oq": "`", 98 "cq": "'", 99 "aq": "'", 100 "dq": '"', 101 "or": "|", 102 "at": "@", 103 "sh": "#", 104 "Eu": "\244", 105 "eu": "\244", 106 "Do": "$", 107 "ct": "\242", 108 "Fo": "\253", 109 "Fc": "\273", 110 "fo": "<", 111 "fc": ">", 112 "r!": "\241", 113 "r?": "\277", 114 "Of": "\252", 115 "Om": "\272", 116 "pc": "\267", 117 "S1": "\271", 118 "S2": "\262", 119 "S3": "\263", 120 "<-": "<-", 121 "->": "->", 122 "<>": "<->", 123 "ua": "^", 124 "da": "v", 125 "lA": "<=", 126 "rA": "=>", 127 "hA": "<=>", 128 "uA": "^^", 129 "dA": "vv", 130 "ba": "|", 131 "bb": "|", 132 "br": "|", 133 "bv": "|", 134 "ru": "_", 135 "ul": "_", 136 "ci": "O", 137 "bu": "o", 138 "co": "\251", 139 "rg": "\256", 140 "tm": "(TM)", 141 "dd": "||", 142 "dg": "|", 143 "ps": "\266", 144 "sc": "\247", 145 "de": "\260", 146 "%0": "0/00", 147 "14": "\274", 148 "12": "\275", 149 "34": "\276", 150 "f/": "/", 151 "sl": "/", 152 "rs": "\\", 153 "sq": "[]", 154 "fm": "'", 155 "ha": "^", 156 "ti": "~", 157 "lB": "[", 158 "rB": "]", 159 "lC": "{", 160 "rC": "}", 161 "la": "<", 162 "ra": ">", 163 "lh": "<=", 164 "rh": "=>", 165 "tf": "therefore", 166 "~~": "~~", 167 "~=": "~=", 168 "!=": "!=", 169 "**": "*", 170 "+-": "\261", 171 "<=": "<=", 172 "==": "==", 173 "=~": "=~", 174 ">=": ">=", 175 "AN": "\\/", 176 "OR": "/\\", 177 "no": "\254", 178 "te": "there exists", 179 "fa": "for all", 180 "Ah": "aleph", 181 "Im": "imaginary", 182 "Re": "real", 183 "if": "infinity", 184 "md": "\267", 185 "mo": "member of", 186 "mu": "\327", 187 "nm": "not member of", 188 "pl": "+", 189 "eq": "=", 190 "pt": "oc", 191 "pp": "perpendicular", 192 "sb": "(=", 193 "sp": "=)", 194 "ib": "(-", 195 "ip": "-)", 196 "ap": "~", 197 "is": "I", 198 "sr": "root", 199 "pd": "d", 200 "c*": "(x)", 201 "c+": "(+)", 202 "ca": "cap", 203 "cu": "U", 204 "di": "\367", 205 "gr": "V", 206 "es": "{}", 207 "CR": "_|", 208 "st": "such that", 209 "/_": "/_", 210 "lz": "<>", 211 "an": "-", 212 # Output Greek 213 "*A": "Alpha", 214 "*B": "Beta", 215 "*C": "Xi", 216 "*D": "Delta", 217 "*E": "Epsilon", 218 "*F": "Phi", 219 "*G": "Gamma", 220 "*H": "Theta", 221 "*I": "Iota", 222 "*K": "Kappa", 223 "*L": "Lambda", 224 "*M": "Mu", 225 "*N": "Nu", 226 "*O": "Omicron", 227 "*P": "Pi", 228 "*Q": "Psi", 229 "*R": "Rho", 230 "*S": "Sigma", 231 "*T": "Tau", 232 "*U": "Upsilon", 233 "*W": "Omega", 234 "*X": "Chi", 235 "*Y": "Eta", 236 "*Z": "Zeta", 237 "*a": "alpha", 238 "*b": "beta", 239 "*c": "xi", 240 "*d": "delta", 241 "*e": "epsilon", 242 "*f": "phi", 243 "+f": "phi", 244 "*g": "gamma", 245 "*h": "theta", 246 "+h": "theta", 247 "*i": "iota", 248 "*k": "kappa", 249 "*l": "lambda", 250 "*m": "\265", 251 "*n": "nu", 252 "*o": "omicron", 253 "*p": "pi", 254 "+p": "omega", 255 "*q": "psi", 256 "*r": "rho", 257 "*s": "sigma", 258 "*t": "tau", 259 "*u": "upsilon", 260 "*w": "omega", 261 "*x": "chi", 262 "*y": "eta", 263 "*z": "zeta", 264 "ts": "sigma", 265 } 266 267 g_re_word = re.compile(r"[a-zA-Z_]+") # equivalent to the word() method 268 g_re_number = re.compile(r"[+-]?\d+") # equivalent to the number() method 269 g_re_esc_char = re.compile( 270 r"""([a-zA-Z_]) | # Word 271 ([+-]?\d) | # Number 272 \\ # Backslash (for escape seq) 273 """, 274 re.VERBOSE, 275 ) 276 277 g_re_not_backslash_or_whitespace = re.compile( 278 r"[^ \t\n\r\f\v\\]+" 279 ) # Match a sequence of not backslash or whitespace 280 281 g_re_newline_collapse = re.compile(r"\n{3,}") 282 283 g_re_font = re.compile( 284 r"""\\f( # Starts with backslash f 285 (\(\S{2}) | # Open paren, then two printable chars 286 (\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket 287 \S) # Any printable character 288 """, 289 re.VERBOSE, 290 ) 291 292 # This gets filled in in __init__ below 293 g_macro_dict = False 294 295 def __init__(self): 296 self.reg_table = {} 297 self.tr_from = "" 298 self.tr_to = "" 299 self.tr = "" 300 self.nls = 2 301 self.specletter = False 302 self.refer = False 303 self.macro = 0 304 self.nobody = False 305 self.inlist = False 306 self.inheader = False 307 self.pic = False 308 self.tbl = False 309 self.tblstate = 0 310 self.tblTab = "" 311 self.eqn = False 312 self.skipheaders = False 313 self.skiplists = False 314 self.ignore_sonx = False 315 self.output = [] 316 self.name = "" 317 318 self.OPTIONS = 0 319 self.FORMAT = 1 320 self.DATA = 2 321 322 # words is uninteresting and should be treated as false 323 324 if not Deroffer.g_macro_dict: 325 Deroffer.g_macro_dict = { 326 "SH": Deroffer.macro_sh, 327 "SS": Deroffer.macro_ss_ip, 328 "IP": Deroffer.macro_ss_ip, 329 "H ": Deroffer.macro_ss_ip, 330 "I ": Deroffer.macro_i_ir, 331 "IR": Deroffer.macro_i_ir, 332 "IB": Deroffer.macro_i_ir, 333 "B ": Deroffer.macro_i_ir, 334 "BR": Deroffer.macro_i_ir, 335 "BI": Deroffer.macro_i_ir, 336 "R ": Deroffer.macro_i_ir, 337 "RB": Deroffer.macro_i_ir, 338 "RI": Deroffer.macro_i_ir, 339 "AB": Deroffer.macro_i_ir, 340 "Nm": Deroffer.macro_Nm, 341 "] ": Deroffer.macro_close_bracket, 342 "PS": Deroffer.macro_ps, 343 "PE": Deroffer.macro_pe, 344 "TS": Deroffer.macro_ts, 345 "T&": Deroffer.macro_t_and, 346 "TE": Deroffer.macro_te, 347 "EQ": Deroffer.macro_eq, 348 "EN": Deroffer.macro_en, 349 "R1": Deroffer.macro_r1, 350 "R2": Deroffer.macro_r2, 351 "de": Deroffer.macro_de, 352 "BL": Deroffer.macro_bl_vl, 353 "VL": Deroffer.macro_bl_vl, 354 "AL": Deroffer.macro_bl_vl, 355 "LB": Deroffer.macro_bl_vl, 356 "RL": Deroffer.macro_bl_vl, 357 "ML": Deroffer.macro_bl_vl, 358 "DL": Deroffer.macro_bl_vl, 359 "BV": Deroffer.macro_bv, 360 "LE": Deroffer.macro_le, 361 "LP": Deroffer.macro_lp_pp, 362 "PP": Deroffer.macro_lp_pp, 363 "P\n": Deroffer.macro_lp_pp, 364 "ds": Deroffer.macro_ds, 365 "so": Deroffer.macro_so_nx, 366 "nx": Deroffer.macro_so_nx, 367 "tr": Deroffer.macro_tr, 368 "sp": Deroffer.macro_sp, 369 } 370 371 def flush_output(self, where): 372 if where: 373 where.write(self.get_output()) 374 self.output[:] = [] 375 376 def get_output(self): 377 res = "".join(self.output) 378 clean_res = Deroffer.g_re_newline_collapse.sub("\n", res) 379 return clean_res 380 381 def putchar(self, c): 382 self.output.append(c) 383 return c 384 385 # This gets swapped in in place of condputs the first time tr gets modified 386 def condputs_tr(self, str): 387 special = ( 388 self.pic 389 or self.eqn 390 or self.refer 391 or self.macro 392 or (self.skiplists and self.inlist) 393 or (self.skipheaders and self.inheader) 394 ) 395 if not special: 396 self.output.append(str.translate(self.tr)) 397 398 def condputs(self, str): 399 special = ( 400 self.pic 401 or self.eqn 402 or self.refer 403 or self.macro 404 or (self.skiplists and self.inlist) 405 or (self.skipheaders and self.inheader) 406 ) 407 if not special: 408 self.output.append(str) 409 410 def str_at(self, idx): 411 return self.s[idx : idx + 1] 412 413 def skip_char(self, amt=1): 414 self.s = self.s[amt:] 415 416 def skip_leading_whitespace(self): 417 self.s = self.s.lstrip() 418 419 def is_white(self, idx): 420 # Note this returns false for empty strings (idx >= len(self.s)) 421 return self.s[idx : idx + 1].isspace() 422 423 def str_eq(offset, other, len): 424 return self.s[offset : offset + len] == other[:len] 425 426 def prch(self, idx): 427 # Note that this return False for the empty string (idx >= len(self.s)) 428 ch = self.s[idx : idx + 1] 429 return ch not in " \t\n" 430 431 def font(self): 432 match = Deroffer.g_re_font.match(self.s) 433 if not match: 434 return False 435 self.skip_char(match.end()) 436 return True 437 438 def font2(self): 439 if self.s[0:2] == "\\f": 440 c = self.str_at(2) 441 if c == "(" and self.prch(3) and self.prch(4): 442 self.skip_char(5) 443 return True 444 elif c == "[": 445 self.skip_char(2) 446 while self.prch(0) and self.str_at(0) != "]": 447 self.skip_char() 448 if self.str_at(0) == "]": 449 self.skip_char() 450 elif self.prch(2): 451 self.skip_char(3) 452 return True 453 return False 454 455 def comment(self): 456 # Here we require that the string start with \" 457 while self.str_at(0) and self.str_at(0) != "\n": 458 self.skip_char() 459 return True 460 461 def numreq(self): 462 # We require that the string starts with backslash 463 if self.str_at(1) in "hvwud" and self.str_at(2) == "'": 464 self.macro += 1 465 self.skip_char(3) 466 while self.str_at(0) != "'" and self.esc_char(): 467 pass # Weird 468 if self.str_at(0) == "'": 469 self.skip_char() 470 self.macro -= 1 471 return True 472 return False 473 474 def var(self): 475 reg = "" 476 s0s1 = self.s[0:2] 477 if s0s1 == "\\n": 478 if self.s[3:5] == "dy": 479 self.skip_char(5) 480 return True 481 elif self.str_at(2) == "(" and self.prch(3) and self.prch(4): 482 self.skip_char(5) 483 return True 484 elif self.str_at(2) == "[" and self.prch(3): 485 self.skip_char(3) 486 while self.str_at(0) and self.str_at(0) != "]": 487 self.skip_char() 488 return True 489 elif self.prch(2): 490 self.skip_char(3) 491 return True 492 elif s0s1 == "\\*": 493 if self.str_at(2) == "(" and self.prch(3) and self.prch(4): 494 reg = self.s[3:5] 495 self.skip_char(5) 496 elif self.str_at(2) == "[" and self.prch(3): 497 self.skip_char(3) 498 while self.str_at(0) and self.str_at(0) != "]": 499 reg = reg + self.str_at(0) 500 self.skip_char() 501 if self.s[0:1] == "]": 502 self.skip_char() 503 else: 504 return False 505 elif self.prch(2): 506 reg = self.str_at(2) 507 self.skip_char(3) 508 else: 509 return False 510 511 if reg in self.reg_table: 512 old_s = self.s 513 self.s = self.reg_table[reg] 514 self.text_arg() 515 return True 516 return False 517 518 def size(self): 519 # We require that the string starts with \s 520 if self.digit(2) or (self.str_at(2) in "-+" and self.digit(3)): 521 self.skip_char(3) 522 while self.digit(0): 523 self.skip_char() 524 return True 525 return False 526 527 def spec(self): 528 self.specletter = False 529 if self.s[0:2] == "\\(" and self.prch(2) and self.prch(3): 530 key = self.s[2:4] 531 if key in Deroffer.g_specs_specletter: 532 self.condputs(Deroffer.g_specs_specletter[key]) 533 self.specletter = True 534 elif key in Deroffer.g_specs: 535 self.condputs(Deroffer.g_specs[key]) 536 self.skip_char(4) 537 return True 538 elif self.s.startswith("\\%"): 539 self.specletter = True 540 self.skip_char(2) 541 return True 542 else: 543 return False 544 545 def esc(self): 546 # We require that the string start with backslash 547 c = self.s[1:2] 548 if not c: 549 return False 550 if c in "eE": 551 self.condputs("\\") 552 elif c in "t": 553 self.condputs("\t") 554 elif c in "0~": 555 self.condputs(" ") 556 elif c in "|^&:": 557 pass 558 else: 559 self.condputs(c) 560 self.skip_char(2) 561 return True 562 563 def word(self): 564 got_something = False 565 while True: 566 match = Deroffer.g_re_word.match(self.s) 567 if not match: 568 break 569 got_something = True 570 self.condputs(match.group(0)) 571 self.skip_char(match.end(0)) 572 573 # Consume all specials 574 while self.spec(): 575 if not self.specletter: 576 break 577 578 return got_something 579 580 def text(self): 581 while True: 582 idx = self.s.find("\\") 583 if idx == -1: 584 self.condputs(self.s) 585 self.s = "" 586 break 587 else: 588 self.condputs(self.s[:idx]) 589 self.skip_char(idx) 590 if not self.esc_char_backslash(): 591 self.condputs(self.str_at(0)) 592 self.skip_char() 593 return True 594 595 def letter(self, idx): 596 ch = self.str_at(idx) 597 return ch.isalpha() or ch == "_" # underscore is used in C identifiers 598 599 def digit(self, idx): 600 ch = self.str_at(idx) 601 return ch.isdigit() 602 603 def number(self): 604 match = Deroffer.g_re_number.match(self.s) 605 if not match: 606 return False 607 else: 608 self.condputs(match.group(0)) 609 self.skip_char(match.end()) 610 return True 611 612 def esc_char_backslash(self): 613 # Like esc_char, but we know the string starts with a backslash 614 c = self.s[1:2] 615 if c == '"': 616 return self.comment() 617 elif c == "f": 618 return self.font() 619 elif c == "s": 620 return self.size() 621 elif c in "hvwud": 622 return self.numreq() 623 elif c in "n*": 624 return self.var() 625 elif c == "(": 626 return self.spec() 627 else: 628 return self.esc() 629 630 def esc_char(self): 631 if self.s[0:1] == "\\": 632 return self.esc_char_backslash() 633 return self.word() or self.number() 634 635 def quoted_arg(self): 636 if self.str_at(0) == '"': 637 self.skip_char() 638 while self.s and self.str_at(0) != '"': 639 if not self.esc_char(): 640 if self.s: 641 self.condputs(self.str_at(0)) 642 self.skip_char() 643 return True 644 else: 645 return False 646 647 def text_arg(self): 648 # PCA: The deroff.c textArg() disallowed quotes at the start of an argument 649 # I'm not sure if this was a bug or not 650 got_something = False 651 while True: 652 match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s) 653 if match: 654 # Output the characters in the match 655 self.condputs(match.group(0)) 656 self.skip_char(match.end(0)) 657 got_something = True 658 659 # Next is either an escape, or whitespace, or the end 660 # If it's the whitespace or the end, we're done 661 if not self.s or self.is_white(0): 662 return got_something 663 664 # Try an escape 665 if not self.esc_char(): 666 # Some busted escape? Just output it 667 self.condputs(self.str_at(0)) 668 self.skip_char() 669 got_something = True 670 671 def text_arg2(self): 672 if not self.esc_char(): 673 if self.s and not self.is_white(0): 674 self.condputs(self.str_at(0)) 675 self.skip_char() 676 else: 677 return False 678 while True: 679 if not self.esc_char(): 680 if self.s and not self.is_white(0): 681 self.condputs(self.str_at(0)) 682 self.skip_char() 683 else: 684 return True 685 686 # Macro functions 687 def macro_sh(self): 688 for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']: 689 if self.s[2:].startswith(header_str): 690 self.inheader = True 691 break 692 else: 693 # Did not find a header string 694 self.inheader = False 695 self.nobody = True 696 697 def macro_ss_ip(self): 698 self.nobody = True 699 return False 700 701 def macro_i_ir(self): 702 pass 703 return False 704 705 def macro_Nm(self): 706 if self.s == "Nm\n": 707 self.condputs(self.name) 708 else: 709 self.name = self.s[3:].strip() + " " 710 return True 711 712 def macro_close_bracket(self): 713 self.refer = False 714 return False 715 716 def macro_ps(self): 717 if self.is_white(2): 718 self.pic = True 719 self.condputs("\n") 720 return True 721 722 def macro_pe(self): 723 if self.is_white(2): 724 self.pic = False 725 self.condputs("\n") 726 return True 727 728 def macro_ts(self): 729 if self.is_white(2): 730 self.tbl, self.tblstate = True, self.OPTIONS 731 self.condputs("\n") 732 return True 733 734 def macro_t_and(self): 735 if self.is_white(2): 736 self.tbl, self.tblstate = True, self.FORMAT 737 self.condputs("\n") 738 return True 739 740 def macro_te(self): 741 if self.is_white(2): 742 self.tbl = False 743 self.condputs("\n") 744 return True 745 746 def macro_eq(self): 747 if self.is_white(2): 748 self.eqn = True 749 self.condputs("\n") 750 return True 751 752 def macro_en(self): 753 if self.is_white(2): 754 self.eqn = False 755 self.condputs("\n") 756 return True 757 758 def macro_r1(self): 759 if self.is_white(2): 760 self.refer2 = True 761 self.condputs("\n") 762 return True 763 764 def macro_r2(self): 765 if self.is_white(2): 766 self.refer2 = False 767 self.condputs("\n") 768 return True 769 770 def macro_de(self): 771 macro = True 772 self.condputs("\n") 773 return True 774 775 def macro_bl_vl(self): 776 if self.is_white(2): 777 self.inlist = True 778 self.condputs("\n") 779 return True 780 781 def macro_bv(self): 782 if self.str_at(2) == "L" and self.white(self.str_at(3)): 783 self.inlist = True 784 self.condputs("\n") 785 return True 786 787 def macro_le(self): 788 if self.is_white(2): 789 self.inlist = False 790 self.condputs("\n") 791 return True 792 793 def macro_lp_pp(self): 794 self.condputs("\n") 795 return True 796 797 def macro_ds(self): 798 self.skip_char(2) 799 self.skip_leading_whitespace() 800 if self.str_at(0): 801 # Split at whitespace 802 comps = self.s.split(None, 2) 803 if len(comps) == 2: 804 name, value = comps 805 value = value.rstrip() 806 self.reg_table[name] = value 807 self.condputs("\n") 808 return True 809 810 def macro_so_nx(self): 811 # We always ignore include directives 812 # deroff.c for some reason allowed this to fall through to the 'tr' case 813 # I think that was just a bug so I won't replicate it 814 return True 815 816 def macro_tr(self): 817 self.skip_char(2) 818 self.skip_leading_whitespace() 819 while self.s and self.str_at(0) != "\n": 820 c = self.str_at(0) 821 ns = self.str_at(1) 822 self.skip_char(2) 823 if not ns or ns == "\n": 824 ns = " " 825 self.tr_from += c 826 self.tr_to += ns 827 828 # Update our table, then swap in the slower tr-savvy condputs 829 try: # Python2 830 self.tr = string.maketrans(self.tr_from, self.tr_to) 831 except AttributeError: # Python3 832 self.tr = "".maketrans(self.tr_from, self.tr_to) 833 self.condputs = self.condputs_tr 834 return True 835 836 def macro_sp(self): 837 self.condputs("\n") 838 return True 839 840 def macro_other(self): 841 self.condputs("\n") 842 return True 843 844 def request_or_macro(self): 845 # s[0] is period or open single quote 846 self.skip_char() 847 s0 = self.s[1:2] 848 if s0 == "\\": 849 if self.str_at(1) == '"': 850 self.condputs("\n") 851 return True 852 else: 853 pass 854 elif s0 == "[": 855 self.refer = True 856 self.condputs("\n") 857 return True 858 elif s0 == "]": 859 self.refer = False 860 self.skip_char() 861 return self.text() 862 elif s0 == ".": 863 self.macro = False 864 self.condputs("\n") 865 return True 866 867 self.nobody = False 868 s0s1 = self.s[0:2] 869 870 macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other) 871 if macro_func(self): 872 return True 873 874 if self.skipheaders and self.nobody: 875 return True 876 877 self.skip_leading_whitespace() 878 while self.s and not self.is_white(0): 879 self.skip_char() 880 self.skip_leading_whitespace() 881 while True: 882 if not self.quoted_arg() and not self.text_arg(): 883 if self.s: 884 self.condputs(self.str_at(0)) 885 self.skip_char() 886 else: 887 return True 888 889 def request_or_macro2(self): 890 self.skip_char() 891 s0 = self.s[0:1] 892 if s0 == "\\": 893 if self.str_at(1) == '"': 894 self.condputs("\n") 895 return True 896 else: 897 pass 898 elif s0 == "[": 899 self.refer = True 900 self.condputs("\n") 901 return True 902 elif s0 == "]": 903 self.refer = False 904 self.skip_char() 905 return self.text() 906 elif s0 == ".": 907 self.macro = False 908 self.condputs("\n") 909 return True 910 911 self.nobody = False 912 s0s1 = self.s[0:2] 913 if s0s1 == "SH": 914 for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']: 915 if self.s[2:].startswith(header_str): 916 self.inheader = True 917 break 918 else: 919 # Did not find a header string 920 self.inheader = False 921 self.nobody = True 922 elif s0s1 in ["SS", "IP", "H "]: 923 self.nobody = True 924 elif s0s1 in ["I ", "IR", "IB", "B ", "BR", "BI", "R ", "RB", "RI", "AB"]: 925 pass 926 elif s0s1 in ["] "]: 927 self.refer = False 928 elif s0s1 in ["PS"]: 929 if self.is_white(2): 930 self.pic = True 931 self.condputs("\n") 932 return True 933 elif s0s1 in ["PE"]: 934 if self.is_white(2): 935 self.pic = False 936 self.condputs("\n") 937 return True 938 elif s0s1 in ["TS"]: 939 if self.is_white(2): 940 self.tbl, self.tblstate = True, self.OPTIONS 941 self.condputs("\n") 942 return True 943 elif s0s1 in ["T&"]: 944 if self.is_white(2): 945 self.tbl, self.tblstate = True, self.FORMAT 946 self.condputs("\n") 947 return True 948 elif s0s1 in ["TE"]: 949 if self.is_white(2): 950 self.tbl = False 951 self.condputs("\n") 952 return True 953 elif s0s1 in ["EQ"]: 954 if self.is_white(2): 955 self.eqn = True 956 self.condputs("\n") 957 return True 958 elif s0s1 in ["EN"]: 959 if self.is_white(2): 960 self.eqn = False 961 self.condputs("\n") 962 return True 963 elif s0s1 in ["R1"]: 964 if self.is_white(2): 965 self.refer2 = True 966 self.condputs("\n") 967 return True 968 elif s0s1 in ["R2"]: 969 if self.is_white(2): 970 self.refer2 = False 971 self.condputs("\n") 972 return True 973 elif s0s1 in ["de"]: 974 macro = True 975 self.condputs("\n") 976 return True 977 elif s0s1 in ["BL", "VL", "AL", "LB", "RL", "ML", "DL"]: 978 if self.is_white(2): 979 self.inlist = True 980 self.condputs("\n") 981 return True 982 elif s0s1 in ["BV"]: 983 if self.str_at(2) == "L" and self.white(self.str_at(3)): 984 self.inlist = True 985 self.condputs("\n") 986 return True 987 elif s0s1 in ["LE"]: 988 if self.is_white(2): 989 self.inlist = False 990 self.condputs("\n") 991 return True 992 elif s0s1 in ["LP", "PP", "P\n"]: 993 self.condputs("\n") 994 return True 995 elif s0s1 in ["ds"]: 996 self.skip_char(2) 997 self.skip_leading_whitespace() 998 if self.str_at(0): 999 # Split at whitespace 1000 comps = self.s.split(None, 2) 1001 if len(comps) == 2: 1002 name, value = comps 1003 value = value.rstrip() 1004 self.reg_table[name] = value 1005 self.condputs("\n") 1006 return True 1007 elif s0s1 in ["so", "nx"]: 1008 # We always ignore include directives 1009 # deroff.c for some reason allowed this to fall through to the 'tr' case 1010 # I think that was just a bug so I won't replicate it 1011 return True 1012 elif s0s1 in ["tr"]: 1013 self.skip_char(2) 1014 self.skip_leading_whitespace() 1015 while self.s and self.str_at(0) != "\n": 1016 c = self.str_at(0) 1017 ns = self.str_at(1) 1018 self.skip_char(2) 1019 if not ns or ns == "\n": 1020 ns = " " 1021 self.tr_from += c 1022 self.tr_to += ns 1023 1024 # Update our table, then swap in the slower tr-savvy condputs 1025 try: # Python2 1026 self.tr = string.maketrans(self.tr_from, self.tr_to) 1027 except AttributeError: # Python3 1028 self.tr = "".maketrans(self.tr_from, self.tr_to) 1029 self.condputs = self.condputs_tr 1030 1031 return True 1032 elif s0s1 in ["sp"]: 1033 self.condputs("\n") 1034 return True 1035 else: 1036 self.condputs("\n") 1037 return True 1038 1039 if self.skipheaders and self.nobody: 1040 return True 1041 1042 self.skip_leading_whitespace() 1043 while self.s and not self.is_white(0): 1044 self.skip_char() 1045 self.skip_leading_whitespace() 1046 while True: 1047 if not self.quoted_arg() and not self.text_arg(): 1048 if self.s: 1049 self.condputs(self.str_at(0)) 1050 self.skip_char() 1051 else: 1052 return True 1053 1054 def do_tbl(self): 1055 if self.tblstate == self.OPTIONS: 1056 while self.s and self.str_at(0) != ";" and self.str_at(0) != "\n": 1057 self.skip_leading_whitespace() 1058 if not self.str_at(0).isalpha(): 1059 # deroff.c has a bug where it can loop forever here...we try to work around it 1060 self.skip_char() 1061 else: # Parse option 1062 1063 option = self.s 1064 arg = "" 1065 1066 idx = 0 1067 while option[idx : idx + 1].isalpha(): 1068 idx += 1 1069 1070 if option[idx : idx + 1] == "(": 1071 option = option[:idx] 1072 self.s = self.s[idx + 1 :] 1073 arg = self.s 1074 else: 1075 self.s = "" 1076 1077 if arg: 1078 idx = arg.find(")") 1079 if idx != -1: 1080 arg = arg[:idx] 1081 self.s = self.s[idx + 1 :] 1082 else: 1083 # self.skip_char() 1084 pass 1085 1086 if option.lower() == "tab": 1087 self.tblTab = arg[0:1] 1088 1089 self.tblstate = self.FORMAT 1090 self.condputs("\n") 1091 1092 elif self.tblstate == self.FORMAT: 1093 while self.s and self.str_at(0) != "." and self.str_at(0) != "\n": 1094 self.skip_leading_whitespace() 1095 if self.str_at(0): 1096 self.skip_char() 1097 1098 if self.str_at(0) == ".": 1099 self.tblstate = self.DATA 1100 self.condputs("\n") 1101 elif self.tblstate == self.DATA: 1102 if self.tblTab: 1103 self.s = self.s.replace(self.tblTab, "\t") 1104 self.text() 1105 return True 1106 1107 def do_line(self): 1108 if self.s[0:1] in ".'": 1109 if not self.request_or_macro(): 1110 return False 1111 elif self.tbl: 1112 self.do_tbl() 1113 else: 1114 self.text() 1115 return True 1116 1117 def deroff(self, str): 1118 lines = str.split("\n") 1119 for line in lines: 1120 self.s = line + "\n" 1121 if not self.do_line(): 1122 break 1123 # self.putchar('\n') 1124 1125 1126def deroff_files(files): 1127 for arg in files: 1128 sys.stderr.write(arg + "\n") 1129 if arg.endswith(".gz"): 1130 f = gzip.open(arg, "r") 1131 str = f.read() 1132 if IS_PY3: 1133 str = str.decode("latin-1") 1134 else: 1135 f = open(arg, "r") 1136 str = f.read() 1137 d = Deroffer() 1138 d.deroff(str) 1139 d.flush_output(sys.stdout) 1140 f.close() 1141 1142 1143if __name__ == "__main__": 1144 import gzip 1145 1146 paths = sys.argv[1:] 1147 if True: 1148 deroff_files(paths) 1149 else: 1150 import cProfile, profile, pstats 1151 1152 profile.run("deroff_files(paths)", "fooprof") 1153 p = pstats.Stats("fooprof") 1154 p.sort_stats("time").print_stats(100) 1155 # p.sort_stats('calls').print_callers(.5, 'startswith') 1156