1# -*- coding: utf-8 -*-
2
3""" Deroff.py, ported to Python from the venerable deroff.c """
4
5import sys, re, string
6
7IS_PY3 = sys.version_info[0] >= 3
8
9
10class Deroffer:
11
12    g_specs_specletter = {
13        # Output composed latin1 letters
14        "-D": "\320",
15        "Sd": "\360",
16        "Tp": "\376",
17        "TP": "\336",
18        "AE": "\306",
19        "ae": "\346",
20        "OE": "OE",
21        "oe": "oe",
22        ":a": "\344",
23        ":A": "\304",
24        ":e": "\353",
25        ":E": "\313",
26        ":i": "\357",
27        ":I": "\317",
28        ":o": "\366",
29        ":O": "\326",
30        ":u": "\374",
31        ":U": "\334",
32        ":y": "\377",
33        "ss": "\337",
34        "'A": "\301",
35        "'E": "\311",
36        "'I": "\315",
37        "'O": "\323",
38        "'U": "\332",
39        "'Y": "\335",
40        "'a": "\341",
41        "'e": "\351",
42        "'i": "\355",
43        "'o": "\363",
44        "'u": "\372",
45        "'y": "\375",
46        "^A": "\302",
47        "^E": "\312",
48        "^I": "\316",
49        "^O": "\324",
50        "^U": "\333",
51        "^a": "\342",
52        "^e": "\352",
53        "^i": "\356",
54        "^o": "\364",
55        "^u": "\373",
56        "`A": "\300",
57        "`E": "\310",
58        "`I": "\314",
59        "`O": "\322",
60        "`U": "\331",
61        "`a": "\340",
62        "`e": "\350",
63        "`i": "\354",
64        "`o": "\362",
65        "`u": "\371",
66        "~A": "\303",
67        "~N": "\321",
68        "~O": "\325",
69        "~a": "\343",
70        "~n": "\361",
71        "~o": "\365",
72        ",C": "\307",
73        ",c": "\347",
74        "/l": "/l",
75        "/L": "/L",
76        "/o": "\370",
77        "/O": "\330",
78        "oA": "\305",
79        "oa": "\345",
80        # Ligatures
81        "fi": "fi",
82        "ff": "ff",
83        "fl": "fl",
84        "Fi": "ffi",
85        "Ff": "fff",
86        "Fl": "ffl",
87    }
88
89    g_specs = {
90        "mi": "-",
91        "en": "-",
92        "hy": "-",
93        "em": "--",
94        "lq": "“",
95        "rq": "”",
96        "Bq": ",,",
97        "oq": "`",
98        "cq": "'",
99        "aq": "'",
100        "dq": '"',
101        "or": "|",
102        "at": "@",
103        "sh": "#",
104        "Eu": "\244",
105        "eu": "\244",
106        "Do": "$",
107        "ct": "\242",
108        "Fo": "\253",
109        "Fc": "\273",
110        "fo": "<",
111        "fc": ">",
112        "r!": "\241",
113        "r?": "\277",
114        "Of": "\252",
115        "Om": "\272",
116        "pc": "\267",
117        "S1": "\271",
118        "S2": "\262",
119        "S3": "\263",
120        "<-": "<-",
121        "->": "->",
122        "<>": "<->",
123        "ua": "^",
124        "da": "v",
125        "lA": "<=",
126        "rA": "=>",
127        "hA": "<=>",
128        "uA": "^^",
129        "dA": "vv",
130        "ba": "|",
131        "bb": "|",
132        "br": "|",
133        "bv": "|",
134        "ru": "_",
135        "ul": "_",
136        "ci": "O",
137        "bu": "o",
138        "co": "\251",
139        "rg": "\256",
140        "tm": "(TM)",
141        "dd": "||",
142        "dg": "|",
143        "ps": "\266",
144        "sc": "\247",
145        "de": "\260",
146        "%0": "0/00",
147        "14": "\274",
148        "12": "\275",
149        "34": "\276",
150        "f/": "/",
151        "sl": "/",
152        "rs": "\\",
153        "sq": "[]",
154        "fm": "'",
155        "ha": "^",
156        "ti": "~",
157        "lB": "[",
158        "rB": "]",
159        "lC": "{",
160        "rC": "}",
161        "la": "<",
162        "ra": ">",
163        "lh": "<=",
164        "rh": "=>",
165        "tf": "therefore",
166        "~~": "~~",
167        "~=": "~=",
168        "!=": "!=",
169        "**": "*",
170        "+-": "\261",
171        "<=": "<=",
172        "==": "==",
173        "=~": "=~",
174        ">=": ">=",
175        "AN": "\\/",
176        "OR": "/\\",
177        "no": "\254",
178        "te": "there exists",
179        "fa": "for all",
180        "Ah": "aleph",
181        "Im": "imaginary",
182        "Re": "real",
183        "if": "infinity",
184        "md": "\267",
185        "mo": "member of",
186        "mu": "\327",
187        "nm": "not member of",
188        "pl": "+",
189        "eq": "=",
190        "pt": "oc",
191        "pp": "perpendicular",
192        "sb": "(=",
193        "sp": "=)",
194        "ib": "(-",
195        "ip": "-)",
196        "ap": "~",
197        "is": "I",
198        "sr": "root",
199        "pd": "d",
200        "c*": "(x)",
201        "c+": "(+)",
202        "ca": "cap",
203        "cu": "U",
204        "di": "\367",
205        "gr": "V",
206        "es": "{}",
207        "CR": "_|",
208        "st": "such that",
209        "/_": "/_",
210        "lz": "<>",
211        "an": "-",
212        # Output Greek
213        "*A": "Alpha",
214        "*B": "Beta",
215        "*C": "Xi",
216        "*D": "Delta",
217        "*E": "Epsilon",
218        "*F": "Phi",
219        "*G": "Gamma",
220        "*H": "Theta",
221        "*I": "Iota",
222        "*K": "Kappa",
223        "*L": "Lambda",
224        "*M": "Mu",
225        "*N": "Nu",
226        "*O": "Omicron",
227        "*P": "Pi",
228        "*Q": "Psi",
229        "*R": "Rho",
230        "*S": "Sigma",
231        "*T": "Tau",
232        "*U": "Upsilon",
233        "*W": "Omega",
234        "*X": "Chi",
235        "*Y": "Eta",
236        "*Z": "Zeta",
237        "*a": "alpha",
238        "*b": "beta",
239        "*c": "xi",
240        "*d": "delta",
241        "*e": "epsilon",
242        "*f": "phi",
243        "+f": "phi",
244        "*g": "gamma",
245        "*h": "theta",
246        "+h": "theta",
247        "*i": "iota",
248        "*k": "kappa",
249        "*l": "lambda",
250        "*m": "\265",
251        "*n": "nu",
252        "*o": "omicron",
253        "*p": "pi",
254        "+p": "omega",
255        "*q": "psi",
256        "*r": "rho",
257        "*s": "sigma",
258        "*t": "tau",
259        "*u": "upsilon",
260        "*w": "omega",
261        "*x": "chi",
262        "*y": "eta",
263        "*z": "zeta",
264        "ts": "sigma",
265    }
266
267    g_re_word = re.compile(r"[a-zA-Z_]+")  # equivalent to the word() method
268    g_re_number = re.compile(r"[+-]?\d+")  # equivalent to the number() method
269    g_re_esc_char = re.compile(
270        r"""([a-zA-Z_]) |   # Word
271                                   ([+-]?\d)   |   # Number
272                                   \\              # Backslash (for escape seq)
273                               """,
274        re.VERBOSE,
275    )
276
277    g_re_not_backslash_or_whitespace = re.compile(
278        r"[^ \t\n\r\f\v\\]+"
279    )  # Match a sequence of not backslash or whitespace
280
281    g_re_newline_collapse = re.compile(r"\n{3,}")
282
283    g_re_font = re.compile(
284        r"""\\f(         # Starts with backslash f
285                               (\(\S{2}) |  # Open paren, then two printable chars
286                               (\[\S*?\]) |  # Open bracket, zero or more printable characters, then close bracket
287                               \S)          # Any printable character
288                            """,
289        re.VERBOSE,
290    )
291
292    # This gets filled in in __init__ below
293    g_macro_dict = False
294
295    def __init__(self):
296        self.reg_table = {}
297        self.tr_from = ""
298        self.tr_to = ""
299        self.tr = ""
300        self.nls = 2
301        self.specletter = False
302        self.refer = False
303        self.macro = 0
304        self.nobody = False
305        self.inlist = False
306        self.inheader = False
307        self.pic = False
308        self.tbl = False
309        self.tblstate = 0
310        self.tblTab = ""
311        self.eqn = False
312        self.skipheaders = False
313        self.skiplists = False
314        self.ignore_sonx = False
315        self.output = []
316        self.name = ""
317
318        self.OPTIONS = 0
319        self.FORMAT = 1
320        self.DATA = 2
321
322        # words is uninteresting and should be treated as false
323
324        if not Deroffer.g_macro_dict:
325            Deroffer.g_macro_dict = {
326                "SH": Deroffer.macro_sh,
327                "SS": Deroffer.macro_ss_ip,
328                "IP": Deroffer.macro_ss_ip,
329                "H ": Deroffer.macro_ss_ip,
330                "I ": Deroffer.macro_i_ir,
331                "IR": Deroffer.macro_i_ir,
332                "IB": Deroffer.macro_i_ir,
333                "B ": Deroffer.macro_i_ir,
334                "BR": Deroffer.macro_i_ir,
335                "BI": Deroffer.macro_i_ir,
336                "R ": Deroffer.macro_i_ir,
337                "RB": Deroffer.macro_i_ir,
338                "RI": Deroffer.macro_i_ir,
339                "AB": Deroffer.macro_i_ir,
340                "Nm": Deroffer.macro_Nm,
341                "] ": Deroffer.macro_close_bracket,
342                "PS": Deroffer.macro_ps,
343                "PE": Deroffer.macro_pe,
344                "TS": Deroffer.macro_ts,
345                "T&": Deroffer.macro_t_and,
346                "TE": Deroffer.macro_te,
347                "EQ": Deroffer.macro_eq,
348                "EN": Deroffer.macro_en,
349                "R1": Deroffer.macro_r1,
350                "R2": Deroffer.macro_r2,
351                "de": Deroffer.macro_de,
352                "BL": Deroffer.macro_bl_vl,
353                "VL": Deroffer.macro_bl_vl,
354                "AL": Deroffer.macro_bl_vl,
355                "LB": Deroffer.macro_bl_vl,
356                "RL": Deroffer.macro_bl_vl,
357                "ML": Deroffer.macro_bl_vl,
358                "DL": Deroffer.macro_bl_vl,
359                "BV": Deroffer.macro_bv,
360                "LE": Deroffer.macro_le,
361                "LP": Deroffer.macro_lp_pp,
362                "PP": Deroffer.macro_lp_pp,
363                "P\n": Deroffer.macro_lp_pp,
364                "ds": Deroffer.macro_ds,
365                "so": Deroffer.macro_so_nx,
366                "nx": Deroffer.macro_so_nx,
367                "tr": Deroffer.macro_tr,
368                "sp": Deroffer.macro_sp,
369            }
370
371    def flush_output(self, where):
372        if where:
373            where.write(self.get_output())
374        self.output[:] = []
375
376    def get_output(self):
377        res = "".join(self.output)
378        clean_res = Deroffer.g_re_newline_collapse.sub("\n", res)
379        return clean_res
380
381    def putchar(self, c):
382        self.output.append(c)
383        return c
384
385    # This gets swapped in in place of condputs the first time tr gets modified
386    def condputs_tr(self, str):
387        special = (
388            self.pic
389            or self.eqn
390            or self.refer
391            or self.macro
392            or (self.skiplists and self.inlist)
393            or (self.skipheaders and self.inheader)
394        )
395        if not special:
396            self.output.append(str.translate(self.tr))
397
398    def condputs(self, str):
399        special = (
400            self.pic
401            or self.eqn
402            or self.refer
403            or self.macro
404            or (self.skiplists and self.inlist)
405            or (self.skipheaders and self.inheader)
406        )
407        if not special:
408            self.output.append(str)
409
410    def str_at(self, idx):
411        return self.s[idx : idx + 1]
412
413    def skip_char(self, amt=1):
414        self.s = self.s[amt:]
415
416    def skip_leading_whitespace(self):
417        self.s = self.s.lstrip()
418
419    def is_white(self, idx):
420        # Note this returns false for empty strings (idx >= len(self.s))
421        return self.s[idx : idx + 1].isspace()
422
423    def str_eq(offset, other, len):
424        return self.s[offset : offset + len] == other[:len]
425
426    def prch(self, idx):
427        # Note that this return False for the empty string (idx >= len(self.s))
428        ch = self.s[idx : idx + 1]
429        return ch not in " \t\n"
430
431    def font(self):
432        match = Deroffer.g_re_font.match(self.s)
433        if not match:
434            return False
435        self.skip_char(match.end())
436        return True
437
438    def font2(self):
439        if self.s[0:2] == "\\f":
440            c = self.str_at(2)
441            if c == "(" and self.prch(3) and self.prch(4):
442                self.skip_char(5)
443                return True
444            elif c == "[":
445                self.skip_char(2)
446                while self.prch(0) and self.str_at(0) != "]":
447                    self.skip_char()
448                if self.str_at(0) == "]":
449                    self.skip_char()
450            elif self.prch(2):
451                self.skip_char(3)
452                return True
453        return False
454
455    def comment(self):
456        # Here we require that the string start with \"
457        while self.str_at(0) and self.str_at(0) != "\n":
458            self.skip_char()
459        return True
460
461    def numreq(self):
462        # We require that the string starts with backslash
463        if self.str_at(1) in "hvwud" and self.str_at(2) == "'":
464            self.macro += 1
465            self.skip_char(3)
466            while self.str_at(0) != "'" and self.esc_char():
467                pass  # Weird
468            if self.str_at(0) == "'":
469                self.skip_char()
470            self.macro -= 1
471            return True
472        return False
473
474    def var(self):
475        reg = ""
476        s0s1 = self.s[0:2]
477        if s0s1 == "\\n":
478            if self.s[3:5] == "dy":
479                self.skip_char(5)
480                return True
481            elif self.str_at(2) == "(" and self.prch(3) and self.prch(4):
482                self.skip_char(5)
483                return True
484            elif self.str_at(2) == "[" and self.prch(3):
485                self.skip_char(3)
486                while self.str_at(0) and self.str_at(0) != "]":
487                    self.skip_char()
488                return True
489            elif self.prch(2):
490                self.skip_char(3)
491                return True
492        elif s0s1 == "\\*":
493            if self.str_at(2) == "(" and self.prch(3) and self.prch(4):
494                reg = self.s[3:5]
495                self.skip_char(5)
496            elif self.str_at(2) == "[" and self.prch(3):
497                self.skip_char(3)
498                while self.str_at(0) and self.str_at(0) != "]":
499                    reg = reg + self.str_at(0)
500                    self.skip_char()
501                if self.s[0:1] == "]":
502                    self.skip_char()
503                else:
504                    return False
505            elif self.prch(2):
506                reg = self.str_at(2)
507                self.skip_char(3)
508            else:
509                return False
510
511            if reg in self.reg_table:
512                old_s = self.s
513                self.s = self.reg_table[reg]
514                self.text_arg()
515                return True
516        return False
517
518    def size(self):
519        # We require that the string starts with \s
520        if self.digit(2) or (self.str_at(2) in "-+" and self.digit(3)):
521            self.skip_char(3)
522            while self.digit(0):
523                self.skip_char()
524            return True
525        return False
526
527    def spec(self):
528        self.specletter = False
529        if self.s[0:2] == "\\(" and self.prch(2) and self.prch(3):
530            key = self.s[2:4]
531            if key in Deroffer.g_specs_specletter:
532                self.condputs(Deroffer.g_specs_specletter[key])
533                self.specletter = True
534            elif key in Deroffer.g_specs:
535                self.condputs(Deroffer.g_specs[key])
536            self.skip_char(4)
537            return True
538        elif self.s.startswith("\\%"):
539            self.specletter = True
540            self.skip_char(2)
541            return True
542        else:
543            return False
544
545    def esc(self):
546        # We require that the string start with backslash
547        c = self.s[1:2]
548        if not c:
549            return False
550        if c in "eE":
551            self.condputs("\\")
552        elif c in "t":
553            self.condputs("\t")
554        elif c in "0~":
555            self.condputs(" ")
556        elif c in "|^&:":
557            pass
558        else:
559            self.condputs(c)
560        self.skip_char(2)
561        return True
562
563    def word(self):
564        got_something = False
565        while True:
566            match = Deroffer.g_re_word.match(self.s)
567            if not match:
568                break
569            got_something = True
570            self.condputs(match.group(0))
571            self.skip_char(match.end(0))
572
573            # Consume all specials
574            while self.spec():
575                if not self.specletter:
576                    break
577
578        return got_something
579
580    def text(self):
581        while True:
582            idx = self.s.find("\\")
583            if idx == -1:
584                self.condputs(self.s)
585                self.s = ""
586                break
587            else:
588                self.condputs(self.s[:idx])
589                self.skip_char(idx)
590                if not self.esc_char_backslash():
591                    self.condputs(self.str_at(0))
592                    self.skip_char()
593        return True
594
595    def letter(self, idx):
596        ch = self.str_at(idx)
597        return ch.isalpha() or ch == "_"  # underscore is used in C identifiers
598
599    def digit(self, idx):
600        ch = self.str_at(idx)
601        return ch.isdigit()
602
603    def number(self):
604        match = Deroffer.g_re_number.match(self.s)
605        if not match:
606            return False
607        else:
608            self.condputs(match.group(0))
609            self.skip_char(match.end())
610            return True
611
612    def esc_char_backslash(self):
613        # Like esc_char, but we know the string starts with a backslash
614        c = self.s[1:2]
615        if c == '"':
616            return self.comment()
617        elif c == "f":
618            return self.font()
619        elif c == "s":
620            return self.size()
621        elif c in "hvwud":
622            return self.numreq()
623        elif c in "n*":
624            return self.var()
625        elif c == "(":
626            return self.spec()
627        else:
628            return self.esc()
629
630    def esc_char(self):
631        if self.s[0:1] == "\\":
632            return self.esc_char_backslash()
633        return self.word() or self.number()
634
635    def quoted_arg(self):
636        if self.str_at(0) == '"':
637            self.skip_char()
638            while self.s and self.str_at(0) != '"':
639                if not self.esc_char():
640                    if self.s:
641                        self.condputs(self.str_at(0))
642                        self.skip_char()
643            return True
644        else:
645            return False
646
647    def text_arg(self):
648        # PCA: The deroff.c textArg() disallowed quotes at the start of an argument
649        # I'm not sure if this was a bug or not
650        got_something = False
651        while True:
652            match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s)
653            if match:
654                # Output the characters in the match
655                self.condputs(match.group(0))
656                self.skip_char(match.end(0))
657                got_something = True
658
659            # Next is either an escape, or whitespace, or the end
660            # If it's the whitespace or the end, we're done
661            if not self.s or self.is_white(0):
662                return got_something
663
664            # Try an escape
665            if not self.esc_char():
666                # Some busted escape? Just output it
667                self.condputs(self.str_at(0))
668                self.skip_char()
669                got_something = True
670
671    def text_arg2(self):
672        if not self.esc_char():
673            if self.s and not self.is_white(0):
674                self.condputs(self.str_at(0))
675                self.skip_char()
676            else:
677                return False
678        while True:
679            if not self.esc_char():
680                if self.s and not self.is_white(0):
681                    self.condputs(self.str_at(0))
682                    self.skip_char()
683                else:
684                    return True
685
686    # Macro functions
687    def macro_sh(self):
688        for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']:
689            if self.s[2:].startswith(header_str):
690                self.inheader = True
691                break
692        else:
693            # Did not find a header string
694            self.inheader = False
695            self.nobody = True
696
697    def macro_ss_ip(self):
698        self.nobody = True
699        return False
700
701    def macro_i_ir(self):
702        pass
703        return False
704
705    def macro_Nm(self):
706        if self.s == "Nm\n":
707            self.condputs(self.name)
708        else:
709            self.name = self.s[3:].strip() + " "
710        return True
711
712    def macro_close_bracket(self):
713        self.refer = False
714        return False
715
716    def macro_ps(self):
717        if self.is_white(2):
718            self.pic = True
719        self.condputs("\n")
720        return True
721
722    def macro_pe(self):
723        if self.is_white(2):
724            self.pic = False
725        self.condputs("\n")
726        return True
727
728    def macro_ts(self):
729        if self.is_white(2):
730            self.tbl, self.tblstate = True, self.OPTIONS
731        self.condputs("\n")
732        return True
733
734    def macro_t_and(self):
735        if self.is_white(2):
736            self.tbl, self.tblstate = True, self.FORMAT
737        self.condputs("\n")
738        return True
739
740    def macro_te(self):
741        if self.is_white(2):
742            self.tbl = False
743        self.condputs("\n")
744        return True
745
746    def macro_eq(self):
747        if self.is_white(2):
748            self.eqn = True
749        self.condputs("\n")
750        return True
751
752    def macro_en(self):
753        if self.is_white(2):
754            self.eqn = False
755        self.condputs("\n")
756        return True
757
758    def macro_r1(self):
759        if self.is_white(2):
760            self.refer2 = True
761        self.condputs("\n")
762        return True
763
764    def macro_r2(self):
765        if self.is_white(2):
766            self.refer2 = False
767        self.condputs("\n")
768        return True
769
770    def macro_de(self):
771        macro = True
772        self.condputs("\n")
773        return True
774
775    def macro_bl_vl(self):
776        if self.is_white(2):
777            self.inlist = True
778        self.condputs("\n")
779        return True
780
781    def macro_bv(self):
782        if self.str_at(2) == "L" and self.white(self.str_at(3)):
783            self.inlist = True
784        self.condputs("\n")
785        return True
786
787    def macro_le(self):
788        if self.is_white(2):
789            self.inlist = False
790        self.condputs("\n")
791        return True
792
793    def macro_lp_pp(self):
794        self.condputs("\n")
795        return True
796
797    def macro_ds(self):
798        self.skip_char(2)
799        self.skip_leading_whitespace()
800        if self.str_at(0):
801            # Split at whitespace
802            comps = self.s.split(None, 2)
803            if len(comps) == 2:
804                name, value = comps
805                value = value.rstrip()
806                self.reg_table[name] = value
807        self.condputs("\n")
808        return True
809
810    def macro_so_nx(self):
811        # We always ignore include directives
812        # deroff.c for some reason allowed this to fall through to the 'tr' case
813        # I think that was just a bug so I won't replicate it
814        return True
815
816    def macro_tr(self):
817        self.skip_char(2)
818        self.skip_leading_whitespace()
819        while self.s and self.str_at(0) != "\n":
820            c = self.str_at(0)
821            ns = self.str_at(1)
822            self.skip_char(2)
823            if not ns or ns == "\n":
824                ns = " "
825            self.tr_from += c
826            self.tr_to += ns
827
828        # Update our table, then swap in the slower tr-savvy condputs
829        try:  # Python2
830            self.tr = string.maketrans(self.tr_from, self.tr_to)
831        except AttributeError:  # Python3
832            self.tr = "".maketrans(self.tr_from, self.tr_to)
833        self.condputs = self.condputs_tr
834        return True
835
836    def macro_sp(self):
837        self.condputs("\n")
838        return True
839
840    def macro_other(self):
841        self.condputs("\n")
842        return True
843
844    def request_or_macro(self):
845        # s[0] is period or open single quote
846        self.skip_char()
847        s0 = self.s[1:2]
848        if s0 == "\\":
849            if self.str_at(1) == '"':
850                self.condputs("\n")
851                return True
852            else:
853                pass
854        elif s0 == "[":
855            self.refer = True
856            self.condputs("\n")
857            return True
858        elif s0 == "]":
859            self.refer = False
860            self.skip_char()
861            return self.text()
862        elif s0 == ".":
863            self.macro = False
864            self.condputs("\n")
865            return True
866
867        self.nobody = False
868        s0s1 = self.s[0:2]
869
870        macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other)
871        if macro_func(self):
872            return True
873
874        if self.skipheaders and self.nobody:
875            return True
876
877        self.skip_leading_whitespace()
878        while self.s and not self.is_white(0):
879            self.skip_char()
880        self.skip_leading_whitespace()
881        while True:
882            if not self.quoted_arg() and not self.text_arg():
883                if self.s:
884                    self.condputs(self.str_at(0))
885                    self.skip_char()
886                else:
887                    return True
888
889    def request_or_macro2(self):
890        self.skip_char()
891        s0 = self.s[0:1]
892        if s0 == "\\":
893            if self.str_at(1) == '"':
894                self.condputs("\n")
895                return True
896            else:
897                pass
898        elif s0 == "[":
899            self.refer = True
900            self.condputs("\n")
901            return True
902        elif s0 == "]":
903            self.refer = False
904            self.skip_char()
905            return self.text()
906        elif s0 == ".":
907            self.macro = False
908            self.condputs("\n")
909            return True
910
911        self.nobody = False
912        s0s1 = self.s[0:2]
913        if s0s1 == "SH":
914            for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']:
915                if self.s[2:].startswith(header_str):
916                    self.inheader = True
917                    break
918            else:
919                # Did not find a header string
920                self.inheader = False
921                self.nobody = True
922        elif s0s1 in ["SS", "IP", "H "]:
923            self.nobody = True
924        elif s0s1 in ["I ", "IR", "IB", "B ", "BR", "BI", "R ", "RB", "RI", "AB"]:
925            pass
926        elif s0s1 in ["] "]:
927            self.refer = False
928        elif s0s1 in ["PS"]:
929            if self.is_white(2):
930                self.pic = True
931            self.condputs("\n")
932            return True
933        elif s0s1 in ["PE"]:
934            if self.is_white(2):
935                self.pic = False
936            self.condputs("\n")
937            return True
938        elif s0s1 in ["TS"]:
939            if self.is_white(2):
940                self.tbl, self.tblstate = True, self.OPTIONS
941            self.condputs("\n")
942            return True
943        elif s0s1 in ["T&"]:
944            if self.is_white(2):
945                self.tbl, self.tblstate = True, self.FORMAT
946            self.condputs("\n")
947            return True
948        elif s0s1 in ["TE"]:
949            if self.is_white(2):
950                self.tbl = False
951            self.condputs("\n")
952            return True
953        elif s0s1 in ["EQ"]:
954            if self.is_white(2):
955                self.eqn = True
956            self.condputs("\n")
957            return True
958        elif s0s1 in ["EN"]:
959            if self.is_white(2):
960                self.eqn = False
961            self.condputs("\n")
962            return True
963        elif s0s1 in ["R1"]:
964            if self.is_white(2):
965                self.refer2 = True
966            self.condputs("\n")
967            return True
968        elif s0s1 in ["R2"]:
969            if self.is_white(2):
970                self.refer2 = False
971            self.condputs("\n")
972            return True
973        elif s0s1 in ["de"]:
974            macro = True
975            self.condputs("\n")
976            return True
977        elif s0s1 in ["BL", "VL", "AL", "LB", "RL", "ML", "DL"]:
978            if self.is_white(2):
979                self.inlist = True
980            self.condputs("\n")
981            return True
982        elif s0s1 in ["BV"]:
983            if self.str_at(2) == "L" and self.white(self.str_at(3)):
984                self.inlist = True
985            self.condputs("\n")
986            return True
987        elif s0s1 in ["LE"]:
988            if self.is_white(2):
989                self.inlist = False
990            self.condputs("\n")
991            return True
992        elif s0s1 in ["LP", "PP", "P\n"]:
993            self.condputs("\n")
994            return True
995        elif s0s1 in ["ds"]:
996            self.skip_char(2)
997            self.skip_leading_whitespace()
998            if self.str_at(0):
999                # Split at whitespace
1000                comps = self.s.split(None, 2)
1001                if len(comps) == 2:
1002                    name, value = comps
1003                    value = value.rstrip()
1004                    self.reg_table[name] = value
1005            self.condputs("\n")
1006            return True
1007        elif s0s1 in ["so", "nx"]:
1008            # We always ignore include directives
1009            # deroff.c for some reason allowed this to fall through to the 'tr' case
1010            # I think that was just a bug so I won't replicate it
1011            return True
1012        elif s0s1 in ["tr"]:
1013            self.skip_char(2)
1014            self.skip_leading_whitespace()
1015            while self.s and self.str_at(0) != "\n":
1016                c = self.str_at(0)
1017                ns = self.str_at(1)
1018                self.skip_char(2)
1019                if not ns or ns == "\n":
1020                    ns = " "
1021                self.tr_from += c
1022                self.tr_to += ns
1023
1024            # Update our table, then swap in the slower tr-savvy condputs
1025            try:  # Python2
1026                self.tr = string.maketrans(self.tr_from, self.tr_to)
1027            except AttributeError:  # Python3
1028                self.tr = "".maketrans(self.tr_from, self.tr_to)
1029            self.condputs = self.condputs_tr
1030
1031            return True
1032        elif s0s1 in ["sp"]:
1033            self.condputs("\n")
1034            return True
1035        else:
1036            self.condputs("\n")
1037            return True
1038
1039        if self.skipheaders and self.nobody:
1040            return True
1041
1042        self.skip_leading_whitespace()
1043        while self.s and not self.is_white(0):
1044            self.skip_char()
1045        self.skip_leading_whitespace()
1046        while True:
1047            if not self.quoted_arg() and not self.text_arg():
1048                if self.s:
1049                    self.condputs(self.str_at(0))
1050                    self.skip_char()
1051                else:
1052                    return True
1053
1054    def do_tbl(self):
1055        if self.tblstate == self.OPTIONS:
1056            while self.s and self.str_at(0) != ";" and self.str_at(0) != "\n":
1057                self.skip_leading_whitespace()
1058                if not self.str_at(0).isalpha():
1059                    # deroff.c has a bug where it can loop forever here...we try to work around it
1060                    self.skip_char()
1061                else:  # Parse option
1062
1063                    option = self.s
1064                    arg = ""
1065
1066                    idx = 0
1067                    while option[idx : idx + 1].isalpha():
1068                        idx += 1
1069
1070                    if option[idx : idx + 1] == "(":
1071                        option = option[:idx]
1072                        self.s = self.s[idx + 1 :]
1073                        arg = self.s
1074                    else:
1075                        self.s = ""
1076
1077                    if arg:
1078                        idx = arg.find(")")
1079                        if idx != -1:
1080                            arg = arg[:idx]
1081                        self.s = self.s[idx + 1 :]
1082                    else:
1083                        # self.skip_char()
1084                        pass
1085
1086                    if option.lower() == "tab":
1087                        self.tblTab = arg[0:1]
1088
1089            self.tblstate = self.FORMAT
1090            self.condputs("\n")
1091
1092        elif self.tblstate == self.FORMAT:
1093            while self.s and self.str_at(0) != "." and self.str_at(0) != "\n":
1094                self.skip_leading_whitespace()
1095                if self.str_at(0):
1096                    self.skip_char()
1097
1098            if self.str_at(0) == ".":
1099                self.tblstate = self.DATA
1100            self.condputs("\n")
1101        elif self.tblstate == self.DATA:
1102            if self.tblTab:
1103                self.s = self.s.replace(self.tblTab, "\t")
1104            self.text()
1105        return True
1106
1107    def do_line(self):
1108        if self.s[0:1] in ".'":
1109            if not self.request_or_macro():
1110                return False
1111        elif self.tbl:
1112            self.do_tbl()
1113        else:
1114            self.text()
1115        return True
1116
1117    def deroff(self, str):
1118        lines = str.split("\n")
1119        for line in lines:
1120            self.s = line + "\n"
1121            if not self.do_line():
1122                break
1123            # self.putchar('\n')
1124
1125
1126def deroff_files(files):
1127    for arg in files:
1128        sys.stderr.write(arg + "\n")
1129        if arg.endswith(".gz"):
1130            f = gzip.open(arg, "r")
1131            str = f.read()
1132            if IS_PY3:
1133                str = str.decode("latin-1")
1134        else:
1135            f = open(arg, "r")
1136            str = f.read()
1137        d = Deroffer()
1138        d.deroff(str)
1139        d.flush_output(sys.stdout)
1140        f.close()
1141
1142
1143if __name__ == "__main__":
1144    import gzip
1145
1146    paths = sys.argv[1:]
1147    if True:
1148        deroff_files(paths)
1149    else:
1150        import cProfile, profile, pstats
1151
1152        profile.run("deroff_files(paths)", "fooprof")
1153        p = pstats.Stats("fooprof")
1154        p.sort_stats("time").print_stats(100)
1155        # p.sort_stats('calls').print_callers(.5, 'startswith')
1156