1#!/usr/local/bin/python3.8
2
3import pygments.lexers.compiled as lexer
4import optparse
5import re
6from pygments.token import Token
7import logging
8
9logg = logging.getLogger(__name__)
10
11FileComment = "FileComment"
12FileInclude = "FileInclude"
13FunctionComment = "FunctionComment"
14FunctionPrototype = "FunctionPrototype"
15
16# use the markdown lexer to identify elements
17# then filter only those we want. The returned
18# token list is more global flagging the role
19# of each token for the manual generation.
20class CppToMarkdown:
21    def __init__(self):
22        self.alldefinitions = 0
23        self.internaldefs = ["static"]
24        self.filecomment_done = ""
25        self.fileinclude_done = ""
26        self.filecomment_text = ""
27        self.fileinclude_text = ""
28        self.comment_text = ""
29        self.function_text = ""
30        self.nesting = 0
31    def split_copyright(self, text):
32        # there are two modes - the copyright starts in the first line
33        # and the source description follows or the other way round.
34        lines = text.split("\n")
35        if len(lines) <= 2:
36            return "", text
37        introtext = [lines[0]]
38        copyright = [lines[0]]
39        check1 = re.compile(r"^\s[*]\s+[(][c][C][)]")
40        check2 = re.compile(r"^\s[*]\s+\b[Cc]opyright\b")
41        empty1 = re.compile(r"^\s[*]\s*$")
42        state = "intro"
43        for i in xrange(1,len(lines)-1):
44            line = lines[i]
45            if state == "intro":
46                if empty1.match(line):
47                    introtext += [ line ]
48                    continue
49                if check1.match(line) or check2.match(line):
50                    state = "copyrightfirst"
51                    copyright += [ line ]
52                else:
53                    state = "introtextfirst"
54                    introtext += [ line ]
55            elif state == "copyrightfirst":
56                if empty1.match(line):
57                    state = "introtextlast"
58                    introtext += [ line ]
59                else:
60                    copyright += [ line ]
61            elif state == "introtextfirst":
62                if check1.match(line) or check2.match(line):
63                    state = "copyrightlast"
64                    copyright += [ line ]
65                else:
66                    introtext += [ line ]
67            elif state == "copyrightlast":
68                copyright += [ line ]
69            elif state == "introtextlast":
70                introtext += [ line ]
71            else:
72                logg.fatal("UNKNOWN STATE %s", state)
73        introtext += [lines[-1]]
74        copyright += [lines[-1]]
75        logg.debug("@ COPYRIGHT\n %s", copyright)
76        logg.debug("@ INTROTEXT\n %s", introtext)
77        return "\n".join(copyright), "\n".join(introtext)
78    def commentblock(self, text):
79        prefix = re.compile(r"(?s)^\s*[/][*]+([^\n]*)(?=\n)")
80        suffix = re.compile(r"(?s)\n [*][/]\s*")
81        empty = re.compile(r"(?s)\n [*][ \t]*(?=\n)")
82        lines1 = re.compile(r"(?s)\n [*][ ][\t]")
83        lines2 = re.compile(r"(?s)\n [*][ ]")
84        lines3 = re.compile(r"(?s)\n [*][\t][\t]")
85        lines4 = re.compile(r"(?s)\n [*][\t]")
86        text = suffix.sub("\n", text)
87        text = prefix.sub("> \\1\n", text)
88        text = empty.sub("\n", text)
89        text = lines1.sub("\n     ", text)
90        text = lines2.sub("\n", text)
91        text = lines3.sub("\n         ", text)
92        text = lines4.sub("\n     ", text)
93        return text
94    def functionblock(self, text):
95        empty = re.compile(r"(?s)\n[ \t]*(?=\n)")
96        text = "    " + text.replace("\n", "\n    ")
97        text = empty.sub("", text)
98        return text
99    def functionname(self, text):
100        check1 = re.compile(r"^[^()=]*(\b\w+)\s*[(=]")
101        found = check1.match(text)
102        if found:
103            return found.group(1)
104        check2 = re.compile(r"^[^()=]*(\b\w+)\s*$")
105        found = check2.match(text)
106        if found:
107            return found.group(1)
108        return ""
109    def run(self, filename):
110        filetext = open(filename).read()
111        for line in self.process(filetext, filename):
112            print(line)
113    def process(self, filetext, filename=""):
114        section_ruler = "-----------------------------------------"
115        copyright = ""
116        for token, text in self.parse(filetext):
117            if token == FileInclude:
118                yield "## SOURCE " + filename.replace("../", "")
119                yield "    #" + text.replace("\n", "\n    ")
120            elif token == FileComment:
121                yield "### INTRODUCTION"
122                copyright, introduction = self.split_copyright(text)
123                yield self.commentblock(introduction)
124            elif token == FunctionPrototype:
125                name = self.functionname(text)
126                yield section_ruler
127                yield "### " + name
128                # yield '<a id="%s"></a>' % name
129                yield "#### NAME"
130                yield "    " + name
131                yield "#### SYNOPSIS"
132                yield self.functionblock(text)
133            elif token == FunctionComment:
134                if text:
135                    yield "#### DESCRIPTION"
136                    yield self.commentblock(text)
137            else:
138                if text:
139                    yield "#### NOTES"
140                    print(token + " " + text.replace("\n", "\n  "))
141        if copyright:
142            yield section_ruler
143            yield "### COPYRIGHT"
144            yield self.commentblock(copyright)
145    def isexported_function(self):
146        function = self.function_text.strip().replace("\n"," ")
147        logg.debug("@ --------------------------------------")
148        logg.debug("@ ALLDEFINITIONS %s", self.alldefinitions)
149        if function.startswith("static ") and self.alldefinitions < 3:
150            logg.debug("@ ONLY INTERNAL %s", function)
151            return False
152        if not self.comment_text:
153            if not self.alldefinitions:
154                logg.info("@ NO COMMENT ON %s", function)
155                return False
156            else:
157                logg.warn("@ NO COMMENT ON %s", function)
158        text = self.comment_text
159        if text.startswith("/**"): return True
160        if text.startswith("/*!"): return True
161        if text.startswith("///"): return True
162        if text.startswith("//!"): return True
163        if self.alldefinitions >= 1:
164            if text.startswith("/*"): return True
165            if text.startswith("//"): return True
166        if self.alldefinitions >= 2:
167            return True
168        logg.debug("@ NO ** COMMENT %s", self.function_text.strip())
169        defs = self.function_text
170        return False
171    def parse(self, filetext):
172        c = lexer.CLexer()
173        for token, text in c.get_tokens(filetext):
174            logg.debug("|| %s %s", token, text.replace("\n", "\n |"))
175            # completion
176            if token != Token.Comment.Preproc and self.fileinclude_done == "no":
177                    yield FileInclude, self.fileinclude_text
178                    if self.filecomment_text:
179                        yield FileComment, self.filecomment_text
180                    self.fileinclude_done = "done"
181            # parsing
182            if token == Token.Comment.Multiline:
183                if not self.filecomment_done:
184                    self.filecomment_done = "done"
185                    self.filecomment_text = text
186                    # wait until we know it is not a function documentation
187                    self.comment_text = text
188                else:
189                    self.comment_text = text
190            elif token == Token.Comment.Preproc and "include" in text:
191                if not self.fileinclude_done:
192                    self.fileinclude_done = "no"
193                    self.fileinclude_text += text
194                    self.comment_text = ""
195            elif token == Token.Comment.Preproc and self.fileinclude_done == "no":
196                if not "\n" in self.fileinclude_text:
197                    self.fileinclude_text += text
198                self.comment_text = ""
199            elif token == Token.Comment.Preproc:
200                    self.comment_text = ""
201                    self.function_text = ""
202            elif token == Token.Operator and text == "=":
203                if not self.nesting and self.function_text.strip():
204                    if self.isexported_function():
205                        yield FunctionPrototype, self.function_text
206                        yield FunctionComment, self.comment_text
207                self.comment_text = ""
208                self.function_text = ""
209            elif token == Token.Punctuation and text == ";":
210                self.comment_text = ""
211                self.function_text = ""
212            elif token == Token.Punctuation and text == "{":
213                if not self.nesting and self.function_text.strip():
214                    if self.isexported_function():
215                        yield FunctionPrototype, self.function_text
216                        yield FunctionComment, self.comment_text
217                self.comment_text = ""
218                self.function_text = ""
219                self.nesting += 1
220            elif token == Token.Punctuation and text == "}":
221                self.nesting -= 1
222                self.comment_text = ""
223                self.function_text = ""
224            else:
225                if not self.nesting:
226                    self.function_text += text
227                else:
228                    pass # yield "|",text
229
230
231if __name__ == "__main__":
232    _o = optparse.OptionParser()
233    _o.add_option("-v", "--verbose", action="count", default=0)
234    _o.add_option("-a", "--all", action="count", default=0,
235                  help="include all definitions in the output (not only /**)")
236    opt, args = _o.parse_args()
237
238    logg.addHandler(logging.StreamHandler())
239    if opt.verbose:
240        logg.setLevel(logging.WARN - 10 * opt.verbose)
241
242    c = CppToMarkdown()
243    if opt.all:
244        c.alldefinitions = opt.all
245    for arg in args:
246        c.run(arg)
247
248
249
250
251