1#!/usr/local/bin/python3.8 2 3import pygments.lexers.compiled as lexer 4import optparse 5import re 6from pygments.token import Token 7import logging 8 9logg = logging.getLogger(__name__) 10 11FileComment = "FileComment" 12FileInclude = "FileInclude" 13FunctionComment = "FunctionComment" 14FunctionPrototype = "FunctionPrototype" 15 16# use the markdown lexer to identify elements 17# then filter only those we want. The returned 18# token list is more global flagging the role 19# of each token for the manual generation. 20class CppToMarkdown: 21 def __init__(self): 22 self.alldefinitions = 0 23 self.internaldefs = ["static"] 24 self.filecomment_done = "" 25 self.fileinclude_done = "" 26 self.filecomment_text = "" 27 self.fileinclude_text = "" 28 self.comment_text = "" 29 self.function_text = "" 30 self.nesting = 0 31 def split_copyright(self, text): 32 # there are two modes - the copyright starts in the first line 33 # and the source description follows or the other way round. 34 lines = text.split("\n") 35 if len(lines) <= 2: 36 return "", text 37 introtext = [lines[0]] 38 copyright = [lines[0]] 39 check1 = re.compile(r"^\s[*]\s+[(][c][C][)]") 40 check2 = re.compile(r"^\s[*]\s+\b[Cc]opyright\b") 41 empty1 = re.compile(r"^\s[*]\s*$") 42 state = "intro" 43 for i in xrange(1,len(lines)-1): 44 line = lines[i] 45 if state == "intro": 46 if empty1.match(line): 47 introtext += [ line ] 48 continue 49 if check1.match(line) or check2.match(line): 50 state = "copyrightfirst" 51 copyright += [ line ] 52 else: 53 state = "introtextfirst" 54 introtext += [ line ] 55 elif state == "copyrightfirst": 56 if empty1.match(line): 57 state = "introtextlast" 58 introtext += [ line ] 59 else: 60 copyright += [ line ] 61 elif state == "introtextfirst": 62 if check1.match(line) or check2.match(line): 63 state = "copyrightlast" 64 copyright += [ line ] 65 else: 66 introtext += [ line ] 67 elif state == "copyrightlast": 68 copyright += [ line ] 69 elif state == "introtextlast": 70 introtext += [ line ] 71 else: 72 logg.fatal("UNKNOWN STATE %s", state) 73 introtext += [lines[-1]] 74 copyright += [lines[-1]] 75 logg.debug("@ COPYRIGHT\n %s", copyright) 76 logg.debug("@ INTROTEXT\n %s", introtext) 77 return "\n".join(copyright), "\n".join(introtext) 78 def commentblock(self, text): 79 prefix = re.compile(r"(?s)^\s*[/][*]+([^\n]*)(?=\n)") 80 suffix = re.compile(r"(?s)\n [*][/]\s*") 81 empty = re.compile(r"(?s)\n [*][ \t]*(?=\n)") 82 lines1 = re.compile(r"(?s)\n [*][ ][\t]") 83 lines2 = re.compile(r"(?s)\n [*][ ]") 84 lines3 = re.compile(r"(?s)\n [*][\t][\t]") 85 lines4 = re.compile(r"(?s)\n [*][\t]") 86 text = suffix.sub("\n", text) 87 text = prefix.sub("> \\1\n", text) 88 text = empty.sub("\n", text) 89 text = lines1.sub("\n ", text) 90 text = lines2.sub("\n", text) 91 text = lines3.sub("\n ", text) 92 text = lines4.sub("\n ", text) 93 return text 94 def functionblock(self, text): 95 empty = re.compile(r"(?s)\n[ \t]*(?=\n)") 96 text = " " + text.replace("\n", "\n ") 97 text = empty.sub("", text) 98 return text 99 def functionname(self, text): 100 check1 = re.compile(r"^[^()=]*(\b\w+)\s*[(=]") 101 found = check1.match(text) 102 if found: 103 return found.group(1) 104 check2 = re.compile(r"^[^()=]*(\b\w+)\s*$") 105 found = check2.match(text) 106 if found: 107 return found.group(1) 108 return "" 109 def run(self, filename): 110 filetext = open(filename).read() 111 for line in self.process(filetext, filename): 112 print(line) 113 def process(self, filetext, filename=""): 114 section_ruler = "-----------------------------------------" 115 copyright = "" 116 for token, text in self.parse(filetext): 117 if token == FileInclude: 118 yield "## SOURCE " + filename.replace("../", "") 119 yield " #" + text.replace("\n", "\n ") 120 elif token == FileComment: 121 yield "### INTRODUCTION" 122 copyright, introduction = self.split_copyright(text) 123 yield self.commentblock(introduction) 124 elif token == FunctionPrototype: 125 name = self.functionname(text) 126 yield section_ruler 127 yield "### " + name 128 # yield '<a id="%s"></a>' % name 129 yield "#### NAME" 130 yield " " + name 131 yield "#### SYNOPSIS" 132 yield self.functionblock(text) 133 elif token == FunctionComment: 134 if text: 135 yield "#### DESCRIPTION" 136 yield self.commentblock(text) 137 else: 138 if text: 139 yield "#### NOTES" 140 print(token + " " + text.replace("\n", "\n ")) 141 if copyright: 142 yield section_ruler 143 yield "### COPYRIGHT" 144 yield self.commentblock(copyright) 145 def isexported_function(self): 146 function = self.function_text.strip().replace("\n"," ") 147 logg.debug("@ --------------------------------------") 148 logg.debug("@ ALLDEFINITIONS %s", self.alldefinitions) 149 if function.startswith("static ") and self.alldefinitions < 3: 150 logg.debug("@ ONLY INTERNAL %s", function) 151 return False 152 if not self.comment_text: 153 if not self.alldefinitions: 154 logg.info("@ NO COMMENT ON %s", function) 155 return False 156 else: 157 logg.warn("@ NO COMMENT ON %s", function) 158 text = self.comment_text 159 if text.startswith("/**"): return True 160 if text.startswith("/*!"): return True 161 if text.startswith("///"): return True 162 if text.startswith("//!"): return True 163 if self.alldefinitions >= 1: 164 if text.startswith("/*"): return True 165 if text.startswith("//"): return True 166 if self.alldefinitions >= 2: 167 return True 168 logg.debug("@ NO ** COMMENT %s", self.function_text.strip()) 169 defs = self.function_text 170 return False 171 def parse(self, filetext): 172 c = lexer.CLexer() 173 for token, text in c.get_tokens(filetext): 174 logg.debug("|| %s %s", token, text.replace("\n", "\n |")) 175 # completion 176 if token != Token.Comment.Preproc and self.fileinclude_done == "no": 177 yield FileInclude, self.fileinclude_text 178 if self.filecomment_text: 179 yield FileComment, self.filecomment_text 180 self.fileinclude_done = "done" 181 # parsing 182 if token == Token.Comment.Multiline: 183 if not self.filecomment_done: 184 self.filecomment_done = "done" 185 self.filecomment_text = text 186 # wait until we know it is not a function documentation 187 self.comment_text = text 188 else: 189 self.comment_text = text 190 elif token == Token.Comment.Preproc and "include" in text: 191 if not self.fileinclude_done: 192 self.fileinclude_done = "no" 193 self.fileinclude_text += text 194 self.comment_text = "" 195 elif token == Token.Comment.Preproc and self.fileinclude_done == "no": 196 if not "\n" in self.fileinclude_text: 197 self.fileinclude_text += text 198 self.comment_text = "" 199 elif token == Token.Comment.Preproc: 200 self.comment_text = "" 201 self.function_text = "" 202 elif token == Token.Operator and text == "=": 203 if not self.nesting and self.function_text.strip(): 204 if self.isexported_function(): 205 yield FunctionPrototype, self.function_text 206 yield FunctionComment, self.comment_text 207 self.comment_text = "" 208 self.function_text = "" 209 elif token == Token.Punctuation and text == ";": 210 self.comment_text = "" 211 self.function_text = "" 212 elif token == Token.Punctuation and text == "{": 213 if not self.nesting and self.function_text.strip(): 214 if self.isexported_function(): 215 yield FunctionPrototype, self.function_text 216 yield FunctionComment, self.comment_text 217 self.comment_text = "" 218 self.function_text = "" 219 self.nesting += 1 220 elif token == Token.Punctuation and text == "}": 221 self.nesting -= 1 222 self.comment_text = "" 223 self.function_text = "" 224 else: 225 if not self.nesting: 226 self.function_text += text 227 else: 228 pass # yield "|",text 229 230 231if __name__ == "__main__": 232 _o = optparse.OptionParser() 233 _o.add_option("-v", "--verbose", action="count", default=0) 234 _o.add_option("-a", "--all", action="count", default=0, 235 help="include all definitions in the output (not only /**)") 236 opt, args = _o.parse_args() 237 238 logg.addHandler(logging.StreamHandler()) 239 if opt.verbose: 240 logg.setLevel(logging.WARN - 10 * opt.verbose) 241 242 c = CppToMarkdown() 243 if opt.all: 244 c.alldefinitions = opt.all 245 for arg in args: 246 c.run(arg) 247 248 249 250 251