1# Sources (c) 2002-2004, 2006-2009, 2012, 2013 2# David Turner <david@freetype.org> 3# 4# 5# this file contains definitions of classes needed to decompose 6# C sources files into a series of multi-line "blocks". There are 7# two kinds of blocks: 8# 9# - normal blocks, which contain source code or ordinary comments 10# 11# - documentation blocks, which have restricted formatting, and 12# whose text always start with a documentation markup tag like 13# "<Function>", "<Type>", etc.. 14# 15# the routines used to process the content of documentation blocks 16# are not contained here, but in "content.py" 17# 18# the classes and methods found here only deal with text parsing 19# and basic documentation block extraction 20# 21 22import fileinput, re, sys, os, string 23 24 25 26################################################################ 27## 28## BLOCK FORMAT PATTERN 29## 30## A simple class containing compiled regular expressions used 31## to detect potential documentation format block comments within 32## C source code 33## 34## note that the 'column' pattern must contain a group that will 35## be used to "unbox" the content of documentation comment blocks 36## 37class SourceBlockFormat: 38 39 def __init__( self, id, start, column, end ): 40 """create a block pattern, used to recognize special documentation blocks""" 41 self.id = id 42 self.start = re.compile( start, re.VERBOSE ) 43 self.column = re.compile( column, re.VERBOSE ) 44 self.end = re.compile( end, re.VERBOSE ) 45 46 47 48# 49# format 1 documentation comment blocks look like the following: 50# 51# /************************************/ 52# /* */ 53# /* */ 54# /* */ 55# /************************************/ 56# 57# we define a few regular expressions here to detect them 58# 59 60start = r''' 61 \s* # any number of whitespace 62 /\*{2,}/ # followed by '/' and at least two asterisks then '/' 63 \s*$ # probably followed by whitespace 64''' 65 66column = r''' 67 \s* # any number of whitespace 68 /\*{1} # followed by '/' and precisely one asterisk 69 ([^*].*) # followed by anything (group 1) 70 \*{1}/ # followed by one asterisk and a '/' 71 \s*$ # probably followed by whitespace 72''' 73 74re_source_block_format1 = SourceBlockFormat( 1, start, column, start ) 75 76 77# 78# format 2 documentation comment blocks look like the following: 79# 80# /************************************ (at least 2 asterisks) 81# * 82# * 83# * 84# * 85# **/ (1 or more asterisks at the end) 86# 87# we define a few regular expressions here to detect them 88# 89start = r''' 90 \s* # any number of whitespace 91 /\*{2,} # followed by '/' and at least two asterisks 92 \s*$ # probably followed by whitespace 93''' 94 95column = r''' 96 \s* # any number of whitespace 97 \*{1}(?!/) # followed by precisely one asterisk not followed by `/' 98 (.*) # then anything (group1) 99''' 100 101end = r''' 102 \s* # any number of whitespace 103 \*+/ # followed by at least one asterisk, then '/' 104''' 105 106re_source_block_format2 = SourceBlockFormat( 2, start, column, end ) 107 108 109# 110# the list of supported documentation block formats, we could add new ones 111# relatively easily 112# 113re_source_block_formats = [re_source_block_format1, re_source_block_format2] 114 115 116# 117# the following regular expressions corresponds to markup tags 118# within the documentation comment blocks. they're equivalent 119# despite their different syntax 120# 121# notice how each markup tag _must_ begin a new line 122# 123re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' ) # <xxxx> format 124re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' ) # @xxxx: format 125 126# 127# the list of supported markup tags, we could add new ones relatively 128# easily 129# 130re_markup_tags = [re_markup_tag1, re_markup_tag2] 131 132# 133# used to detect a cross-reference, after markup tags have been stripped 134# 135re_crossref = re.compile( r'@((?:\w|-)*)(.*)' ) # @foo 136 137# 138# used to detect italic and bold styles in paragraph text 139# 140re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_ 141re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold* 142 143# 144# this regular expression code to identify an URL has been taken from 145# 146# http://mail.python.org/pipermail/tutor/2002-September/017228.html 147# 148# (with slight modifications) 149# 150 151urls = r'(?:https?|telnet|gopher|file|wais|ftp)' 152ltrs = r'\w' 153gunk = r'/#~:.?+=&%@!\-' 154punc = r'.:?\-' 155any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs, 156 'gunk' : gunk, 157 'punc' : punc } 158url = r""" 159 ( 160 \b # start at word boundary 161 %(urls)s : # need resource and a colon 162 [%(any)s] +? # followed by one or more of any valid 163 # character, but be conservative and 164 # take only what you need to... 165 (?= # [look-ahead non-consumptive assertion] 166 [%(punc)s]* # either 0 or more punctuation 167 (?: # [non-grouping parentheses] 168 [^%(any)s] | $ # followed by a non-url char 169 # or end of the string 170 ) 171 ) 172 ) 173 """ % {'urls' : urls, 174 'any' : any, 175 'punc' : punc } 176 177re_url = re.compile( url, re.VERBOSE | re.MULTILINE ) 178 179# 180# used to detect the end of commented source lines 181# 182re_source_sep = re.compile( r'\s*/\*\s*\*/' ) 183 184# 185# used to perform cross-reference within source output 186# 187re_source_crossref = re.compile( r'(\W*)(\w*)' ) 188 189# 190# a list of reserved source keywords 191# 192re_source_keywords = re.compile( '''\\b ( typedef | 193 struct | 194 enum | 195 union | 196 const | 197 char | 198 int | 199 short | 200 long | 201 void | 202 signed | 203 unsigned | 204 \#include | 205 \#define | 206 \#undef | 207 \#if | 208 \#ifdef | 209 \#ifndef | 210 \#else | 211 \#endif ) \\b''', re.VERBOSE ) 212 213 214################################################################ 215## 216## SOURCE BLOCK CLASS 217## 218## A SourceProcessor is in charge of reading a C source file 219## and decomposing it into a series of different "SourceBlocks". 220## each one of these blocks can be made of the following data: 221## 222## - A documentation comment block that starts with "/**" and 223## whose exact format will be discussed later 224## 225## - normal sources lines, including comments 226## 227## the important fields in a text block are the following ones: 228## 229## self.lines : a list of text lines for the corresponding block 230## 231## self.content : for documentation comment blocks only, this is the 232## block content that has been "unboxed" from its 233## decoration. This is None for all other blocks 234## (i.e. sources or ordinary comments with no starting 235## markup tag) 236## 237class SourceBlock: 238 239 def __init__( self, processor, filename, lineno, lines ): 240 self.processor = processor 241 self.filename = filename 242 self.lineno = lineno 243 self.lines = lines[:] 244 self.format = processor.format 245 self.content = [] 246 247 if self.format == None: 248 return 249 250 words = [] 251 252 # extract comment lines 253 lines = [] 254 255 for line0 in self.lines: 256 m = self.format.column.match( line0 ) 257 if m: 258 lines.append( m.group( 1 ) ) 259 260 # now, look for a markup tag 261 for l in lines: 262 l = string.strip( l ) 263 if len( l ) > 0: 264 for tag in re_markup_tags: 265 if tag.match( l ): 266 self.content = lines 267 return 268 269 def location( self ): 270 return "(" + self.filename + ":" + repr( self.lineno ) + ")" 271 272 # debugging only - not used in normal operations 273 def dump( self ): 274 if self.content: 275 print "{{{content start---" 276 for l in self.content: 277 print l 278 print "---content end}}}" 279 return 280 281 fmt = "" 282 if self.format: 283 fmt = repr( self.format.id ) + " " 284 285 for line in self.lines: 286 print line 287 288 289 290################################################################ 291## 292## SOURCE PROCESSOR CLASS 293## 294## The SourceProcessor is in charge of reading a C source file 295## and decomposing it into a series of different "SourceBlock" 296## objects. 297## 298## each one of these blocks can be made of the following data: 299## 300## - A documentation comment block that starts with "/**" and 301## whose exact format will be discussed later 302## 303## - normal sources lines, include comments 304## 305## 306class SourceProcessor: 307 308 def __init__( self ): 309 """initialize a source processor""" 310 self.blocks = [] 311 self.filename = None 312 self.format = None 313 self.lines = [] 314 315 def reset( self ): 316 """reset a block processor, clean all its blocks""" 317 self.blocks = [] 318 self.format = None 319 320 def parse_file( self, filename ): 321 """parse a C source file, and add its blocks to the processor's list""" 322 self.reset() 323 324 self.filename = filename 325 326 fileinput.close() 327 self.format = None 328 self.lineno = 0 329 self.lines = [] 330 331 for line in fileinput.input( filename ): 332 # strip trailing newlines, important on Windows machines! 333 if line[-1] == '\012': 334 line = line[0:-1] 335 336 if self.format == None: 337 self.process_normal_line( line ) 338 else: 339 if self.format.end.match( line ): 340 # that's a normal block end, add it to 'lines' and 341 # create a new block 342 self.lines.append( line ) 343 self.add_block_lines() 344 elif self.format.column.match( line ): 345 # that's a normal column line, add it to 'lines' 346 self.lines.append( line ) 347 else: 348 # humm.. this is an unexpected block end, 349 # create a new block, but don't process the line 350 self.add_block_lines() 351 352 # we need to process the line again 353 self.process_normal_line( line ) 354 355 # record the last lines 356 self.add_block_lines() 357 358 def process_normal_line( self, line ): 359 """process a normal line and check whether it is the start of a new block""" 360 for f in re_source_block_formats: 361 if f.start.match( line ): 362 self.add_block_lines() 363 self.format = f 364 self.lineno = fileinput.filelineno() 365 366 self.lines.append( line ) 367 368 def add_block_lines( self ): 369 """add the current accumulated lines and create a new block""" 370 if self.lines != []: 371 block = SourceBlock( self, self.filename, self.lineno, self.lines ) 372 373 self.blocks.append( block ) 374 self.format = None 375 self.lines = [] 376 377 # debugging only, not used in normal operations 378 def dump( self ): 379 """print all blocks in a processor""" 380 for b in self.blocks: 381 b.dump() 382 383# eof 384