1# Content (c) 2002, 2004, 2006-2009, 2012, 2013 2# David Turner <david@freetype.org> 3# 4# This file contains routines used to parse the content of documentation 5# comment blocks and build more structured objects out of them. 6# 7 8from sources import * 9from utils import * 10import string, re 11 12 13# this regular expression is used to detect code sequences. these 14# are simply code fragments embedded in '{' and '}' like in: 15# 16# { 17# x = y + z; 18# if ( zookoo == 2 ) 19# { 20# foobar(); 21# } 22# } 23# 24# note that indentation of the starting and ending accolades must be 25# exactly the same. the code sequence can contain accolades at greater 26# indentation 27# 28re_code_start = re.compile( r"(\s*){\s*$" ) 29re_code_end = re.compile( r"(\s*)}\s*$" ) 30 31 32# this regular expression is used to isolate identifiers from 33# other text 34# 35re_identifier = re.compile( r'((?:\w|-)*)' ) 36 37 38# we collect macros ending in `_H'; while outputting the object data, we use 39# this info together with the object's file location to emit the appropriate 40# header file macro and name before the object itself 41# 42re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' ) 43 44 45############################################################################# 46# 47# The DocCode class is used to store source code lines. 48# 49# 'self.lines' contains a set of source code lines that will be dumped as 50# HTML in a <PRE> tag. 51# 52# The object is filled line by line by the parser; it strips the leading 53# "margin" space from each input line before storing it in 'self.lines'. 54# 55class DocCode: 56 57 def __init__( self, margin, lines ): 58 self.lines = [] 59 self.words = None 60 61 # remove margin spaces 62 for l in lines: 63 if string.strip( l[:margin] ) == "": 64 l = l[margin:] 65 self.lines.append( l ) 66 67 def dump( self, prefix = "", width = 60 ): 68 lines = self.dump_lines( 0, width ) 69 for l in lines: 70 print prefix + l 71 72 def dump_lines( self, margin = 0, width = 60 ): 73 result = [] 74 for l in self.lines: 75 result.append( " " * margin + l ) 76 return result 77 78 79 80############################################################################# 81# 82# The DocPara class is used to store "normal" text paragraph. 83# 84# 'self.words' contains the list of words that make up the paragraph 85# 86class DocPara: 87 88 def __init__( self, lines ): 89 self.lines = None 90 self.words = [] 91 for l in lines: 92 l = string.strip( l ) 93 self.words.extend( string.split( l ) ) 94 95 def dump( self, prefix = "", width = 60 ): 96 lines = self.dump_lines( 0, width ) 97 for l in lines: 98 print prefix + l 99 100 def dump_lines( self, margin = 0, width = 60 ): 101 cur = "" # current line 102 col = 0 # current width 103 result = [] 104 105 for word in self.words: 106 ln = len( word ) 107 if col > 0: 108 ln = ln + 1 109 110 if col + ln > width: 111 result.append( " " * margin + cur ) 112 cur = word 113 col = len( word ) 114 else: 115 if col > 0: 116 cur = cur + " " 117 cur = cur + word 118 col = col + ln 119 120 if col > 0: 121 result.append( " " * margin + cur ) 122 123 return result 124 125 126 127############################################################################# 128# 129# The DocField class is used to store a list containing either DocPara or 130# DocCode objects. Each DocField also has an optional "name" which is used 131# when the object corresponds to a field or value definition 132# 133class DocField: 134 135 def __init__( self, name, lines ): 136 self.name = name # can be None for normal paragraphs/sources 137 self.items = [] # list of items 138 139 mode_none = 0 # start parsing mode 140 mode_code = 1 # parsing code sequences 141 mode_para = 3 # parsing normal paragraph 142 143 margin = -1 # current code sequence indentation 144 cur_lines = [] 145 146 # now analyze the markup lines to see if they contain paragraphs, 147 # code sequences or fields definitions 148 # 149 start = 0 150 mode = mode_none 151 152 for l in lines: 153 # are we parsing a code sequence ? 154 if mode == mode_code: 155 m = re_code_end.match( l ) 156 if m and len( m.group( 1 ) ) <= margin: 157 # that's it, we finished the code sequence 158 code = DocCode( 0, cur_lines ) 159 self.items.append( code ) 160 margin = -1 161 cur_lines = [] 162 mode = mode_none 163 else: 164 # nope, continue the code sequence 165 cur_lines.append( l[margin:] ) 166 else: 167 # start of code sequence ? 168 m = re_code_start.match( l ) 169 if m: 170 # save current lines 171 if cur_lines: 172 para = DocPara( cur_lines ) 173 self.items.append( para ) 174 cur_lines = [] 175 176 # switch to code extraction mode 177 margin = len( m.group( 1 ) ) 178 mode = mode_code 179 else: 180 if not string.split( l ) and cur_lines: 181 # if the line is empty, we end the current paragraph, 182 # if any 183 para = DocPara( cur_lines ) 184 self.items.append( para ) 185 cur_lines = [] 186 else: 187 # otherwise, simply add the line to the current 188 # paragraph 189 cur_lines.append( l ) 190 191 if mode == mode_code: 192 # unexpected end of code sequence 193 code = DocCode( margin, cur_lines ) 194 self.items.append( code ) 195 elif cur_lines: 196 para = DocPara( cur_lines ) 197 self.items.append( para ) 198 199 def dump( self, prefix = "" ): 200 if self.field: 201 print prefix + self.field + " ::" 202 prefix = prefix + "----" 203 204 first = 1 205 for p in self.items: 206 if not first: 207 print "" 208 p.dump( prefix ) 209 first = 0 210 211 def dump_lines( self, margin = 0, width = 60 ): 212 result = [] 213 nl = None 214 215 for p in self.items: 216 if nl: 217 result.append( "" ) 218 219 result.extend( p.dump_lines( margin, width ) ) 220 nl = 1 221 222 return result 223 224 225 226# this regular expression is used to detect field definitions 227# 228re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" ) 229 230 231 232class DocMarkup: 233 234 def __init__( self, tag, lines ): 235 self.tag = string.lower( tag ) 236 self.fields = [] 237 238 cur_lines = [] 239 field = None 240 mode = 0 241 242 for l in lines: 243 m = re_field.match( l ) 244 if m: 245 # we detected the start of a new field definition 246 247 # first, save the current one 248 if cur_lines: 249 f = DocField( field, cur_lines ) 250 self.fields.append( f ) 251 cur_lines = [] 252 field = None 253 254 field = m.group( 1 ) # record field name 255 ln = len( m.group( 0 ) ) 256 l = " " * ln + l[ln:] 257 cur_lines = [l] 258 else: 259 cur_lines.append( l ) 260 261 if field or cur_lines: 262 f = DocField( field, cur_lines ) 263 self.fields.append( f ) 264 265 def get_name( self ): 266 try: 267 return self.fields[0].items[0].words[0] 268 except: 269 return None 270 271 def dump( self, margin ): 272 print " " * margin + "<" + self.tag + ">" 273 for f in self.fields: 274 f.dump( " " ) 275 print " " * margin + "</" + self.tag + ">" 276 277 278 279class DocChapter: 280 281 def __init__( self, block ): 282 self.block = block 283 self.sections = [] 284 if block: 285 self.name = block.name 286 self.title = block.get_markup_words( "title" ) 287 self.order = block.get_markup_words( "sections" ) 288 else: 289 self.name = "Other" 290 self.title = string.split( "Miscellaneous" ) 291 self.order = [] 292 293 294 295class DocSection: 296 297 def __init__( self, name = "Other" ): 298 self.name = name 299 self.blocks = {} 300 self.block_names = [] # ordered block names in section 301 self.defs = [] 302 self.abstract = "" 303 self.description = "" 304 self.order = [] 305 self.title = "ERROR" 306 self.chapter = None 307 308 def add_def( self, block ): 309 self.defs.append( block ) 310 311 def add_block( self, block ): 312 self.block_names.append( block.name ) 313 self.blocks[block.name] = block 314 315 def process( self ): 316 # look up one block that contains a valid section description 317 for block in self.defs: 318 title = block.get_markup_text( "title" ) 319 if title: 320 self.title = title 321 self.abstract = block.get_markup_words( "abstract" ) 322 self.description = block.get_markup_items( "description" ) 323 self.order = block.get_markup_words( "order" ) 324 return 325 326 def reorder( self ): 327 self.block_names = sort_order_list( self.block_names, self.order ) 328 329 330 331class ContentProcessor: 332 333 def __init__( self ): 334 """initialize a block content processor""" 335 self.reset() 336 337 self.sections = {} # dictionary of documentation sections 338 self.section = None # current documentation section 339 340 self.chapters = [] # list of chapters 341 342 self.headers = {} # dictionary of header macros 343 344 def set_section( self, section_name ): 345 """set current section during parsing""" 346 if not self.sections.has_key( section_name ): 347 section = DocSection( section_name ) 348 self.sections[section_name] = section 349 self.section = section 350 else: 351 self.section = self.sections[section_name] 352 353 def add_chapter( self, block ): 354 chapter = DocChapter( block ) 355 self.chapters.append( chapter ) 356 357 358 def reset( self ): 359 """reset the content processor for a new block""" 360 self.markups = [] 361 self.markup = None 362 self.markup_lines = [] 363 364 def add_markup( self ): 365 """add a new markup section""" 366 if self.markup and self.markup_lines: 367 368 # get rid of last line of markup if it's empty 369 marks = self.markup_lines 370 if len( marks ) > 0 and not string.strip( marks[-1] ): 371 self.markup_lines = marks[:-1] 372 373 m = DocMarkup( self.markup, self.markup_lines ) 374 375 self.markups.append( m ) 376 377 self.markup = None 378 self.markup_lines = [] 379 380 def process_content( self, content ): 381 """process a block content and return a list of DocMarkup objects 382 corresponding to it""" 383 markup = None 384 markup_lines = [] 385 first = 1 386 387 for line in content: 388 found = None 389 for t in re_markup_tags: 390 m = t.match( line ) 391 if m: 392 found = string.lower( m.group( 1 ) ) 393 prefix = len( m.group( 0 ) ) 394 line = " " * prefix + line[prefix:] # remove markup from line 395 break 396 397 # is it the start of a new markup section ? 398 if found: 399 first = 0 400 self.add_markup() # add current markup content 401 self.markup = found 402 if len( string.strip( line ) ) > 0: 403 self.markup_lines.append( line ) 404 elif first == 0: 405 self.markup_lines.append( line ) 406 407 self.add_markup() 408 409 return self.markups 410 411 def parse_sources( self, source_processor ): 412 blocks = source_processor.blocks 413 count = len( blocks ) 414 415 for n in range( count ): 416 source = blocks[n] 417 if source.content: 418 # this is a documentation comment, we need to catch 419 # all following normal blocks in the "follow" list 420 # 421 follow = [] 422 m = n + 1 423 while m < count and not blocks[m].content: 424 follow.append( blocks[m] ) 425 m = m + 1 426 427 doc_block = DocBlock( source, follow, self ) 428 429 def finish( self ): 430 # process all sections to extract their abstract, description 431 # and ordered list of items 432 # 433 for sec in self.sections.values(): 434 sec.process() 435 436 # process chapters to check that all sections are correctly 437 # listed there 438 for chap in self.chapters: 439 for sec in chap.order: 440 if self.sections.has_key( sec ): 441 section = self.sections[sec] 442 section.chapter = chap 443 section.reorder() 444 chap.sections.append( section ) 445 else: 446 sys.stderr.write( "WARNING: chapter '" + \ 447 chap.name + "' in " + chap.block.location() + \ 448 " lists unknown section '" + sec + "'\n" ) 449 450 # check that all sections are in a chapter 451 # 452 others = [] 453 for sec in self.sections.values(): 454 if not sec.chapter: 455 others.append( sec ) 456 457 # create a new special chapter for all remaining sections 458 # when necessary 459 # 460 if others: 461 chap = DocChapter( None ) 462 chap.sections = others 463 self.chapters.append( chap ) 464 465 466 467class DocBlock: 468 469 def __init__( self, source, follow, processor ): 470 processor.reset() 471 472 self.source = source 473 self.code = [] 474 self.type = "ERRTYPE" 475 self.name = "ERRNAME" 476 self.section = processor.section 477 self.markups = processor.process_content( source.content ) 478 479 # compute block type from first markup tag 480 try: 481 self.type = self.markups[0].tag 482 except: 483 pass 484 485 # compute block name from first markup paragraph 486 try: 487 markup = self.markups[0] 488 para = markup.fields[0].items[0] 489 name = para.words[0] 490 m = re_identifier.match( name ) 491 if m: 492 name = m.group( 1 ) 493 self.name = name 494 except: 495 pass 496 497 if self.type == "section": 498 # detect new section starts 499 processor.set_section( self.name ) 500 processor.section.add_def( self ) 501 elif self.type == "chapter": 502 # detect new chapter 503 processor.add_chapter( self ) 504 else: 505 processor.section.add_block( self ) 506 507 # now, compute the source lines relevant to this documentation 508 # block. We keep normal comments in for obvious reasons (??) 509 source = [] 510 for b in follow: 511 if b.format: 512 break 513 for l in b.lines: 514 # collect header macro definitions 515 m = re_header_macro.match( l ) 516 if m: 517 processor.headers[m.group( 2 )] = m.group( 1 ); 518 519 # we use "/* */" as a separator 520 if re_source_sep.match( l ): 521 break 522 source.append( l ) 523 524 # now strip the leading and trailing empty lines from the sources 525 start = 0 526 end = len( source ) - 1 527 528 while start < end and not string.strip( source[start] ): 529 start = start + 1 530 531 while start < end and not string.strip( source[end] ): 532 end = end - 1 533 534 if start == end and not string.strip( source[start] ): 535 self.code = [] 536 else: 537 self.code = source[start:end + 1] 538 539 def location( self ): 540 return self.source.location() 541 542 def get_markup( self, tag_name ): 543 """return the DocMarkup corresponding to a given tag in a block""" 544 for m in self.markups: 545 if m.tag == string.lower( tag_name ): 546 return m 547 return None 548 549 def get_markup_words( self, tag_name ): 550 try: 551 m = self.get_markup( tag_name ) 552 return m.fields[0].items[0].words 553 except: 554 return [] 555 556 def get_markup_text( self, tag_name ): 557 result = self.get_markup_words( tag_name ) 558 return string.join( result ) 559 560 def get_markup_items( self, tag_name ): 561 try: 562 m = self.get_markup( tag_name ) 563 return m.fields[0].items 564 except: 565 return None 566 567# eof 568