1# Content (c) 2002, 2004, 2006-2009, 2012 2# David Turner <david@freetype.org> 3# 4# This file contains routines used to parse the content of documentation 5# comment blocks and build more structured objects out of them. 6# 7 8from sources import * 9from utils import * 10import string, re 11 12 13# this regular expression is used to detect code sequences. these 14# are simply code fragments embedded in '{' and '}' like in: 15# 16# { 17# x = y + z; 18# if ( zookoo == 2 ) 19# { 20# foobar(); 21# } 22# } 23# 24# note that indentation of the starting and ending accolades must be 25# exactly the same. the code sequence can contain accolades at greater 26# indentation 27# 28re_code_start = re.compile( r"(\s*){\s*$" ) 29re_code_end = re.compile( r"(\s*)}\s*$" ) 30 31 32# this regular expression is used to isolate identifiers from 33# other text 34# 35re_identifier = re.compile( r'((?:\w|-)*)' ) 36 37 38# we collect macros ending in `_H'; while outputting the object data, we use 39# this info together with the object's file location to emit the appropriate 40# header file macro and name before the object itself 41# 42re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' ) 43 44 45############################################################################# 46# 47# The DocCode class is used to store source code lines. 48# 49# 'self.lines' contains a set of source code lines that will be dumped as 50# HTML in a <PRE> tag. 51# 52# The object is filled line by line by the parser; it strips the leading 53# "margin" space from each input line before storing it in 'self.lines'. 54# 55class DocCode: 56 57 def __init__( self, margin, lines ): 58 self.lines = [] 59 self.words = None 60 61 # remove margin spaces 62 for l in lines: 63 if string.strip( l[:margin] ) == "": 64 l = l[margin:] 65 self.lines.append( l ) 66 67 def dump( self, prefix = "", width = 60 ): 68 lines = self.dump_lines( 0, width ) 69 for l in lines: 70 print prefix + l 71 72 def dump_lines( self, margin = 0, width = 60 ): 73 result = [] 74 for l in self.lines: 75 result.append( " " * margin + l ) 76 return result 77 78 79 80############################################################################# 81# 82# The DocPara class is used to store "normal" text paragraph. 83# 84# 'self.words' contains the list of words that make up the paragraph 85# 86class DocPara: 87 88 def __init__( self, lines ): 89 self.lines = None 90 self.words = [] 91 for l in lines: 92 l = string.strip( l ) 93 self.words.extend( string.split( l ) ) 94 95 def dump( self, prefix = "", width = 60 ): 96 lines = self.dump_lines( 0, width ) 97 for l in lines: 98 print prefix + l 99 100 def dump_lines( self, margin = 0, width = 60 ): 101 cur = "" # current line 102 col = 0 # current width 103 result = [] 104 105 for word in self.words: 106 ln = len( word ) 107 if col > 0: 108 ln = ln + 1 109 110 if col + ln > width: 111 result.append( " " * margin + cur ) 112 cur = word 113 col = len( word ) 114 else: 115 if col > 0: 116 cur = cur + " " 117 cur = cur + word 118 col = col + ln 119 120 if col > 0: 121 result.append( " " * margin + cur ) 122 123 return result 124 125 126 127############################################################################# 128# 129# The DocField class is used to store a list containing either DocPara or 130# DocCode objects. Each DocField also has an optional "name" which is used 131# when the object corresponds to a field or value definition 132# 133class DocField: 134 135 def __init__( self, name, lines ): 136 self.name = name # can be None for normal paragraphs/sources 137 self.items = [] # list of items 138 139 mode_none = 0 # start parsing mode 140 mode_code = 1 # parsing code sequences 141 mode_para = 3 # parsing normal paragraph 142 143 margin = -1 # current code sequence indentation 144 cur_lines = [] 145 146 # now analyze the markup lines to see if they contain paragraphs, 147 # code sequences or fields definitions 148 # 149 start = 0 150 mode = mode_none 151 152 for l in lines: 153 # are we parsing a code sequence ? 154 if mode == mode_code: 155 m = re_code_end.match( l ) 156 if m and len( m.group( 1 ) ) <= margin: 157 # that's it, we finished the code sequence 158 code = DocCode( 0, cur_lines ) 159 self.items.append( code ) 160 margin = -1 161 cur_lines = [] 162 mode = mode_none 163 else: 164 # nope, continue the code sequence 165 cur_lines.append( l[margin:] ) 166 else: 167 # start of code sequence ? 168 m = re_code_start.match( l ) 169 if m: 170 # save current lines 171 if cur_lines: 172 para = DocPara( cur_lines ) 173 self.items.append( para ) 174 cur_lines = [] 175 176 # switch to code extraction mode 177 margin = len( m.group( 1 ) ) 178 mode = mode_code 179 else: 180 if not string.split( l ) and cur_lines: 181 # if the line is empty, we end the current paragraph, 182 # if any 183 para = DocPara( cur_lines ) 184 self.items.append( para ) 185 cur_lines = [] 186 else: 187 # otherwise, simply add the line to the current 188 # paragraph 189 cur_lines.append( l ) 190 191 if mode == mode_code: 192 # unexpected end of code sequence 193 code = DocCode( margin, cur_lines ) 194 self.items.append( code ) 195 elif cur_lines: 196 para = DocPara( cur_lines ) 197 self.items.append( para ) 198 199 def dump( self, prefix = "" ): 200 if self.field: 201 print prefix + self.field + " ::" 202 prefix = prefix + "----" 203 204 first = 1 205 for p in self.items: 206 if not first: 207 print "" 208 p.dump( prefix ) 209 first = 0 210 211 def dump_lines( self, margin = 0, width = 60 ): 212 result = [] 213 nl = None 214 215 for p in self.items: 216 if nl: 217 result.append( "" ) 218 219 result.extend( p.dump_lines( margin, width ) ) 220 nl = 1 221 222 return result 223 224 225 226# this regular expression is used to detect field definitions 227# 228re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" ) 229 230 231 232class DocMarkup: 233 234 def __init__( self, tag, lines ): 235 self.tag = string.lower( tag ) 236 self.fields = [] 237 238 cur_lines = [] 239 field = None 240 mode = 0 241 242 for l in lines: 243 m = re_field.match( l ) 244 if m: 245 # we detected the start of a new field definition 246 247 # first, save the current one 248 if cur_lines: 249 f = DocField( field, cur_lines ) 250 self.fields.append( f ) 251 cur_lines = [] 252 field = None 253 254 field = m.group( 1 ) # record field name 255 ln = len( m.group( 0 ) ) 256 l = " " * ln + l[ln:] 257 cur_lines = [l] 258 else: 259 cur_lines.append( l ) 260 261 if field or cur_lines: 262 f = DocField( field, cur_lines ) 263 self.fields.append( f ) 264 265 def get_name( self ): 266 try: 267 return self.fields[0].items[0].words[0] 268 except: 269 return None 270 271 def get_start( self ): 272 try: 273 result = "" 274 for word in self.fields[0].items[0].words: 275 result = result + " " + word 276 return result[1:] 277 except: 278 return "ERROR" 279 280 def dump( self, margin ): 281 print " " * margin + "<" + self.tag + ">" 282 for f in self.fields: 283 f.dump( " " ) 284 print " " * margin + "</" + self.tag + ">" 285 286 287 288class DocChapter: 289 290 def __init__( self, block ): 291 self.block = block 292 self.sections = [] 293 if block: 294 self.name = block.name 295 self.title = block.get_markup_words( "title" ) 296 self.order = block.get_markup_words( "sections" ) 297 else: 298 self.name = "Other" 299 self.title = string.split( "Miscellaneous" ) 300 self.order = [] 301 302 303 304class DocSection: 305 306 def __init__( self, name = "Other" ): 307 self.name = name 308 self.blocks = {} 309 self.block_names = [] # ordered block names in section 310 self.defs = [] 311 self.abstract = "" 312 self.description = "" 313 self.order = [] 314 self.title = "ERROR" 315 self.chapter = None 316 317 def add_def( self, block ): 318 self.defs.append( block ) 319 320 def add_block( self, block ): 321 self.block_names.append( block.name ) 322 self.blocks[block.name] = block 323 324 def process( self ): 325 # look up one block that contains a valid section description 326 for block in self.defs: 327 title = block.get_markup_text( "title" ) 328 if title: 329 self.title = title 330 self.abstract = block.get_markup_words( "abstract" ) 331 self.description = block.get_markup_items( "description" ) 332 self.order = block.get_markup_words( "order" ) 333 return 334 335 def reorder( self ): 336 self.block_names = sort_order_list( self.block_names, self.order ) 337 338 339 340class ContentProcessor: 341 342 def __init__( self ): 343 """initialize a block content processor""" 344 self.reset() 345 346 self.sections = {} # dictionary of documentation sections 347 self.section = None # current documentation section 348 349 self.chapters = [] # list of chapters 350 351 self.headers = {} # dictionary of header macros 352 353 def set_section( self, section_name ): 354 """set current section during parsing""" 355 if not self.sections.has_key( section_name ): 356 section = DocSection( section_name ) 357 self.sections[section_name] = section 358 self.section = section 359 else: 360 self.section = self.sections[section_name] 361 362 def add_chapter( self, block ): 363 chapter = DocChapter( block ) 364 self.chapters.append( chapter ) 365 366 367 def reset( self ): 368 """reset the content processor for a new block""" 369 self.markups = [] 370 self.markup = None 371 self.markup_lines = [] 372 373 def add_markup( self ): 374 """add a new markup section""" 375 if self.markup and self.markup_lines: 376 377 # get rid of last line of markup if it's empty 378 marks = self.markup_lines 379 if len( marks ) > 0 and not string.strip( marks[-1] ): 380 self.markup_lines = marks[:-1] 381 382 m = DocMarkup( self.markup, self.markup_lines ) 383 384 self.markups.append( m ) 385 386 self.markup = None 387 self.markup_lines = [] 388 389 def process_content( self, content ): 390 """process a block content and return a list of DocMarkup objects 391 corresponding to it""" 392 markup = None 393 markup_lines = [] 394 first = 1 395 396 for line in content: 397 found = None 398 for t in re_markup_tags: 399 m = t.match( line ) 400 if m: 401 found = string.lower( m.group( 1 ) ) 402 prefix = len( m.group( 0 ) ) 403 line = " " * prefix + line[prefix:] # remove markup from line 404 break 405 406 # is it the start of a new markup section ? 407 if found: 408 first = 0 409 self.add_markup() # add current markup content 410 self.markup = found 411 if len( string.strip( line ) ) > 0: 412 self.markup_lines.append( line ) 413 elif first == 0: 414 self.markup_lines.append( line ) 415 416 self.add_markup() 417 418 return self.markups 419 420 def parse_sources( self, source_processor ): 421 blocks = source_processor.blocks 422 count = len( blocks ) 423 424 for n in range( count ): 425 source = blocks[n] 426 if source.content: 427 # this is a documentation comment, we need to catch 428 # all following normal blocks in the "follow" list 429 # 430 follow = [] 431 m = n + 1 432 while m < count and not blocks[m].content: 433 follow.append( blocks[m] ) 434 m = m + 1 435 436 doc_block = DocBlock( source, follow, self ) 437 438 def finish( self ): 439 # process all sections to extract their abstract, description 440 # and ordered list of items 441 # 442 for sec in self.sections.values(): 443 sec.process() 444 445 # process chapters to check that all sections are correctly 446 # listed there 447 for chap in self.chapters: 448 for sec in chap.order: 449 if self.sections.has_key( sec ): 450 section = self.sections[sec] 451 section.chapter = chap 452 section.reorder() 453 chap.sections.append( section ) 454 else: 455 sys.stderr.write( "WARNING: chapter '" + \ 456 chap.name + "' in " + chap.block.location() + \ 457 " lists unknown section '" + sec + "'\n" ) 458 459 # check that all sections are in a chapter 460 # 461 others = [] 462 for sec in self.sections.values(): 463 if not sec.chapter: 464 others.append( sec ) 465 466 # create a new special chapter for all remaining sections 467 # when necessary 468 # 469 if others: 470 chap = DocChapter( None ) 471 chap.sections = others 472 self.chapters.append( chap ) 473 474 475 476class DocBlock: 477 478 def __init__( self, source, follow, processor ): 479 processor.reset() 480 481 self.source = source 482 self.code = [] 483 self.type = "ERRTYPE" 484 self.name = "ERRNAME" 485 self.section = processor.section 486 self.markups = processor.process_content( source.content ) 487 488 # compute block type from first markup tag 489 try: 490 self.type = self.markups[0].tag 491 except: 492 pass 493 494 # compute block name from first markup paragraph 495 try: 496 markup = self.markups[0] 497 para = markup.fields[0].items[0] 498 name = para.words[0] 499 m = re_identifier.match( name ) 500 if m: 501 name = m.group( 1 ) 502 self.name = name 503 except: 504 pass 505 506 if self.type == "section": 507 # detect new section starts 508 processor.set_section( self.name ) 509 processor.section.add_def( self ) 510 elif self.type == "chapter": 511 # detect new chapter 512 processor.add_chapter( self ) 513 else: 514 processor.section.add_block( self ) 515 516 # now, compute the source lines relevant to this documentation 517 # block. We keep normal comments in for obvious reasons (??) 518 source = [] 519 for b in follow: 520 if b.format: 521 break 522 for l in b.lines: 523 # collect header macro definitions 524 m = re_header_macro.match( l ) 525 if m: 526 processor.headers[m.group( 2 )] = m.group( 1 ); 527 528 # we use "/* */" as a separator 529 if re_source_sep.match( l ): 530 break 531 source.append( l ) 532 533 # now strip the leading and trailing empty lines from the sources 534 start = 0 535 end = len( source ) - 1 536 537 while start < end and not string.strip( source[start] ): 538 start = start + 1 539 540 while start < end and not string.strip( source[end] ): 541 end = end - 1 542 543 if start == end and not string.strip( source[start] ): 544 self.code = [] 545 else: 546 self.code = source[start:end + 1] 547 548 def location( self ): 549 return self.source.location() 550 551 def get_markup( self, tag_name ): 552 """return the DocMarkup corresponding to a given tag in a block""" 553 for m in self.markups: 554 if m.tag == string.lower( tag_name ): 555 return m 556 return None 557 558 def get_markup_name( self, tag_name ): 559 """return the name of a given primary markup in a block""" 560 try: 561 m = self.get_markup( tag_name ) 562 return m.get_name() 563 except: 564 return None 565 566 def get_markup_words( self, tag_name ): 567 try: 568 m = self.get_markup( tag_name ) 569 return m.fields[0].items[0].words 570 except: 571 return [] 572 573 def get_markup_text( self, tag_name ): 574 result = self.get_markup_words( tag_name ) 575 return string.join( result ) 576 577 def get_markup_items( self, tag_name ): 578 try: 579 m = self.get_markup( tag_name ) 580 return m.fields[0].items 581 except: 582 return None 583 584# eof 585