1# 2# content.py 3# 4# Parse comment blocks to build content blocks (library file). 5# 6# Copyright 2002-2015 by 7# David Turner. 8# 9# This file is part of the FreeType project, and may only be used, 10# modified, and distributed under the terms of the FreeType project 11# license, LICENSE.TXT. By continuing to use, modify, or distribute 12# this file you indicate that you have read the license and 13# understand and accept it fully. 14 15# 16# This file contains routines to parse documentation comment blocks, 17# building more structured objects out of them. 18# 19 20 21from sources import * 22from utils import * 23 24import string, re 25 26 27# 28# Regular expressions to detect code sequences. `Code sequences' are simply 29# code fragments embedded in '{' and '}', as demonstrated in the following 30# example. 31# 32# { 33# x = y + z; 34# if ( zookoo == 2 ) 35# { 36# foobar(); 37# } 38# } 39# 40# Note that the indentation of the first opening brace and the last closing 41# brace must be exactly the same. The code sequence itself should have a 42# larger indentation than the surrounding braces. 43# 44re_code_start = re.compile( r"(\s*){\s*$" ) 45re_code_end = re.compile( r"(\s*)}\s*$" ) 46 47 48# 49# A regular expression to isolate identifiers from other text. 50# 51re_identifier = re.compile( r'((?:\w|-)*)' ) 52 53 54# 55# We collect macro names ending in `_H' (group 1), as defined in 56# `freetype/config/ftheader.h'. While outputting the object data, we use 57# this info together with the object's file location (group 2) to emit the 58# appropriate header file macro and its associated file name before the 59# object itself. 60# 61# Example: 62# 63# #define FT_FREETYPE_H <freetype.h> 64# 65re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' ) 66 67 68################################################################ 69## 70## DOC CODE CLASS 71## 72## The `DocCode' class is used to store source code lines. 73## 74## `self.lines' contains a set of source code lines that will be dumped as 75## HTML in a <PRE> tag. 76## 77## The object is filled line by line by the parser; it strips the leading 78## `margin' space from each input line before storing it in `self.lines'. 79## 80class DocCode: 81 82 def __init__( self, margin, lines ): 83 self.lines = [] 84 self.words = None 85 86 # remove margin spaces 87 for l in lines: 88 if string.strip( l[:margin] ) == "": 89 l = l[margin:] 90 self.lines.append( l ) 91 92 def dump( self, prefix = "", width = 60 ): 93 lines = self.dump_lines( 0, width ) 94 for l in lines: 95 print prefix + l 96 97 def dump_lines( self, margin = 0, width = 60 ): 98 result = [] 99 for l in self.lines: 100 result.append( " " * margin + l ) 101 return result 102 103 104 105################################################################ 106## 107## DOC PARA CLASS 108## 109## `Normal' text paragraphs are stored in the `DocPara' class. 110## 111## `self.words' contains the list of words that make up the paragraph. 112## 113class DocPara: 114 115 def __init__( self, lines ): 116 self.lines = None 117 self.words = [] 118 for l in lines: 119 l = string.strip( l ) 120 self.words.extend( string.split( l ) ) 121 122 def dump( self, prefix = "", width = 60 ): 123 lines = self.dump_lines( 0, width ) 124 for l in lines: 125 print prefix + l 126 127 def dump_lines( self, margin = 0, width = 60 ): 128 cur = "" # current line 129 col = 0 # current width 130 result = [] 131 132 for word in self.words: 133 ln = len( word ) 134 if col > 0: 135 ln = ln + 1 136 137 if col + ln > width: 138 result.append( " " * margin + cur ) 139 cur = word 140 col = len( word ) 141 else: 142 if col > 0: 143 cur = cur + " " 144 cur = cur + word 145 col = col + ln 146 147 if col > 0: 148 result.append( " " * margin + cur ) 149 150 return result 151 152 153################################################################ 154## 155## DOC FIELD CLASS 156## 157## The `DocField' class stores a list containing either `DocPara' or 158## `DocCode' objects. Each DocField object also has an optional `name' 159## that is used when the object corresponds to a field or value definition. 160## 161class DocField: 162 163 def __init__( self, name, lines ): 164 self.name = name # can be `None' for normal paragraphs/sources 165 self.items = [] # list of items 166 167 mode_none = 0 # start parsing mode 168 mode_code = 1 # parsing code sequences 169 mode_para = 3 # parsing normal paragraph 170 171 margin = -1 # current code sequence indentation 172 cur_lines = [] 173 174 # analyze the markup lines to check whether they contain paragraphs, 175 # code sequences, or fields definitions 176 # 177 start = 0 178 mode = mode_none 179 180 for l in lines: 181 # are we parsing a code sequence? 182 if mode == mode_code: 183 m = re_code_end.match( l ) 184 if m and len( m.group( 1 ) ) <= margin: 185 # that's it, we finished the code sequence 186 code = DocCode( 0, cur_lines ) 187 self.items.append( code ) 188 margin = -1 189 cur_lines = [] 190 mode = mode_none 191 else: 192 # otherwise continue the code sequence 193 cur_lines.append( l[margin:] ) 194 else: 195 # start of code sequence? 196 m = re_code_start.match( l ) 197 if m: 198 # save current lines 199 if cur_lines: 200 para = DocPara( cur_lines ) 201 self.items.append( para ) 202 cur_lines = [] 203 204 # switch to code extraction mode 205 margin = len( m.group( 1 ) ) 206 mode = mode_code 207 else: 208 if not string.split( l ) and cur_lines: 209 # if the line is empty, we end the current paragraph, 210 # if any 211 para = DocPara( cur_lines ) 212 self.items.append( para ) 213 cur_lines = [] 214 else: 215 # otherwise, simply add the line to the current 216 # paragraph 217 cur_lines.append( l ) 218 219 if mode == mode_code: 220 # unexpected end of code sequence 221 code = DocCode( margin, cur_lines ) 222 self.items.append( code ) 223 elif cur_lines: 224 para = DocPara( cur_lines ) 225 self.items.append( para ) 226 227 def dump( self, prefix = "" ): 228 if self.field: 229 print prefix + self.field + " ::" 230 prefix = prefix + "----" 231 232 first = 1 233 for p in self.items: 234 if not first: 235 print "" 236 p.dump( prefix ) 237 first = 0 238 239 def dump_lines( self, margin = 0, width = 60 ): 240 result = [] 241 nl = None 242 243 for p in self.items: 244 if nl: 245 result.append( "" ) 246 247 result.extend( p.dump_lines( margin, width ) ) 248 nl = 1 249 250 return result 251 252 253# 254# A regular expression to detect field definitions. 255# 256# Examples: 257# 258# foo :: 259# foo.bar :: 260# 261re_field = re.compile( r""" 262 \s* 263 ( 264 \w* 265 | 266 \w (\w | \.)* \w 267 ) 268 \s* :: 269 """, re.VERBOSE ) 270 271 272################################################################ 273## 274## DOC MARKUP CLASS 275## 276class DocMarkup: 277 278 def __init__( self, tag, lines ): 279 self.tag = string.lower( tag ) 280 self.fields = [] 281 282 cur_lines = [] 283 field = None 284 mode = 0 285 286 for l in lines: 287 m = re_field.match( l ) 288 if m: 289 # We detected the start of a new field definition. 290 291 # first, save the current one 292 if cur_lines: 293 f = DocField( field, cur_lines ) 294 self.fields.append( f ) 295 cur_lines = [] 296 field = None 297 298 field = m.group( 1 ) # record field name 299 ln = len( m.group( 0 ) ) 300 l = " " * ln + l[ln:] 301 cur_lines = [l] 302 else: 303 cur_lines.append( l ) 304 305 if field or cur_lines: 306 f = DocField( field, cur_lines ) 307 self.fields.append( f ) 308 309 def get_name( self ): 310 try: 311 return self.fields[0].items[0].words[0] 312 except: 313 return None 314 315 def dump( self, margin ): 316 print " " * margin + "<" + self.tag + ">" 317 for f in self.fields: 318 f.dump( " " ) 319 print " " * margin + "</" + self.tag + ">" 320 321 322################################################################ 323## 324## DOC CHAPTER CLASS 325## 326class DocChapter: 327 328 def __init__( self, block ): 329 self.block = block 330 self.sections = [] 331 if block: 332 self.name = block.name 333 self.title = block.get_markup_words( "title" ) 334 self.order = block.get_markup_words( "sections" ) 335 else: 336 self.name = "Other" 337 self.title = string.split( "Miscellaneous" ) 338 self.order = [] 339 340 341################################################################ 342## 343## DOC SECTION CLASS 344## 345class DocSection: 346 347 def __init__( self, name = "Other" ): 348 self.name = name 349 self.blocks = {} 350 self.block_names = [] # ordered block names in section 351 self.defs = [] 352 self.abstract = "" 353 self.description = "" 354 self.order = [] 355 self.title = "ERROR" 356 self.chapter = None 357 358 def add_def( self, block ): 359 self.defs.append( block ) 360 361 def add_block( self, block ): 362 self.block_names.append( block.name ) 363 self.blocks[block.name] = block 364 365 def process( self ): 366 # look up one block that contains a valid section description 367 for block in self.defs: 368 title = block.get_markup_text( "title" ) 369 if title: 370 self.title = title 371 self.abstract = block.get_markup_words( "abstract" ) 372 self.description = block.get_markup_items( "description" ) 373 self.order = block.get_markup_words_all( "order" ) 374 return 375 376 def reorder( self ): 377 self.block_names = sort_order_list( self.block_names, self.order ) 378 379 380################################################################ 381## 382## CONTENT PROCESSOR CLASS 383## 384class ContentProcessor: 385 386 def __init__( self ): 387 """Initialize a block content processor.""" 388 self.reset() 389 390 self.sections = {} # dictionary of documentation sections 391 self.section = None # current documentation section 392 393 self.chapters = [] # list of chapters 394 395 self.headers = {} # dictionary of header macros 396 397 def set_section( self, section_name ): 398 """Set current section during parsing.""" 399 if not section_name in self.sections: 400 section = DocSection( section_name ) 401 self.sections[section_name] = section 402 self.section = section 403 else: 404 self.section = self.sections[section_name] 405 406 def add_chapter( self, block ): 407 chapter = DocChapter( block ) 408 self.chapters.append( chapter ) 409 410 def reset( self ): 411 """Reset the content processor for a new block.""" 412 self.markups = [] 413 self.markup = None 414 self.markup_lines = [] 415 416 def add_markup( self ): 417 """Add a new markup section.""" 418 if self.markup and self.markup_lines: 419 420 # get rid of last line of markup if it's empty 421 marks = self.markup_lines 422 if len( marks ) > 0 and not string.strip( marks[-1] ): 423 self.markup_lines = marks[:-1] 424 425 m = DocMarkup( self.markup, self.markup_lines ) 426 427 self.markups.append( m ) 428 429 self.markup = None 430 self.markup_lines = [] 431 432 def process_content( self, content ): 433 """Process a block content and return a list of DocMarkup objects 434 corresponding to it.""" 435 markup = None 436 markup_lines = [] 437 first = 1 438 439 for line in content: 440 found = None 441 for t in re_markup_tags: 442 m = t.match( line ) 443 if m: 444 found = string.lower( m.group( 1 ) ) 445 prefix = len( m.group( 0 ) ) 446 line = " " * prefix + line[prefix:] # remove markup from line 447 break 448 449 # is it the start of a new markup section ? 450 if found: 451 first = 0 452 self.add_markup() # add current markup content 453 self.markup = found 454 if len( string.strip( line ) ) > 0: 455 self.markup_lines.append( line ) 456 elif first == 0: 457 self.markup_lines.append( line ) 458 459 self.add_markup() 460 461 return self.markups 462 463 def parse_sources( self, source_processor ): 464 blocks = source_processor.blocks 465 count = len( blocks ) 466 467 for n in range( count ): 468 source = blocks[n] 469 if source.content: 470 # this is a documentation comment, we need to catch 471 # all following normal blocks in the "follow" list 472 # 473 follow = [] 474 m = n + 1 475 while m < count and not blocks[m].content: 476 follow.append( blocks[m] ) 477 m = m + 1 478 479 doc_block = DocBlock( source, follow, self ) 480 481 def finish( self ): 482 # process all sections to extract their abstract, description 483 # and ordered list of items 484 # 485 for sec in self.sections.values(): 486 sec.process() 487 488 # process chapters to check that all sections are correctly 489 # listed there 490 for chap in self.chapters: 491 for sec in chap.order: 492 if sec in self.sections: 493 section = self.sections[sec] 494 section.chapter = chap 495 section.reorder() 496 chap.sections.append( section ) 497 else: 498 sys.stderr.write( "WARNING: chapter '" + \ 499 chap.name + "' in " + chap.block.location() + \ 500 " lists unknown section '" + sec + "'\n" ) 501 502 # check that all sections are in a chapter 503 # 504 others = [] 505 for sec in self.sections.values(): 506 if not sec.chapter: 507 sec.reorder() 508 others.append( sec ) 509 510 # create a new special chapter for all remaining sections 511 # when necessary 512 # 513 if others: 514 chap = DocChapter( None ) 515 chap.sections = others 516 self.chapters.append( chap ) 517 518 519################################################################ 520## 521## DOC BLOCK CLASS 522## 523class DocBlock: 524 525 def __init__( self, source, follow, processor ): 526 processor.reset() 527 528 self.source = source 529 self.code = [] 530 self.type = "ERRTYPE" 531 self.name = "ERRNAME" 532 self.section = processor.section 533 self.markups = processor.process_content( source.content ) 534 535 # compute block type from first markup tag 536 try: 537 self.type = self.markups[0].tag 538 except: 539 pass 540 541 # compute block name from first markup paragraph 542 try: 543 markup = self.markups[0] 544 para = markup.fields[0].items[0] 545 name = para.words[0] 546 m = re_identifier.match( name ) 547 if m: 548 name = m.group( 1 ) 549 self.name = name 550 except: 551 pass 552 553 if self.type == "section": 554 # detect new section starts 555 processor.set_section( self.name ) 556 processor.section.add_def( self ) 557 elif self.type == "chapter": 558 # detect new chapter 559 processor.add_chapter( self ) 560 else: 561 processor.section.add_block( self ) 562 563 # now, compute the source lines relevant to this documentation 564 # block. We keep normal comments in for obvious reasons (??) 565 source = [] 566 for b in follow: 567 if b.format: 568 break 569 for l in b.lines: 570 # collect header macro definitions 571 m = re_header_macro.match( l ) 572 if m: 573 processor.headers[m.group( 2 )] = m.group( 1 ); 574 575 # we use "/* */" as a separator 576 if re_source_sep.match( l ): 577 break 578 source.append( l ) 579 580 # now strip the leading and trailing empty lines from the sources 581 start = 0 582 end = len( source ) - 1 583 584 while start < end and not string.strip( source[start] ): 585 start = start + 1 586 587 while start < end and not string.strip( source[end] ): 588 end = end - 1 589 590 if start == end and not string.strip( source[start] ): 591 self.code = [] 592 else: 593 self.code = source[start:end + 1] 594 595 def location( self ): 596 return self.source.location() 597 598 def get_markup( self, tag_name ): 599 """Return the DocMarkup corresponding to a given tag in a block.""" 600 for m in self.markups: 601 if m.tag == string.lower( tag_name ): 602 return m 603 return None 604 605 def get_markup_words( self, tag_name ): 606 try: 607 m = self.get_markup( tag_name ) 608 return m.fields[0].items[0].words 609 except: 610 return [] 611 612 def get_markup_words_all( self, tag_name ): 613 try: 614 m = self.get_markup( tag_name ) 615 words = [] 616 for item in m.fields[0].items: 617 # We honour empty lines in an `<Order>' section element by 618 # adding the sentinel `/empty/'. The formatter should then 619 # convert it to an appropriate representation in the 620 # `section_enter' function. 621 words += item.words 622 words.append( "/empty/" ) 623 return words 624 except: 625 return [] 626 627 def get_markup_text( self, tag_name ): 628 result = self.get_markup_words( tag_name ) 629 return string.join( result ) 630 631 def get_markup_items( self, tag_name ): 632 try: 633 m = self.get_markup( tag_name ) 634 return m.fields[0].items 635 except: 636 return None 637 638# eof 639