1#
2#  content.py
3#
4#    Parse comment blocks to build content blocks (library file).
5#
6#  Copyright 2002-2016 by
7#  David Turner.
8#
9#  This file is part of the FreeType project, and may only be used,
10#  modified, and distributed under the terms of the FreeType project
11#  license, LICENSE.TXT.  By continuing to use, modify, or distribute
12#  this file you indicate that you have read the license and
13#  understand and accept it fully.
14
15#
16# This file contains routines to parse documentation comment blocks,
17# building more structured objects out of them.
18#
19
20
21from sources import *
22from utils   import *
23
24import string, re
25
26
27#
28# Regular expressions to detect code sequences.  `Code sequences' are simply
29# code fragments embedded in '{' and '}', as demonstrated in the following
30# example.
31#
32#   {
33#     x = y + z;
34#     if ( zookoo == 2 )
35#     {
36#       foobar();
37#     }
38#   }
39#
40# Note that the indentation of the first opening brace and the last closing
41# brace must be exactly the same.  The code sequence itself should have a
42# larger indentation than the surrounding braces.
43#
44re_code_start = re.compile( r"(\s*){\s*$" )
45re_code_end   = re.compile( r"(\s*)}\s*$" )
46
47
48#
49# A regular expression to isolate identifiers from other text.  Two syntax
50# forms are supported:
51#
52#   <name>
53#   <name>[<id>]
54#
55# where both `<name>' and `<id>' consist of alphanumeric characters, `_',
56# and `-'.  Use `<id>' if there are multiple, valid `<name>' entries; in the
57# index, `<id>' will be appended in parentheses.
58#
59# For example,
60#
61#   stem_darkening[autofit]
62#
63# becomes `stem_darkening (autofit)' in the index.
64#
65re_identifier = re.compile( r"""
66                              ((?:\w|-)+
67                               (?:\[(?:\w|-)+\])?)
68                            """, re.VERBOSE )
69
70
71#
72# We collect macro names ending in `_H' (group 1), as defined in
73# `freetype/config/ftheader.h'.  While outputting the object data, we use
74# this info together with the object's file location (group 2) to emit the
75# appropriate header file macro and its associated file name before the
76# object itself.
77#
78# Example:
79#
80#   #define FT_FREETYPE_H <freetype.h>
81#
82re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
83
84
85################################################################
86##
87##  DOC CODE CLASS
88##
89##  The `DocCode' class is used to store source code lines.
90##
91##  `self.lines' contains a set of source code lines that will be dumped as
92##  HTML in a <PRE> tag.
93##
94##  The object is filled line by line by the parser; it strips the leading
95##  `margin' space from each input line before storing it in `self.lines'.
96##
97class  DocCode:
98
99    def  __init__( self, margin, lines ):
100        self.lines = []
101        self.words = None
102
103        # remove margin spaces
104        for l in lines:
105            if string.strip( l[:margin] ) == "":
106                l = l[margin:]
107            self.lines.append( l )
108
109    def  dump( self, prefix = "", width = 60 ):
110        lines = self.dump_lines( 0, width )
111        for l in lines:
112            print prefix + l
113
114    def  dump_lines( self, margin = 0, width = 60 ):
115        result = []
116        for l in self.lines:
117            result.append( " " * margin + l )
118        return result
119
120
121
122################################################################
123##
124##  DOC PARA CLASS
125##
126##  `Normal' text paragraphs are stored in the `DocPara' class.
127##
128##  `self.words' contains the list of words that make up the paragraph.
129##
130class  DocPara:
131
132    def  __init__( self, lines ):
133        self.lines = None
134        self.words = []
135        for l in lines:
136            l = string.strip( l )
137            self.words.extend( string.split( l ) )
138
139    def  dump( self, prefix = "", width = 60 ):
140        lines = self.dump_lines( 0, width )
141        for l in lines:
142            print prefix + l
143
144    def  dump_lines( self, margin = 0, width = 60 ):
145        cur    = ""  # current line
146        col    = 0   # current width
147        result = []
148
149        for word in self.words:
150            ln = len( word )
151            if col > 0:
152                ln = ln + 1
153
154            if col + ln > width:
155                result.append( " " * margin + cur )
156                cur = word
157                col = len( word )
158            else:
159                if col > 0:
160                    cur = cur + " "
161                cur = cur + word
162                col = col + ln
163
164        if col > 0:
165            result.append( " " * margin + cur )
166
167        return result
168
169
170################################################################
171##
172##  DOC FIELD CLASS
173##
174##  The `DocField' class stores a list containing either `DocPara' or
175##  `DocCode' objects.  Each DocField object also has an optional `name'
176##  that is used when the object corresponds to a field or value definition.
177##
178class  DocField:
179
180    def  __init__( self, name, lines ):
181        self.name  = name  # can be `None' for normal paragraphs/sources
182        self.items = []    # list of items
183
184        mode_none  = 0     # start parsing mode
185        mode_code  = 1     # parsing code sequences
186        mode_para  = 3     # parsing normal paragraph
187
188        margin     = -1    # current code sequence indentation
189        cur_lines  = []
190
191        # analyze the markup lines to check whether they contain paragraphs,
192        # code sequences, or fields definitions
193        #
194        start = 0
195        mode  = mode_none
196
197        for l in lines:
198            # are we parsing a code sequence?
199            if mode == mode_code:
200                m = re_code_end.match( l )
201                if m and len( m.group( 1 ) ) <= margin:
202                    # that's it, we finished the code sequence
203                    code = DocCode( 0, cur_lines )
204                    self.items.append( code )
205                    margin    = -1
206                    cur_lines = []
207                    mode      = mode_none
208                else:
209                    # otherwise continue the code sequence
210                    cur_lines.append( l[margin:] )
211            else:
212                # start of code sequence?
213                m = re_code_start.match( l )
214                if m:
215                    # save current lines
216                    if cur_lines:
217                        para = DocPara( cur_lines )
218                        self.items.append( para )
219                        cur_lines = []
220
221                    # switch to code extraction mode
222                    margin = len( m.group( 1 ) )
223                    mode   = mode_code
224                else:
225                    if not string.split( l ) and cur_lines:
226                        # if the line is empty, we end the current paragraph,
227                        # if any
228                        para = DocPara( cur_lines )
229                        self.items.append( para )
230                        cur_lines = []
231                    else:
232                        # otherwise, simply add the line to the current
233                        # paragraph
234                        cur_lines.append( l )
235
236        if mode == mode_code:
237            # unexpected end of code sequence
238            code = DocCode( margin, cur_lines )
239            self.items.append( code )
240        elif cur_lines:
241            para = DocPara( cur_lines )
242            self.items.append( para )
243
244    def  dump( self, prefix = "" ):
245        if self.field:
246            print prefix + self.field + " ::"
247            prefix = prefix + "----"
248
249        first = 1
250        for p in self.items:
251            if not first:
252                print ""
253            p.dump( prefix )
254            first = 0
255
256    def  dump_lines( self, margin = 0, width = 60 ):
257        result = []
258        nl     = None
259
260        for p in self.items:
261            if nl:
262                result.append( "" )
263
264            result.extend( p.dump_lines( margin, width ) )
265            nl = 1
266
267        return result
268
269
270#
271# A regular expression to detect field definitions.
272#
273# Examples:
274#
275#   foo     ::
276#   foo.bar ::
277#
278re_field = re.compile( r"""
279                         \s*
280                           (
281                             \w*
282                           |
283                             \w (\w | \.)* \w
284                           )
285                         \s* ::
286                       """, re.VERBOSE )
287
288
289################################################################
290##
291##  DOC MARKUP CLASS
292##
293class  DocMarkup:
294
295    def  __init__( self, tag, lines ):
296        self.tag    = string.lower( tag )
297        self.fields = []
298
299        cur_lines = []
300        field     = None
301        mode      = 0
302
303        for l in lines:
304            m = re_field.match( l )
305            if m:
306                # We detected the start of a new field definition.
307
308                # first, save the current one
309                if cur_lines:
310                    f = DocField( field, cur_lines )
311                    self.fields.append( f )
312                    cur_lines = []
313                    field     = None
314
315                field     = m.group( 1 )   # record field name
316                ln        = len( m.group( 0 ) )
317                l         = " " * ln + l[ln:]
318                cur_lines = [l]
319            else:
320                cur_lines.append( l )
321
322        if field or cur_lines:
323            f = DocField( field, cur_lines )
324            self.fields.append( f )
325
326    def  get_name( self ):
327        try:
328            return self.fields[0].items[0].words[0]
329        except:
330            return None
331
332    def  dump( self, margin ):
333        print " " * margin + "<" + self.tag + ">"
334        for f in self.fields:
335            f.dump( "  " )
336        print " " * margin + "</" + self.tag + ">"
337
338
339################################################################
340##
341##  DOC CHAPTER CLASS
342##
343class  DocChapter:
344
345    def  __init__( self, block ):
346        self.block    = block
347        self.sections = []
348        if block:
349            self.name  = block.name
350            self.title = block.get_markup_words( "title" )
351            self.order = block.get_markup_words( "sections" )
352        else:
353            self.name  = "Other"
354            self.title = string.split( "Miscellaneous" )
355            self.order = []
356
357
358################################################################
359##
360##  DOC SECTION CLASS
361##
362class  DocSection:
363
364    def  __init__( self, name = "Other" ):
365        self.name        = name
366        self.blocks      = {}
367        self.block_names = []  # ordered block names in section
368        self.defs        = []
369        self.abstract    = ""
370        self.description = ""
371        self.order       = []
372        self.title       = "ERROR"
373        self.chapter     = None
374
375    def  add_def( self, block ):
376        self.defs.append( block )
377
378    def  add_block( self, block ):
379        self.block_names.append( block.name )
380        self.blocks[block.name] = block
381
382    def  process( self ):
383        # look up one block that contains a valid section description
384        for block in self.defs:
385            title = block.get_markup_text( "title" )
386            if title:
387                self.title       = title
388                self.abstract    = block.get_markup_words( "abstract" )
389                self.description = block.get_markup_items( "description" )
390                self.order       = block.get_markup_words_all( "order" )
391                return
392
393    def  reorder( self ):
394        self.block_names = sort_order_list( self.block_names, self.order )
395
396
397################################################################
398##
399##  CONTENT PROCESSOR CLASS
400##
401class  ContentProcessor:
402
403    def  __init__( self ):
404        """Initialize a block content processor."""
405        self.reset()
406
407        self.sections = {}    # dictionary of documentation sections
408        self.section  = None  # current documentation section
409
410        self.chapters = []    # list of chapters
411
412        self.headers  = {}    # dictionary of header macros
413
414    def  set_section( self, section_name ):
415        """Set current section during parsing."""
416        if not section_name in self.sections:
417            section = DocSection( section_name )
418            self.sections[section_name] = section
419            self.section                = section
420        else:
421            self.section = self.sections[section_name]
422
423    def  add_chapter( self, block ):
424        chapter = DocChapter( block )
425        self.chapters.append( chapter )
426
427    def  reset( self ):
428        """Reset the content processor for a new block."""
429        self.markups      = []
430        self.markup       = None
431        self.markup_lines = []
432
433    def  add_markup( self ):
434        """Add a new markup section."""
435        if self.markup and self.markup_lines:
436
437            # get rid of last line of markup if it's empty
438            marks = self.markup_lines
439            if len( marks ) > 0 and not string.strip( marks[-1] ):
440                self.markup_lines = marks[:-1]
441
442            m = DocMarkup( self.markup, self.markup_lines )
443
444            self.markups.append( m )
445
446            self.markup       = None
447            self.markup_lines = []
448
449    def  process_content( self, content ):
450        """Process a block content and return a list of DocMarkup objects
451           corresponding to it."""
452        markup       = None
453        markup_lines = []
454        first        = 1
455
456        for line in content:
457            found = None
458            for t in re_markup_tags:
459                m = t.match( line )
460                if m:
461                    found  = string.lower( m.group( 1 ) )
462                    prefix = len( m.group( 0 ) )
463                    line   = " " * prefix + line[prefix:]   # remove markup from line
464                    break
465
466            # is it the start of a new markup section ?
467            if found:
468                first = 0
469                self.add_markup()  # add current markup content
470                self.markup = found
471                if len( string.strip( line ) ) > 0:
472                    self.markup_lines.append( line )
473            elif first == 0:
474                self.markup_lines.append( line )
475
476        self.add_markup()
477
478        return self.markups
479
480    def  parse_sources( self, source_processor ):
481        blocks = source_processor.blocks
482        count  = len( blocks )
483
484        for n in range( count ):
485            source = blocks[n]
486            if source.content:
487                # this is a documentation comment, we need to catch
488                # all following normal blocks in the "follow" list
489                #
490                follow = []
491                m = n + 1
492                while m < count and not blocks[m].content:
493                    follow.append( blocks[m] )
494                    m = m + 1
495
496                doc_block = DocBlock( source, follow, self )
497
498    def  finish( self ):
499        # process all sections to extract their abstract, description
500        # and ordered list of items
501        #
502        for sec in self.sections.values():
503            sec.process()
504
505        # process chapters to check that all sections are correctly
506        # listed there
507        for chap in self.chapters:
508            for sec in chap.order:
509                if sec in self.sections:
510                    section = self.sections[sec]
511                    section.chapter = chap
512                    section.reorder()
513                    chap.sections.append( section )
514                else:
515                    sys.stderr.write( "WARNING: chapter '" +          \
516                        chap.name + "' in " + chap.block.location() + \
517                        " lists unknown section '" + sec + "'\n" )
518
519        # check that all sections are in a chapter
520        #
521        others = []
522        for sec in self.sections.values():
523            if not sec.chapter:
524                sec.reorder()
525                others.append( sec )
526
527        # create a new special chapter for all remaining sections
528        # when necessary
529        #
530        if others:
531            chap = DocChapter( None )
532            chap.sections = others
533            self.chapters.append( chap )
534
535
536################################################################
537##
538##  DOC BLOCK CLASS
539##
540class  DocBlock:
541
542    def  __init__( self, source, follow, processor ):
543        processor.reset()
544
545        self.source  = source
546        self.code    = []
547        self.type    = "ERRTYPE"
548        self.name    = "ERRNAME"
549        self.section = processor.section
550        self.markups = processor.process_content( source.content )
551
552        # compute block type from first markup tag
553        try:
554            self.type = self.markups[0].tag
555        except:
556            pass
557
558        # compute block name from first markup paragraph
559        try:
560            markup = self.markups[0]
561            para   = markup.fields[0].items[0]
562            name   = para.words[0]
563            m = re_identifier.match( name )
564            if m:
565                name = m.group( 1 )
566            self.name = name
567        except:
568            pass
569
570        if self.type == "section":
571            # detect new section starts
572            processor.set_section( self.name )
573            processor.section.add_def( self )
574        elif self.type == "chapter":
575            # detect new chapter
576            processor.add_chapter( self )
577        else:
578            processor.section.add_block( self )
579
580        # now, compute the source lines relevant to this documentation
581        # block. We keep normal comments in for obvious reasons (??)
582        source = []
583        for b in follow:
584            if b.format:
585                break
586            for l in b.lines:
587                # collect header macro definitions
588                m = re_header_macro.match( l )
589                if m:
590                    processor.headers[m.group( 2 )] = m.group( 1 );
591
592                # we use "/* */" as a separator
593                if re_source_sep.match( l ):
594                    break
595                source.append( l )
596
597        # now strip the leading and trailing empty lines from the sources
598        start = 0
599        end   = len( source ) - 1
600
601        while start < end and not string.strip( source[start] ):
602            start = start + 1
603
604        while start < end and not string.strip( source[end] ):
605            end = end - 1
606
607        if start == end and not string.strip( source[start] ):
608            self.code = []
609        else:
610            self.code = source[start:end + 1]
611
612    def  location( self ):
613        return self.source.location()
614
615    def  get_markup( self, tag_name ):
616        """Return the DocMarkup corresponding to a given tag in a block."""
617        for m in self.markups:
618            if m.tag == string.lower( tag_name ):
619                return m
620        return None
621
622    def  get_markup_words( self, tag_name ):
623        try:
624            m = self.get_markup( tag_name )
625            return m.fields[0].items[0].words
626        except:
627            return []
628
629    def  get_markup_words_all( self, tag_name ):
630        try:
631            m = self.get_markup( tag_name )
632            words = []
633            for item in m.fields[0].items:
634                # We honour empty lines in an `<Order>' section element by
635                # adding the sentinel `/empty/'.  The formatter should then
636                # convert it to an appropriate representation in the
637                # `section_enter' function.
638                words += item.words
639                words.append( "/empty/" )
640            return words
641        except:
642            return []
643
644    def  get_markup_text( self, tag_name ):
645        result = self.get_markup_words( tag_name )
646        return string.join( result )
647
648    def  get_markup_items( self, tag_name ):
649        try:
650            m = self.get_markup( tag_name )
651            return m.fields[0].items
652        except:
653            return None
654
655# eof
656