1#
2#  content.py
3#
4#    Parse comment blocks to build content blocks (library file).
5#
6#  Copyright 2002-2018 by
7#  David Turner.
8#
9#  This file is part of the FreeType project, and may only be used,
10#  modified, and distributed under the terms of the FreeType project
11#  license, LICENSE.TXT.  By continuing to use, modify, or distribute
12#  this file you indicate that you have read the license and
13#  understand and accept it fully.
14
15#
16# This file contains routines to parse documentation comment blocks,
17# building more structured objects out of them.
18#
19
20
21from sources import *
22from utils   import *
23
24import string, re
25
26
27#
28# Regular expressions to detect code sequences.  `Code sequences' are simply
29# code fragments embedded in '{' and '}', as demonstrated in the following
30# example.
31#
32#   {
33#     x = y + z;
34#     if ( zookoo == 2 )
35#     {
36#       foobar();
37#     }
38#   }
39#
40# Note that the indentation of the first opening brace and the last closing
41# brace must be exactly the same.  The code sequence itself should have a
42# larger indentation than the surrounding braces.
43#
44re_code_start = re.compile( r"(\s*){\s*$" )
45re_code_end   = re.compile( r"(\s*)}\s*$" )
46
47
48#
49# A regular expression to isolate identifiers from other text.  Two syntax
50# forms are supported:
51#
52#   <name>
53#   <name>[<id>]
54#
55# where both `<name>' and `<id>' consist of alphanumeric characters, `_',
56# and `-'.  Use `<id>' if there are multiple, valid `<name>' entries; in the
57# index, `<id>' will be appended in parentheses.
58#
59# For example,
60#
61#   stem_darkening[autofit]
62#
63# becomes `stem_darkening (autofit)' in the index.
64#
65re_identifier = re.compile( r"""
66                              ((?:\w|-)+
67                               (?:\[(?:\w|-)+\])?)
68                            """, re.VERBOSE )
69
70
71#
72# We collect macro names ending in `_H' (group 1), as defined in
73# `freetype/config/ftheader.h'.  While outputting the object data, we use
74# this info together with the object's file location (group 2) to emit the
75# appropriate header file macro and its associated file name before the
76# object itself.
77#
78# Example:
79#
80#   #define FT_FREETYPE_H <freetype.h>
81#
82re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
83
84
85################################################################
86##
87##  DOC CODE CLASS
88##
89##  The `DocCode' class is used to store source code lines.
90##
91##  `self.lines' contains a set of source code lines that will be dumped as
92##  HTML in a <PRE> tag.
93##
94##  The object is filled line by line by the parser; it strips the leading
95##  `margin' space from each input line before storing it in `self.lines'.
96##
97class  DocCode:
98
99    def  __init__( self, margin, lines ):
100        self.lines = []
101        self.words = None
102
103        # remove margin spaces
104        for l in lines:
105            if string.strip( l[:margin] ) == "":
106                l = l[margin:]
107            self.lines.append( l )
108
109    def  dump( self, prefix = "", width = 60 ):
110        lines = self.dump_lines( 0, width )
111        for l in lines:
112            print prefix + l
113
114    def  dump_lines( self, margin = 0, width = 60 ):
115        result = []
116        for l in self.lines:
117            result.append( " " * margin + l )
118        return result
119
120
121
122################################################################
123##
124##  DOC PARA CLASS
125##
126##  `Normal' text paragraphs are stored in the `DocPara' class.
127##
128##  `self.words' contains the list of words that make up the paragraph.
129##
130class  DocPara:
131
132    def  __init__( self, lines ):
133        self.lines = None
134        self.words = []
135        for l in lines:
136            l = string.strip( l )
137            self.words.extend( string.split( l ) )
138
139    def  dump( self, prefix = "", width = 60 ):
140        lines = self.dump_lines( 0, width )
141        for l in lines:
142            print prefix + l
143
144    def  dump_lines( self, margin = 0, width = 60 ):
145        cur    = ""  # current line
146        col    = 0   # current width
147        result = []
148
149        for word in self.words:
150            ln = len( word )
151            if col > 0:
152                ln = ln + 1
153
154            if col + ln > width:
155                result.append( " " * margin + cur )
156                cur = word
157                col = len( word )
158            else:
159                if col > 0:
160                    cur = cur + " "
161                cur = cur + word
162                col = col + ln
163
164        if col > 0:
165            result.append( " " * margin + cur )
166
167        return result
168
169
170################################################################
171##
172##  DOC FIELD CLASS
173##
174##  The `DocField' class stores a list containing either `DocPara' or
175##  `DocCode' objects.  Each DocField object also has an optional `name'
176##  that is used when the object corresponds to a field or value definition.
177##
178class  DocField:
179
180    def  __init__( self, name, lines ):
181        self.name  = name  # can be `None' for normal paragraphs/sources
182        self.items = []    # list of items
183
184        mode_none  = 0     # start parsing mode
185        mode_code  = 1     # parsing code sequences
186        mode_para  = 3     # parsing normal paragraph
187
188        margin     = -1    # current code sequence indentation
189        cur_lines  = []
190
191        # analyze the markup lines to check whether they contain paragraphs,
192        # code sequences, or fields definitions
193        #
194        start = 0
195        mode  = mode_none
196
197        for l in lines:
198            # are we parsing a code sequence?
199            if mode == mode_code:
200                m = re_code_end.match( l )
201                if m and len( m.group( 1 ) ) <= margin:
202                    # that's it, we finished the code sequence
203                    code = DocCode( 0, cur_lines )
204                    self.items.append( code )
205                    margin    = -1
206                    cur_lines = []
207                    mode      = mode_none
208                else:
209                    # otherwise continue the code sequence
210                    cur_lines.append( l[margin:] )
211            else:
212                # start of code sequence?
213                m = re_code_start.match( l )
214                if m:
215                    # save current lines
216                    if cur_lines:
217                        para = DocPara( cur_lines )
218                        self.items.append( para )
219                        cur_lines = []
220
221                    # switch to code extraction mode
222                    margin = len( m.group( 1 ) )
223                    mode   = mode_code
224                else:
225                    if not string.split( l ) and cur_lines:
226                        # if the line is empty, we end the current paragraph,
227                        # if any
228                        para = DocPara( cur_lines )
229                        self.items.append( para )
230                        cur_lines = []
231                    else:
232                        # otherwise, simply add the line to the current
233                        # paragraph
234                        cur_lines.append( l )
235
236        if mode == mode_code:
237            # unexpected end of code sequence
238            code = DocCode( margin, cur_lines )
239            self.items.append( code )
240        elif cur_lines:
241            para = DocPara( cur_lines )
242            self.items.append( para )
243
244    def  dump( self, prefix = "" ):
245        if self.field:
246            print prefix + self.field + " ::"
247            prefix = prefix + "----"
248
249        first = 1
250        for p in self.items:
251            if not first:
252                print ""
253            p.dump( prefix )
254            first = 0
255
256    def  dump_lines( self, margin = 0, width = 60 ):
257        result = []
258        nl     = None
259
260        for p in self.items:
261            if nl:
262                result.append( "" )
263
264            result.extend( p.dump_lines( margin, width ) )
265            nl = 1
266
267        return result
268
269
270#
271# A regular expression to detect field definitions.
272#
273# Examples:
274#
275#   foo     ::
276#   foo.bar ::
277#
278re_field = re.compile( r"""
279                         \s*
280                           (
281                             \w*
282                           |
283                             \w (\w | \.)* \w
284                           )
285                         \s* ::
286                       """, re.VERBOSE )
287
288
289################################################################
290##
291##  DOC MARKUP CLASS
292##
293class  DocMarkup:
294
295    def  __init__( self, tag, lines ):
296        self.tag    = string.lower( tag )
297        self.fields = []
298
299        cur_lines = []
300        field     = None
301        mode      = 0
302
303        for l in lines:
304            m = re_field.match( l )
305            if m:
306                # We detected the start of a new field definition.
307
308                # first, save the current one
309                if cur_lines:
310                    f = DocField( field, cur_lines )
311                    self.fields.append( f )
312                    cur_lines = []
313                    field     = None
314
315                field     = m.group( 1 )   # record field name
316                ln        = len( m.group( 0 ) )
317                l         = " " * ln + l[ln:]
318                cur_lines = [l]
319            else:
320                cur_lines.append( l )
321
322        if field or cur_lines:
323            f = DocField( field, cur_lines )
324            self.fields.append( f )
325
326    def  get_name( self ):
327        try:
328            return self.fields[0].items[0].words[0]
329        except:
330            return None
331
332    def  dump( self, margin ):
333        print " " * margin + "<" + self.tag + ">"
334        for f in self.fields:
335            f.dump( "  " )
336        print " " * margin + "</" + self.tag + ">"
337
338
339################################################################
340##
341##  DOC CHAPTER CLASS
342##
343class  DocChapter:
344
345    def  __init__( self, block ):
346        self.block    = block
347        self.sections = []
348        if block:
349            self.name  = block.name
350            self.title = block.get_markup_words( "title" )
351            self.order = block.get_markup_words( "sections" )
352        else:
353            self.name  = "Other"
354            self.title = string.split( "Miscellaneous" )
355            self.order = []
356
357
358################################################################
359##
360##  DOC SECTION CLASS
361##
362class  DocSection:
363
364    def  __init__( self, name = "Other" ):
365        self.name        = name
366        self.blocks      = {}
367        self.block_names = []  # ordered block names in section
368        self.defs        = []
369        self.abstract    = ""
370        self.description = ""
371        self.order       = []
372        self.title       = "ERROR"
373        self.chapter     = None
374
375    def  add_def( self, block ):
376        self.defs.append( block )
377
378    def  add_block( self, block ):
379        self.block_names.append( block.name )
380        self.blocks[block.name] = block
381
382    def  process( self ):
383        # look up one block that contains a valid section description
384        for block in self.defs:
385            title = block.get_markup_text( "title" )
386            if title:
387                self.title       = title
388                self.abstract    = block.get_markup_words( "abstract" )
389                self.description = block.get_markup_items( "description" )
390                self.order       = block.get_markup_words_all( "order" )
391                return
392
393    def  reorder( self ):
394        self.block_names = sort_order_list( self.block_names, self.order )
395
396
397################################################################
398##
399##  CONTENT PROCESSOR CLASS
400##
401class  ContentProcessor:
402
403    def  __init__( self ):
404        """Initialize a block content processor."""
405        self.reset()
406
407        self.sections = {}    # dictionary of documentation sections
408        self.section  = None  # current documentation section
409
410        self.chapters = []    # list of chapters
411
412        self.headers  = {}    # dictionary of header macros
413
414    def  set_section( self, section_name ):
415        """Set current section during parsing."""
416        if not section_name in self.sections:
417            section = DocSection( section_name )
418            self.sections[section_name] = section
419            self.section                = section
420        else:
421            self.section = self.sections[section_name]
422
423    def  add_chapter( self, block ):
424        chapter = DocChapter( block )
425        self.chapters.append( chapter )
426
427    def  reset( self ):
428        """Reset the content processor for a new block."""
429        self.markups      = []
430        self.markup       = None
431        self.markup_lines = []
432
433    def  add_markup( self ):
434        """Add a new markup section."""
435        if self.markup and self.markup_lines:
436
437            # get rid of last line of markup if it's empty
438            marks = self.markup_lines
439            if len( marks ) > 0 and not string.strip( marks[-1] ):
440                self.markup_lines = marks[:-1]
441
442            m = DocMarkup( self.markup, self.markup_lines )
443
444            self.markups.append( m )
445
446            self.markup       = None
447            self.markup_lines = []
448
449    def  process_content( self, content ):
450        """Process a block content and return a list of DocMarkup objects
451           corresponding to it."""
452        markup       = None
453        markup_lines = []
454        first        = 1
455
456        margin  = -1
457        in_code = 0
458
459        for line in content:
460            if in_code:
461                m = re_code_end.match( line )
462                if m and len( m.group( 1 ) ) <= margin:
463                    in_code = 0
464                    margin  = -1
465            else:
466                m = re_code_start.match( line )
467                if m:
468                    in_code = 1
469                    margin  = len( m.group( 1 ) )
470
471            found = None
472
473            if not in_code:
474                for t in re_markup_tags:
475                    m = t.match( line )
476                    if m:
477                        found  = string.lower( m.group( 1 ) )
478                        prefix = len( m.group( 0 ) )
479                        # remove markup from line
480                        line   = " " * prefix + line[prefix:]
481                        break
482
483            # is it the start of a new markup section ?
484            if found:
485                first = 0
486                self.add_markup()  # add current markup content
487                self.markup = found
488                if len( string.strip( line ) ) > 0:
489                    self.markup_lines.append( line )
490            elif first == 0:
491                self.markup_lines.append( line )
492
493        self.add_markup()
494
495        return self.markups
496
497    def  parse_sources( self, source_processor ):
498        blocks = source_processor.blocks
499        count  = len( blocks )
500
501        for n in range( count ):
502            source = blocks[n]
503            if source.content:
504                # this is a documentation comment, we need to catch
505                # all following normal blocks in the "follow" list
506                #
507                follow = []
508                m = n + 1
509                while m < count and not blocks[m].content:
510                    follow.append( blocks[m] )
511                    m = m + 1
512
513                doc_block = DocBlock( source, follow, self )
514
515    def  finish( self ):
516        # process all sections to extract their abstract, description
517        # and ordered list of items
518        #
519        for sec in self.sections.values():
520            sec.process()
521
522        # process chapters to check that all sections are correctly
523        # listed there
524        for chap in self.chapters:
525            for sec in chap.order:
526                if sec in self.sections:
527                    section = self.sections[sec]
528                    section.chapter = chap
529                    section.reorder()
530                    chap.sections.append( section )
531                else:
532                    sys.stderr.write( "WARNING: chapter '" +          \
533                        chap.name + "' in " + chap.block.location() + \
534                        " lists unknown section '" + sec + "'\n" )
535
536        # check that all sections are in a chapter
537        #
538        others = []
539        for sec in self.sections.values():
540            if not sec.chapter:
541                sec.reorder()
542                others.append( sec )
543
544        # create a new special chapter for all remaining sections
545        # when necessary
546        #
547        if others:
548            chap = DocChapter( None )
549            chap.sections = others
550            self.chapters.append( chap )
551
552
553################################################################
554##
555##  DOC BLOCK CLASS
556##
557class  DocBlock:
558
559    def  __init__( self, source, follow, processor ):
560        processor.reset()
561
562        self.source  = source
563        self.code    = []
564        self.type    = "ERRTYPE"
565        self.name    = "ERRNAME"
566        self.section = processor.section
567        self.markups = processor.process_content( source.content )
568
569        # compute block type from first markup tag
570        try:
571            self.type = self.markups[0].tag
572        except:
573            pass
574
575        # compute block name from first markup paragraph
576        try:
577            markup = self.markups[0]
578            para   = markup.fields[0].items[0]
579            name   = para.words[0]
580            m = re_identifier.match( name )
581            if m:
582                name = m.group( 1 )
583            self.name = name
584        except:
585            pass
586
587        if self.type == "section":
588            # detect new section starts
589            processor.set_section( self.name )
590            processor.section.add_def( self )
591        elif self.type == "chapter":
592            # detect new chapter
593            processor.add_chapter( self )
594        else:
595            processor.section.add_block( self )
596
597        # now, compute the source lines relevant to this documentation
598        # block. We keep normal comments in for obvious reasons (??)
599        source = []
600        for b in follow:
601            if b.format:
602                break
603            for l in b.lines:
604                # collect header macro definitions
605                m = re_header_macro.match( l )
606                if m:
607                    processor.headers[m.group( 2 )] = m.group( 1 );
608
609                # we use "/* */" as a separator
610                if re_source_sep.match( l ):
611                    break
612                source.append( l )
613
614        # now strip the leading and trailing empty lines from the sources
615        start = 0
616        end   = len( source ) - 1
617
618        while start < end and not string.strip( source[start] ):
619            start = start + 1
620
621        while start < end and not string.strip( source[end] ):
622            end = end - 1
623
624        if start == end and not string.strip( source[start] ):
625            self.code = []
626        else:
627            self.code = source[start:end + 1]
628
629    def  location( self ):
630        return self.source.location()
631
632    def  get_markup( self, tag_name ):
633        """Return the DocMarkup corresponding to a given tag in a block."""
634        for m in self.markups:
635            if m.tag == string.lower( tag_name ):
636                return m
637        return None
638
639    def  get_markup_words( self, tag_name ):
640        try:
641            m = self.get_markup( tag_name )
642            return m.fields[0].items[0].words
643        except:
644            return []
645
646    def  get_markup_words_all( self, tag_name ):
647        try:
648            m = self.get_markup( tag_name )
649            words = []
650            for item in m.fields[0].items:
651                # We honour empty lines in an `<Order>' section element by
652                # adding the sentinel `/empty/'.  The formatter should then
653                # convert it to an appropriate representation in the
654                # `section_enter' function.
655                words += item.words
656                words.append( "/empty/" )
657            return words
658        except:
659            return []
660
661    def  get_markup_text( self, tag_name ):
662        result = self.get_markup_words( tag_name )
663        return string.join( result )
664
665    def  get_markup_items( self, tag_name ):
666        try:
667            m = self.get_markup( tag_name )
668            return m.fields[0].items
669        except:
670            return None
671
672# eof
673