1#
2#  content.py
3#
4#    Parse comment blocks to build content blocks (library file).
5#
6#  Copyright 2002-2015 by
7#  David Turner.
8#
9#  This file is part of the FreeType project, and may only be used,
10#  modified, and distributed under the terms of the FreeType project
11#  license, LICENSE.TXT.  By continuing to use, modify, or distribute
12#  this file you indicate that you have read the license and
13#  understand and accept it fully.
14
15#
16# This file contains routines to parse documentation comment blocks,
17# building more structured objects out of them.
18#
19
20
21from sources import *
22from utils   import *
23
24import string, re
25
26
27#
28# Regular expressions to detect code sequences.  `Code sequences' are simply
29# code fragments embedded in '{' and '}', as demonstrated in the following
30# example.
31#
32#   {
33#     x = y + z;
34#     if ( zookoo == 2 )
35#     {
36#       foobar();
37#     }
38#   }
39#
40# Note that the indentation of the first opening brace and the last closing
41# brace must be exactly the same.  The code sequence itself should have a
42# larger indentation than the surrounding braces.
43#
44re_code_start = re.compile( r"(\s*){\s*$" )
45re_code_end   = re.compile( r"(\s*)}\s*$" )
46
47
48#
49# A regular expression to isolate identifiers from other text.
50#
51re_identifier = re.compile( r'((?:\w|-)*)' )
52
53
54#
55# We collect macro names ending in `_H' (group 1), as defined in
56# `freetype/config/ftheader.h'.  While outputting the object data, we use
57# this info together with the object's file location (group 2) to emit the
58# appropriate header file macro and its associated file name before the
59# object itself.
60#
61# Example:
62#
63#   #define FT_FREETYPE_H <freetype.h>
64#
65re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
66
67
68################################################################
69##
70##  DOC CODE CLASS
71##
72##  The `DocCode' class is used to store source code lines.
73##
74##  `self.lines' contains a set of source code lines that will be dumped as
75##  HTML in a <PRE> tag.
76##
77##  The object is filled line by line by the parser; it strips the leading
78##  `margin' space from each input line before storing it in `self.lines'.
79##
80class  DocCode:
81
82    def  __init__( self, margin, lines ):
83        self.lines = []
84        self.words = None
85
86        # remove margin spaces
87        for l in lines:
88            if string.strip( l[:margin] ) == "":
89                l = l[margin:]
90            self.lines.append( l )
91
92    def  dump( self, prefix = "", width = 60 ):
93        lines = self.dump_lines( 0, width )
94        for l in lines:
95            print prefix + l
96
97    def  dump_lines( self, margin = 0, width = 60 ):
98        result = []
99        for l in self.lines:
100            result.append( " " * margin + l )
101        return result
102
103
104
105################################################################
106##
107##  DOC PARA CLASS
108##
109##  `Normal' text paragraphs are stored in the `DocPara' class.
110##
111##  `self.words' contains the list of words that make up the paragraph.
112##
113class  DocPara:
114
115    def  __init__( self, lines ):
116        self.lines = None
117        self.words = []
118        for l in lines:
119            l = string.strip( l )
120            self.words.extend( string.split( l ) )
121
122    def  dump( self, prefix = "", width = 60 ):
123        lines = self.dump_lines( 0, width )
124        for l in lines:
125            print prefix + l
126
127    def  dump_lines( self, margin = 0, width = 60 ):
128        cur    = ""  # current line
129        col    = 0   # current width
130        result = []
131
132        for word in self.words:
133            ln = len( word )
134            if col > 0:
135                ln = ln + 1
136
137            if col + ln > width:
138                result.append( " " * margin + cur )
139                cur = word
140                col = len( word )
141            else:
142                if col > 0:
143                    cur = cur + " "
144                cur = cur + word
145                col = col + ln
146
147        if col > 0:
148            result.append( " " * margin + cur )
149
150        return result
151
152
153################################################################
154##
155##  DOC FIELD CLASS
156##
157##  The `DocField' class stores a list containing either `DocPara' or
158##  `DocCode' objects.  Each DocField object also has an optional `name'
159##  that is used when the object corresponds to a field or value definition.
160##
161class  DocField:
162
163    def  __init__( self, name, lines ):
164        self.name  = name  # can be `None' for normal paragraphs/sources
165        self.items = []    # list of items
166
167        mode_none  = 0     # start parsing mode
168        mode_code  = 1     # parsing code sequences
169        mode_para  = 3     # parsing normal paragraph
170
171        margin     = -1    # current code sequence indentation
172        cur_lines  = []
173
174        # analyze the markup lines to check whether they contain paragraphs,
175        # code sequences, or fields definitions
176        #
177        start = 0
178        mode  = mode_none
179
180        for l in lines:
181            # are we parsing a code sequence?
182            if mode == mode_code:
183                m = re_code_end.match( l )
184                if m and len( m.group( 1 ) ) <= margin:
185                    # that's it, we finished the code sequence
186                    code = DocCode( 0, cur_lines )
187                    self.items.append( code )
188                    margin    = -1
189                    cur_lines = []
190                    mode      = mode_none
191                else:
192                    # otherwise continue the code sequence
193                    cur_lines.append( l[margin:] )
194            else:
195                # start of code sequence?
196                m = re_code_start.match( l )
197                if m:
198                    # save current lines
199                    if cur_lines:
200                        para = DocPara( cur_lines )
201                        self.items.append( para )
202                        cur_lines = []
203
204                    # switch to code extraction mode
205                    margin = len( m.group( 1 ) )
206                    mode   = mode_code
207                else:
208                    if not string.split( l ) and cur_lines:
209                        # if the line is empty, we end the current paragraph,
210                        # if any
211                        para = DocPara( cur_lines )
212                        self.items.append( para )
213                        cur_lines = []
214                    else:
215                        # otherwise, simply add the line to the current
216                        # paragraph
217                        cur_lines.append( l )
218
219        if mode == mode_code:
220            # unexpected end of code sequence
221            code = DocCode( margin, cur_lines )
222            self.items.append( code )
223        elif cur_lines:
224            para = DocPara( cur_lines )
225            self.items.append( para )
226
227    def  dump( self, prefix = "" ):
228        if self.field:
229            print prefix + self.field + " ::"
230            prefix = prefix + "----"
231
232        first = 1
233        for p in self.items:
234            if not first:
235                print ""
236            p.dump( prefix )
237            first = 0
238
239    def  dump_lines( self, margin = 0, width = 60 ):
240        result = []
241        nl     = None
242
243        for p in self.items:
244            if nl:
245                result.append( "" )
246
247            result.extend( p.dump_lines( margin, width ) )
248            nl = 1
249
250        return result
251
252
253#
254# A regular expression to detect field definitions.
255#
256# Examples:
257#
258#   foo     ::
259#   foo.bar ::
260#
261re_field = re.compile( r"""
262                         \s*
263                           (
264                             \w*
265                           |
266                             \w (\w | \.)* \w
267                           )
268                         \s* ::
269                       """, re.VERBOSE )
270
271
272################################################################
273##
274##  DOC MARKUP CLASS
275##
276class  DocMarkup:
277
278    def  __init__( self, tag, lines ):
279        self.tag    = string.lower( tag )
280        self.fields = []
281
282        cur_lines = []
283        field     = None
284        mode      = 0
285
286        for l in lines:
287            m = re_field.match( l )
288            if m:
289                # We detected the start of a new field definition.
290
291                # first, save the current one
292                if cur_lines:
293                    f = DocField( field, cur_lines )
294                    self.fields.append( f )
295                    cur_lines = []
296                    field     = None
297
298                field     = m.group( 1 )   # record field name
299                ln        = len( m.group( 0 ) )
300                l         = " " * ln + l[ln:]
301                cur_lines = [l]
302            else:
303                cur_lines.append( l )
304
305        if field or cur_lines:
306            f = DocField( field, cur_lines )
307            self.fields.append( f )
308
309    def  get_name( self ):
310        try:
311            return self.fields[0].items[0].words[0]
312        except:
313            return None
314
315    def  dump( self, margin ):
316        print " " * margin + "<" + self.tag + ">"
317        for f in self.fields:
318            f.dump( "  " )
319        print " " * margin + "</" + self.tag + ">"
320
321
322################################################################
323##
324##  DOC CHAPTER CLASS
325##
326class  DocChapter:
327
328    def  __init__( self, block ):
329        self.block    = block
330        self.sections = []
331        if block:
332            self.name  = block.name
333            self.title = block.get_markup_words( "title" )
334            self.order = block.get_markup_words( "sections" )
335        else:
336            self.name  = "Other"
337            self.title = string.split( "Miscellaneous" )
338            self.order = []
339
340
341################################################################
342##
343##  DOC SECTION CLASS
344##
345class  DocSection:
346
347    def  __init__( self, name = "Other" ):
348        self.name        = name
349        self.blocks      = {}
350        self.block_names = []  # ordered block names in section
351        self.defs        = []
352        self.abstract    = ""
353        self.description = ""
354        self.order       = []
355        self.title       = "ERROR"
356        self.chapter     = None
357
358    def  add_def( self, block ):
359        self.defs.append( block )
360
361    def  add_block( self, block ):
362        self.block_names.append( block.name )
363        self.blocks[block.name] = block
364
365    def  process( self ):
366        # look up one block that contains a valid section description
367        for block in self.defs:
368            title = block.get_markup_text( "title" )
369            if title:
370                self.title       = title
371                self.abstract    = block.get_markup_words( "abstract" )
372                self.description = block.get_markup_items( "description" )
373                self.order       = block.get_markup_words_all( "order" )
374                return
375
376    def  reorder( self ):
377        self.block_names = sort_order_list( self.block_names, self.order )
378
379
380################################################################
381##
382##  CONTENT PROCESSOR CLASS
383##
384class  ContentProcessor:
385
386    def  __init__( self ):
387        """Initialize a block content processor."""
388        self.reset()
389
390        self.sections = {}    # dictionary of documentation sections
391        self.section  = None  # current documentation section
392
393        self.chapters = []    # list of chapters
394
395        self.headers  = {}    # dictionary of header macros
396
397    def  set_section( self, section_name ):
398        """Set current section during parsing."""
399        if not section_name in self.sections:
400            section = DocSection( section_name )
401            self.sections[section_name] = section
402            self.section                = section
403        else:
404            self.section = self.sections[section_name]
405
406    def  add_chapter( self, block ):
407        chapter = DocChapter( block )
408        self.chapters.append( chapter )
409
410    def  reset( self ):
411        """Reset the content processor for a new block."""
412        self.markups      = []
413        self.markup       = None
414        self.markup_lines = []
415
416    def  add_markup( self ):
417        """Add a new markup section."""
418        if self.markup and self.markup_lines:
419
420            # get rid of last line of markup if it's empty
421            marks = self.markup_lines
422            if len( marks ) > 0 and not string.strip( marks[-1] ):
423                self.markup_lines = marks[:-1]
424
425            m = DocMarkup( self.markup, self.markup_lines )
426
427            self.markups.append( m )
428
429            self.markup       = None
430            self.markup_lines = []
431
432    def  process_content( self, content ):
433        """Process a block content and return a list of DocMarkup objects
434           corresponding to it."""
435        markup       = None
436        markup_lines = []
437        first        = 1
438
439        for line in content:
440            found = None
441            for t in re_markup_tags:
442                m = t.match( line )
443                if m:
444                    found  = string.lower( m.group( 1 ) )
445                    prefix = len( m.group( 0 ) )
446                    line   = " " * prefix + line[prefix:]   # remove markup from line
447                    break
448
449            # is it the start of a new markup section ?
450            if found:
451                first = 0
452                self.add_markup()  # add current markup content
453                self.markup = found
454                if len( string.strip( line ) ) > 0:
455                    self.markup_lines.append( line )
456            elif first == 0:
457                self.markup_lines.append( line )
458
459        self.add_markup()
460
461        return self.markups
462
463    def  parse_sources( self, source_processor ):
464        blocks = source_processor.blocks
465        count  = len( blocks )
466
467        for n in range( count ):
468            source = blocks[n]
469            if source.content:
470                # this is a documentation comment, we need to catch
471                # all following normal blocks in the "follow" list
472                #
473                follow = []
474                m = n + 1
475                while m < count and not blocks[m].content:
476                    follow.append( blocks[m] )
477                    m = m + 1
478
479                doc_block = DocBlock( source, follow, self )
480
481    def  finish( self ):
482        # process all sections to extract their abstract, description
483        # and ordered list of items
484        #
485        for sec in self.sections.values():
486            sec.process()
487
488        # process chapters to check that all sections are correctly
489        # listed there
490        for chap in self.chapters:
491            for sec in chap.order:
492                if sec in self.sections:
493                    section = self.sections[sec]
494                    section.chapter = chap
495                    section.reorder()
496                    chap.sections.append( section )
497                else:
498                    sys.stderr.write( "WARNING: chapter '" +          \
499                        chap.name + "' in " + chap.block.location() + \
500                        " lists unknown section '" + sec + "'\n" )
501
502        # check that all sections are in a chapter
503        #
504        others = []
505        for sec in self.sections.values():
506            if not sec.chapter:
507                sec.reorder()
508                others.append( sec )
509
510        # create a new special chapter for all remaining sections
511        # when necessary
512        #
513        if others:
514            chap = DocChapter( None )
515            chap.sections = others
516            self.chapters.append( chap )
517
518
519################################################################
520##
521##  DOC BLOCK CLASS
522##
523class  DocBlock:
524
525    def  __init__( self, source, follow, processor ):
526        processor.reset()
527
528        self.source  = source
529        self.code    = []
530        self.type    = "ERRTYPE"
531        self.name    = "ERRNAME"
532        self.section = processor.section
533        self.markups = processor.process_content( source.content )
534
535        # compute block type from first markup tag
536        try:
537            self.type = self.markups[0].tag
538        except:
539            pass
540
541        # compute block name from first markup paragraph
542        try:
543            markup = self.markups[0]
544            para   = markup.fields[0].items[0]
545            name   = para.words[0]
546            m = re_identifier.match( name )
547            if m:
548                name = m.group( 1 )
549            self.name = name
550        except:
551            pass
552
553        if self.type == "section":
554            # detect new section starts
555            processor.set_section( self.name )
556            processor.section.add_def( self )
557        elif self.type == "chapter":
558            # detect new chapter
559            processor.add_chapter( self )
560        else:
561            processor.section.add_block( self )
562
563        # now, compute the source lines relevant to this documentation
564        # block. We keep normal comments in for obvious reasons (??)
565        source = []
566        for b in follow:
567            if b.format:
568                break
569            for l in b.lines:
570                # collect header macro definitions
571                m = re_header_macro.match( l )
572                if m:
573                    processor.headers[m.group( 2 )] = m.group( 1 );
574
575                # we use "/* */" as a separator
576                if re_source_sep.match( l ):
577                    break
578                source.append( l )
579
580        # now strip the leading and trailing empty lines from the sources
581        start = 0
582        end   = len( source ) - 1
583
584        while start < end and not string.strip( source[start] ):
585            start = start + 1
586
587        while start < end and not string.strip( source[end] ):
588            end = end - 1
589
590        if start == end and not string.strip( source[start] ):
591            self.code = []
592        else:
593            self.code = source[start:end + 1]
594
595    def  location( self ):
596        return self.source.location()
597
598    def  get_markup( self, tag_name ):
599        """Return the DocMarkup corresponding to a given tag in a block."""
600        for m in self.markups:
601            if m.tag == string.lower( tag_name ):
602                return m
603        return None
604
605    def  get_markup_words( self, tag_name ):
606        try:
607            m = self.get_markup( tag_name )
608            return m.fields[0].items[0].words
609        except:
610            return []
611
612    def  get_markup_words_all( self, tag_name ):
613        try:
614            m = self.get_markup( tag_name )
615            words = []
616            for item in m.fields[0].items:
617                # We honour empty lines in an `<Order>' section element by
618                # adding the sentinel `/empty/'.  The formatter should then
619                # convert it to an appropriate representation in the
620                # `section_enter' function.
621                words += item.words
622                words.append( "/empty/" )
623            return words
624        except:
625            return []
626
627    def  get_markup_text( self, tag_name ):
628        result = self.get_markup_words( tag_name )
629        return string.join( result )
630
631    def  get_markup_items( self, tag_name ):
632        try:
633            m = self.get_markup( tag_name )
634            return m.fields[0].items
635        except:
636            return None
637
638# eof
639