1#  Content (c) 2002, 2004, 2006-2009, 2012
2#    David Turner <david@freetype.org>
3#
4#  This file contains routines used to parse the content of documentation
5#  comment blocks and build more structured objects out of them.
6#
7
8from sources import *
9from utils import *
10import string, re
11
12
13# this regular expression is used to detect code sequences. these
14# are simply code fragments embedded in '{' and '}' like in:
15#
16#  {
17#    x = y + z;
18#    if ( zookoo == 2 )
19#    {
20#      foobar();
21#    }
22#  }
23#
24# note that indentation of the starting and ending accolades must be
25# exactly the same. the code sequence can contain accolades at greater
26# indentation
27#
28re_code_start = re.compile( r"(\s*){\s*$" )
29re_code_end   = re.compile( r"(\s*)}\s*$" )
30
31
32# this regular expression is used to isolate identifiers from
33# other text
34#
35re_identifier = re.compile( r'((?:\w|-)*)' )
36
37
38# we collect macros ending in `_H'; while outputting the object data, we use
39# this info together with the object's file location to emit the appropriate
40# header file macro and name before the object itself
41#
42re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
43
44
45#############################################################################
46#
47# The DocCode class is used to store source code lines.
48#
49#   'self.lines' contains a set of source code lines that will be dumped as
50#   HTML in a <PRE> tag.
51#
52#   The object is filled line by line by the parser; it strips the leading
53#   "margin" space from each input line before storing it in 'self.lines'.
54#
55class  DocCode:
56
57    def  __init__( self, margin, lines ):
58        self.lines = []
59        self.words = None
60
61        # remove margin spaces
62        for l in lines:
63            if string.strip( l[:margin] ) == "":
64                l = l[margin:]
65            self.lines.append( l )
66
67    def  dump( self, prefix = "", width = 60 ):
68        lines = self.dump_lines( 0, width )
69        for l in lines:
70            print prefix + l
71
72    def  dump_lines( self, margin = 0, width = 60 ):
73        result = []
74        for l in self.lines:
75            result.append( " " * margin + l )
76        return result
77
78
79
80#############################################################################
81#
82# The DocPara class is used to store "normal" text paragraph.
83#
84#   'self.words' contains the list of words that make up the paragraph
85#
86class  DocPara:
87
88    def  __init__( self, lines ):
89        self.lines = None
90        self.words = []
91        for l in lines:
92            l = string.strip( l )
93            self.words.extend( string.split( l ) )
94
95    def  dump( self, prefix = "", width = 60 ):
96        lines = self.dump_lines( 0, width )
97        for l in lines:
98            print prefix + l
99
100    def  dump_lines( self, margin = 0, width = 60 ):
101        cur    = ""  # current line
102        col    = 0   # current width
103        result = []
104
105        for word in self.words:
106            ln = len( word )
107            if col > 0:
108                ln = ln + 1
109
110            if col + ln > width:
111                result.append( " " * margin + cur )
112                cur = word
113                col = len( word )
114            else:
115                if col > 0:
116                    cur = cur + " "
117                cur = cur + word
118                col = col + ln
119
120        if col > 0:
121            result.append( " " * margin + cur )
122
123        return result
124
125
126
127#############################################################################
128#
129#  The DocField class is used to store a list containing either DocPara or
130#  DocCode objects. Each DocField also has an optional "name" which is used
131#  when the object corresponds to a field or value definition
132#
133class  DocField:
134
135    def  __init__( self, name, lines ):
136        self.name  = name  # can be None for normal paragraphs/sources
137        self.items = []    # list of items
138
139        mode_none  = 0     # start parsing mode
140        mode_code  = 1     # parsing code sequences
141        mode_para  = 3     # parsing normal paragraph
142
143        margin     = -1    # current code sequence indentation
144        cur_lines  = []
145
146        # now analyze the markup lines to see if they contain paragraphs,
147        # code sequences or fields definitions
148        #
149        start = 0
150        mode  = mode_none
151
152        for l in lines:
153            # are we parsing a code sequence ?
154            if mode == mode_code:
155                m = re_code_end.match( l )
156                if m and len( m.group( 1 ) ) <= margin:
157                    # that's it, we finished the code sequence
158                    code = DocCode( 0, cur_lines )
159                    self.items.append( code )
160                    margin    = -1
161                    cur_lines = []
162                    mode      = mode_none
163                else:
164                    # nope, continue the code sequence
165                    cur_lines.append( l[margin:] )
166            else:
167                # start of code sequence ?
168                m = re_code_start.match( l )
169                if m:
170                    # save current lines
171                    if cur_lines:
172                        para = DocPara( cur_lines )
173                        self.items.append( para )
174                        cur_lines = []
175
176                    # switch to code extraction mode
177                    margin = len( m.group( 1 ) )
178                    mode   = mode_code
179                else:
180                    if not string.split( l ) and cur_lines:
181                        # if the line is empty, we end the current paragraph,
182                        # if any
183                        para = DocPara( cur_lines )
184                        self.items.append( para )
185                        cur_lines = []
186                    else:
187                        # otherwise, simply add the line to the current
188                        # paragraph
189                        cur_lines.append( l )
190
191        if mode == mode_code:
192            # unexpected end of code sequence
193            code = DocCode( margin, cur_lines )
194            self.items.append( code )
195        elif cur_lines:
196            para = DocPara( cur_lines )
197            self.items.append( para )
198
199    def  dump( self, prefix = "" ):
200        if self.field:
201            print prefix + self.field + " ::"
202            prefix = prefix + "----"
203
204        first = 1
205        for p in self.items:
206            if not first:
207                print ""
208            p.dump( prefix )
209            first = 0
210
211    def  dump_lines( self, margin = 0, width = 60 ):
212        result = []
213        nl     = None
214
215        for p in self.items:
216            if nl:
217                result.append( "" )
218
219            result.extend( p.dump_lines( margin, width ) )
220            nl = 1
221
222        return result
223
224
225
226# this regular expression is used to detect field definitions
227#
228re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" )
229
230
231
232class  DocMarkup:
233
234    def  __init__( self, tag, lines ):
235        self.tag    = string.lower( tag )
236        self.fields = []
237
238        cur_lines = []
239        field     = None
240        mode      = 0
241
242        for l in lines:
243            m = re_field.match( l )
244            if m:
245                # we detected the start of a new field definition
246
247                # first, save the current one
248                if cur_lines:
249                    f = DocField( field, cur_lines )
250                    self.fields.append( f )
251                    cur_lines = []
252                    field     = None
253
254                field     = m.group( 1 )   # record field name
255                ln        = len( m.group( 0 ) )
256                l         = " " * ln + l[ln:]
257                cur_lines = [l]
258            else:
259                cur_lines.append( l )
260
261        if field or cur_lines:
262            f = DocField( field, cur_lines )
263            self.fields.append( f )
264
265    def  get_name( self ):
266        try:
267            return self.fields[0].items[0].words[0]
268        except:
269            return None
270
271    def  get_start( self ):
272        try:
273            result = ""
274            for word in self.fields[0].items[0].words:
275                result = result + " " + word
276            return result[1:]
277        except:
278            return "ERROR"
279
280    def  dump( self, margin ):
281        print " " * margin + "<" + self.tag + ">"
282        for f in self.fields:
283            f.dump( "  " )
284        print " " * margin + "</" + self.tag + ">"
285
286
287
288class  DocChapter:
289
290    def  __init__( self, block ):
291        self.block    = block
292        self.sections = []
293        if block:
294            self.name  = block.name
295            self.title = block.get_markup_words( "title" )
296            self.order = block.get_markup_words( "sections" )
297        else:
298            self.name  = "Other"
299            self.title = string.split( "Miscellaneous" )
300            self.order = []
301
302
303
304class  DocSection:
305
306    def  __init__( self, name = "Other" ):
307        self.name        = name
308        self.blocks      = {}
309        self.block_names = []  # ordered block names in section
310        self.defs        = []
311        self.abstract    = ""
312        self.description = ""
313        self.order       = []
314        self.title       = "ERROR"
315        self.chapter     = None
316
317    def  add_def( self, block ):
318        self.defs.append( block )
319
320    def  add_block( self, block ):
321        self.block_names.append( block.name )
322        self.blocks[block.name] = block
323
324    def  process( self ):
325        # look up one block that contains a valid section description
326        for block in self.defs:
327            title = block.get_markup_text( "title" )
328            if title:
329                self.title       = title
330                self.abstract    = block.get_markup_words( "abstract" )
331                self.description = block.get_markup_items( "description" )
332                self.order       = block.get_markup_words( "order" )
333                return
334
335    def  reorder( self ):
336        self.block_names = sort_order_list( self.block_names, self.order )
337
338
339
340class  ContentProcessor:
341
342    def  __init__( self ):
343        """initialize a block content processor"""
344        self.reset()
345
346        self.sections = {}    # dictionary of documentation sections
347        self.section  = None  # current documentation section
348
349        self.chapters = []    # list of chapters
350
351        self.headers  = {}    # dictionary of header macros
352
353    def  set_section( self, section_name ):
354        """set current section during parsing"""
355        if not self.sections.has_key( section_name ):
356            section = DocSection( section_name )
357            self.sections[section_name] = section
358            self.section                = section
359        else:
360            self.section = self.sections[section_name]
361
362    def  add_chapter( self, block ):
363        chapter = DocChapter( block )
364        self.chapters.append( chapter )
365
366
367    def  reset( self ):
368        """reset the content processor for a new block"""
369        self.markups      = []
370        self.markup       = None
371        self.markup_lines = []
372
373    def  add_markup( self ):
374        """add a new markup section"""
375        if self.markup and self.markup_lines:
376
377            # get rid of last line of markup if it's empty
378            marks = self.markup_lines
379            if len( marks ) > 0 and not string.strip( marks[-1] ):
380                self.markup_lines = marks[:-1]
381
382            m = DocMarkup( self.markup, self.markup_lines )
383
384            self.markups.append( m )
385
386            self.markup       = None
387            self.markup_lines = []
388
389    def  process_content( self, content ):
390        """process a block content and return a list of DocMarkup objects
391           corresponding to it"""
392        markup       = None
393        markup_lines = []
394        first        = 1
395
396        for line in content:
397            found = None
398            for t in re_markup_tags:
399                m = t.match( line )
400                if m:
401                    found  = string.lower( m.group( 1 ) )
402                    prefix = len( m.group( 0 ) )
403                    line   = " " * prefix + line[prefix:]   # remove markup from line
404                    break
405
406            # is it the start of a new markup section ?
407            if found:
408                first = 0
409                self.add_markup()  # add current markup content
410                self.markup = found
411                if len( string.strip( line ) ) > 0:
412                    self.markup_lines.append( line )
413            elif first == 0:
414                self.markup_lines.append( line )
415
416        self.add_markup()
417
418        return self.markups
419
420    def  parse_sources( self, source_processor ):
421        blocks = source_processor.blocks
422        count  = len( blocks )
423
424        for n in range( count ):
425            source = blocks[n]
426            if source.content:
427                # this is a documentation comment, we need to catch
428                # all following normal blocks in the "follow" list
429                #
430                follow = []
431                m = n + 1
432                while m < count and not blocks[m].content:
433                    follow.append( blocks[m] )
434                    m = m + 1
435
436                doc_block = DocBlock( source, follow, self )
437
438    def  finish( self ):
439        # process all sections to extract their abstract, description
440        # and ordered list of items
441        #
442        for sec in self.sections.values():
443            sec.process()
444
445        # process chapters to check that all sections are correctly
446        # listed there
447        for chap in self.chapters:
448            for sec in chap.order:
449                if self.sections.has_key( sec ):
450                    section = self.sections[sec]
451                    section.chapter = chap
452                    section.reorder()
453                    chap.sections.append( section )
454                else:
455                    sys.stderr.write( "WARNING: chapter '" +          \
456                        chap.name + "' in " + chap.block.location() + \
457                        " lists unknown section '" + sec + "'\n" )
458
459        # check that all sections are in a chapter
460        #
461        others = []
462        for sec in self.sections.values():
463            if not sec.chapter:
464                others.append( sec )
465
466        # create a new special chapter for all remaining sections
467        # when necessary
468        #
469        if others:
470            chap = DocChapter( None )
471            chap.sections = others
472            self.chapters.append( chap )
473
474
475
476class  DocBlock:
477
478    def  __init__( self, source, follow, processor ):
479        processor.reset()
480
481        self.source  = source
482        self.code    = []
483        self.type    = "ERRTYPE"
484        self.name    = "ERRNAME"
485        self.section = processor.section
486        self.markups = processor.process_content( source.content )
487
488        # compute block type from first markup tag
489        try:
490            self.type = self.markups[0].tag
491        except:
492            pass
493
494        # compute block name from first markup paragraph
495        try:
496            markup = self.markups[0]
497            para   = markup.fields[0].items[0]
498            name   = para.words[0]
499            m = re_identifier.match( name )
500            if m:
501                name = m.group( 1 )
502            self.name = name
503        except:
504            pass
505
506        if self.type == "section":
507            # detect new section starts
508            processor.set_section( self.name )
509            processor.section.add_def( self )
510        elif self.type == "chapter":
511            # detect new chapter
512            processor.add_chapter( self )
513        else:
514            processor.section.add_block( self )
515
516        # now, compute the source lines relevant to this documentation
517        # block. We keep normal comments in for obvious reasons (??)
518        source = []
519        for b in follow:
520            if b.format:
521                break
522            for l in b.lines:
523                # collect header macro definitions
524                m = re_header_macro.match( l )
525                if m:
526                    processor.headers[m.group( 2 )] = m.group( 1 );
527
528                # we use "/* */" as a separator
529                if re_source_sep.match( l ):
530                    break
531                source.append( l )
532
533        # now strip the leading and trailing empty lines from the sources
534        start = 0
535        end   = len( source ) - 1
536
537        while start < end and not string.strip( source[start] ):
538            start = start + 1
539
540        while start < end and not string.strip( source[end] ):
541            end = end - 1
542
543        if start == end and not string.strip( source[start] ):
544            self.code = []
545        else:
546            self.code = source[start:end + 1]
547
548    def  location( self ):
549        return self.source.location()
550
551    def  get_markup( self, tag_name ):
552        """return the DocMarkup corresponding to a given tag in a block"""
553        for m in self.markups:
554            if m.tag == string.lower( tag_name ):
555                return m
556        return None
557
558    def  get_markup_name( self, tag_name ):
559        """return the name of a given primary markup in a block"""
560        try:
561            m = self.get_markup( tag_name )
562            return m.get_name()
563        except:
564            return None
565
566    def  get_markup_words( self, tag_name ):
567        try:
568            m = self.get_markup( tag_name )
569            return m.fields[0].items[0].words
570        except:
571            return []
572
573    def  get_markup_text( self, tag_name ):
574        result = self.get_markup_words( tag_name )
575        return string.join( result )
576
577    def  get_markup_items( self, tag_name ):
578        try:
579            m = self.get_markup( tag_name )
580            return m.fields[0].items
581        except:
582            return None
583
584# eof
585