1#  Sources (c) 2002-2004, 2006-2009, 2012, 2013
2#    David Turner <david@freetype.org>
3#
4#
5# this file contains definitions of classes needed to decompose
6# C sources files into a series of multi-line "blocks". There are
7# two kinds of blocks:
8#
9#   - normal blocks, which contain source code or ordinary comments
10#
11#   - documentation blocks, which have restricted formatting, and
12#     whose text always start with a documentation markup tag like
13#     "<Function>", "<Type>", etc..
14#
15# the routines used to process the content of documentation blocks
16# are not contained here, but in "content.py"
17#
18# the classes and methods found here only deal with text parsing
19# and basic documentation block extraction
20#
21
22import fileinput, re, sys, os, string
23
24
25
26################################################################
27##
28##  BLOCK FORMAT PATTERN
29##
30##   A simple class containing compiled regular expressions used
31##   to detect potential documentation format block comments within
32##   C source code
33##
34##   note that the 'column' pattern must contain a group that will
35##   be used to "unbox" the content of documentation comment blocks
36##
37class  SourceBlockFormat:
38
39    def  __init__( self, id, start, column, end ):
40        """create a block pattern, used to recognize special documentation blocks"""
41        self.id     = id
42        self.start  = re.compile( start, re.VERBOSE )
43        self.column = re.compile( column, re.VERBOSE )
44        self.end    = re.compile( end, re.VERBOSE )
45
46
47
48#
49# format 1 documentation comment blocks look like the following:
50#
51#    /************************************/
52#    /*                                  */
53#    /*                                  */
54#    /*                                  */
55#    /************************************/
56#
57# we define a few regular expressions here to detect them
58#
59
60start = r'''
61  \s*      # any number of whitespace
62  /\*{2,}/ # followed by '/' and at least two asterisks then '/'
63  \s*$     # probably followed by whitespace
64'''
65
66column = r'''
67  \s*      # any number of whitespace
68  /\*{1}   # followed by '/' and precisely one asterisk
69  ([^*].*) # followed by anything (group 1)
70  \*{1}/   # followed by one asterisk and a '/'
71  \s*$     # probably followed by whitespace
72'''
73
74re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
75
76
77#
78# format 2 documentation comment blocks look like the following:
79#
80#    /************************************ (at least 2 asterisks)
81#     *
82#     *
83#     *
84#     *
85#     **/       (1 or more asterisks at the end)
86#
87# we define a few regular expressions here to detect them
88#
89start = r'''
90  \s*     # any number of whitespace
91  /\*{2,} # followed by '/' and at least two asterisks
92  \s*$    # probably followed by whitespace
93'''
94
95column = r'''
96  \s*        # any number of whitespace
97  \*{1}(?!/) # followed by precisely one asterisk not followed by `/'
98  (.*)       # then anything (group1)
99'''
100
101end = r'''
102  \s*  # any number of whitespace
103  \*+/ # followed by at least one asterisk, then '/'
104'''
105
106re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
107
108
109#
110# the list of supported documentation block formats, we could add new ones
111# relatively easily
112#
113re_source_block_formats = [re_source_block_format1, re_source_block_format2]
114
115
116#
117# the following regular expressions corresponds to markup tags
118# within the documentation comment blocks. they're equivalent
119# despite their different syntax
120#
121# notice how each markup tag _must_ begin a new line
122#
123re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' )  # <xxxx> format
124re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' )  # @xxxx: format
125
126#
127# the list of supported markup tags, we could add new ones relatively
128# easily
129#
130re_markup_tags = [re_markup_tag1, re_markup_tag2]
131
132#
133# used to detect a cross-reference, after markup tags have been stripped
134#
135re_crossref = re.compile( r'@((?:\w|-)*)(.*)' )    #  @foo
136
137#
138# used to detect italic and bold styles in paragraph text
139#
140re_italic = re.compile( r"_(\w(\w|')*)_(.*)" )     #  _italic_
141re_bold   = re.compile( r"\*(\w(\w|')*)\*(.*)" )   #  *bold*
142
143#
144# this regular expression code to identify an URL has been taken from
145#
146#   http://mail.python.org/pipermail/tutor/2002-September/017228.html
147#
148# (with slight modifications)
149#
150
151urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
152ltrs = r'\w'
153gunk = r'/#~:.?+=&%@!\-'
154punc = r'.:?\-'
155any  = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
156                                      'gunk' : gunk,
157                                      'punc' : punc }
158url  = r"""
159         (
160           \b                    # start at word boundary
161           %(urls)s :            # need resource and a colon
162           [%(any)s] +?          # followed by one or more of any valid
163                                 # character, but be conservative and
164                                 # take only what you need to...
165           (?=                   # [look-ahead non-consumptive assertion]
166             [%(punc)s]*         # either 0 or more punctuation
167             (?:                 # [non-grouping parentheses]
168               [^%(any)s] | $    # followed by a non-url char
169                                 # or end of the string
170             )
171           )
172         )
173        """ % {'urls' : urls,
174               'any'  : any,
175               'punc' : punc }
176
177re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
178
179#
180# used to detect the end of commented source lines
181#
182re_source_sep = re.compile( r'\s*/\*\s*\*/' )
183
184#
185# used to perform cross-reference within source output
186#
187re_source_crossref = re.compile( r'(\W*)(\w*)' )
188
189#
190# a list of reserved source keywords
191#
192re_source_keywords = re.compile( '''\\b ( typedef   |
193                                          struct    |
194                                          enum      |
195                                          union     |
196                                          const     |
197                                          char      |
198                                          int       |
199                                          short     |
200                                          long      |
201                                          void      |
202                                          signed    |
203                                          unsigned  |
204                                          \#include |
205                                          \#define  |
206                                          \#undef   |
207                                          \#if      |
208                                          \#ifdef   |
209                                          \#ifndef  |
210                                          \#else    |
211                                          \#endif   ) \\b''', re.VERBOSE )
212
213
214################################################################
215##
216##  SOURCE BLOCK CLASS
217##
218##   A SourceProcessor is in charge of reading a C source file
219##   and decomposing it into a series of different "SourceBlocks".
220##   each one of these blocks can be made of the following data:
221##
222##   - A documentation comment block that starts with "/**" and
223##     whose exact format will be discussed later
224##
225##   - normal sources lines, including comments
226##
227##   the important fields in a text block are the following ones:
228##
229##     self.lines   : a list of text lines for the corresponding block
230##
231##     self.content : for documentation comment blocks only, this is the
232##                    block content that has been "unboxed" from its
233##                    decoration. This is None for all other blocks
234##                    (i.e. sources or ordinary comments with no starting
235##                     markup tag)
236##
237class  SourceBlock:
238
239    def  __init__( self, processor, filename, lineno, lines ):
240        self.processor = processor
241        self.filename  = filename
242        self.lineno    = lineno
243        self.lines     = lines[:]
244        self.format    = processor.format
245        self.content   = []
246
247        if self.format == None:
248            return
249
250        words = []
251
252        # extract comment lines
253        lines = []
254
255        for line0 in self.lines:
256            m = self.format.column.match( line0 )
257            if m:
258                lines.append( m.group( 1 ) )
259
260        # now, look for a markup tag
261        for l in lines:
262            l = string.strip( l )
263            if len( l ) > 0:
264                for tag in re_markup_tags:
265                    if tag.match( l ):
266                        self.content = lines
267                        return
268
269    def  location( self ):
270        return "(" + self.filename + ":" + repr( self.lineno ) + ")"
271
272    # debugging only - not used in normal operations
273    def  dump( self ):
274        if self.content:
275            print "{{{content start---"
276            for l in self.content:
277                print l
278            print "---content end}}}"
279            return
280
281        fmt = ""
282        if self.format:
283            fmt = repr( self.format.id ) + " "
284
285        for line in self.lines:
286            print line
287
288
289
290################################################################
291##
292##  SOURCE PROCESSOR CLASS
293##
294##   The SourceProcessor is in charge of reading a C source file
295##   and decomposing it into a series of different "SourceBlock"
296##   objects.
297##
298##   each one of these blocks can be made of the following data:
299##
300##   - A documentation comment block that starts with "/**" and
301##     whose exact format will be discussed later
302##
303##   - normal sources lines, include comments
304##
305##
306class  SourceProcessor:
307
308    def  __init__( self ):
309        """initialize a source processor"""
310        self.blocks   = []
311        self.filename = None
312        self.format   = None
313        self.lines    = []
314
315    def  reset( self ):
316        """reset a block processor, clean all its blocks"""
317        self.blocks = []
318        self.format = None
319
320    def  parse_file( self, filename ):
321        """parse a C source file, and add its blocks to the processor's list"""
322        self.reset()
323
324        self.filename = filename
325
326        fileinput.close()
327        self.format = None
328        self.lineno = 0
329        self.lines  = []
330
331        for line in fileinput.input( filename ):
332            # strip trailing newlines, important on Windows machines!
333            if line[-1] == '\012':
334                line = line[0:-1]
335
336            if self.format == None:
337                self.process_normal_line( line )
338            else:
339                if self.format.end.match( line ):
340                    # that's a normal block end, add it to 'lines' and
341                    # create a new block
342                    self.lines.append( line )
343                    self.add_block_lines()
344                elif self.format.column.match( line ):
345                    # that's a normal column line, add it to 'lines'
346                    self.lines.append( line )
347                else:
348                    # humm.. this is an unexpected block end,
349                    # create a new block, but don't process the line
350                    self.add_block_lines()
351
352                    # we need to process the line again
353                    self.process_normal_line( line )
354
355        # record the last lines
356        self.add_block_lines()
357
358    def  process_normal_line( self, line ):
359        """process a normal line and check whether it is the start of a new block"""
360        for f in re_source_block_formats:
361            if f.start.match( line ):
362                self.add_block_lines()
363                self.format = f
364                self.lineno = fileinput.filelineno()
365
366        self.lines.append( line )
367
368    def  add_block_lines( self ):
369        """add the current accumulated lines and create a new block"""
370        if self.lines != []:
371            block = SourceBlock( self, self.filename, self.lineno, self.lines )
372
373            self.blocks.append( block )
374            self.format = None
375            self.lines  = []
376
377    # debugging only, not used in normal operations
378    def  dump( self ):
379        """print all blocks in a processor"""
380        for b in self.blocks:
381            b.dump()
382
383# eof
384