1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3"""
4Filters Python code for use with Doxygen, using a syntax-aware approach.
5
6Rather than implementing a partial Python parser with regular expressions, this
7script uses Python's own abstract syntax tree walker to isolate meaningful
8constructs.  It passes along namespace information so Doxygen can construct a
9proper tree for nested functions, classes, and methods.  It understands bed lump
10variables are by convention private.  It groks Zope-style Python interfaces.
11It can automatically turn PEP 257 compliant that follow the more restrictive
12Google style guide into appropriate Doxygen tags, and is even aware of
13doctests.
14"""
15
16from ast import NodeVisitor, parse, iter_fields, AST, Name, get_docstring
17from re import compile as regexpCompile, IGNORECASE, MULTILINE
18from types import GeneratorType
19from sys import stderr
20from os import linesep
21from string import whitespace
22from codeop import compile_command
23
24
25def coroutine(func):
26    """Basic decorator to implement the coroutine pattern."""
27    def __start(*args, **kwargs):
28        """Automatically calls next() on the internal generator function."""
29        __cr = func(*args, **kwargs)
30        next(__cr)
31        return __cr
32    return __start
33
34
35class AstWalker(NodeVisitor):
36    """
37    A walker that'll recursively progress through an AST.
38
39    Given an abstract syntax tree for Python code, walk through all the
40    nodes looking for significant types (for our purposes we only care
41    about module starts, class definitions, function definitions, variable
42    assignments, and function calls, as all the information we want to pass
43    to Doxygen is found within these constructs).  If the autobrief option
44    is set, it further attempts to parse docstrings to create appropriate
45    Doxygen tags.
46    """
47
48    # We have a number of regular expressions that we use.  They don't
49    # vary across instances and so are compiled directly in the class
50    # definition.
51    __indentRE = regexpCompile(r'^(\s*)\S')
52    __newlineRE = regexpCompile(r'^#', MULTILINE)
53    __blanklineRE = regexpCompile(r'^\s*$')
54    __docstrMarkerRE = regexpCompile(r"\s*([uUbB]*[rR]?(['\"]{3}))")
55    __docstrOneLineRE = regexpCompile(r"\s*[uUbB]*[rR]?(['\"]{3})(.+)\1")
56
57    __implementsRE = regexpCompile(r"^(\s*)(?:zope\.)?(?:interface\.)?"
58                                   r"(?:module|class|directly)?"
59                                   r"(?:Provides|Implements)\(\s*(.+)\s*\)",
60                                   IGNORECASE)
61    __interfaceRE = regexpCompile(r"^\s*class\s+(\S+)\s*\(\s*(?:zope\.)?"
62                                  r"(?:interface\.)?"
63                                  r"Interface\s*\)\s*:", IGNORECASE)
64    __attributeRE = regexpCompile(r"^(\s*)(\S+)\s*=\s*(?:zope\.)?"
65                                  r"(?:interface\.)?"
66                                  r"Attribute\s*\(['\"]{1,3}(.*)['\"]{1,3}\)",
67                                  IGNORECASE)
68
69    __singleLineREs = {
70        ' @author: ': regexpCompile(r"^(\s*Authors?:\s*)(.*)$", IGNORECASE),
71        ' @copyright ': regexpCompile(r"^(\s*Copyright:\s*)(.*)$", IGNORECASE),
72        ' @date ': regexpCompile(r"^(\s*Date:\s*)(.*)$", IGNORECASE),
73        ' @file ': regexpCompile(r"^(\s*File:\s*)(.*)$", IGNORECASE),
74        ' @version: ': regexpCompile(r"^(\s*Version:\s*)(.*)$", IGNORECASE),
75        ' @note ': regexpCompile(r"^(\s*Note:\s*)(.*)$", IGNORECASE),
76        ' @warning ': regexpCompile(r"^(\s*Warning:\s*)(.*)$", IGNORECASE)
77    }
78    __argsStartRE = regexpCompile(r"^(\s*(?:(?:Keyword\s+)?"
79                                  r"(?:A|Kwa)rg(?:ument)?|Attribute)s?"
80                                  r"\s*:\s*)$", IGNORECASE)
81    __argsRE = regexpCompile(r"^\s*(?P<name>\w+)\s*(?P<type>\(?\S*\)?)?\s*"
82                             r"(?:-|:)+\s+(?P<desc>.+)$")
83    __returnsStartRE = regexpCompile(r"^\s*(?:Return|Yield)s:\s*$", IGNORECASE)
84    __raisesStartRE = regexpCompile(r"^\s*(Raises|Exceptions|See Also):\s*$",
85                                    IGNORECASE)
86    __listRE = regexpCompile(r"^\s*(([\w\.]+),\s*)+(&|and)?\s*([\w\.]+)$")
87    __singleListItemRE = regexpCompile(r'^\s*([\w\.]+)\s*$')
88    __listItemRE = regexpCompile(r'([\w\.]+),?\s*')
89    __examplesStartRE = regexpCompile(r"^\s*(?:Example|Doctest)s?:\s*$",
90                                      IGNORECASE)
91    __sectionStartRE = regexpCompile(r"^\s*(([A-Z]\w* ?){1,2}):\s*$")
92    # The error line should match traceback lines, error exception lines, and
93    # (due to a weird behavior of codeop) single word lines.
94    __errorLineRE = regexpCompile(r"^\s*((?:\S+Error|Traceback.*):?\s*(.*)|@?[\w.]+)\s*$",
95                                  IGNORECASE)
96
97    def __init__(self, lines, options, inFilename):
98        """Initialize a few class variables in preparation for our walk."""
99        self.lines = lines
100        self.options = options
101        self.inFilename = inFilename
102        self.docLines = []
103
104    @staticmethod
105    def _stripOutAnds(inStr):
106        """Takes a string and returns the same without ands or ampersands."""
107        assert isinstance(inStr, str)
108        return inStr.replace(' and ', ' ').replace(' & ', ' ')
109
110    @staticmethod
111    def _endCodeIfNeeded(line, inCodeBlock):
112        """Simple routine to append end code marker if needed."""
113        assert isinstance(line, str)
114        if inCodeBlock:
115            line = '# @endcode{0}{1}'.format(linesep, line.rstrip())
116            inCodeBlock = False
117        return line, inCodeBlock
118
119    @coroutine
120    def _checkIfCode(self, inCodeBlock):
121        """Checks whether or not a given line appears to be Python code."""
122        while True:
123            line, lines, lineNum = (yield)
124            testLineNum = 1
125            currentLineNum = 0
126            testLine = line.strip()
127            lineOfCode = None
128            while lineOfCode is None:
129                match = AstWalker.__errorLineRE.match(testLine)
130                if not testLine or testLine == '...' or match:
131                    # These are ambiguous.
132                    line, lines, lineNum = (yield)
133                    testLine = line.strip()
134                    #testLineNum = 1
135                elif testLine.startswith('>>> '):
136                    # This is definitely code.
137                    lineOfCode = True
138                else:
139                    try:
140                        compLine = compile_command(testLine)
141                        if compLine and lines[currentLineNum].strip().startswith('#'):
142                            lineOfCode = True
143                        else:
144                            line, lines, lineNum = (yield)
145                            line = line.strip()
146                            if line.startswith('>>> '):
147                                # Definitely code, don't compile further.
148                                lineOfCode = True
149                            else:
150                                testLine += linesep + line
151                                testLine = testLine.strip()
152                                testLineNum += 1
153                    except (SyntaxError, RuntimeError):
154                        # This is definitely not code.
155                        lineOfCode = False
156                    except Exception:
157                        # Other errors are ambiguous.
158                        line, lines, lineNum = (yield)
159                        testLine = line.strip()
160                        #testLineNum = 1
161                currentLineNum = lineNum - testLineNum
162            if not inCodeBlock and lineOfCode:
163                inCodeBlock = True
164                lines[currentLineNum] = '{0}{1}# @code{1}'.format(
165                    lines[currentLineNum],
166                    linesep
167                )
168            elif inCodeBlock and lineOfCode is False:
169                # None is ambiguous, so strict checking
170                # against False is necessary.
171                inCodeBlock = False
172                lines[currentLineNum] = '{0}{1}# @endcode{1}'.format(
173                    lines[currentLineNum],
174                    linesep
175                )
176
177    @coroutine
178    def __alterDocstring(self, tail='', writer=None):
179        """
180        Runs eternally, processing docstring lines.
181
182        Parses docstring lines as they get fed in via send, applies appropriate
183        Doxygen tags, and passes them along in batches for writing.
184        """
185        assert isinstance(tail, str) and isinstance(writer, GeneratorType)
186
187        lines = []
188        timeToSend = False
189        inCodeBlock = False
190        inSection = False
191        prefix = ''
192        firstLineNum = -1
193        sectionHeadingIndent = 0
194        codeChecker = self._checkIfCode(False)
195        proseChecker = self._checkIfCode(True)
196        while True:
197            lineNum, line = (yield)
198            if firstLineNum < 0:
199                firstLineNum = lineNum
200            # Don't bother doing extra work if it's a sentinel.
201            if line is not None:
202                # Also limit work if we're not parsing the docstring.
203                if self.options.autobrief:
204                    for doxyTag, tagRE in AstWalker.__singleLineREs.items():
205                        match = tagRE.search(line)
206                        if match:
207                            # We've got a simple one-line Doxygen command
208                            lines[-1], inCodeBlock = self._endCodeIfNeeded(
209                                lines[-1], inCodeBlock)
210                            writer.send((firstLineNum, lineNum - 1, lines))
211                            lines = []
212                            firstLineNum = lineNum
213                            line = line.replace(match.group(1), doxyTag)
214                            timeToSend = True
215
216                    if inSection:
217                        # The last line belonged to a section.
218                        # Does this one too? (Ignoring empty lines.)
219                        match = AstWalker.__blanklineRE.match(line)
220                        if not match:
221                            indent = len(line.expandtabs(self.options.tablength)) - \
222                                len(line.expandtabs(self.options.tablength).lstrip())
223                            if indent <= sectionHeadingIndent:
224                                inSection = False
225                            else:
226                                if lines[-1] == '#':
227                                    # If the last line was empty, but we're still in a section
228                                    # then we need to start a new paragraph.
229                                    lines[-1] = '# @par'
230
231                    match = AstWalker.__returnsStartRE.match(line)
232                    if match:
233                        # We've got a "returns" section
234                        line = line.replace(match.group(0), ' @return\t').rstrip()
235                        prefix = '@return\t'
236                    else:
237                        match = AstWalker.__argsStartRE.match(line)
238                        if match:
239                            # We've got an "arguments" section
240                            line = line.replace(match.group(0), '').rstrip()
241                            if 'attr' in match.group(0).lower():
242                                prefix = '@property\t'
243                            else:
244                                prefix = '@param\t'
245                            lines[-1], inCodeBlock = self._endCodeIfNeeded(
246                                lines[-1], inCodeBlock)
247                            lines.append('#' + line)
248                            continue
249                        else:
250                            match = AstWalker.__argsRE.match(line)
251                            if match and not inCodeBlock:
252                                # We've got something that looks like an item /
253                                # description pair.
254                                if 'property' in prefix:
255                                    line = '# {0}\t{1[name]}{2}# {1[desc]}'.format(
256                                        prefix, match.groupdict(), linesep)
257                                else:
258                                    line = ' {0}\t{1[name]}\t{1[desc]}'.format(
259                                        prefix, match.groupdict())
260                            else:
261                                match = AstWalker.__raisesStartRE.match(line)
262                                if match:
263                                    line = line.replace(match.group(0), '').rstrip()
264                                    if 'see' in match.group(1).lower():
265                                        # We've got a "see also" section
266                                        prefix = '@sa\t'
267                                    else:
268                                        # We've got an "exceptions" section
269                                        prefix = '@exception\t'
270                                    lines[-1], inCodeBlock = self._endCodeIfNeeded(
271                                        lines[-1], inCodeBlock)
272                                    lines.append('#' + line)
273                                    continue
274                                else:
275                                    match = AstWalker.__listRE.match(line)
276                                    if match and not inCodeBlock:
277                                        # We've got a list of something or another
278                                        itemList = []
279                                        for itemMatch in AstWalker.__listItemRE.findall(self._stripOutAnds(
280                                                                                        match.group(0))):
281                                            itemList.append('# {0}\t{1}{2}'.format(
282                                                prefix, itemMatch, linesep))
283                                        line = ''.join(itemList)[1:]
284                                    else:
285                                        match = AstWalker.__examplesStartRE.match(line)
286                                        if match and lines[-1].strip() == '#' \
287                                           and self.options.autocode:
288                                            # We've got an "example" section
289                                            inCodeBlock = True
290                                            line = line.replace(match.group(0),
291                                                                ' @b Examples{0}# @code'.format(linesep))
292                                        else:
293                                            match = AstWalker.__sectionStartRE.match(line)
294                                            if match:
295                                                # We've got an arbitrary section
296                                                prefix = ''
297                                                inSection = True
298                                                # What's the indentation of the section heading?
299                                                sectionHeadingIndent = len(line.expandtabs(self.options.tablength)) \
300                                                    - len(line.expandtabs(self.options.tablength).lstrip())
301                                                line = line.replace(
302                                                    match.group(0),
303                                                    ' @par {0}'.format(match.group(1))
304                                                )
305                                                if lines[-1] == '# @par':
306                                                    lines[-1] = '#'
307                                                lines[-1], inCodeBlock = self._endCodeIfNeeded(
308                                                    lines[-1], inCodeBlock)
309                                                lines.append('#' + line)
310                                                continue
311                                            elif prefix:
312                                                match = AstWalker.__singleListItemRE.match(line)
313                                                if match and not inCodeBlock:
314                                                    # Probably a single list item
315                                                    line = ' {0}\t{1}'.format(
316                                                        prefix, match.group(0))
317                                                elif self.options.autocode and inCodeBlock:
318                                                    proseChecker.send(
319                                                        (
320                                                            line, lines,
321                                                            lineNum - firstLineNum
322                                                        )
323                                                    )
324                                                elif self.options.autocode:
325                                                    codeChecker.send(
326                                                        (
327                                                            line, lines,
328                                                            lineNum - firstLineNum
329                                                        )
330                                                    )
331
332                # If we were passed a tail, append it to the docstring.
333                # Note that this means that we need a docstring for this
334                # item to get documented.
335                if tail and lineNum == len(self.docLines) - 1:
336                    line = '{0}{1}# {2}'.format(line.rstrip(), linesep, tail)
337
338                # Add comment marker for every line.
339                line = '#{0}'.format(line.rstrip())
340                # Ensure the first line has the Doxygen double comment.
341                if lineNum == 0:
342                    line = '#' + line
343
344                lines.append(line.replace(' ' + linesep, linesep))
345            else:
346                # If we get our sentinel value, send out what we've got.
347                timeToSend = True
348
349            if timeToSend:
350                lines[-1], inCodeBlock = self._endCodeIfNeeded(lines[-1],
351                                                               inCodeBlock)
352                writer.send((firstLineNum, lineNum, lines))
353                lines = []
354                firstLineNum = -1
355                timeToSend = False
356
357    @coroutine
358    def __writeDocstring(self):
359        """
360        Runs eternally, dumping out docstring line batches as they get fed in.
361
362        Replaces original batches of docstring lines with modified versions
363        fed in via send.
364        """
365        while True:
366            firstLineNum, lastLineNum, lines = (yield)
367            newDocstringLen = lastLineNum - firstLineNum + 1
368            while len(lines) < newDocstringLen:
369                lines.append('')
370            # Substitute the new block of lines for the original block of lines.
371            self.docLines[firstLineNum: lastLineNum + 1] = lines
372
373    def _processDocstring(self, node, tail='', **kwargs):
374        """
375        Handles a docstring for functions, classes, and modules.
376
377        Basically just figures out the bounds of the docstring and sends it
378        off to the parser to do the actual work.
379        """
380        typeName = type(node).__name__
381        # Modules don't have lineno defined, but it's always 0 for them.
382        curLineNum = startLineNum = 0
383        if typeName != 'Module':
384            startLineNum = curLineNum = node.lineno - 1
385        # Figure out where both our enclosing object and our docstring start.
386        line = ''
387        while curLineNum < len(self.lines):
388            line = self.lines[curLineNum]
389            match = AstWalker.__docstrMarkerRE.match(line)
390            if match:
391                break
392            curLineNum += 1
393        docstringStart = curLineNum
394        # Figure out where our docstring ends.
395        if not AstWalker.__docstrOneLineRE.match(line):
396            # Skip for the special case of a single-line docstring.
397            curLineNum += 1
398            while curLineNum < len(self.lines):
399                line = self.lines[curLineNum]
400                if line.find(match.group(2)) >= 0:
401                    break
402                curLineNum += 1
403        endLineNum = curLineNum + 1
404
405        # Isolate our enclosing object's declaration.
406        defLines = self.lines[startLineNum: docstringStart]
407        # Isolate our docstring.
408        self.docLines = self.lines[docstringStart: endLineNum]
409
410        # If we have a docstring, extract information from it.
411        if self.docLines:
412            # Get rid of the docstring delineators.
413            self.docLines[0] = AstWalker.__docstrMarkerRE.sub('',
414                                                              self.docLines[0])
415            self.docLines[-1] = AstWalker.__docstrMarkerRE.sub('',
416                                                               self.docLines[-1])
417            # Handle special strings within the docstring.
418            docstringConverter = self.__alterDocstring(
419                tail, self.__writeDocstring())
420            for lineInfo in enumerate(self.docLines):
421                docstringConverter.send(lineInfo)
422            docstringConverter.send((len(self.docLines) - 1, None))
423
424        # Add a Doxygen @brief tag to any single-line description.
425        if self.options.autobrief:
426            safetyCounter = 0
427            while len(self.docLines) > 0 and self.docLines[0].lstrip('#').strip() == '':
428                del self.docLines[0]
429                self.docLines.append('')
430                safetyCounter += 1
431                if safetyCounter >= len(self.docLines):
432                    # Escape the effectively empty docstring.
433                    break
434            if len(self.docLines) == 1 or (len(self.docLines) >= 2 and (
435                self.docLines[1].strip(whitespace + '#') == '' or
436                    self.docLines[1].strip(whitespace + '#').startswith('@'))):
437                self.docLines[0] = "## @brief {0}".format(self.docLines[0].lstrip('#'))
438                if len(self.docLines) > 1 and self.docLines[1] == '# @par':
439                    self.docLines[1] = '#'
440
441        if defLines:
442            match = AstWalker.__indentRE.match(defLines[0])
443            indentStr = match and match.group(1) or ''
444            self.docLines = [AstWalker.__newlineRE.sub(indentStr + '#', docLine)
445                             for docLine in self.docLines]
446
447        # Taking away a docstring from an interface method definition sometimes
448        # leaves broken code as the docstring may be the only code in it.
449        # Here we manually insert a pass statement to rectify this problem.
450        if typeName != 'Module':
451            if docstringStart < len(self.lines):
452                match = AstWalker.__indentRE.match(self.lines[docstringStart])
453                indentStr = match and match.group(1) or ''
454            else:
455                indentStr = ''
456            containingNodes = kwargs.get('containingNodes', []) or []
457            fullPathNamespace = self._getFullPathName(containingNodes)
458            parentType = fullPathNamespace[-2][1]
459            if parentType == 'interface' and typeName == 'FunctionDef' \
460               or fullPathNamespace[-1][1] == 'interface':
461                defLines[-1] = '{0}{1}{2}pass'.format(defLines[-1],
462                                                      linesep, indentStr)
463            elif self.options.autobrief and typeName == 'ClassDef':
464                # If we're parsing docstrings separate out class attribute
465                # definitions to get better Doxygen output.
466                for firstVarLineNum, firstVarLine in enumerate(self.docLines):
467                    if '@property\t' in firstVarLine:
468                        break
469                lastVarLineNum = len(self.docLines)
470                if '@property\t' in firstVarLine:
471                    while lastVarLineNum > firstVarLineNum:
472                        lastVarLineNum -= 1
473                        if '@property\t' in self.docLines[lastVarLineNum]:
474                            break
475                    lastVarLineNum += 1
476                    if firstVarLineNum < len(self.docLines):
477                        indentLineNum = endLineNum
478                        indentStr = ''
479                        while not indentStr and indentLineNum < len(self.lines):
480                            match = AstWalker.__indentRE.match(self.lines[indentLineNum])
481                            indentStr = match and match.group(1) or ''
482                            indentLineNum += 1
483                        varLines = ['{0}{1}'.format(linesep, docLine).replace(
484                                    linesep, linesep + indentStr)
485                                    for docLine in self.docLines[
486                                        firstVarLineNum: lastVarLineNum]]
487                        defLines.extend(varLines)
488                        self.docLines[firstVarLineNum: lastVarLineNum] = []
489                        # After the property shuffling we will need to relocate
490                        # any existing namespace information.
491                        namespaceLoc = defLines[-1].find('\n# @namespace')
492                        if namespaceLoc >= 0:
493                            self.docLines[-1] += defLines[-1][namespaceLoc:]
494                            defLines[-1] = defLines[-1][:namespaceLoc]
495
496        # For classes and functions, apply our changes and reverse the
497        # order of the declaration and docstring, and for modules just
498        # apply our changes.
499        if typeName != 'Module':
500            self.lines[startLineNum: endLineNum] = self.docLines + defLines
501        else:
502            self.lines[startLineNum: endLineNum] = defLines + self.docLines
503
504    @staticmethod
505    def _checkMemberName(name):
506        """
507        See if a member name indicates that it should be private.
508
509        Private variables in Python (starting with a double underscore but
510        not ending in a double underscore) and bed lumps (variables that
511        are not really private but are by common convention treated as
512        protected because they begin with a single underscore) get Doxygen
513        tags labeling them appropriately.
514        """
515        assert isinstance(name, str)
516        restrictionLevel = None
517        if not name.endswith('__'):
518            if name.startswith('__'):
519                restrictionLevel = 'private'
520            elif name.startswith('_'):
521                restrictionLevel = 'protected'
522        return restrictionLevel
523
524    def _processMembers(self, node, contextTag):
525        """
526        Mark up members if they should be private.
527
528        If the name indicates it should be private or protected, apply
529        the appropriate Doxygen tags.
530        """
531        restrictionLevel = self._checkMemberName(node.name)
532        if restrictionLevel:
533            workTag = '{0}{1}# @{2}'.format(contextTag,
534                                            linesep,
535                                            restrictionLevel)
536        else:
537            workTag = contextTag
538        return workTag
539
540    def generic_visit(self, node, **kwargs):
541        """
542        Extract useful information from relevant nodes including docstrings.
543
544        This is virtually identical to the standard version contained in
545        NodeVisitor.  It is only overridden because we're tracking extra
546        information (the hierarchy of containing nodes) not preserved in
547        the original.
548        """
549        for field, value in iter_fields(node):
550            if isinstance(value, list):
551                for item in value:
552                    if isinstance(item, AST):
553                        self.visit(item, containingNodes=kwargs['containingNodes'])
554            elif isinstance(value, AST):
555                self.visit(value, containingNodes=kwargs['containingNodes'])
556
557    def visit(self, node, **kwargs):
558        """
559        Visit a node and extract useful information from it.
560
561        This is virtually identical to the standard version contained in
562        NodeVisitor.  It is only overridden because we're tracking extra
563        information (the hierarchy of containing nodes) not preserved in
564        the original.
565        """
566        containingNodes = kwargs.get('containingNodes', [])
567        method = 'visit_' + node.__class__.__name__
568        visitor = getattr(self, method, self.generic_visit)
569        return visitor(node, containingNodes=containingNodes)
570
571    def _getFullPathName(self, containingNodes):
572        """
573        Returns the full node hierarchy rooted at module name.
574
575        The list representing the full path through containing nodes
576        (starting with the module itself) is returned.
577        """
578        assert isinstance(containingNodes, list)
579        return [(self.options.fullPathNamespace, 'module')] + containingNodes
580
581    def visit_Module(self, node, **kwargs):
582        """
583        Handles the module-level docstring.
584
585        Process the module-level docstring and create appropriate Doxygen tags
586        if autobrief option is set.
587        """
588        if self.options.debug:
589            stderr.write("# Module {0}{1}".format(self.options.fullPathNamespace,
590                                                  linesep))
591        if get_docstring(node):
592            self._processDocstring(node)
593        # Visit any contained nodes (in this case pretty much everything).
594        self.generic_visit(node, containingNodes=kwargs.get('containingNodes',
595                                                            []))
596
597    def visit_Assign(self, node, **kwargs):
598        """
599        Handles assignments within code.
600
601        Variable assignments in Python are used to represent interface
602        attributes in addition to basic variables.  If an assignment appears
603        to be an attribute, it gets labeled as such for Doxygen.  If a variable
604        name uses Python mangling or is just a bed lump, it is labeled as
605        private for Doxygen.
606        """
607        lineNum = node.lineno - 1
608        # Assignments have one Doxygen-significant special case:
609        # interface attributes.
610        match = AstWalker.__attributeRE.match(self.lines[lineNum])
611        if match:
612            self.lines[lineNum] = '{0}## @property {1}{2}{0}# {3}{2}' \
613                '{0}# @hideinitializer{2}{4}{2}'.format(
614                    match.group(1),
615                    match.group(2),
616                    linesep,
617                    match.group(3),
618                    self.lines[lineNum].rstrip()
619                )
620            if self.options.debug:
621                stderr.write("# Attribute {0.id}{1}".format(node.targets[0],
622                                                            linesep))
623        if isinstance(node.targets[0], Name):
624            match = AstWalker.__indentRE.match(self.lines[lineNum])
625            indentStr = match and match.group(1) or ''
626            restrictionLevel = self._checkMemberName(node.targets[0].id)
627            if restrictionLevel:
628                self.lines[lineNum] = '{0}## @var {1}{2}{0}' \
629                    '# @hideinitializer{2}{0}# @{3}{2}{4}{2}'.format(
630                        indentStr,
631                        node.targets[0].id,
632                        linesep,
633                        restrictionLevel,
634                        self.lines[lineNum].rstrip()
635                    )
636        # Visit any contained nodes.
637        self.generic_visit(node, containingNodes=kwargs['containingNodes'])
638
639    def visit_Call(self, node, **kwargs):
640        """
641        Handles function calls within code.
642
643        Function calls in Python are used to represent interface implementations
644        in addition to their normal use.  If a call appears to mark an
645        implementation, it gets labeled as such for Doxygen.
646        """
647        lineNum = node.lineno - 1
648        # Function calls have one Doxygen-significant special case:  interface
649        # implementations.
650        match = AstWalker.__implementsRE.match(self.lines[lineNum])
651        if match:
652            self.lines[lineNum] = '{0}## @implements {1}{2}{0}{3}{2}'.format(
653                match.group(1), match.group(2), linesep,
654                self.lines[lineNum].rstrip())
655            if self.options.debug:
656                stderr.write("# Implements {0}{1}".format(match.group(1),
657                                                          linesep))
658        # Visit any contained nodes.
659        self.generic_visit(node, containingNodes=kwargs['containingNodes'])
660
661    def visit_FunctionDef(self, node, **kwargs):
662        """
663        Handles function definitions within code.
664
665        Process a function's docstring, keeping well aware of the function's
666        context and whether or not it's part of an interface definition.
667        """
668        if self.options.debug:
669            stderr.write("# Function {0.name}{1}".format(node, linesep))
670        # Push either 'interface' or 'class' onto our containing nodes
671        # hierarchy so we can keep track of context.  This will let us tell
672        # if a function is nested within another function or even if a class
673        # is nested within a function.
674        containingNodes = kwargs.get('containingNodes', []) or []
675        containingNodes.append((node.name, 'function'))
676        if self.options.topLevelNamespace:
677            fullPathNamespace = self._getFullPathName(containingNodes)
678            contextTag = '.'.join(pathTuple[0] for pathTuple in fullPathNamespace)
679            modifiedContextTag = self._processMembers(node, contextTag)
680            tail = '@namespace {0}'.format(modifiedContextTag)
681        else:
682            tail = self._processMembers(node, '')
683        if get_docstring(node):
684            self._processDocstring(node, tail,
685                                   containingNodes=containingNodes)
686        # Visit any contained nodes.
687        self.generic_visit(node, containingNodes=containingNodes)
688        # Remove the item we pushed onto the containing nodes hierarchy.
689        containingNodes.pop()
690
691    def visit_ClassDef(self, node, **kwargs):
692        """
693        Handles class definitions within code.
694
695        Process the docstring.  Note though that in Python Class definitions
696        are used to define interfaces in addition to classes.
697        If a class definition appears to be an interface definition tag it as an
698        interface definition for Doxygen.  Otherwise tag it as a class
699        definition for Doxygen.
700        """
701        lineNum = node.lineno - 1
702        # Push either 'interface' or 'class' onto our containing nodes
703        # hierarchy so we can keep track of context.  This will let us tell
704        # if a function is a method or an interface method definition or if
705        # a class is fully contained within another class.
706        containingNodes = kwargs.get('containingNodes', []) or []
707        match = AstWalker.__interfaceRE.match(self.lines[lineNum])
708        if match:
709            if self.options.debug:
710                stderr.write("# Interface {0.name}{1}".format(node, linesep))
711            containingNodes.append((node.name, 'interface'))
712        else:
713            if self.options.debug:
714                stderr.write("# Class {0.name}{1}".format(node, linesep))
715            containingNodes.append((node.name, 'class'))
716        if self.options.topLevelNamespace:
717            fullPathNamespace = self._getFullPathName(containingNodes)
718            contextTag = '.'.join(pathTuple[0] for pathTuple in fullPathNamespace)
719            tail = '@namespace {0}'.format(contextTag)
720        else:
721            tail = ''
722        # Class definitions have one Doxygen-significant special case:
723        # interface definitions.
724        if match:
725            contextTag = '{0}{1}# @interface {2}'.format(tail,
726                                                         linesep,
727                                                         match.group(1))
728        else:
729            contextTag = tail
730        contextTag = self._processMembers(node, contextTag)
731        if get_docstring(node):
732            self._processDocstring(node, contextTag,
733                                   containingNodes=containingNodes)
734        # Visit any contained nodes.
735        self.generic_visit(node, containingNodes=containingNodes)
736        # Remove the item we pushed onto the containing nodes hierarchy.
737        containingNodes.pop()
738
739    def parseLines(self):
740        """Form an AST for the code and produce a new version of the source."""
741        inAst = parse(''.join(self.lines), self.inFilename)
742        # Visit all the nodes in our tree and apply Doxygen tags to the source.
743        self.visit(inAst)
744
745    def getLines(self):
746        """Return the modified file once processing has been completed."""
747        return linesep.join(line.rstrip() for line in self.lines)
748
749
750def main():
751    """
752    Starts the parser on the file given by the filename as the first
753    argument on the command line.
754    """
755    from optparse import OptionParser, OptionGroup
756    from os import sep
757    from os.path import basename
758    from sys import argv, exit as sysExit
759
760    def optParse():
761        """
762        Parses command line options.
763
764        Generally we're supporting all the command line options that doxypy.py
765        supports in an analogous way to make it easy to switch back and forth.
766        We additionally support a top-level namespace argument that is used
767        to trim away excess path information.
768        """
769
770        parser = OptionParser(prog=basename(argv[0]))
771
772        parser.set_usage("%prog [options] filename")
773        parser.add_option(
774            "-a", "--autobrief",
775            action="store_true", dest="autobrief",
776            help="parse the docstring for @brief description and other information"
777        )
778        parser.add_option(
779            "-c", "--autocode",
780            action="store_true", dest="autocode",
781            help="parse the docstring for code samples"
782        )
783        parser.add_option(
784            "-n", "--ns",
785            action="store", type="string", dest="topLevelNamespace",
786            help="specify a top-level namespace that will be used to trim paths"
787        )
788        parser.add_option(
789            "-t", "--tablength",
790            action="store", type="int", dest="tablength", default=4,
791            help="specify a tab length in spaces; only needed if tabs are used"
792        )
793        group = OptionGroup(parser, "Debug Options")
794        group.add_option(
795            "-d", "--debug",
796            action="store_true", dest="debug",
797            help="enable debug output on stderr"
798        )
799        parser.add_option_group(group)
800
801        ## Parse options based on our definition.
802        (options, filename) = parser.parse_args()
803
804        # Just abort immediately if we are don't have an input file.
805        if not filename:
806            stderr.write("No filename given." + linesep)
807            sysExit(-1)
808
809        # Turn the full path filename into a full path module location.
810        fullPathNamespace = filename[0].replace(sep, '.')[:-3]
811        # Use any provided top-level namespace argument to trim off excess.
812        realNamespace = fullPathNamespace
813        if options.topLevelNamespace:
814            namespaceStart = fullPathNamespace.find(options.topLevelNamespace)
815            if namespaceStart >= 0:
816                realNamespace = fullPathNamespace[namespaceStart:]
817        options.fullPathNamespace = realNamespace
818
819        return options, filename[0]
820
821    # Figure out what is being requested.
822    (options, inFilename) = optParse()
823
824    # Read contents of input file.
825    inFile = open(inFilename)
826    lines = inFile.readlines()
827    inFile.close()
828    # Create the abstract syntax tree for the input file.
829    astWalker = AstWalker(lines, options, inFilename)
830    astWalker.parseLines()
831    # Output the modified source.
832    print(astWalker.getLines())
833
834# See if we're running as a script.
835if __name__ == "__main__":
836    main()
837