1#!/usr/local/bin/python3.8
2'''
3NAME
4    code-filter - AsciiDoc filter to highlight language keywords
5
6SYNOPSIS
7    code-filter -b backend -l language [ -t tabsize ]
8                [ --help | -h ] [ --version | -v ]
9
10DESCRIPTION
11    This filter reads source code from the standard input, highlights language
12    keywords and comments and writes to the standard output.
13
14    The purpose of this program is to demonstrate how to write an AsciiDoc
15    filter -- it's much to simplistic to be passed off as a code syntax
16    highlighter. Use the 'source-highlight-filter' instead.
17
18
19OPTIONS
20    --help, -h
21        Print this documentation.
22
23    -b
24        Backend output file format: 'docbook', 'linuxdoc', 'html', 'css'.
25
26    -l
27        The name of the source code language: 'python', 'ruby', 'c++', 'c'.
28
29    -t tabsize
30        Expand source tabs to tabsize spaces.
31
32    --version, -v
33        Print program version number.
34
35BUGS
36    - Code on the same line as a block comment is treated as comment.
37      Keywords inside literal strings are highlighted.
38    - There doesn't appear to be an easy way to accommodate linuxdoc so
39      just pass it through without markup.
40
41AUTHOR
42    Written by Stuart Rackham, <srackham@gmail.com>
43
44URLS
45    https://github.com/asciidoc/asciidoc-py3
46    https://asciidoc.org/
47
48COPYING
49    Copyright (C) 2002-2013 Stuart Rackham.
50    Copyright (C) 2013-2020 AsciiDoc Contributors.
51
52    Free use of this software is granted under the terms of the GNU General
53    Public License version 2 (GPLv2).
54'''
55
56import os
57import sys
58import re
59
60VERSION = '1.1.2'
61
62# Globals.
63language = None
64backend = None
65tabsize = 8
66keywordtags = {
67    'html':
68        ('<strong>', '</strong>'),
69    'css':
70        ('<strong>', '</strong>'),
71    'docbook':
72        ('<emphasis role="strong">', '</emphasis>'),
73    'linuxdoc':
74        ('', '')
75}
76commenttags = {
77    'html':
78        ('<i>', '</i>'),
79    'css':
80        ('<i>', '</i>'),
81    'docbook':
82        ('<emphasis>', '</emphasis>'),
83    'linuxdoc':
84        ('', '')
85}
86keywords = {
87    'python':
88        (
89            'and', 'del', 'for', 'is', 'raise', 'assert', 'elif', 'from',
90            'lambda', 'return', 'break', 'else', 'global', 'not', 'try', 'class',
91            'except', 'if', 'or', 'while', 'continue', 'exec', 'import', 'pass',
92            'yield', 'def', 'finally', 'in', 'print'
93        ),
94    'ruby':
95        (
96            '__FILE__', 'and', 'def', 'end', 'in', 'or', 'self', 'unless',
97            '__LINE__', 'begin', 'defined?' 'ensure', 'module', 'redo', 'super',
98            'until', 'BEGIN', 'break', 'do', 'false', 'next', 'rescue', 'then',
99            'when', 'END', 'case', 'else', 'for', 'nil', 'retry', 'true', 'while',
100            'alias', 'class', 'elsif', 'if', 'not', 'return', 'undef', 'yield'
101        ),
102    'c++':
103        (
104            'asm', 'auto', 'bool', 'break', 'case', 'catch', 'char', 'class',
105            'const', 'const_cast', 'continue', 'default', 'delete', 'do', 'double',
106            'dynamic_cast', 'else', 'enum', 'explicit', 'export', 'extern',
107            'false', 'float', 'for', 'friend', 'goto', 'if', 'inline', 'int',
108            'long', 'mutable', 'namespace', 'new', 'operator', 'private',
109            'protected', 'public', 'register', 'reinterpret_cast', 'return',
110            'short', 'signed', 'sizeof', 'static', 'static_cast', 'struct',
111            'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef',
112            'typeid', 'typename', 'union', 'unsigned', 'using', 'virtual', 'void',
113            'volatile', 'wchar_t', 'while'
114        )
115}
116block_comments = {
117    'python': ("'''", "'''"),
118    'ruby': None,
119    'c++': ('/*', '*/')
120}
121inline_comments = {
122    'python': '#',
123    'ruby': '#',
124    'c++': '//'
125}
126
127
128def print_stderr(line):
129    sys.stderr.write(line+os.linesep)
130
131
132def sub_keyword(mo):
133    '''re.subs() argument to tag keywords.'''
134    word = mo.group('word')
135    if word in keywords[language]:
136        stag, etag = keywordtags[backend]
137        return stag+word+etag
138    else:
139        return word
140
141
142def code_filter():
143    '''This function does all the work.'''
144    global language, backend
145    inline_comment = inline_comments[language]
146    blk_comment = block_comments[language]
147    if blk_comment:
148        blk_comment = (
149            re.escape(block_comments[language][0]),
150            re.escape(block_comments[language][1])
151        )
152    stag, etag = commenttags[backend]
153    in_comment = 0  # True if we're inside a multi-line block comment.
154    tag_comment = 0  # True if we should tag the current line as a comment.
155    line = sys.stdin.readline()
156    while line:
157        line = line.rstrip()
158        line = line.expandtabs(tabsize)
159        # Escape special characters.
160        line = line.replace('&', '&amp;')
161        line = line.replace('<', '&lt;')
162        line = line.replace('>', '&gt;')
163        # Process block comment.
164        if blk_comment:
165            if in_comment:
166                if re.match(r'.*'+blk_comment[1]+r'$', line):
167                    in_comment = 0
168            else:
169                if re.match(r'^\s*'+blk_comment[0]+r'.*'+blk_comment[1], line):
170                    # Single line block comment.
171                    tag_comment = 1
172                elif re.match(r'^\s*'+blk_comment[0], line):
173                    # Start of multi-line block comment.
174                    tag_comment = 1
175                    in_comment = 1
176                else:
177                    tag_comment = 0
178        if tag_comment:
179            if line:
180                line = stag+line+etag
181        else:
182            if inline_comment:
183                pos = line.find(inline_comment)
184            else:
185                pos = -1
186            if pos >= 0:
187                # Process inline comment.
188                line = re.sub(r'\b(?P<word>\w+)\b', sub_keyword, line[:pos]) \
189                    + stag + line[pos:] + etag
190            else:
191                line = re.sub(r'\b(?P<word>\w+)\b', sub_keyword, line)
192        sys.stdout.write(line + os.linesep)
193        line = sys.stdin.readline()
194
195
196def usage(msg=''):
197    if msg:
198        print_stderr(msg)
199    print_stderr('Usage: code-filter -b backend -l language [ -t tabsize ]')
200    print_stderr('                   [ --help | -h ] [ --version | -v ]')
201
202
203def main():
204    global language, backend, tabsize
205    # Process command line options.
206    import getopt
207    opts, args = getopt.getopt(
208        sys.argv[1:],
209        'b:l:ht:v',
210        ['help', 'version']
211    )
212    if len(args) > 0:
213        usage()
214        sys.exit(1)
215    for o, v in opts:
216        if o in ('--help', '-h'):
217            print(__doc__)
218            sys.exit(0)
219        if o in ('--version', '-v'):
220            print('code-filter version %s' % (VERSION,))
221            sys.exit(0)
222        if o == '-b':
223            backend = v
224        if o == '-l':
225            v = v.lower()
226            if v == 'c':
227                v = 'c++'
228            language = v
229        if o == '-t':
230            try:
231                tabsize = int(v)
232            except BaseException:
233                usage('illegal tabsize')
234                sys.exit(1)
235            if tabsize <= 0:
236                usage('illegal tabsize')
237                sys.exit(1)
238    if backend is None:
239        usage('backend option is mandatory')
240        sys.exit(1)
241    if backend not in keywordtags:
242        usage('illegal backend option')
243        sys.exit(1)
244    if language is None:
245        usage('language option is mandatory')
246        sys.exit(1)
247    if language not in keywords:
248        usage('illegal language option')
249        sys.exit(1)
250    # Do the work.
251    code_filter()
252
253
254if __name__ == "__main__":
255    try:
256        main()
257    except (KeyboardInterrupt, SystemExit):
258        pass
259    except BaseException:
260        print_stderr(
261            "%s: unexpected exit status: %s" %
262            (os.path.basename(sys.argv[0]), sys.exc_info()[1])
263        )
264    # Exit with previous sys.exit() status or zero if no sys.exit().
265    sys.exit(sys.exc_info()[1])
266