1#!/usr/local/bin/python3.8 2''' 3NAME 4 code-filter - AsciiDoc filter to highlight language keywords 5 6SYNOPSIS 7 code-filter -b backend -l language [ -t tabsize ] 8 [ --help | -h ] [ --version | -v ] 9 10DESCRIPTION 11 This filter reads source code from the standard input, highlights language 12 keywords and comments and writes to the standard output. 13 14 The purpose of this program is to demonstrate how to write an AsciiDoc 15 filter -- it's much to simplistic to be passed off as a code syntax 16 highlighter. Use the 'source-highlight-filter' instead. 17 18 19OPTIONS 20 --help, -h 21 Print this documentation. 22 23 -b 24 Backend output file format: 'docbook', 'linuxdoc', 'html', 'css'. 25 26 -l 27 The name of the source code language: 'python', 'ruby', 'c++', 'c'. 28 29 -t tabsize 30 Expand source tabs to tabsize spaces. 31 32 --version, -v 33 Print program version number. 34 35BUGS 36 - Code on the same line as a block comment is treated as comment. 37 Keywords inside literal strings are highlighted. 38 - There doesn't appear to be an easy way to accommodate linuxdoc so 39 just pass it through without markup. 40 41AUTHOR 42 Written by Stuart Rackham, <srackham@gmail.com> 43 44URLS 45 https://github.com/asciidoc/asciidoc-py3 46 https://asciidoc.org/ 47 48COPYING 49 Copyright (C) 2002-2013 Stuart Rackham. 50 Copyright (C) 2013-2020 AsciiDoc Contributors. 51 52 Free use of this software is granted under the terms of the GNU General 53 Public License version 2 (GPLv2). 54''' 55 56import os 57import sys 58import re 59 60VERSION = '1.1.2' 61 62# Globals. 63language = None 64backend = None 65tabsize = 8 66keywordtags = { 67 'html': 68 ('<strong>', '</strong>'), 69 'css': 70 ('<strong>', '</strong>'), 71 'docbook': 72 ('<emphasis role="strong">', '</emphasis>'), 73 'linuxdoc': 74 ('', '') 75} 76commenttags = { 77 'html': 78 ('<i>', '</i>'), 79 'css': 80 ('<i>', '</i>'), 81 'docbook': 82 ('<emphasis>', '</emphasis>'), 83 'linuxdoc': 84 ('', '') 85} 86keywords = { 87 'python': 88 ( 89 'and', 'del', 'for', 'is', 'raise', 'assert', 'elif', 'from', 90 'lambda', 'return', 'break', 'else', 'global', 'not', 'try', 'class', 91 'except', 'if', 'or', 'while', 'continue', 'exec', 'import', 'pass', 92 'yield', 'def', 'finally', 'in', 'print' 93 ), 94 'ruby': 95 ( 96 '__FILE__', 'and', 'def', 'end', 'in', 'or', 'self', 'unless', 97 '__LINE__', 'begin', 'defined?' 'ensure', 'module', 'redo', 'super', 98 'until', 'BEGIN', 'break', 'do', 'false', 'next', 'rescue', 'then', 99 'when', 'END', 'case', 'else', 'for', 'nil', 'retry', 'true', 'while', 100 'alias', 'class', 'elsif', 'if', 'not', 'return', 'undef', 'yield' 101 ), 102 'c++': 103 ( 104 'asm', 'auto', 'bool', 'break', 'case', 'catch', 'char', 'class', 105 'const', 'const_cast', 'continue', 'default', 'delete', 'do', 'double', 106 'dynamic_cast', 'else', 'enum', 'explicit', 'export', 'extern', 107 'false', 'float', 'for', 'friend', 'goto', 'if', 'inline', 'int', 108 'long', 'mutable', 'namespace', 'new', 'operator', 'private', 109 'protected', 'public', 'register', 'reinterpret_cast', 'return', 110 'short', 'signed', 'sizeof', 'static', 'static_cast', 'struct', 111 'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef', 112 'typeid', 'typename', 'union', 'unsigned', 'using', 'virtual', 'void', 113 'volatile', 'wchar_t', 'while' 114 ) 115} 116block_comments = { 117 'python': ("'''", "'''"), 118 'ruby': None, 119 'c++': ('/*', '*/') 120} 121inline_comments = { 122 'python': '#', 123 'ruby': '#', 124 'c++': '//' 125} 126 127 128def print_stderr(line): 129 sys.stderr.write(line+os.linesep) 130 131 132def sub_keyword(mo): 133 '''re.subs() argument to tag keywords.''' 134 word = mo.group('word') 135 if word in keywords[language]: 136 stag, etag = keywordtags[backend] 137 return stag+word+etag 138 else: 139 return word 140 141 142def code_filter(): 143 '''This function does all the work.''' 144 global language, backend 145 inline_comment = inline_comments[language] 146 blk_comment = block_comments[language] 147 if blk_comment: 148 blk_comment = ( 149 re.escape(block_comments[language][0]), 150 re.escape(block_comments[language][1]) 151 ) 152 stag, etag = commenttags[backend] 153 in_comment = 0 # True if we're inside a multi-line block comment. 154 tag_comment = 0 # True if we should tag the current line as a comment. 155 line = sys.stdin.readline() 156 while line: 157 line = line.rstrip() 158 line = line.expandtabs(tabsize) 159 # Escape special characters. 160 line = line.replace('&', '&') 161 line = line.replace('<', '<') 162 line = line.replace('>', '>') 163 # Process block comment. 164 if blk_comment: 165 if in_comment: 166 if re.match(r'.*'+blk_comment[1]+r'$', line): 167 in_comment = 0 168 else: 169 if re.match(r'^\s*'+blk_comment[0]+r'.*'+blk_comment[1], line): 170 # Single line block comment. 171 tag_comment = 1 172 elif re.match(r'^\s*'+blk_comment[0], line): 173 # Start of multi-line block comment. 174 tag_comment = 1 175 in_comment = 1 176 else: 177 tag_comment = 0 178 if tag_comment: 179 if line: 180 line = stag+line+etag 181 else: 182 if inline_comment: 183 pos = line.find(inline_comment) 184 else: 185 pos = -1 186 if pos >= 0: 187 # Process inline comment. 188 line = re.sub(r'\b(?P<word>\w+)\b', sub_keyword, line[:pos]) \ 189 + stag + line[pos:] + etag 190 else: 191 line = re.sub(r'\b(?P<word>\w+)\b', sub_keyword, line) 192 sys.stdout.write(line + os.linesep) 193 line = sys.stdin.readline() 194 195 196def usage(msg=''): 197 if msg: 198 print_stderr(msg) 199 print_stderr('Usage: code-filter -b backend -l language [ -t tabsize ]') 200 print_stderr(' [ --help | -h ] [ --version | -v ]') 201 202 203def main(): 204 global language, backend, tabsize 205 # Process command line options. 206 import getopt 207 opts, args = getopt.getopt( 208 sys.argv[1:], 209 'b:l:ht:v', 210 ['help', 'version'] 211 ) 212 if len(args) > 0: 213 usage() 214 sys.exit(1) 215 for o, v in opts: 216 if o in ('--help', '-h'): 217 print(__doc__) 218 sys.exit(0) 219 if o in ('--version', '-v'): 220 print('code-filter version %s' % (VERSION,)) 221 sys.exit(0) 222 if o == '-b': 223 backend = v 224 if o == '-l': 225 v = v.lower() 226 if v == 'c': 227 v = 'c++' 228 language = v 229 if o == '-t': 230 try: 231 tabsize = int(v) 232 except BaseException: 233 usage('illegal tabsize') 234 sys.exit(1) 235 if tabsize <= 0: 236 usage('illegal tabsize') 237 sys.exit(1) 238 if backend is None: 239 usage('backend option is mandatory') 240 sys.exit(1) 241 if backend not in keywordtags: 242 usage('illegal backend option') 243 sys.exit(1) 244 if language is None: 245 usage('language option is mandatory') 246 sys.exit(1) 247 if language not in keywords: 248 usage('illegal language option') 249 sys.exit(1) 250 # Do the work. 251 code_filter() 252 253 254if __name__ == "__main__": 255 try: 256 main() 257 except (KeyboardInterrupt, SystemExit): 258 pass 259 except BaseException: 260 print_stderr( 261 "%s: unexpected exit status: %s" % 262 (os.path.basename(sys.argv[0]), sys.exc_info()[1]) 263 ) 264 # Exit with previous sys.exit() status or zero if no sys.exit(). 265 sys.exit(sys.exc_info()[1]) 266