1# -*- test-case-name: twisted.python.test.test_htmlizer -*- 2# Copyright (c) Twisted Matrix Laboratories. 3# See LICENSE for details. 4 5""" 6HTML rendering of Python source. 7""" 8 9import keyword 10import tokenize 11from html import escape 12from typing import List 13 14from . import reflect 15 16 17class TokenPrinter: 18 """ 19 Format a stream of tokens and intermediate whitespace, for pretty-printing. 20 """ 21 22 currentCol, currentLine = 0, 1 23 lastIdentifier = parameters = 0 24 encoding = "utf-8" 25 26 def __init__(self, writer): 27 """ 28 @param writer: A file-like object, opened in bytes mode. 29 """ 30 self.writer = writer 31 32 def printtoken(self, type, token, sCoordinates, eCoordinates, line): 33 if hasattr(tokenize, "ENCODING") and type == tokenize.ENCODING: 34 self.encoding = token 35 return 36 37 if not isinstance(token, bytes): 38 token = token.encode(self.encoding) 39 40 (srow, scol) = sCoordinates 41 (erow, ecol) = eCoordinates 42 if self.currentLine < srow: 43 self.writer(b"\n" * (srow - self.currentLine)) 44 self.currentLine, self.currentCol = srow, 0 45 self.writer(b" " * (scol - self.currentCol)) 46 if self.lastIdentifier: 47 type = "identifier" 48 self.parameters = 1 49 elif type == tokenize.NAME: 50 if keyword.iskeyword(token): 51 type = "keyword" 52 else: 53 if self.parameters: 54 type = "parameter" 55 else: 56 type = "variable" 57 else: 58 type = tokenize.tok_name.get(type) 59 assert type is not None 60 type = type.lower() 61 self.writer(token, type) 62 self.currentCol = ecol 63 self.currentLine += token.count(b"\n") 64 if self.currentLine != erow: 65 self.currentCol = 0 66 self.lastIdentifier = token in (b"def", b"class") 67 if token == b":": 68 self.parameters = 0 69 70 71class HTMLWriter: 72 """ 73 Write the stream of tokens and whitespace from L{TokenPrinter}, formating 74 tokens as HTML spans. 75 """ 76 77 noSpan: List[str] = [] 78 79 def __init__(self, writer): 80 self.writer = writer 81 noSpan: List[str] = [] 82 reflect.accumulateClassList(self.__class__, "noSpan", noSpan) 83 self.noSpan = noSpan 84 85 def write(self, token, type=None): 86 if isinstance(token, bytes): 87 token = token.decode("utf-8") 88 token = escape(token) 89 token = token.encode("utf-8") 90 if (type is None) or (type in self.noSpan): 91 self.writer(token) 92 else: 93 self.writer( 94 b'<span class="py-src-' 95 + type.encode("utf-8") 96 + b'">' 97 + token 98 + b"</span>" 99 ) 100 101 102class SmallerHTMLWriter(HTMLWriter): 103 """ 104 HTMLWriter that doesn't generate spans for some junk. 105 106 Results in much smaller HTML output. 107 """ 108 109 noSpan = ["endmarker", "indent", "dedent", "op", "newline", "nl"] 110 111 112def filter(inp, out, writer=HTMLWriter): 113 out.write(b"<pre>") 114 printer = TokenPrinter(writer(out.write).write).printtoken 115 try: 116 for token in tokenize.tokenize(inp.readline): 117 (tokenType, string, start, end, line) = token 118 printer(tokenType, string, start, end, line) 119 except tokenize.TokenError: 120 pass 121 out.write(b"</pre>\n") 122 123 124def main(): 125 import sys 126 127 stdout = getattr(sys.stdout, "buffer", sys.stdout) 128 with open(sys.argv[1], "rb") as f: 129 filter(f, stdout) 130 131 132if __name__ == "__main__": 133 main() 134