1"""A fast, drop-in replacement for pygments ``get_*()`` and ``guess_*()`` funtions. 2 3The following pygments API functions are currently supplied here:: 4 5 from pygments_cache import get_lexer_for_filename, guess_lexer_for_filename 6 from pygments_cache import get_formatter_for_filename, get_formatter_by_name 7 from pygments_cache import get_style_by_name, get_all_styles 8 from pygments_cache import get_filter_by_name 9 10The cache itself is stored at the location given by the ``$PYGMENTS_CACHE_FILE`` 11environment variable, or by default at ``~/.local/share/pygments-cache/cache.py``. 12The cache file is created on first use, if it does not already exist. 13 14 15""" 16import os 17import importlib 18 19 20# Global storage variables 21__version__ = "0.1.1" 22CACHE = None 23DEBUG = False 24 25 26def _print_duplicate_message(duplicates): 27 import sys 28 29 for filename, vals in sorted(duplicates.items()): 30 msg = "for {0} ambiquity between:\n ".format(filename) 31 vals = [m + ":" + c for m, c in vals] 32 msg += "\n ".join(sorted(vals)) 33 print(msg, file=sys.stderr) 34 35 36def _discover_lexers(): 37 import inspect 38 from pygments.lexers import get_all_lexers, find_lexer_class 39 40 # maps file extension (and names) to (module, classname) tuples 41 default_exts = { 42 # C / C++ 43 ".h": ("pygments.lexers.c_cpp", "CLexer"), 44 ".hh": ("pygments.lexers.c_cpp", "CppLexer"), 45 ".cp": ("pygments.lexers.c_cpp", "CppLexer"), 46 # python 47 ".py": ("pygments.lexers.python", "Python3Lexer"), 48 ".pyw": ("pygments.lexers.python", "Python3Lexer"), 49 ".sc": ("pygments.lexers.python", "Python3Lexer"), 50 ".tac": ("pygments.lexers.python", "Python3Lexer"), 51 "SConstruct": ("pygments.lexers.python", "Python3Lexer"), 52 "SConscript": ("pygments.lexers.python", "Python3Lexer"), 53 ".sage": ("pygments.lexers.python", "Python3Lexer"), 54 ".pytb": ("pygments.lexers.python", "Python3TracebackLexer"), 55 # perl 56 ".t": ("pygments.lexers.perl", "Perl6Lexer"), 57 ".pl": ("pygments.lexers.perl", "Perl6Lexer"), 58 ".pm": ("pygments.lexers.perl", "Perl6Lexer"), 59 # asm 60 ".s": ("pygments.lexers.asm", "GasLexer"), 61 ".S": ("pygments.lexers.asm", "GasLexer"), 62 ".asm": ("pygments.lexers.asm", "NasmLexer"), 63 ".ASM": ("pygments.lexers.asm", "NasmLexer"), 64 # Antlr 65 ".g": ("pygments.lexers.parsers", "AntlrCppLexer"), 66 ".G": ("pygments.lexers.parsers", "AntlrCppLexer"), 67 # XML 68 ".xml": ("pygments.lexers.html", "XmlLexer"), 69 ".xsl": ("pygments.lexers.html", "XsltLexer"), 70 ".xslt": ("pygments.lexers.html", "XsltLexer"), 71 # ASP 72 ".axd": ("pygments.lexers.dotnet", "CSharpAspxLexer"), 73 ".asax": ("pygments.lexers.dotnet", "CSharpAspxLexer"), 74 ".ascx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), 75 ".ashx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), 76 ".asmx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), 77 ".aspx": ("pygments.lexers.dotnet", "CSharpAspxLexer"), 78 # misc 79 ".b": ("pygments.lexers.esoteric", "BrainfuckLexer"), 80 ".j": ("pygments.lexers.jvm", "JasminLexer"), 81 ".m": ("pygments.lexers.matlab", "MatlabLexer"), 82 ".n": ("pygments.lexers.dotnet", "NemerleLexer"), 83 ".p": ("pygments.lexers.pawn", "PawnLexer"), 84 ".v": ("pygments.lexers.theorem", "CoqLexer"), 85 ".as": ("pygments.lexers.actionscript", "ActionScript3Lexer"), 86 ".fs": ("pygments.lexers.forth", "ForthLexer"), 87 ".hy": ("pygments.lexers.lisp", "HyLexer"), 88 ".ts": ("pygments.lexers.javascript", "TypeScriptLexer"), 89 ".rl": ("pygments.lexers.parsers", "RagelCppLexer"), 90 ".bas": ("pygments.lexers.basic", "QBasicLexer"), 91 ".bug": ("pygments.lexers.modeling", "BugsLexer"), 92 ".ecl": ("pygments.lexers.ecl", "ECLLexer"), 93 ".inc": ("pygments.lexers.php", "PhpLexer"), 94 ".inf": ("pygments.lexers.configs", "IniLexer"), 95 ".pro": ("pygments.lexers.prolog", "PrologLexer"), 96 ".sql": ("pygments.lexers.sql", "SqlLexer"), 97 ".txt": ("pygments.lexers.special", "TextLexer"), 98 ".html": ("pygments.lexers.html", "HtmlLexer"), 99 } 100 exts = {} 101 lexers = {"exts": exts} 102 if DEBUG: 103 from collections import defaultdict 104 105 duplicates = defaultdict(set) 106 for longname, aliases, filenames, mimetypes in get_all_lexers(): 107 cls = find_lexer_class(longname) 108 mod = inspect.getmodule(cls) 109 val = (mod.__name__, cls.__name__) 110 for filename in filenames: 111 if filename.startswith("*."): 112 filename = filename[1:] 113 if "*" in filename: 114 continue 115 if ( 116 DEBUG 117 and filename in exts 118 and exts[filename] != val 119 and filename not in default_exts 120 ): 121 duplicates[filename].add(val) 122 duplicates[filename].add(exts[filename]) 123 exts[filename] = val 124 # remove some ambiquity 125 exts.update(default_exts) 126 # print duplicate message 127 if DEBUG: 128 _print_duplicate_message(duplicates) 129 return lexers 130 131 132def _discover_formatters(): 133 import inspect 134 from pygments.formatters import get_all_formatters 135 136 # maps file extension (and names) to (module, classname) tuples 137 default_exts = {} 138 exts = {} 139 # maps formatter 'name' (not the class name) and alias to (module, classname) tuples 140 default_names = {} 141 names = {} 142 formatters = {"exts": exts, "names": names} 143 if DEBUG: 144 from collections import defaultdict 145 146 duplicates = defaultdict(set) 147 for cls in get_all_formatters(): 148 mod = inspect.getmodule(cls) 149 val = (mod.__name__, cls.__name__) 150 # add extentions 151 for filename in cls.filenames: 152 if filename.startswith("*."): 153 filename = filename[1:] 154 if "*" in filename: 155 continue 156 if ( 157 DEBUG 158 and filename in exts 159 and exts[filename] != val 160 and filename not in default_exts 161 ): 162 duplicates[filename].add(val) 163 duplicates[filename].add(exts[filename]) 164 exts[filename] = val 165 # add names and aliases 166 names[cls.name] = val 167 for alias in cls.aliases: 168 if ( 169 DEBUG 170 and alias in names 171 and names[alias] != val 172 and alias not in default_names 173 ): 174 duplicates[alias].add(val) 175 duplicates[alias].add(names[alias]) 176 names[alias] = val 177 # remove some ambiquity 178 exts.update(default_exts) 179 names.update(default_names) 180 # print dumplicate message 181 if DEBUG: 182 _print_duplicate_message(duplicates) 183 return formatters 184 185 186def _discover_styles(): 187 import inspect 188 from pygments.styles import get_all_styles, get_style_by_name 189 190 # maps style 'name' (not the class name) and aliases to (module, classname) tuples 191 default_names = {} 192 names = {} 193 styles = {"names": names} 194 if DEBUG: 195 from collections import defaultdict 196 197 duplicates = defaultdict(set) 198 for name in get_all_styles(): 199 cls = get_style_by_name(name) 200 mod = inspect.getmodule(cls) 201 val = (mod.__name__, cls.__name__) 202 if DEBUG and name in names and names[name] != val and name not in default_names: 203 duplicates[name].add(val) 204 duplicates[name].add(names[name]) 205 names[name] = val 206 # remove some ambiquity 207 names.update(default_names) 208 # print dumplicate message 209 if DEBUG: 210 _print_duplicate_message(duplicates) 211 return styles 212 213 214def _discover_filters(): 215 import inspect 216 from pygments.filters import get_all_filters, get_filter_by_name 217 218 # maps filter 'name' (not the class name) to (module, classname) tuples 219 default_names = {} 220 names = {} 221 filters = {"names": names} 222 if DEBUG: 223 from collections import defaultdict 224 225 duplicates = defaultdict(set) 226 for name in get_all_filters(): 227 filter = get_filter_by_name(name) 228 cls = type(filter) 229 mod = inspect.getmodule(cls) 230 val = (mod.__name__, cls.__name__) 231 if DEBUG and name in names and names[name] != val and name not in default_names: 232 duplicates[name].add(val) 233 duplicates[name].add(names[name]) 234 names[name] = val 235 # remove some ambiquity 236 names.update(default_names) 237 # print dumplicate message 238 if DEBUG: 239 _print_duplicate_message(duplicates) 240 return filters 241 242 243def build_cache(): 244 """Does the hard work of building a cache from nothing.""" 245 cache = {} 246 cache["lexers"] = _discover_lexers() 247 cache["formatters"] = _discover_formatters() 248 cache["styles"] = _discover_styles() 249 cache["filters"] = _discover_filters() 250 return cache 251 252 253def cache_filename(): 254 """Gets the name of the cache file to use.""" 255 # Configuration variables read from the environment 256 if "PYGMENTS_CACHE_FILE" in os.environ: 257 return os.environ["PYGMENTS_CACHE_FILE"] 258 else: 259 return os.path.join( 260 os.environ.get( 261 "XDG_DATA_HOME", 262 os.path.join(os.path.expanduser("~"), ".local", "share"), 263 ), 264 "pygments-cache", 265 "cache.py", 266 ) 267 268 269def load(filename): 270 """Loads the cache from a filename.""" 271 global CACHE 272 with open(filename) as f: 273 s = f.read() 274 ctx = globals() 275 CACHE = eval(s, ctx, ctx) 276 return CACHE 277 278 279def write_cache(filename): 280 """Writes the current cache to the file""" 281 from pprint import pformat 282 283 d = os.path.dirname(filename) 284 os.makedirs(d, exist_ok=True) 285 s = pformat(CACHE) 286 with open(filename, "w") as f: 287 f.write(s) 288 289 290def load_or_build(): 291 """Loads the cache from disk. If the cache does not exist, 292 this will build and write it out. 293 """ 294 global CACHE 295 fname = cache_filename() 296 if os.path.exists(fname): 297 load(fname) 298 else: 299 import sys 300 301 print("pygments cache not found, building...", file=sys.stderr) 302 CACHE = build_cache() 303 print("...writing cache to " + fname, file=sys.stderr) 304 write_cache(fname) 305 306 307# 308# pygments interface 309# 310 311 312def get_lexer_for_filename(filename, text="", **options): 313 """Gets a lexer from a filename (usually via the filename extension). 314 This mimics the behavior of ``pygments.lexers.get_lexer_for_filename()`` 315 and ``pygments.lexers.guess_lexer_for_filename()``. 316 """ 317 if CACHE is None: 318 load_or_build() 319 exts = CACHE["lexers"]["exts"] 320 fname = os.path.basename(filename) 321 key = fname if fname in exts else os.path.splitext(fname)[1] 322 if key in exts: 323 modname, clsname = exts[key] 324 mod = importlib.import_module(modname) 325 cls = getattr(mod, clsname) 326 lexer = cls(**options) 327 else: 328 # couldn't find lexer in cache, fallback to the hard way 329 import inspect 330 from pygments.lexers import guess_lexer_for_filename 331 332 lexer = guess_lexer_for_filename(filename, text, **options) 333 # add this filename to the cache for future use 334 cls = type(lexer) 335 mod = inspect.getmodule(cls) 336 exts[fname] = (mod.__name__, cls.__name__) 337 write_cache(cache_filename()) 338 return lexer 339 340 341guess_lexer_for_filename = get_lexer_for_filename 342 343 344def get_formatter_for_filename(fn, **options): 345 """Gets a formatter instance from a filename (usually via the filename 346 extension). This mimics the behavior of 347 ``pygments.formatters.get_formatter_for_filename()``. 348 """ 349 if CACHE is None: 350 load_or_build() 351 exts = CACHE["formatters"]["exts"] 352 fname = os.path.basename(fn) 353 key = fname if fname in exts else os.path.splitext(fname)[1] 354 if key in exts: 355 modname, clsname = exts[key] 356 mod = importlib.import_module(modname) 357 cls = getattr(mod, clsname) 358 formatter = cls(**options) 359 else: 360 # couldn't find formatter in cache, fallback to the hard way 361 import inspect 362 from pygments.formatters import get_formatter_for_filename 363 364 formatter = get_formatter_for_filename(fn, **options) 365 # add this filename to the cache for future use 366 cls = type(formatter) 367 mod = inspect.getmodule(cls) 368 exts[fname] = (mod.__name__, cls.__name__) 369 write_cache(cache_filename()) 370 return formatter 371 372 373def get_formatter_by_name(alias, **options): 374 """Gets a formatter instance from its name or alias. 375 This mimics the behavior of ``pygments.formatters.get_formatter_by_name()``. 376 """ 377 if CACHE is None: 378 load_or_build() 379 names = CACHE["formatters"]["names"] 380 if alias in names: 381 modname, clsname = names[alias] 382 mod = importlib.import_module(modname) 383 cls = getattr(mod, clsname) 384 formatter = cls(**options) 385 else: 386 # couldn't find formatter in cache, fallback to the hard way 387 import inspect 388 from pygments.formatters import get_formatter_by_name 389 390 formatter = get_formatter_by_name(alias, **options) 391 # add this filename to the cache for future use 392 cls = type(formatter) 393 mod = inspect.getmodule(cls) 394 names[alias] = (mod.__name__, cls.__name__) 395 write_cache(cache_filename()) 396 return formatter 397 398 399def get_style_by_name(name): 400 """Gets a style class from its name or alias. 401 This mimics the behavior of ``pygments.styles.get_style_by_name()``. 402 """ 403 if CACHE is None: 404 load_or_build() 405 names = CACHE["styles"]["names"] 406 if name in names: 407 modname, clsname = names[name] 408 mod = importlib.import_module(modname) 409 style = getattr(mod, clsname) 410 else: 411 # couldn't find style in cache, fallback to the hard way 412 import inspect 413 from pygments.styles import get_style_by_name 414 415 style = get_style_by_name(name) 416 # add this style to the cache for future use 417 mod = inspect.getmodule(style) 418 names[name] = (mod.__name__, style.__name__) 419 write_cache(cache_filename()) 420 return style 421 422 423def get_all_styles(): 424 """Iterable through all known style names. 425 This mimics the behavior of ``pygments.styles.get_all_styles``. 426 """ 427 if CACHE is None: 428 load_or_build() 429 yield from CACHE["styles"]["names"] 430 431 432def get_filter_by_name(filtername, **options): 433 """Gets a filter instance from its name. This mimics the behavior of 434 ``pygments.filters.get_filtere_by_name()``. 435 """ 436 if CACHE is None: 437 load_or_build() 438 names = CACHE["filters"]["names"] 439 if filtername in names: 440 modname, clsname = names[filtername] 441 mod = importlib.import_module(modname) 442 cls = getattr(mod, clsname) 443 filter = cls(**options) 444 else: 445 # couldn't find style in cache, fallback to the hard way 446 import inspect 447 from pygments.filters import get_filter_by_name 448 449 filter = get_filter_by_name(filtername, **options) 450 # add this filter to the cache for future use 451 cls = type(filter) 452 mod = inspect.getmodule(cls) 453 names[filtername] = (mod.__name__, cls.__name__) 454 write_cache(cache_filename()) 455 return filter 456