1"""A fast, drop-in replacement for pygments ``get_*()`` and ``guess_*()`` funtions.
2
3The following pygments API functions are currently supplied here::
4
5    from pygments_cache import get_lexer_for_filename, guess_lexer_for_filename
6    from pygments_cache import get_formatter_for_filename, get_formatter_by_name
7    from pygments_cache import get_style_by_name, get_all_styles
8    from pygments_cache import get_filter_by_name
9
10The cache itself is stored at the location given by the ``$PYGMENTS_CACHE_FILE``
11environment variable, or by default at ``~/.local/share/pygments-cache/cache.py``.
12The cache file is created on first use, if it does not already exist.
13
14
15"""
16import os
17import importlib
18
19
20# Global storage variables
21__version__ = "0.1.1"
22CACHE = None
23DEBUG = False
24
25
26def _print_duplicate_message(duplicates):
27    import sys
28
29    for filename, vals in sorted(duplicates.items()):
30        msg = "for {0} ambiquity between:\n  ".format(filename)
31        vals = [m + ":" + c for m, c in vals]
32        msg += "\n  ".join(sorted(vals))
33        print(msg, file=sys.stderr)
34
35
36def _discover_lexers():
37    import inspect
38    from pygments.lexers import get_all_lexers, find_lexer_class
39
40    # maps file extension (and names) to (module, classname) tuples
41    default_exts = {
42        # C / C++
43        ".h": ("pygments.lexers.c_cpp", "CLexer"),
44        ".hh": ("pygments.lexers.c_cpp", "CppLexer"),
45        ".cp": ("pygments.lexers.c_cpp", "CppLexer"),
46        # python
47        ".py": ("pygments.lexers.python", "Python3Lexer"),
48        ".pyw": ("pygments.lexers.python", "Python3Lexer"),
49        ".sc": ("pygments.lexers.python", "Python3Lexer"),
50        ".tac": ("pygments.lexers.python", "Python3Lexer"),
51        "SConstruct": ("pygments.lexers.python", "Python3Lexer"),
52        "SConscript": ("pygments.lexers.python", "Python3Lexer"),
53        ".sage": ("pygments.lexers.python", "Python3Lexer"),
54        ".pytb": ("pygments.lexers.python", "Python3TracebackLexer"),
55        # perl
56        ".t": ("pygments.lexers.perl", "Perl6Lexer"),
57        ".pl": ("pygments.lexers.perl", "Perl6Lexer"),
58        ".pm": ("pygments.lexers.perl", "Perl6Lexer"),
59        # asm
60        ".s": ("pygments.lexers.asm", "GasLexer"),
61        ".S": ("pygments.lexers.asm", "GasLexer"),
62        ".asm": ("pygments.lexers.asm", "NasmLexer"),
63        ".ASM": ("pygments.lexers.asm", "NasmLexer"),
64        # Antlr
65        ".g": ("pygments.lexers.parsers", "AntlrCppLexer"),
66        ".G": ("pygments.lexers.parsers", "AntlrCppLexer"),
67        # XML
68        ".xml": ("pygments.lexers.html", "XmlLexer"),
69        ".xsl": ("pygments.lexers.html", "XsltLexer"),
70        ".xslt": ("pygments.lexers.html", "XsltLexer"),
71        # ASP
72        ".axd": ("pygments.lexers.dotnet", "CSharpAspxLexer"),
73        ".asax": ("pygments.lexers.dotnet", "CSharpAspxLexer"),
74        ".ascx": ("pygments.lexers.dotnet", "CSharpAspxLexer"),
75        ".ashx": ("pygments.lexers.dotnet", "CSharpAspxLexer"),
76        ".asmx": ("pygments.lexers.dotnet", "CSharpAspxLexer"),
77        ".aspx": ("pygments.lexers.dotnet", "CSharpAspxLexer"),
78        # misc
79        ".b": ("pygments.lexers.esoteric", "BrainfuckLexer"),
80        ".j": ("pygments.lexers.jvm", "JasminLexer"),
81        ".m": ("pygments.lexers.matlab", "MatlabLexer"),
82        ".n": ("pygments.lexers.dotnet", "NemerleLexer"),
83        ".p": ("pygments.lexers.pawn", "PawnLexer"),
84        ".v": ("pygments.lexers.theorem", "CoqLexer"),
85        ".as": ("pygments.lexers.actionscript", "ActionScript3Lexer"),
86        ".fs": ("pygments.lexers.forth", "ForthLexer"),
87        ".hy": ("pygments.lexers.lisp", "HyLexer"),
88        ".ts": ("pygments.lexers.javascript", "TypeScriptLexer"),
89        ".rl": ("pygments.lexers.parsers", "RagelCppLexer"),
90        ".bas": ("pygments.lexers.basic", "QBasicLexer"),
91        ".bug": ("pygments.lexers.modeling", "BugsLexer"),
92        ".ecl": ("pygments.lexers.ecl", "ECLLexer"),
93        ".inc": ("pygments.lexers.php", "PhpLexer"),
94        ".inf": ("pygments.lexers.configs", "IniLexer"),
95        ".pro": ("pygments.lexers.prolog", "PrologLexer"),
96        ".sql": ("pygments.lexers.sql", "SqlLexer"),
97        ".txt": ("pygments.lexers.special", "TextLexer"),
98        ".html": ("pygments.lexers.html", "HtmlLexer"),
99    }
100    exts = {}
101    lexers = {"exts": exts}
102    if DEBUG:
103        from collections import defaultdict
104
105        duplicates = defaultdict(set)
106    for longname, aliases, filenames, mimetypes in get_all_lexers():
107        cls = find_lexer_class(longname)
108        mod = inspect.getmodule(cls)
109        val = (mod.__name__, cls.__name__)
110        for filename in filenames:
111            if filename.startswith("*."):
112                filename = filename[1:]
113            if "*" in filename:
114                continue
115            if (
116                DEBUG
117                and filename in exts
118                and exts[filename] != val
119                and filename not in default_exts
120            ):
121                duplicates[filename].add(val)
122                duplicates[filename].add(exts[filename])
123            exts[filename] = val
124    # remove some ambiquity
125    exts.update(default_exts)
126    # print duplicate message
127    if DEBUG:
128        _print_duplicate_message(duplicates)
129    return lexers
130
131
132def _discover_formatters():
133    import inspect
134    from pygments.formatters import get_all_formatters
135
136    # maps file extension (and names) to (module, classname) tuples
137    default_exts = {}
138    exts = {}
139    # maps formatter 'name' (not the class name) and alias to (module, classname) tuples
140    default_names = {}
141    names = {}
142    formatters = {"exts": exts, "names": names}
143    if DEBUG:
144        from collections import defaultdict
145
146        duplicates = defaultdict(set)
147    for cls in get_all_formatters():
148        mod = inspect.getmodule(cls)
149        val = (mod.__name__, cls.__name__)
150        # add extentions
151        for filename in cls.filenames:
152            if filename.startswith("*."):
153                filename = filename[1:]
154            if "*" in filename:
155                continue
156            if (
157                DEBUG
158                and filename in exts
159                and exts[filename] != val
160                and filename not in default_exts
161            ):
162                duplicates[filename].add(val)
163                duplicates[filename].add(exts[filename])
164            exts[filename] = val
165        # add names and aliases
166        names[cls.name] = val
167        for alias in cls.aliases:
168            if (
169                DEBUG
170                and alias in names
171                and names[alias] != val
172                and alias not in default_names
173            ):
174                duplicates[alias].add(val)
175                duplicates[alias].add(names[alias])
176            names[alias] = val
177    # remove some ambiquity
178    exts.update(default_exts)
179    names.update(default_names)
180    # print dumplicate message
181    if DEBUG:
182        _print_duplicate_message(duplicates)
183    return formatters
184
185
186def _discover_styles():
187    import inspect
188    from pygments.styles import get_all_styles, get_style_by_name
189
190    # maps style 'name' (not the class name) and aliases to (module, classname) tuples
191    default_names = {}
192    names = {}
193    styles = {"names": names}
194    if DEBUG:
195        from collections import defaultdict
196
197        duplicates = defaultdict(set)
198    for name in get_all_styles():
199        cls = get_style_by_name(name)
200        mod = inspect.getmodule(cls)
201        val = (mod.__name__, cls.__name__)
202        if DEBUG and name in names and names[name] != val and name not in default_names:
203            duplicates[name].add(val)
204            duplicates[name].add(names[name])
205        names[name] = val
206    # remove some ambiquity
207    names.update(default_names)
208    # print dumplicate message
209    if DEBUG:
210        _print_duplicate_message(duplicates)
211    return styles
212
213
214def _discover_filters():
215    import inspect
216    from pygments.filters import get_all_filters, get_filter_by_name
217
218    # maps filter 'name' (not the class name) to (module, classname) tuples
219    default_names = {}
220    names = {}
221    filters = {"names": names}
222    if DEBUG:
223        from collections import defaultdict
224
225        duplicates = defaultdict(set)
226    for name in get_all_filters():
227        filter = get_filter_by_name(name)
228        cls = type(filter)
229        mod = inspect.getmodule(cls)
230        val = (mod.__name__, cls.__name__)
231        if DEBUG and name in names and names[name] != val and name not in default_names:
232            duplicates[name].add(val)
233            duplicates[name].add(names[name])
234        names[name] = val
235    # remove some ambiquity
236    names.update(default_names)
237    # print dumplicate message
238    if DEBUG:
239        _print_duplicate_message(duplicates)
240    return filters
241
242
243def build_cache():
244    """Does the hard work of building a cache from nothing."""
245    cache = {}
246    cache["lexers"] = _discover_lexers()
247    cache["formatters"] = _discover_formatters()
248    cache["styles"] = _discover_styles()
249    cache["filters"] = _discover_filters()
250    return cache
251
252
253def cache_filename():
254    """Gets the name of the cache file to use."""
255    # Configuration variables read from the environment
256    if "PYGMENTS_CACHE_FILE" in os.environ:
257        return os.environ["PYGMENTS_CACHE_FILE"]
258    else:
259        return os.path.join(
260            os.environ.get(
261                "XDG_DATA_HOME",
262                os.path.join(os.path.expanduser("~"), ".local", "share"),
263            ),
264            "pygments-cache",
265            "cache.py",
266        )
267
268
269def load(filename):
270    """Loads the cache from a filename."""
271    global CACHE
272    with open(filename) as f:
273        s = f.read()
274    ctx = globals()
275    CACHE = eval(s, ctx, ctx)
276    return CACHE
277
278
279def write_cache(filename):
280    """Writes the current cache to the file"""
281    from pprint import pformat
282
283    d = os.path.dirname(filename)
284    os.makedirs(d, exist_ok=True)
285    s = pformat(CACHE)
286    with open(filename, "w") as f:
287        f.write(s)
288
289
290def load_or_build():
291    """Loads the cache from disk. If the cache does not exist,
292    this will build and write it out.
293    """
294    global CACHE
295    fname = cache_filename()
296    if os.path.exists(fname):
297        load(fname)
298    else:
299        import sys
300
301        print("pygments cache not found, building...", file=sys.stderr)
302        CACHE = build_cache()
303        print("...writing cache to " + fname, file=sys.stderr)
304        write_cache(fname)
305
306
307#
308# pygments interface
309#
310
311
312def get_lexer_for_filename(filename, text="", **options):
313    """Gets a lexer from a filename (usually via the filename extension).
314    This mimics the behavior of ``pygments.lexers.get_lexer_for_filename()``
315    and ``pygments.lexers.guess_lexer_for_filename()``.
316    """
317    if CACHE is None:
318        load_or_build()
319    exts = CACHE["lexers"]["exts"]
320    fname = os.path.basename(filename)
321    key = fname if fname in exts else os.path.splitext(fname)[1]
322    if key in exts:
323        modname, clsname = exts[key]
324        mod = importlib.import_module(modname)
325        cls = getattr(mod, clsname)
326        lexer = cls(**options)
327    else:
328        # couldn't find lexer in cache, fallback to the hard way
329        import inspect
330        from pygments.lexers import guess_lexer_for_filename
331
332        lexer = guess_lexer_for_filename(filename, text, **options)
333        # add this filename to the cache for future use
334        cls = type(lexer)
335        mod = inspect.getmodule(cls)
336        exts[fname] = (mod.__name__, cls.__name__)
337        write_cache(cache_filename())
338    return lexer
339
340
341guess_lexer_for_filename = get_lexer_for_filename
342
343
344def get_formatter_for_filename(fn, **options):
345    """Gets a formatter instance from a filename (usually via the filename
346    extension). This mimics the behavior of
347    ``pygments.formatters.get_formatter_for_filename()``.
348    """
349    if CACHE is None:
350        load_or_build()
351    exts = CACHE["formatters"]["exts"]
352    fname = os.path.basename(fn)
353    key = fname if fname in exts else os.path.splitext(fname)[1]
354    if key in exts:
355        modname, clsname = exts[key]
356        mod = importlib.import_module(modname)
357        cls = getattr(mod, clsname)
358        formatter = cls(**options)
359    else:
360        # couldn't find formatter in cache, fallback to the hard way
361        import inspect
362        from pygments.formatters import get_formatter_for_filename
363
364        formatter = get_formatter_for_filename(fn, **options)
365        # add this filename to the cache for future use
366        cls = type(formatter)
367        mod = inspect.getmodule(cls)
368        exts[fname] = (mod.__name__, cls.__name__)
369        write_cache(cache_filename())
370    return formatter
371
372
373def get_formatter_by_name(alias, **options):
374    """Gets a formatter instance from its name or alias.
375    This mimics the behavior of ``pygments.formatters.get_formatter_by_name()``.
376    """
377    if CACHE is None:
378        load_or_build()
379    names = CACHE["formatters"]["names"]
380    if alias in names:
381        modname, clsname = names[alias]
382        mod = importlib.import_module(modname)
383        cls = getattr(mod, clsname)
384        formatter = cls(**options)
385    else:
386        # couldn't find formatter in cache, fallback to the hard way
387        import inspect
388        from pygments.formatters import get_formatter_by_name
389
390        formatter = get_formatter_by_name(alias, **options)
391        # add this filename to the cache for future use
392        cls = type(formatter)
393        mod = inspect.getmodule(cls)
394        names[alias] = (mod.__name__, cls.__name__)
395        write_cache(cache_filename())
396    return formatter
397
398
399def get_style_by_name(name):
400    """Gets a style class from its name or alias.
401    This mimics the behavior of ``pygments.styles.get_style_by_name()``.
402    """
403    if CACHE is None:
404        load_or_build()
405    names = CACHE["styles"]["names"]
406    if name in names:
407        modname, clsname = names[name]
408        mod = importlib.import_module(modname)
409        style = getattr(mod, clsname)
410    else:
411        # couldn't find style in cache, fallback to the hard way
412        import inspect
413        from pygments.styles import get_style_by_name
414
415        style = get_style_by_name(name)
416        # add this style to the cache for future use
417        mod = inspect.getmodule(style)
418        names[name] = (mod.__name__, style.__name__)
419        write_cache(cache_filename())
420    return style
421
422
423def get_all_styles():
424    """Iterable through all known style names.
425    This mimics the behavior of ``pygments.styles.get_all_styles``.
426    """
427    if CACHE is None:
428        load_or_build()
429    yield from CACHE["styles"]["names"]
430
431
432def get_filter_by_name(filtername, **options):
433    """Gets a filter instance from its name. This mimics the behavior of
434    ``pygments.filters.get_filtere_by_name()``.
435    """
436    if CACHE is None:
437        load_or_build()
438    names = CACHE["filters"]["names"]
439    if filtername in names:
440        modname, clsname = names[filtername]
441        mod = importlib.import_module(modname)
442        cls = getattr(mod, clsname)
443        filter = cls(**options)
444    else:
445        # couldn't find style in cache, fallback to the hard way
446        import inspect
447        from pygments.filters import get_filter_by_name
448
449        filter = get_filter_by_name(filtername, **options)
450        # add this filter to the cache for future use
451        cls = type(filter)
452        mod = inspect.getmodule(cls)
453        names[filtername] = (mod.__name__, cls.__name__)
454        write_cache(cache_filename())
455    return filter
456