3    pygments.lexers.haskell
4    ~~~~~~~~~~~~~~~~~~~~~~~
6    Lexers for Haskell and related languages.
12import re
14from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
15    default, include, inherit
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17    Number, Punctuation, Generic
18from pygments import unistring as uni
20__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
21           'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
22           'LiterateCryptolLexer', 'KokaLexer']
25line_re = re.compile('.*?\n')
28class HaskellLexer(RegexLexer):
29    """
30    A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
32    .. versionadded:: 0.8
33    """
34    name = 'Haskell'
35    aliases = ['haskell', 'hs']
36    filenames = ['*.hs']
37    mimetypes = ['text/x-haskell']
39    flags = re.MULTILINE | re.UNICODE
41    reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
42                'family', 'if', 'in', 'infix[lr]?', 'instance',
43                'let', 'newtype', 'of', 'then', 'type', 'where', '_')
44    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
45             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
46             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
47             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
49    tokens = {
50        'root': [
51            # Whitespace:
52            (r'\s+', Text),
53            # (r'--\s*|.*$', Comment.Doc),
54            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
55            (r'\{-', Comment.Multiline, 'comment'),
56            # Lexemes:
57            #  Identifiers
58            (r'\bimport\b', Keyword.Reserved, 'import'),
59            (r'\bmodule\b', Keyword.Reserved, 'module'),
60            (r'\berror\b', Name.Exception),
61            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
62            (r"'[^\\]'", String.Char),  # this has to come before the TH quote
63            (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
64            (r"'?[_" + uni.Ll + r"][\w']*", Name),
65            (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
66            (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
67            (r"(')\[[^\]]*\]", Keyword.Type),  # tuples and lists get special treatment in GHC
68            (r"(')\([^)]*\)", Keyword.Type),  # ..
69            (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type),  # promoted type operators
70            #  Operators
71            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function),  # lambda operator
72            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
73            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type),  # Constructor operators
74            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
75            #  Numbers
76            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
77            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
78             r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
79            (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
80            (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
81            (r'0[bB]_*[01](_*[01])*', Number.Bin),
82            (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
83            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
84            (r'\d(_*\d)*', Number.Integer),
85            #  Character/String Literals
86            (r"'", String.Char, 'character'),
87            (r'"', String, 'string'),
88            #  Special
89            (r'\[\]', Keyword.Type),
90            (r'\(\)', Name.Builtin),
91            (r'[][(),;`{}]', Punctuation),
92        ],
93        'import': [
94            # Import statements
95            (r'\s+', Text),
96            (r'"', String, 'string'),
97            # after "funclist" state
98            (r'\)', Punctuation, '#pop'),
99            (r'qualified\b', Keyword),
100            # import X as Y
101            (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
102             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
103            # import X hiding (functions)
104            (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
105             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
106            # import X (functions)
107            (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
108             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
109            # import X
110            (r'[\w.]+', Name.Namespace, '#pop'),
111        ],
112        'module': [
113            (r'\s+', Text),
114            (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
115             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
116            (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
117        ],
118        'funclist': [
119            (r'\s+', Text),
120            (r'[' + uni.Lu + r']\w*', Keyword.Type),
121            (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
122            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
123            (r'\{-', Comment.Multiline, 'comment'),
124            (r',', Punctuation),
125            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
126            # (HACK, but it makes sense to push two instances, believe me)
127            (r'\(', Punctuation, ('funclist', 'funclist')),
128            (r'\)', Punctuation, '#pop:2'),
129        ],
130        # NOTE: the next four states are shared in the AgdaLexer; make sure
131        # any change is compatible with Agda as well or copy over and change
132        'comment': [
133            # Multiline Comments
134            (r'[^-{}]+', Comment.Multiline),
135            (r'\{-', Comment.Multiline, '#push'),
136            (r'-\}', Comment.Multiline, '#pop'),
137            (r'[-{}]', Comment.Multiline),
138        ],
139        'character': [
140            # Allows multi-chars, incorrectly.
141            (r"[^\\']'", String.Char, '#pop'),
142            (r"\\", String.Escape, 'escape'),
143            ("'", String.Char, '#pop'),
144        ],
145        'string': [
146            (r'[^\\"]+', String),
147            (r"\\", String.Escape, 'escape'),
148            ('"', String, '#pop'),
149        ],
150        'escape': [
151            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
152            (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
153            ('|'.join(ascii), String.Escape, '#pop'),
154            (r'o[0-7]+', String.Escape, '#pop'),
155            (r'x[\da-fA-F]+', String.Escape, '#pop'),
156            (r'\d+', String.Escape, '#pop'),
157            (r'\s+\\', String.Escape, '#pop'),
158        ],
159    }
162class HspecLexer(HaskellLexer):
163    """
164    A Haskell lexer with support for Hspec constructs.
166    .. versionadded:: 2.4.0
167    """
169    name = 'Hspec'
170    aliases = ['hspec']
171    filenames = []
172    mimetypes = []
174    tokens = {
175        'root': [
176            (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
177            (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
178            (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
179            inherit,
180        ],
181    }
184class IdrisLexer(RegexLexer):
185    """
186    A lexer for the dependently typed programming language Idris.
188    Based on the Haskell and Agda Lexer.
190    .. versionadded:: 2.0
191    """
192    name = 'Idris'
193    aliases = ['idris', 'idr']
194    filenames = ['*.idr']
195    mimetypes = ['text/x-idris']
197    reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
198                'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
199                'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
200                'total', 'partial',
201                'interface', 'implementation', 'export', 'covering', 'constructor',
202                'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
203                'pattern',  'term',  'syntax', 'prefix',
204                'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
205                'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
207    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
208             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
209             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
210             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
212    directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
213                  'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
215    tokens = {
216        'root': [
217            # Comments
218            (r'^(\s*)(%%(%s))' % '|'.join(directives),
219             bygroups(Text, Keyword.Reserved)),
220            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
221            (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
222            (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
223            # Declaration
224            (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
225             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
226            #  Identifiers
227            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
228            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
229            (r"('')?[A-Z][\w\']*", Keyword.Type),
230            (r'[a-z][\w\']*', Text),
231            #  Special Symbols
232            (r'(<-|::|->|=>|=)', Operator.Word),  # specials
233            (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
234            #  Numbers
235            (r'\d+[eE][+-]?\d+', Number.Float),
236            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
237            (r'0[xX][\da-fA-F]+', Number.Hex),
238            (r'\d+', Number.Integer),
239            # Strings
240            (r"'", String.Char, 'character'),
241            (r'"', String, 'string'),
242            (r'[^\s(){}]+', Text),
243            (r'\s+?', Text),  # Whitespace
244        ],
245        'module': [
246            (r'\s+', Text),
247            (r'([A-Z][\w.]*)(\s+)(\()',
248             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
249            (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
250        ],
251        'funclist': [
252            (r'\s+', Text),
253            (r'[A-Z]\w*', Keyword.Type),
254            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
255            (r'--.*$', Comment.Single),
256            (r'\{-', Comment.Multiline, 'comment'),
257            (r',', Punctuation),
258            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
259            # (HACK, but it makes sense to push two instances, believe me)
260            (r'\(', Punctuation, ('funclist', 'funclist')),
261            (r'\)', Punctuation, '#pop:2'),
262        ],
263        # NOTE: the next four states are shared in the AgdaLexer; make sure
264        # any change is compatible with Agda as well or copy over and change
265        'comment': [
266            # Multiline Comments
267            (r'[^-{}]+', Comment.Multiline),
268            (r'\{-', Comment.Multiline, '#push'),
269            (r'-\}', Comment.Multiline, '#pop'),
270            (r'[-{}]', Comment.Multiline),
271        ],
272        'character': [
273            # Allows multi-chars, incorrectly.
274            (r"[^\\']", String.Char),
275            (r"\\", String.Escape, 'escape'),
276            ("'", String.Char, '#pop'),
277        ],
278        'string': [
279            (r'[^\\"]+', String),
280            (r"\\", String.Escape, 'escape'),
281            ('"', String, '#pop'),
282        ],
283        'escape': [
284            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
285            (r'\^[][A-Z@^_]', String.Escape, '#pop'),
286            ('|'.join(ascii), String.Escape, '#pop'),
287            (r'o[0-7]+', String.Escape, '#pop'),
288            (r'x[\da-fA-F]+', String.Escape, '#pop'),
289            (r'\d+', String.Escape, '#pop'),
290            (r'\s+\\', String.Escape, '#pop')
291        ],
292    }
295class AgdaLexer(RegexLexer):
296    """
297    For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
298    dependently typed functional programming language and proof assistant.
300    .. versionadded:: 2.0
301    """
303    name = 'Agda'
304    aliases = ['agda']
305    filenames = ['*.agda']
306    mimetypes = ['text/x-agda']
308    reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
309                'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
310                'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
311                'pattern', 'postulate', 'primitive', 'private',
312                'quote', 'quoteGoal', 'quoteTerm',
313                'record', 'renaming', 'rewrite', 'syntax', 'tactic',
314                'unquote', 'unquoteDecl', 'using', 'where', 'with']
316    tokens = {
317        'root': [
318            # Declaration
319            (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
320             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
321            # Comments
322            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
323            (r'\{-', Comment.Multiline, 'comment'),
324            # Holes
325            (r'\{!', Comment.Directive, 'hole'),
326            # Lexemes:
327            #  Identifiers
328            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
329            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
330            (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
331            #  Special Symbols
332            (r'(\(|\)|\{|\})', Operator),
333            (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
334            #  Numbers
335            (r'\d+[eE][+-]?\d+', Number.Float),
336            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
337            (r'0[xX][\da-fA-F]+', Number.Hex),
338            (r'\d+', Number.Integer),
339            # Strings
340            (r"'", String.Char, 'character'),
341            (r'"', String, 'string'),
342            (r'[^\s(){}]+', Text),
343            (r'\s+?', Text),  # Whitespace
344        ],
345        'hole': [
346            # Holes
347            (r'[^!{}]+', Comment.Directive),
348            (r'\{!', Comment.Directive, '#push'),
349            (r'!\}', Comment.Directive, '#pop'),
350            (r'[!{}]', Comment.Directive),
351        ],
352        'module': [
353            (r'\{-', Comment.Multiline, 'comment'),
354            (r'[a-zA-Z][\w.]*', Name, '#pop'),
355            (r'[\W0-9_]+', Text)
356        ],
357        'comment': HaskellLexer.tokens['comment'],
358        'character': HaskellLexer.tokens['character'],
359        'string': HaskellLexer.tokens['string'],
360        'escape': HaskellLexer.tokens['escape']
361    }
364class CryptolLexer(RegexLexer):
365    """
366    FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
368    .. versionadded:: 2.0
369    """
370    name = 'Cryptol'
371    aliases = ['cryptol', 'cry']
372    filenames = ['*.cry']
373    mimetypes = ['text/x-cryptol']
375    reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
376                'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
377                'max', 'min', 'module', 'newtype', 'pragma', 'property',
378                'then', 'type', 'where', 'width')
379    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
380             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
381             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
382             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
384    tokens = {
385        'root': [
386            # Whitespace:
387            (r'\s+', Text),
388            # (r'--\s*|.*$', Comment.Doc),
389            (r'//.*$', Comment.Single),
390            (r'/\*', Comment.Multiline, 'comment'),
391            # Lexemes:
392            #  Identifiers
393            (r'\bimport\b', Keyword.Reserved, 'import'),
394            (r'\bmodule\b', Keyword.Reserved, 'module'),
395            (r'\berror\b', Name.Exception),
396            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
397            (r'^[_a-z][\w\']*', Name.Function),
398            (r"'?[_a-z][\w']*", Name),
399            (r"('')?[A-Z][\w\']*", Keyword.Type),
400            #  Operators
401            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function),  # lambda operator
402            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
403            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type),  # Constructor operators
404            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
405            #  Numbers
406            (r'\d+[eE][+-]?\d+', Number.Float),
407            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
408            (r'0[oO][0-7]+', Number.Oct),
409            (r'0[xX][\da-fA-F]+', Number.Hex),
410            (r'\d+', Number.Integer),
411            #  Character/String Literals
412            (r"'", String.Char, 'character'),
413            (r'"', String, 'string'),
414            #  Special
415            (r'\[\]', Keyword.Type),
416            (r'\(\)', Name.Builtin),
417            (r'[][(),;`{}]', Punctuation),
418        ],
419        'import': [
420            # Import statements
421            (r'\s+', Text),
422            (r'"', String, 'string'),
423            # after "funclist" state
424            (r'\)', Punctuation, '#pop'),
425            (r'qualified\b', Keyword),
426            # import X as Y
427            (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
428             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
429            # import X hiding (functions)
430            (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
431             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
432            # import X (functions)
433            (r'([A-Z][\w.]*)(\s+)(\()',
434             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
435            # import X
436            (r'[\w.]+', Name.Namespace, '#pop'),
437        ],
438        'module': [
439            (r'\s+', Text),
440            (r'([A-Z][\w.]*)(\s+)(\()',
441             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
442            (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
443        ],
444        'funclist': [
445            (r'\s+', Text),
446            (r'[A-Z]\w*', Keyword.Type),
447            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
448            # TODO: these don't match the comments in docs, remove.
449            # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
450            # (r'{-', Comment.Multiline, 'comment'),
451            (r',', Punctuation),
452            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
453            # (HACK, but it makes sense to push two instances, believe me)
454            (r'\(', Punctuation, ('funclist', 'funclist')),
455            (r'\)', Punctuation, '#pop:2'),
456        ],
457        'comment': [
458            # Multiline Comments
459            (r'[^/*]+', Comment.Multiline),
460            (r'/\*', Comment.Multiline, '#push'),
461            (r'\*/', Comment.Multiline, '#pop'),
462            (r'[*/]', Comment.Multiline),
463        ],
464        'character': [
465            # Allows multi-chars, incorrectly.
466            (r"[^\\']'", String.Char, '#pop'),
467            (r"\\", String.Escape, 'escape'),
468            ("'", String.Char, '#pop'),
469        ],
470        'string': [
471            (r'[^\\"]+', String),
472            (r"\\", String.Escape, 'escape'),
473            ('"', String, '#pop'),
474        ],
475        'escape': [
476            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
477            (r'\^[][A-Z@^_]', String.Escape, '#pop'),
478            ('|'.join(ascii), String.Escape, '#pop'),
479            (r'o[0-7]+', String.Escape, '#pop'),
480            (r'x[\da-fA-F]+', String.Escape, '#pop'),
481            (r'\d+', String.Escape, '#pop'),
482            (r'\s+\\', String.Escape, '#pop'),
483        ],
484    }
486    EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
487                      'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
488                      'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
489                      'trace'}
491    def get_tokens_unprocessed(self, text):
492        stack = ['root']
493        for index, token, value in \
494                RegexLexer.get_tokens_unprocessed(self, text, stack):
495            if token is Name and value in self.EXTRA_KEYWORDS:
496                yield index, Name.Builtin, value
497            else:
498                yield index, token, value
501class LiterateLexer(Lexer):
502    """
503    Base class for lexers of literate file formats based on LaTeX or Bird-style
504    (prefixing each code line with ">").
506    Additional options accepted:
508    `litstyle`
509        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
510        is autodetected: if the first non-whitespace character in the source
511        is a backslash or percent character, LaTeX is assumed, else Bird.
512    """
514    bird_re = re.compile(r'(>[ \t]*)(.*\n)')
516    def __init__(self, baselexer, **options):
517        self.baselexer = baselexer
518        Lexer.__init__(self, **options)
520    def get_tokens_unprocessed(self, text):
521        style = self.options.get('litstyle')
522        if style is None:
523            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
525        code = ''
526        insertions = []
527        if style == 'bird':
528            # bird-style
529            for match in line_re.finditer(text):
530                line = match.group()
531                m = self.bird_re.match(line)
532                if m:
533                    insertions.append((len(code),
534                                       [(0, Comment.Special, m.group(1))]))
535                    code += m.group(2)
536                else:
537                    insertions.append((len(code), [(0, Text, line)]))
538        else:
539            # latex-style
540            from pygments.lexers.markup import TexLexer
541            lxlexer = TexLexer(**self.options)
542            codelines = 0
543            latex = ''
544            for match in line_re.finditer(text):
545                line = match.group()
546                if codelines:
547                    if line.lstrip().startswith('\\end{code}'):
548                        codelines = 0
549                        latex += line
550                    else:
551                        code += line
552                elif line.lstrip().startswith('\\begin{code}'):
553                    codelines = 1
554                    latex += line
555                    insertions.append((len(code),
556                                       list(lxlexer.get_tokens_unprocessed(latex))))
557                    latex = ''
558                else:
559                    latex += line
560            insertions.append((len(code),
561                               list(lxlexer.get_tokens_unprocessed(latex))))
562        yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
565class LiterateHaskellLexer(LiterateLexer):
566    """
567    For Literate Haskell (Bird-style or LaTeX) source.
569    Additional options accepted:
571    `litstyle`
572        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
573        is autodetected: if the first non-whitespace character in the source
574        is a backslash or percent character, LaTeX is assumed, else Bird.
576    .. versionadded:: 0.9
577    """
578    name = 'Literate Haskell'
579    aliases = ['lhs', 'literate-haskell', 'lhaskell']
580    filenames = ['*.lhs']
581    mimetypes = ['text/x-literate-haskell']
583    def __init__(self, **options):
584        hslexer = HaskellLexer(**options)
585        LiterateLexer.__init__(self, hslexer, **options)
588class LiterateIdrisLexer(LiterateLexer):
589    """
590    For Literate Idris (Bird-style or LaTeX) source.
592    Additional options accepted:
594    `litstyle`
595        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
596        is autodetected: if the first non-whitespace character in the source
597        is a backslash or percent character, LaTeX is assumed, else Bird.
599    .. versionadded:: 2.0
600    """
601    name = 'Literate Idris'
602    aliases = ['lidr', 'literate-idris', 'lidris']
603    filenames = ['*.lidr']
604    mimetypes = ['text/x-literate-idris']
606    def __init__(self, **options):
607        hslexer = IdrisLexer(**options)
608        LiterateLexer.__init__(self, hslexer, **options)
611class LiterateAgdaLexer(LiterateLexer):
612    """
613    For Literate Agda source.
615    Additional options accepted:
617    `litstyle`
618        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
619        is autodetected: if the first non-whitespace character in the source
620        is a backslash or percent character, LaTeX is assumed, else Bird.
622    .. versionadded:: 2.0
623    """
624    name = 'Literate Agda'
625    aliases = ['lagda', 'literate-agda']
626    filenames = ['*.lagda']
627    mimetypes = ['text/x-literate-agda']
629    def __init__(self, **options):
630        agdalexer = AgdaLexer(**options)
631        LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
634class LiterateCryptolLexer(LiterateLexer):
635    """
636    For Literate Cryptol (Bird-style or LaTeX) source.
638    Additional options accepted:
640    `litstyle`
641        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
642        is autodetected: if the first non-whitespace character in the source
643        is a backslash or percent character, LaTeX is assumed, else Bird.
645    .. versionadded:: 2.0
646    """
647    name = 'Literate Cryptol'
648    aliases = ['lcry', 'literate-cryptol', 'lcryptol']
649    filenames = ['*.lcry']
650    mimetypes = ['text/x-literate-cryptol']
652    def __init__(self, **options):
653        crylexer = CryptolLexer(**options)
654        LiterateLexer.__init__(self, crylexer, **options)
657class KokaLexer(RegexLexer):
658    """
659    Lexer for the `Koka <http://koka.codeplex.com>`_
660    language.
662    .. versionadded:: 1.6
663    """
665    name = 'Koka'
666    aliases = ['koka']
667    filenames = ['*.kk', '*.kki']
668    mimetypes = ['text/x-koka']
670    keywords = [
671        'infix', 'infixr', 'infixl',
672        'type', 'cotype', 'rectype', 'alias',
673        'struct', 'con',
674        'fun', 'function', 'val', 'var',
675        'external',
676        'if', 'then', 'else', 'elif', 'return', 'match',
677        'private', 'public', 'private',
678        'module', 'import', 'as',
679        'include', 'inline',
680        'rec',
681        'try', 'yield', 'enum',
682        'interface', 'instance',
683    ]
685    # keywords that are followed by a type
686    typeStartKeywords = [
687        'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
688    ]
690    # keywords valid in a type
691    typekeywords = [
692        'forall', 'exists', 'some', 'with',
693    ]
695    # builtin names and special names
696    builtin = [
697        'for', 'while', 'repeat',
698        'foreach', 'foreach-indexed',
699        'error', 'catch', 'finally',
700        'cs', 'js', 'file', 'ref', 'assigned',
701    ]
703    # symbols that can be in an operator
704    symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
706    # symbol boundary: an operator keyword should not be followed by any of these
707    sboundary = '(?!' + symbols + ')'
709    # name boundary: a keyword should not be followed by any of these
710    boundary = r'(?![\w/])'
712    # koka token abstractions
713    tokenType = Name.Attribute
714    tokenTypeDef = Name.Class
715    tokenConstructor = Generic.Emph
717    # main lexer
718    tokens = {
719        'root': [
720            include('whitespace'),
722            # go into type mode
723            (r'::?' + sboundary, tokenType, 'type'),
724            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
725             'alias-type'),
726            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
727             'struct-type'),
728            ((r'(%s)' % '|'.join(typeStartKeywords)) +
729             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
730             'type'),
732            # special sequences of tokens (we use ?: for non-capturing group as
733            # required by 'bygroups')
734            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
735             bygroups(Keyword, Text, Keyword, Name.Namespace)),
736            (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
737             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
738             r'((?:[a-z]\w*/)*[a-z]\w*))?',
739             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
740                      Keyword, Name.Namespace)),
742            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
743             r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
744             bygroups(Keyword, Text, Name.Function)),
745            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
746             r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
747             bygroups(Keyword, Text, Keyword, Name.Function)),
749            # keywords
750            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
751            (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
752            (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
753            (r'::?|:=|\->|[=.]' + sboundary, Keyword),
755            # names
756            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
757             bygroups(Name.Namespace, tokenConstructor)),
758            (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
759            (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
760             bygroups(Name.Namespace, Name)),
761            (r'_\w*', Name.Variable),
763            # literal string
764            (r'@"', String.Double, 'litstring'),
766            # operators
767            (symbols + "|/(?![*/])", Operator),
768            (r'`', Operator),
769            (r'[{}()\[\];,]', Punctuation),
771            # literals. No check for literal characters with len > 1
772            (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
773            (r'0[xX][0-9a-fA-F]+', Number.Hex),
774            (r'[0-9]+', Number.Integer),
776            (r"'", String.Char, 'char'),
777            (r'"', String.Double, 'string'),
778        ],
780        # type started by alias
781        'alias-type': [
782            (r'=', Keyword),
783            include('type')
784        ],
786        # type started by struct
787        'struct-type': [
788            (r'(?=\((?!,*\)))', Punctuation, '#pop'),
789            include('type')
790        ],
792        # type started by colon
793        'type': [
794            (r'[(\[<]', tokenType, 'type-nested'),
795            include('type-content')
796        ],
798        # type nested in brackets: can contain parameters, comma etc.
799        'type-nested': [
800            (r'[)\]>]', tokenType, '#pop'),
801            (r'[(\[<]', tokenType, 'type-nested'),
802            (r',', tokenType),
803            (r'([a-z]\w*)(\s*)(:)(?!:)',
804             bygroups(Name, Text, tokenType)),  # parameter name
805            include('type-content')
806        ],
808        # shared contents of a type
809        'type-content': [
810            include('whitespace'),
812            # keywords
813            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
814            (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
815             Keyword, '#pop'),  # need to match because names overlap...
817            # kinds
818            (r'[EPHVX]' + boundary, tokenType),
820            # type names
821            (r'[a-z][0-9]*(?![\w/])', tokenType),
822            (r'_\w*', tokenType.Variable),  # Generic.Emph
823            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
824             bygroups(Name.Namespace, tokenType)),
825            (r'((?:[a-z]\w*/)*)([a-z]\w+)',
826             bygroups(Name.Namespace, tokenType)),
828            # type keyword operators
829            (r'::|->|[.:|]', tokenType),
831            # catchall
832            default('#pop')
833        ],
835        # comments and literals
836        'whitespace': [
837            (r'\n\s*#.*$', Comment.Preproc),
838            (r'\s+', Text),
839            (r'/\*', Comment.Multiline, 'comment'),
840            (r'//.*$', Comment.Single)
841        ],
842        'comment': [
843            (r'[^/*]+', Comment.Multiline),
844            (r'/\*', Comment.Multiline, '#push'),
845            (r'\*/', Comment.Multiline, '#pop'),
846            (r'[*/]', Comment.Multiline),
847        ],
848        'litstring': [
849            (r'[^"]+', String.Double),
850            (r'""', String.Escape),
851            (r'"', String.Double, '#pop'),
852        ],
853        'string': [
854            (r'[^\\"\n]+', String.Double),
855            include('escape-sequence'),
856            (r'["\n]', String.Double, '#pop'),
857        ],
858        'char': [
859            (r'[^\\\'\n]+', String.Char),
860            include('escape-sequence'),
861            (r'[\'\n]', String.Char, '#pop'),
862        ],
863        'escape-sequence': [
864            (r'\\[nrt\\"\']', String.Escape),
865            (r'\\x[0-9a-fA-F]{2}', String.Escape),
866            (r'\\u[0-9a-fA-F]{4}', String.Escape),
867            # Yes, \U literals are 6 hex digits.
868            (r'\\U[0-9a-fA-F]{6}', String.Escape)
869        ]
870    }