1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.haskell
4    ~~~~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for Haskell and related languages.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
15    default, include, inherit
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17    Number, Punctuation, Generic
18from pygments import unistring as uni
19
20__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
21           'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
22           'LiterateCryptolLexer', 'KokaLexer']
23
24
25line_re = re.compile('.*?\n')
26
27
28class HaskellLexer(RegexLexer):
29    """
30    A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
31
32    .. versionadded:: 0.8
33    """
34    name = 'Haskell'
35    aliases = ['haskell', 'hs']
36    filenames = ['*.hs']
37    mimetypes = ['text/x-haskell']
38
39    flags = re.MULTILINE | re.UNICODE
40
41    reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
42                'family', 'if', 'in', 'infix[lr]?', 'instance',
43                'let', 'newtype', 'of', 'then', 'type', 'where', '_')
44    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
45             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
46             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
47             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
48
49    tokens = {
50        'root': [
51            # Whitespace:
52            (r'\s+', Text),
53            # (r'--\s*|.*$', Comment.Doc),
54            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
55            (r'\{-', Comment.Multiline, 'comment'),
56            # Lexemes:
57            #  Identifiers
58            (r'\bimport\b', Keyword.Reserved, 'import'),
59            (r'\bmodule\b', Keyword.Reserved, 'module'),
60            (r'\berror\b', Name.Exception),
61            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
62            (r"'[^\\]'", String.Char),  # this has to come before the TH quote
63            (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
64            (r"'?[_" + uni.Ll + r"][\w']*", Name),
65            (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
66            (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
67            (r"(')\[[^\]]*\]", Keyword.Type),  # tuples and lists get special treatment in GHC
68            (r"(')\([^)]*\)", Keyword.Type),  # ..
69            (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type),  # promoted type operators
70            #  Operators
71            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function),  # lambda operator
72            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
73            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type),  # Constructor operators
74            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
75            #  Numbers
76            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
77            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
78             r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
79            (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
80            (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
81            (r'0[bB]_*[01](_*[01])*', Number.Bin),
82            (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
83            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
84            (r'\d(_*\d)*', Number.Integer),
85            #  Character/String Literals
86            (r"'", String.Char, 'character'),
87            (r'"', String, 'string'),
88            #  Special
89            (r'\[\]', Keyword.Type),
90            (r'\(\)', Name.Builtin),
91            (r'[][(),;`{}]', Punctuation),
92        ],
93        'import': [
94            # Import statements
95            (r'\s+', Text),
96            (r'"', String, 'string'),
97            # after "funclist" state
98            (r'\)', Punctuation, '#pop'),
99            (r'qualified\b', Keyword),
100            # import X as Y
101            (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
102             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
103            # import X hiding (functions)
104            (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
105             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
106            # import X (functions)
107            (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
108             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
109            # import X
110            (r'[\w.]+', Name.Namespace, '#pop'),
111        ],
112        'module': [
113            (r'\s+', Text),
114            (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
115             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
116            (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
117        ],
118        'funclist': [
119            (r'\s+', Text),
120            (r'[' + uni.Lu + r']\w*', Keyword.Type),
121            (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
122            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
123            (r'\{-', Comment.Multiline, 'comment'),
124            (r',', Punctuation),
125            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
126            # (HACK, but it makes sense to push two instances, believe me)
127            (r'\(', Punctuation, ('funclist', 'funclist')),
128            (r'\)', Punctuation, '#pop:2'),
129        ],
130        # NOTE: the next four states are shared in the AgdaLexer; make sure
131        # any change is compatible with Agda as well or copy over and change
132        'comment': [
133            # Multiline Comments
134            (r'[^-{}]+', Comment.Multiline),
135            (r'\{-', Comment.Multiline, '#push'),
136            (r'-\}', Comment.Multiline, '#pop'),
137            (r'[-{}]', Comment.Multiline),
138        ],
139        'character': [
140            # Allows multi-chars, incorrectly.
141            (r"[^\\']'", String.Char, '#pop'),
142            (r"\\", String.Escape, 'escape'),
143            ("'", String.Char, '#pop'),
144        ],
145        'string': [
146            (r'[^\\"]+', String),
147            (r"\\", String.Escape, 'escape'),
148            ('"', String, '#pop'),
149        ],
150        'escape': [
151            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
152            (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
153            ('|'.join(ascii), String.Escape, '#pop'),
154            (r'o[0-7]+', String.Escape, '#pop'),
155            (r'x[\da-fA-F]+', String.Escape, '#pop'),
156            (r'\d+', String.Escape, '#pop'),
157            (r'\s+\\', String.Escape, '#pop'),
158        ],
159    }
160
161
162class HspecLexer(HaskellLexer):
163    """
164    A Haskell lexer with support for Hspec constructs.
165
166    .. versionadded:: 2.4.0
167    """
168
169    name = 'Hspec'
170    aliases = ['hspec']
171    filenames = []
172    mimetypes = []
173
174    tokens = {
175        'root': [
176            (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
177            (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
178            (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
179            inherit,
180        ],
181    }
182
183
184class IdrisLexer(RegexLexer):
185    """
186    A lexer for the dependently typed programming language Idris.
187
188    Based on the Haskell and Agda Lexer.
189
190    .. versionadded:: 2.0
191    """
192    name = 'Idris'
193    aliases = ['idris', 'idr']
194    filenames = ['*.idr']
195    mimetypes = ['text/x-idris']
196
197    reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
198                'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
199                'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
200                'total', 'partial',
201                'interface', 'implementation', 'export', 'covering', 'constructor',
202                'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
203                'pattern',  'term',  'syntax', 'prefix',
204                'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
205                'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
206
207    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
208             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
209             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
210             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
211
212    directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
213                  'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
214
215    tokens = {
216        'root': [
217            # Comments
218            (r'^(\s*)(%%(%s))' % '|'.join(directives),
219             bygroups(Text, Keyword.Reserved)),
220            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
221            (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
222            (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
223            # Declaration
224            (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
225             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
226            #  Identifiers
227            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
228            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
229            (r"('')?[A-Z][\w\']*", Keyword.Type),
230            (r'[a-z][\w\']*', Text),
231            #  Special Symbols
232            (r'(<-|::|->|=>|=)', Operator.Word),  # specials
233            (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
234            #  Numbers
235            (r'\d+[eE][+-]?\d+', Number.Float),
236            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
237            (r'0[xX][\da-fA-F]+', Number.Hex),
238            (r'\d+', Number.Integer),
239            # Strings
240            (r"'", String.Char, 'character'),
241            (r'"', String, 'string'),
242            (r'[^\s(){}]+', Text),
243            (r'\s+?', Text),  # Whitespace
244        ],
245        'module': [
246            (r'\s+', Text),
247            (r'([A-Z][\w.]*)(\s+)(\()',
248             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
249            (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
250        ],
251        'funclist': [
252            (r'\s+', Text),
253            (r'[A-Z]\w*', Keyword.Type),
254            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
255            (r'--.*$', Comment.Single),
256            (r'\{-', Comment.Multiline, 'comment'),
257            (r',', Punctuation),
258            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
259            # (HACK, but it makes sense to push two instances, believe me)
260            (r'\(', Punctuation, ('funclist', 'funclist')),
261            (r'\)', Punctuation, '#pop:2'),
262        ],
263        # NOTE: the next four states are shared in the AgdaLexer; make sure
264        # any change is compatible with Agda as well or copy over and change
265        'comment': [
266            # Multiline Comments
267            (r'[^-{}]+', Comment.Multiline),
268            (r'\{-', Comment.Multiline, '#push'),
269            (r'-\}', Comment.Multiline, '#pop'),
270            (r'[-{}]', Comment.Multiline),
271        ],
272        'character': [
273            # Allows multi-chars, incorrectly.
274            (r"[^\\']", String.Char),
275            (r"\\", String.Escape, 'escape'),
276            ("'", String.Char, '#pop'),
277        ],
278        'string': [
279            (r'[^\\"]+', String),
280            (r"\\", String.Escape, 'escape'),
281            ('"', String, '#pop'),
282        ],
283        'escape': [
284            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
285            (r'\^[][A-Z@^_]', String.Escape, '#pop'),
286            ('|'.join(ascii), String.Escape, '#pop'),
287            (r'o[0-7]+', String.Escape, '#pop'),
288            (r'x[\da-fA-F]+', String.Escape, '#pop'),
289            (r'\d+', String.Escape, '#pop'),
290            (r'\s+\\', String.Escape, '#pop')
291        ],
292    }
293
294
295class AgdaLexer(RegexLexer):
296    """
297    For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
298    dependently typed functional programming language and proof assistant.
299
300    .. versionadded:: 2.0
301    """
302
303    name = 'Agda'
304    aliases = ['agda']
305    filenames = ['*.agda']
306    mimetypes = ['text/x-agda']
307
308    reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
309                'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
310                'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
311                'pattern', 'postulate', 'primitive', 'private',
312                'quote', 'quoteGoal', 'quoteTerm',
313                'record', 'renaming', 'rewrite', 'syntax', 'tactic',
314                'unquote', 'unquoteDecl', 'using', 'where', 'with']
315
316    tokens = {
317        'root': [
318            # Declaration
319            (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
320             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
321            # Comments
322            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
323            (r'\{-', Comment.Multiline, 'comment'),
324            # Holes
325            (r'\{!', Comment.Directive, 'hole'),
326            # Lexemes:
327            #  Identifiers
328            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
329            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
330            (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
331            #  Special Symbols
332            (r'(\(|\)|\{|\})', Operator),
333            (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
334            #  Numbers
335            (r'\d+[eE][+-]?\d+', Number.Float),
336            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
337            (r'0[xX][\da-fA-F]+', Number.Hex),
338            (r'\d+', Number.Integer),
339            # Strings
340            (r"'", String.Char, 'character'),
341            (r'"', String, 'string'),
342            (r'[^\s(){}]+', Text),
343            (r'\s+?', Text),  # Whitespace
344        ],
345        'hole': [
346            # Holes
347            (r'[^!{}]+', Comment.Directive),
348            (r'\{!', Comment.Directive, '#push'),
349            (r'!\}', Comment.Directive, '#pop'),
350            (r'[!{}]', Comment.Directive),
351        ],
352        'module': [
353            (r'\{-', Comment.Multiline, 'comment'),
354            (r'[a-zA-Z][\w.]*', Name, '#pop'),
355            (r'[\W0-9_]+', Text)
356        ],
357        'comment': HaskellLexer.tokens['comment'],
358        'character': HaskellLexer.tokens['character'],
359        'string': HaskellLexer.tokens['string'],
360        'escape': HaskellLexer.tokens['escape']
361    }
362
363
364class CryptolLexer(RegexLexer):
365    """
366    FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
367
368    .. versionadded:: 2.0
369    """
370    name = 'Cryptol'
371    aliases = ['cryptol', 'cry']
372    filenames = ['*.cry']
373    mimetypes = ['text/x-cryptol']
374
375    reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
376                'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
377                'max', 'min', 'module', 'newtype', 'pragma', 'property',
378                'then', 'type', 'where', 'width')
379    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
380             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
381             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
382             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
383
384    tokens = {
385        'root': [
386            # Whitespace:
387            (r'\s+', Text),
388            # (r'--\s*|.*$', Comment.Doc),
389            (r'//.*$', Comment.Single),
390            (r'/\*', Comment.Multiline, 'comment'),
391            # Lexemes:
392            #  Identifiers
393            (r'\bimport\b', Keyword.Reserved, 'import'),
394            (r'\bmodule\b', Keyword.Reserved, 'module'),
395            (r'\berror\b', Name.Exception),
396            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
397            (r'^[_a-z][\w\']*', Name.Function),
398            (r"'?[_a-z][\w']*", Name),
399            (r"('')?[A-Z][\w\']*", Keyword.Type),
400            #  Operators
401            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function),  # lambda operator
402            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
403            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type),  # Constructor operators
404            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
405            #  Numbers
406            (r'\d+[eE][+-]?\d+', Number.Float),
407            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
408            (r'0[oO][0-7]+', Number.Oct),
409            (r'0[xX][\da-fA-F]+', Number.Hex),
410            (r'\d+', Number.Integer),
411            #  Character/String Literals
412            (r"'", String.Char, 'character'),
413            (r'"', String, 'string'),
414            #  Special
415            (r'\[\]', Keyword.Type),
416            (r'\(\)', Name.Builtin),
417            (r'[][(),;`{}]', Punctuation),
418        ],
419        'import': [
420            # Import statements
421            (r'\s+', Text),
422            (r'"', String, 'string'),
423            # after "funclist" state
424            (r'\)', Punctuation, '#pop'),
425            (r'qualified\b', Keyword),
426            # import X as Y
427            (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
428             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
429            # import X hiding (functions)
430            (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
431             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
432            # import X (functions)
433            (r'([A-Z][\w.]*)(\s+)(\()',
434             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
435            # import X
436            (r'[\w.]+', Name.Namespace, '#pop'),
437        ],
438        'module': [
439            (r'\s+', Text),
440            (r'([A-Z][\w.]*)(\s+)(\()',
441             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
442            (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
443        ],
444        'funclist': [
445            (r'\s+', Text),
446            (r'[A-Z]\w*', Keyword.Type),
447            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
448            # TODO: these don't match the comments in docs, remove.
449            # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
450            # (r'{-', Comment.Multiline, 'comment'),
451            (r',', Punctuation),
452            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
453            # (HACK, but it makes sense to push two instances, believe me)
454            (r'\(', Punctuation, ('funclist', 'funclist')),
455            (r'\)', Punctuation, '#pop:2'),
456        ],
457        'comment': [
458            # Multiline Comments
459            (r'[^/*]+', Comment.Multiline),
460            (r'/\*', Comment.Multiline, '#push'),
461            (r'\*/', Comment.Multiline, '#pop'),
462            (r'[*/]', Comment.Multiline),
463        ],
464        'character': [
465            # Allows multi-chars, incorrectly.
466            (r"[^\\']'", String.Char, '#pop'),
467            (r"\\", String.Escape, 'escape'),
468            ("'", String.Char, '#pop'),
469        ],
470        'string': [
471            (r'[^\\"]+', String),
472            (r"\\", String.Escape, 'escape'),
473            ('"', String, '#pop'),
474        ],
475        'escape': [
476            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
477            (r'\^[][A-Z@^_]', String.Escape, '#pop'),
478            ('|'.join(ascii), String.Escape, '#pop'),
479            (r'o[0-7]+', String.Escape, '#pop'),
480            (r'x[\da-fA-F]+', String.Escape, '#pop'),
481            (r'\d+', String.Escape, '#pop'),
482            (r'\s+\\', String.Escape, '#pop'),
483        ],
484    }
485
486    EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
487                      'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
488                      'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
489                      'trace'}
490
491    def get_tokens_unprocessed(self, text):
492        stack = ['root']
493        for index, token, value in \
494                RegexLexer.get_tokens_unprocessed(self, text, stack):
495            if token is Name and value in self.EXTRA_KEYWORDS:
496                yield index, Name.Builtin, value
497            else:
498                yield index, token, value
499
500
501class LiterateLexer(Lexer):
502    """
503    Base class for lexers of literate file formats based on LaTeX or Bird-style
504    (prefixing each code line with ">").
505
506    Additional options accepted:
507
508    `litstyle`
509        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
510        is autodetected: if the first non-whitespace character in the source
511        is a backslash or percent character, LaTeX is assumed, else Bird.
512    """
513
514    bird_re = re.compile(r'(>[ \t]*)(.*\n)')
515
516    def __init__(self, baselexer, **options):
517        self.baselexer = baselexer
518        Lexer.__init__(self, **options)
519
520    def get_tokens_unprocessed(self, text):
521        style = self.options.get('litstyle')
522        if style is None:
523            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
524
525        code = ''
526        insertions = []
527        if style == 'bird':
528            # bird-style
529            for match in line_re.finditer(text):
530                line = match.group()
531                m = self.bird_re.match(line)
532                if m:
533                    insertions.append((len(code),
534                                       [(0, Comment.Special, m.group(1))]))
535                    code += m.group(2)
536                else:
537                    insertions.append((len(code), [(0, Text, line)]))
538        else:
539            # latex-style
540            from pygments.lexers.markup import TexLexer
541            lxlexer = TexLexer(**self.options)
542            codelines = 0
543            latex = ''
544            for match in line_re.finditer(text):
545                line = match.group()
546                if codelines:
547                    if line.lstrip().startswith('\\end{code}'):
548                        codelines = 0
549                        latex += line
550                    else:
551                        code += line
552                elif line.lstrip().startswith('\\begin{code}'):
553                    codelines = 1
554                    latex += line
555                    insertions.append((len(code),
556                                       list(lxlexer.get_tokens_unprocessed(latex))))
557                    latex = ''
558                else:
559                    latex += line
560            insertions.append((len(code),
561                               list(lxlexer.get_tokens_unprocessed(latex))))
562        yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
563
564
565class LiterateHaskellLexer(LiterateLexer):
566    """
567    For Literate Haskell (Bird-style or LaTeX) source.
568
569    Additional options accepted:
570
571    `litstyle`
572        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
573        is autodetected: if the first non-whitespace character in the source
574        is a backslash or percent character, LaTeX is assumed, else Bird.
575
576    .. versionadded:: 0.9
577    """
578    name = 'Literate Haskell'
579    aliases = ['lhs', 'literate-haskell', 'lhaskell']
580    filenames = ['*.lhs']
581    mimetypes = ['text/x-literate-haskell']
582
583    def __init__(self, **options):
584        hslexer = HaskellLexer(**options)
585        LiterateLexer.__init__(self, hslexer, **options)
586
587
588class LiterateIdrisLexer(LiterateLexer):
589    """
590    For Literate Idris (Bird-style or LaTeX) source.
591
592    Additional options accepted:
593
594    `litstyle`
595        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
596        is autodetected: if the first non-whitespace character in the source
597        is a backslash or percent character, LaTeX is assumed, else Bird.
598
599    .. versionadded:: 2.0
600    """
601    name = 'Literate Idris'
602    aliases = ['lidr', 'literate-idris', 'lidris']
603    filenames = ['*.lidr']
604    mimetypes = ['text/x-literate-idris']
605
606    def __init__(self, **options):
607        hslexer = IdrisLexer(**options)
608        LiterateLexer.__init__(self, hslexer, **options)
609
610
611class LiterateAgdaLexer(LiterateLexer):
612    """
613    For Literate Agda source.
614
615    Additional options accepted:
616
617    `litstyle`
618        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
619        is autodetected: if the first non-whitespace character in the source
620        is a backslash or percent character, LaTeX is assumed, else Bird.
621
622    .. versionadded:: 2.0
623    """
624    name = 'Literate Agda'
625    aliases = ['lagda', 'literate-agda']
626    filenames = ['*.lagda']
627    mimetypes = ['text/x-literate-agda']
628
629    def __init__(self, **options):
630        agdalexer = AgdaLexer(**options)
631        LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
632
633
634class LiterateCryptolLexer(LiterateLexer):
635    """
636    For Literate Cryptol (Bird-style or LaTeX) source.
637
638    Additional options accepted:
639
640    `litstyle`
641        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
642        is autodetected: if the first non-whitespace character in the source
643        is a backslash or percent character, LaTeX is assumed, else Bird.
644
645    .. versionadded:: 2.0
646    """
647    name = 'Literate Cryptol'
648    aliases = ['lcry', 'literate-cryptol', 'lcryptol']
649    filenames = ['*.lcry']
650    mimetypes = ['text/x-literate-cryptol']
651
652    def __init__(self, **options):
653        crylexer = CryptolLexer(**options)
654        LiterateLexer.__init__(self, crylexer, **options)
655
656
657class KokaLexer(RegexLexer):
658    """
659    Lexer for the `Koka <http://koka.codeplex.com>`_
660    language.
661
662    .. versionadded:: 1.6
663    """
664
665    name = 'Koka'
666    aliases = ['koka']
667    filenames = ['*.kk', '*.kki']
668    mimetypes = ['text/x-koka']
669
670    keywords = [
671        'infix', 'infixr', 'infixl',
672        'type', 'cotype', 'rectype', 'alias',
673        'struct', 'con',
674        'fun', 'function', 'val', 'var',
675        'external',
676        'if', 'then', 'else', 'elif', 'return', 'match',
677        'private', 'public', 'private',
678        'module', 'import', 'as',
679        'include', 'inline',
680        'rec',
681        'try', 'yield', 'enum',
682        'interface', 'instance',
683    ]
684
685    # keywords that are followed by a type
686    typeStartKeywords = [
687        'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
688    ]
689
690    # keywords valid in a type
691    typekeywords = [
692        'forall', 'exists', 'some', 'with',
693    ]
694
695    # builtin names and special names
696    builtin = [
697        'for', 'while', 'repeat',
698        'foreach', 'foreach-indexed',
699        'error', 'catch', 'finally',
700        'cs', 'js', 'file', 'ref', 'assigned',
701    ]
702
703    # symbols that can be in an operator
704    symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
705
706    # symbol boundary: an operator keyword should not be followed by any of these
707    sboundary = '(?!' + symbols + ')'
708
709    # name boundary: a keyword should not be followed by any of these
710    boundary = r'(?![\w/])'
711
712    # koka token abstractions
713    tokenType = Name.Attribute
714    tokenTypeDef = Name.Class
715    tokenConstructor = Generic.Emph
716
717    # main lexer
718    tokens = {
719        'root': [
720            include('whitespace'),
721
722            # go into type mode
723            (r'::?' + sboundary, tokenType, 'type'),
724            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
725             'alias-type'),
726            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
727             'struct-type'),
728            ((r'(%s)' % '|'.join(typeStartKeywords)) +
729             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
730             'type'),
731
732            # special sequences of tokens (we use ?: for non-capturing group as
733            # required by 'bygroups')
734            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
735             bygroups(Keyword, Text, Keyword, Name.Namespace)),
736            (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
737             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
738             r'((?:[a-z]\w*/)*[a-z]\w*))?',
739             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
740                      Keyword, Name.Namespace)),
741
742            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
743             r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
744             bygroups(Keyword, Text, Name.Function)),
745            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
746             r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
747             bygroups(Keyword, Text, Keyword, Name.Function)),
748
749            # keywords
750            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
751            (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
752            (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
753            (r'::?|:=|\->|[=.]' + sboundary, Keyword),
754
755            # names
756            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
757             bygroups(Name.Namespace, tokenConstructor)),
758            (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
759            (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
760             bygroups(Name.Namespace, Name)),
761            (r'_\w*', Name.Variable),
762
763            # literal string
764            (r'@"', String.Double, 'litstring'),
765
766            # operators
767            (symbols + "|/(?![*/])", Operator),
768            (r'`', Operator),
769            (r'[{}()\[\];,]', Punctuation),
770
771            # literals. No check for literal characters with len > 1
772            (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
773            (r'0[xX][0-9a-fA-F]+', Number.Hex),
774            (r'[0-9]+', Number.Integer),
775
776            (r"'", String.Char, 'char'),
777            (r'"', String.Double, 'string'),
778        ],
779
780        # type started by alias
781        'alias-type': [
782            (r'=', Keyword),
783            include('type')
784        ],
785
786        # type started by struct
787        'struct-type': [
788            (r'(?=\((?!,*\)))', Punctuation, '#pop'),
789            include('type')
790        ],
791
792        # type started by colon
793        'type': [
794            (r'[(\[<]', tokenType, 'type-nested'),
795            include('type-content')
796        ],
797
798        # type nested in brackets: can contain parameters, comma etc.
799        'type-nested': [
800            (r'[)\]>]', tokenType, '#pop'),
801            (r'[(\[<]', tokenType, 'type-nested'),
802            (r',', tokenType),
803            (r'([a-z]\w*)(\s*)(:)(?!:)',
804             bygroups(Name, Text, tokenType)),  # parameter name
805            include('type-content')
806        ],
807
808        # shared contents of a type
809        'type-content': [
810            include('whitespace'),
811
812            # keywords
813            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
814            (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
815             Keyword, '#pop'),  # need to match because names overlap...
816
817            # kinds
818            (r'[EPHVX]' + boundary, tokenType),
819
820            # type names
821            (r'[a-z][0-9]*(?![\w/])', tokenType),
822            (r'_\w*', tokenType.Variable),  # Generic.Emph
823            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
824             bygroups(Name.Namespace, tokenType)),
825            (r'((?:[a-z]\w*/)*)([a-z]\w+)',
826             bygroups(Name.Namespace, tokenType)),
827
828            # type keyword operators
829            (r'::|->|[.:|]', tokenType),
830
831            # catchall
832            default('#pop')
833        ],
834
835        # comments and literals
836        'whitespace': [
837            (r'\n\s*#.*$', Comment.Preproc),
838            (r'\s+', Text),
839            (r'/\*', Comment.Multiline, 'comment'),
840            (r'//.*$', Comment.Single)
841        ],
842        'comment': [
843            (r'[^/*]+', Comment.Multiline),
844            (r'/\*', Comment.Multiline, '#push'),
845            (r'\*/', Comment.Multiline, '#pop'),
846            (r'[*/]', Comment.Multiline),
847        ],
848        'litstring': [
849            (r'[^"]+', String.Double),
850            (r'""', String.Escape),
851            (r'"', String.Double, '#pop'),
852        ],
853        'string': [
854            (r'[^\\"\n]+', String.Double),
855            include('escape-sequence'),
856            (r'["\n]', String.Double, '#pop'),
857        ],
858        'char': [
859            (r'[^\\\'\n]+', String.Char),
860            include('escape-sequence'),
861            (r'[\'\n]', String.Char, '#pop'),
862        ],
863        'escape-sequence': [
864            (r'\\[nrt\\"\']', String.Escape),
865            (r'\\x[0-9a-fA-F]{2}', String.Escape),
866            (r'\\u[0-9a-fA-F]{4}', String.Escape),
867            # Yes, \U literals are 6 hex digits.
868            (r'\\U[0-9a-fA-F]{6}', String.Escape)
869        ]
870    }
871