1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.haskell
4    ~~~~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for Haskell and related languages.
7
8    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
15    default, include, inherit
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17    Number, Punctuation, Generic
18from pygments import unistring as uni
19
20__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
21           'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
22           'LiterateCryptolLexer', 'KokaLexer']
23
24
25line_re = re.compile('.*?\n')
26
27
28class HaskellLexer(RegexLexer):
29    """
30    A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
31
32    .. versionadded:: 0.8
33    """
34    name = 'Haskell'
35    aliases = ['haskell', 'hs']
36    filenames = ['*.hs']
37    mimetypes = ['text/x-haskell']
38
39    flags = re.MULTILINE | re.UNICODE
40
41    reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
42                'family', 'if', 'in', 'infix[lr]?', 'instance',
43                'let', 'newtype', 'of', 'then', 'type', 'where', '_')
44    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
45             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
46             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
47             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
48
49    tokens = {
50        'root': [
51            # Whitespace:
52            (r'\s+', Text),
53            # (r'--\s*|.*$', Comment.Doc),
54            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
55            (r'\{-', Comment.Multiline, 'comment'),
56            # Lexemes:
57            #  Identifiers
58            (r'\bimport\b', Keyword.Reserved, 'import'),
59            (r'\bmodule\b', Keyword.Reserved, 'module'),
60            (r'\berror\b', Name.Exception),
61            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
62            (r"'[^\\]'", String.Char),  # this has to come before the TH quote
63            (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
64            (r"'?[_" + uni.Ll + r"][\w']*", Name),
65            (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
66            (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
67            (r"(')\[[^\]]*\]", Keyword.Type),  # tuples and lists get special treatment in GHC
68            (r"(')\([^)]*\)", Keyword.Type),  # ..
69            #  Operators
70            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function),  # lambda operator
71            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
72            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type),  # Constructor operators
73            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
74            #  Numbers
75            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
76            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
77             r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
78            (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
79            (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
80            (r'0[bB]_*[01](_*[01])*', Number.Bin),
81            (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
82            (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
83            (r'\d(_*\d)*', Number.Integer),
84            #  Character/String Literals
85            (r"'", String.Char, 'character'),
86            (r'"', String, 'string'),
87            #  Special
88            (r'\[\]', Keyword.Type),
89            (r'\(\)', Name.Builtin),
90            (r'[][(),;`{}]', Punctuation),
91        ],
92        'import': [
93            # Import statements
94            (r'\s+', Text),
95            (r'"', String, 'string'),
96            # after "funclist" state
97            (r'\)', Punctuation, '#pop'),
98            (r'qualified\b', Keyword),
99            # import X as Y
100            (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
101             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
102            # import X hiding (functions)
103            (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
104             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
105            # import X (functions)
106            (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
107             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
108            # import X
109            (r'[\w.]+', Name.Namespace, '#pop'),
110        ],
111        'module': [
112            (r'\s+', Text),
113            (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
114             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
115            (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
116        ],
117        'funclist': [
118            (r'\s+', Text),
119            (r'[' + uni.Lu + r']\w*', Keyword.Type),
120            (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
121            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
122            (r'\{-', Comment.Multiline, 'comment'),
123            (r',', Punctuation),
124            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
125            # (HACK, but it makes sense to push two instances, believe me)
126            (r'\(', Punctuation, ('funclist', 'funclist')),
127            (r'\)', Punctuation, '#pop:2'),
128        ],
129        # NOTE: the next four states are shared in the AgdaLexer; make sure
130        # any change is compatible with Agda as well or copy over and change
131        'comment': [
132            # Multiline Comments
133            (r'[^-{}]+', Comment.Multiline),
134            (r'\{-', Comment.Multiline, '#push'),
135            (r'-\}', Comment.Multiline, '#pop'),
136            (r'[-{}]', Comment.Multiline),
137        ],
138        'character': [
139            # Allows multi-chars, incorrectly.
140            (r"[^\\']'", String.Char, '#pop'),
141            (r"\\", String.Escape, 'escape'),
142            ("'", String.Char, '#pop'),
143        ],
144        'string': [
145            (r'[^\\"]+', String),
146            (r"\\", String.Escape, 'escape'),
147            ('"', String, '#pop'),
148        ],
149        'escape': [
150            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
151            (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
152            ('|'.join(ascii), String.Escape, '#pop'),
153            (r'o[0-7]+', String.Escape, '#pop'),
154            (r'x[\da-fA-F]+', String.Escape, '#pop'),
155            (r'\d+', String.Escape, '#pop'),
156            (r'\s+\\', String.Escape, '#pop'),
157        ],
158    }
159
160
161class HspecLexer(HaskellLexer):
162    """
163    A Haskell lexer with support for Hspec constructs.
164
165    .. versionadded:: 2.4.0
166    """
167
168    name = 'Hspec'
169    aliases = ['hspec']
170    filenames = []
171    mimetypes = []
172
173    tokens = {
174        'root': [
175            (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
176            (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
177            (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
178            inherit,
179        ],
180    }
181
182
183class IdrisLexer(RegexLexer):
184    """
185    A lexer for the dependently typed programming language Idris.
186
187    Based on the Haskell and Agda Lexer.
188
189    .. versionadded:: 2.0
190    """
191    name = 'Idris'
192    aliases = ['idris', 'idr']
193    filenames = ['*.idr']
194    mimetypes = ['text/x-idris']
195
196    reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
197                'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
198                'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
199                'total', 'partial',
200                'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
201                'pattern',  'term',  'syntax', 'prefix',
202                'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
203                'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
204
205    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
206             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
207             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
208             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
209
210    directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
211                  'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
212
213    tokens = {
214        'root': [
215            # Comments
216            (r'^(\s*)(%%%s)' % '|'.join(directives),
217             bygroups(Text, Keyword.Reserved)),
218            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
219            (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
220            (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
221            # Declaration
222            (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
223             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
224            #  Identifiers
225            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
226            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
227            (r"('')?[A-Z][\w\']*", Keyword.Type),
228            (r'[a-z][\w\']*', Text),
229            #  Special Symbols
230            (r'(<-|::|->|=>|=)', Operator.Word),  # specials
231            (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
232            #  Numbers
233            (r'\d+[eE][+-]?\d+', Number.Float),
234            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
235            (r'0[xX][\da-fA-F]+', Number.Hex),
236            (r'\d+', Number.Integer),
237            # Strings
238            (r"'", String.Char, 'character'),
239            (r'"', String, 'string'),
240            (r'[^\s(){}]+', Text),
241            (r'\s+?', Text),  # Whitespace
242        ],
243        'module': [
244            (r'\s+', Text),
245            (r'([A-Z][\w.]*)(\s+)(\()',
246             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
247            (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
248        ],
249        'funclist': [
250            (r'\s+', Text),
251            (r'[A-Z]\w*', Keyword.Type),
252            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
253            (r'--.*$', Comment.Single),
254            (r'\{-', Comment.Multiline, 'comment'),
255            (r',', Punctuation),
256            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
257            # (HACK, but it makes sense to push two instances, believe me)
258            (r'\(', Punctuation, ('funclist', 'funclist')),
259            (r'\)', Punctuation, '#pop:2'),
260        ],
261        # NOTE: the next four states are shared in the AgdaLexer; make sure
262        # any change is compatible with Agda as well or copy over and change
263        'comment': [
264            # Multiline Comments
265            (r'[^-{}]+', Comment.Multiline),
266            (r'\{-', Comment.Multiline, '#push'),
267            (r'-\}', Comment.Multiline, '#pop'),
268            (r'[-{}]', Comment.Multiline),
269        ],
270        'character': [
271            # Allows multi-chars, incorrectly.
272            (r"[^\\']", String.Char),
273            (r"\\", String.Escape, 'escape'),
274            ("'", String.Char, '#pop'),
275        ],
276        'string': [
277            (r'[^\\"]+', String),
278            (r"\\", String.Escape, 'escape'),
279            ('"', String, '#pop'),
280        ],
281        'escape': [
282            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
283            (r'\^[][A-Z@^_]', String.Escape, '#pop'),
284            ('|'.join(ascii), String.Escape, '#pop'),
285            (r'o[0-7]+', String.Escape, '#pop'),
286            (r'x[\da-fA-F]+', String.Escape, '#pop'),
287            (r'\d+', String.Escape, '#pop'),
288            (r'\s+\\', String.Escape, '#pop')
289        ],
290    }
291
292
293class AgdaLexer(RegexLexer):
294    """
295    For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
296    dependently typed functional programming language and proof assistant.
297
298    .. versionadded:: 2.0
299    """
300
301    name = 'Agda'
302    aliases = ['agda']
303    filenames = ['*.agda']
304    mimetypes = ['text/x-agda']
305
306    reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
307                'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
308                'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
309                'pattern', 'postulate', 'primitive', 'private',
310                'quote', 'quoteGoal', 'quoteTerm',
311                'record', 'renaming', 'rewrite', 'syntax', 'tactic',
312                'unquote', 'unquoteDecl', 'using', 'where', 'with']
313
314    tokens = {
315        'root': [
316            # Declaration
317            (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
318             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
319            # Comments
320            (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
321            (r'\{-', Comment.Multiline, 'comment'),
322            # Holes
323            (r'\{!', Comment.Directive, 'hole'),
324            # Lexemes:
325            #  Identifiers
326            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
327            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
328            (u'\\b(Set|Prop)[\u2080-\u2089]*\\b', Keyword.Type),
329            #  Special Symbols
330            (r'(\(|\)|\{|\})', Operator),
331            (u'(\\.{1,3}|\\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
332            #  Numbers
333            (r'\d+[eE][+-]?\d+', Number.Float),
334            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
335            (r'0[xX][\da-fA-F]+', Number.Hex),
336            (r'\d+', Number.Integer),
337            # Strings
338            (r"'", String.Char, 'character'),
339            (r'"', String, 'string'),
340            (r'[^\s(){}]+', Text),
341            (r'\s+?', Text),  # Whitespace
342        ],
343        'hole': [
344            # Holes
345            (r'[^!{}]+', Comment.Directive),
346            (r'\{!', Comment.Directive, '#push'),
347            (r'!\}', Comment.Directive, '#pop'),
348            (r'[!{}]', Comment.Directive),
349        ],
350        'module': [
351            (r'\{-', Comment.Multiline, 'comment'),
352            (r'[a-zA-Z][\w.]*', Name, '#pop'),
353            (r'[\W0-9_]+', Text)
354        ],
355        'comment': HaskellLexer.tokens['comment'],
356        'character': HaskellLexer.tokens['character'],
357        'string': HaskellLexer.tokens['string'],
358        'escape': HaskellLexer.tokens['escape']
359    }
360
361
362class CryptolLexer(RegexLexer):
363    """
364    FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
365
366    .. versionadded:: 2.0
367    """
368    name = 'Cryptol'
369    aliases = ['cryptol', 'cry']
370    filenames = ['*.cry']
371    mimetypes = ['text/x-cryptol']
372
373    reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
374                'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
375                'max', 'min', 'module', 'newtype', 'pragma', 'property',
376                'then', 'type', 'where', 'width')
377    ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
378             'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
379             'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
380             'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
381
382    tokens = {
383        'root': [
384            # Whitespace:
385            (r'\s+', Text),
386            # (r'--\s*|.*$', Comment.Doc),
387            (r'//.*$', Comment.Single),
388            (r'/\*', Comment.Multiline, 'comment'),
389            # Lexemes:
390            #  Identifiers
391            (r'\bimport\b', Keyword.Reserved, 'import'),
392            (r'\bmodule\b', Keyword.Reserved, 'module'),
393            (r'\berror\b', Name.Exception),
394            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
395            (r'^[_a-z][\w\']*', Name.Function),
396            (r"'?[_a-z][\w']*", Name),
397            (r"('')?[A-Z][\w\']*", Keyword.Type),
398            #  Operators
399            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function),  # lambda operator
400            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word),  # specials
401            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type),  # Constructor operators
402            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),  # Other operators
403            #  Numbers
404            (r'\d+[eE][+-]?\d+', Number.Float),
405            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
406            (r'0[oO][0-7]+', Number.Oct),
407            (r'0[xX][\da-fA-F]+', Number.Hex),
408            (r'\d+', Number.Integer),
409            #  Character/String Literals
410            (r"'", String.Char, 'character'),
411            (r'"', String, 'string'),
412            #  Special
413            (r'\[\]', Keyword.Type),
414            (r'\(\)', Name.Builtin),
415            (r'[][(),;`{}]', Punctuation),
416        ],
417        'import': [
418            # Import statements
419            (r'\s+', Text),
420            (r'"', String, 'string'),
421            # after "funclist" state
422            (r'\)', Punctuation, '#pop'),
423            (r'qualified\b', Keyword),
424            # import X as Y
425            (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
426             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
427            # import X hiding (functions)
428            (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
429             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
430            # import X (functions)
431            (r'([A-Z][\w.]*)(\s+)(\()',
432             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
433            # import X
434            (r'[\w.]+', Name.Namespace, '#pop'),
435        ],
436        'module': [
437            (r'\s+', Text),
438            (r'([A-Z][\w.]*)(\s+)(\()',
439             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
440            (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
441        ],
442        'funclist': [
443            (r'\s+', Text),
444            (r'[A-Z]\w*', Keyword.Type),
445            (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
446            # TODO: these don't match the comments in docs, remove.
447            # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
448            # (r'{-', Comment.Multiline, 'comment'),
449            (r',', Punctuation),
450            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
451            # (HACK, but it makes sense to push two instances, believe me)
452            (r'\(', Punctuation, ('funclist', 'funclist')),
453            (r'\)', Punctuation, '#pop:2'),
454        ],
455        'comment': [
456            # Multiline Comments
457            (r'[^/*]+', Comment.Multiline),
458            (r'/\*', Comment.Multiline, '#push'),
459            (r'\*/', Comment.Multiline, '#pop'),
460            (r'[*/]', Comment.Multiline),
461        ],
462        'character': [
463            # Allows multi-chars, incorrectly.
464            (r"[^\\']'", String.Char, '#pop'),
465            (r"\\", String.Escape, 'escape'),
466            ("'", String.Char, '#pop'),
467        ],
468        'string': [
469            (r'[^\\"]+', String),
470            (r"\\", String.Escape, 'escape'),
471            ('"', String, '#pop'),
472        ],
473        'escape': [
474            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
475            (r'\^[][A-Z@^_]', String.Escape, '#pop'),
476            ('|'.join(ascii), String.Escape, '#pop'),
477            (r'o[0-7]+', String.Escape, '#pop'),
478            (r'x[\da-fA-F]+', String.Escape, '#pop'),
479            (r'\d+', String.Escape, '#pop'),
480            (r'\s+\\', String.Escape, '#pop'),
481        ],
482    }
483
484    EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
485                      'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
486                      'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
487                      'trace'}
488
489    def get_tokens_unprocessed(self, text):
490        stack = ['root']
491        for index, token, value in \
492                RegexLexer.get_tokens_unprocessed(self, text, stack):
493            if token is Name and value in self.EXTRA_KEYWORDS:
494                yield index, Name.Builtin, value
495            else:
496                yield index, token, value
497
498
499class LiterateLexer(Lexer):
500    """
501    Base class for lexers of literate file formats based on LaTeX or Bird-style
502    (prefixing each code line with ">").
503
504    Additional options accepted:
505
506    `litstyle`
507        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
508        is autodetected: if the first non-whitespace character in the source
509        is a backslash or percent character, LaTeX is assumed, else Bird.
510    """
511
512    bird_re = re.compile(r'(>[ \t]*)(.*\n)')
513
514    def __init__(self, baselexer, **options):
515        self.baselexer = baselexer
516        Lexer.__init__(self, **options)
517
518    def get_tokens_unprocessed(self, text):
519        style = self.options.get('litstyle')
520        if style is None:
521            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
522
523        code = ''
524        insertions = []
525        if style == 'bird':
526            # bird-style
527            for match in line_re.finditer(text):
528                line = match.group()
529                m = self.bird_re.match(line)
530                if m:
531                    insertions.append((len(code),
532                                       [(0, Comment.Special, m.group(1))]))
533                    code += m.group(2)
534                else:
535                    insertions.append((len(code), [(0, Text, line)]))
536        else:
537            # latex-style
538            from pygments.lexers.markup import TexLexer
539            lxlexer = TexLexer(**self.options)
540            codelines = 0
541            latex = ''
542            for match in line_re.finditer(text):
543                line = match.group()
544                if codelines:
545                    if line.lstrip().startswith('\\end{code}'):
546                        codelines = 0
547                        latex += line
548                    else:
549                        code += line
550                elif line.lstrip().startswith('\\begin{code}'):
551                    codelines = 1
552                    latex += line
553                    insertions.append((len(code),
554                                       list(lxlexer.get_tokens_unprocessed(latex))))
555                    latex = ''
556                else:
557                    latex += line
558            insertions.append((len(code),
559                               list(lxlexer.get_tokens_unprocessed(latex))))
560        for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
561            yield item
562
563
564class LiterateHaskellLexer(LiterateLexer):
565    """
566    For Literate Haskell (Bird-style or LaTeX) source.
567
568    Additional options accepted:
569
570    `litstyle`
571        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
572        is autodetected: if the first non-whitespace character in the source
573        is a backslash or percent character, LaTeX is assumed, else Bird.
574
575    .. versionadded:: 0.9
576    """
577    name = 'Literate Haskell'
578    aliases = ['lhs', 'literate-haskell', 'lhaskell']
579    filenames = ['*.lhs']
580    mimetypes = ['text/x-literate-haskell']
581
582    def __init__(self, **options):
583        hslexer = HaskellLexer(**options)
584        LiterateLexer.__init__(self, hslexer, **options)
585
586
587class LiterateIdrisLexer(LiterateLexer):
588    """
589    For Literate Idris (Bird-style or LaTeX) source.
590
591    Additional options accepted:
592
593    `litstyle`
594        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
595        is autodetected: if the first non-whitespace character in the source
596        is a backslash or percent character, LaTeX is assumed, else Bird.
597
598    .. versionadded:: 2.0
599    """
600    name = 'Literate Idris'
601    aliases = ['lidr', 'literate-idris', 'lidris']
602    filenames = ['*.lidr']
603    mimetypes = ['text/x-literate-idris']
604
605    def __init__(self, **options):
606        hslexer = IdrisLexer(**options)
607        LiterateLexer.__init__(self, hslexer, **options)
608
609
610class LiterateAgdaLexer(LiterateLexer):
611    """
612    For Literate Agda source.
613
614    Additional options accepted:
615
616    `litstyle`
617        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
618        is autodetected: if the first non-whitespace character in the source
619        is a backslash or percent character, LaTeX is assumed, else Bird.
620
621    .. versionadded:: 2.0
622    """
623    name = 'Literate Agda'
624    aliases = ['lagda', 'literate-agda']
625    filenames = ['*.lagda']
626    mimetypes = ['text/x-literate-agda']
627
628    def __init__(self, **options):
629        agdalexer = AgdaLexer(**options)
630        LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
631
632
633class LiterateCryptolLexer(LiterateLexer):
634    """
635    For Literate Cryptol (Bird-style or LaTeX) source.
636
637    Additional options accepted:
638
639    `litstyle`
640        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
641        is autodetected: if the first non-whitespace character in the source
642        is a backslash or percent character, LaTeX is assumed, else Bird.
643
644    .. versionadded:: 2.0
645    """
646    name = 'Literate Cryptol'
647    aliases = ['lcry', 'literate-cryptol', 'lcryptol']
648    filenames = ['*.lcry']
649    mimetypes = ['text/x-literate-cryptol']
650
651    def __init__(self, **options):
652        crylexer = CryptolLexer(**options)
653        LiterateLexer.__init__(self, crylexer, **options)
654
655
656class KokaLexer(RegexLexer):
657    """
658    Lexer for the `Koka <http://koka.codeplex.com>`_
659    language.
660
661    .. versionadded:: 1.6
662    """
663
664    name = 'Koka'
665    aliases = ['koka']
666    filenames = ['*.kk', '*.kki']
667    mimetypes = ['text/x-koka']
668
669    keywords = [
670        'infix', 'infixr', 'infixl',
671        'type', 'cotype', 'rectype', 'alias',
672        'struct', 'con',
673        'fun', 'function', 'val', 'var',
674        'external',
675        'if', 'then', 'else', 'elif', 'return', 'match',
676        'private', 'public', 'private',
677        'module', 'import', 'as',
678        'include', 'inline',
679        'rec',
680        'try', 'yield', 'enum',
681        'interface', 'instance',
682    ]
683
684    # keywords that are followed by a type
685    typeStartKeywords = [
686        'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
687    ]
688
689    # keywords valid in a type
690    typekeywords = [
691        'forall', 'exists', 'some', 'with',
692    ]
693
694    # builtin names and special names
695    builtin = [
696        'for', 'while', 'repeat',
697        'foreach', 'foreach-indexed',
698        'error', 'catch', 'finally',
699        'cs', 'js', 'file', 'ref', 'assigned',
700    ]
701
702    # symbols that can be in an operator
703    symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
704
705    # symbol boundary: an operator keyword should not be followed by any of these
706    sboundary = '(?!' + symbols + ')'
707
708    # name boundary: a keyword should not be followed by any of these
709    boundary = r'(?![\w/])'
710
711    # koka token abstractions
712    tokenType = Name.Attribute
713    tokenTypeDef = Name.Class
714    tokenConstructor = Generic.Emph
715
716    # main lexer
717    tokens = {
718        'root': [
719            include('whitespace'),
720
721            # go into type mode
722            (r'::?' + sboundary, tokenType, 'type'),
723            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
724             'alias-type'),
725            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
726             'struct-type'),
727            ((r'(%s)' % '|'.join(typeStartKeywords)) +
728             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
729             'type'),
730
731            # special sequences of tokens (we use ?: for non-capturing group as
732            # required by 'bygroups')
733            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
734             bygroups(Keyword, Text, Keyword, Name.Namespace)),
735            (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
736             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
737             r'((?:[a-z]\w*/)*[a-z]\w*))?',
738             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
739                      Keyword, Name.Namespace)),
740
741            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
742             r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
743             bygroups(Keyword, Text, Name.Function)),
744            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
745             r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
746             bygroups(Keyword, Text, Keyword, Name.Function)),
747
748            # keywords
749            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
750            (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
751            (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
752            (r'::?|:=|\->|[=.]' + sboundary, Keyword),
753
754            # names
755            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
756             bygroups(Name.Namespace, tokenConstructor)),
757            (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
758            (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
759             bygroups(Name.Namespace, Name)),
760            (r'_\w*', Name.Variable),
761
762            # literal string
763            (r'@"', String.Double, 'litstring'),
764
765            # operators
766            (symbols + "|/(?![*/])", Operator),
767            (r'`', Operator),
768            (r'[{}()\[\];,]', Punctuation),
769
770            # literals. No check for literal characters with len > 1
771            (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
772            (r'0[xX][0-9a-fA-F]+', Number.Hex),
773            (r'[0-9]+', Number.Integer),
774
775            (r"'", String.Char, 'char'),
776            (r'"', String.Double, 'string'),
777        ],
778
779        # type started by alias
780        'alias-type': [
781            (r'=', Keyword),
782            include('type')
783        ],
784
785        # type started by struct
786        'struct-type': [
787            (r'(?=\((?!,*\)))', Punctuation, '#pop'),
788            include('type')
789        ],
790
791        # type started by colon
792        'type': [
793            (r'[(\[<]', tokenType, 'type-nested'),
794            include('type-content')
795        ],
796
797        # type nested in brackets: can contain parameters, comma etc.
798        'type-nested': [
799            (r'[)\]>]', tokenType, '#pop'),
800            (r'[(\[<]', tokenType, 'type-nested'),
801            (r',', tokenType),
802            (r'([a-z]\w*)(\s*)(:)(?!:)',
803             bygroups(Name, Text, tokenType)),  # parameter name
804            include('type-content')
805        ],
806
807        # shared contents of a type
808        'type-content': [
809            include('whitespace'),
810
811            # keywords
812            (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
813            (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
814             Keyword, '#pop'),  # need to match because names overlap...
815
816            # kinds
817            (r'[EPHVX]' + boundary, tokenType),
818
819            # type names
820            (r'[a-z][0-9]*(?![\w/])', tokenType),
821            (r'_\w*', tokenType.Variable),  # Generic.Emph
822            (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
823             bygroups(Name.Namespace, tokenType)),
824            (r'((?:[a-z]\w*/)*)([a-z]\w+)',
825             bygroups(Name.Namespace, tokenType)),
826
827            # type keyword operators
828            (r'::|->|[.:|]', tokenType),
829
830            # catchall
831            default('#pop')
832        ],
833
834        # comments and literals
835        'whitespace': [
836            (r'\n\s*#.*$', Comment.Preproc),
837            (r'\s+', Text),
838            (r'/\*', Comment.Multiline, 'comment'),
839            (r'//.*$', Comment.Single)
840        ],
841        'comment': [
842            (r'[^/*]+', Comment.Multiline),
843            (r'/\*', Comment.Multiline, '#push'),
844            (r'\*/', Comment.Multiline, '#pop'),
845            (r'[*/]', Comment.Multiline),
846        ],
847        'litstring': [
848            (r'[^"]+', String.Double),
849            (r'""', String.Escape),
850            (r'"', String.Double, '#pop'),
851        ],
852        'string': [
853            (r'[^\\"\n]+', String.Double),
854            include('escape-sequence'),
855            (r'["\n]', String.Double, '#pop'),
856        ],
857        'char': [
858            (r'[^\\\'\n]+', String.Char),
859            include('escape-sequence'),
860            (r'[\'\n]', String.Char, '#pop'),
861        ],
862        'escape-sequence': [
863            (r'\\[nrt\\"\']', String.Escape),
864            (r'\\x[0-9a-fA-F]{2}', String.Escape),
865            (r'\\u[0-9a-fA-F]{4}', String.Escape),
866            # Yes, \U literals are 6 hex digits.
867            (r'\\U[0-9a-fA-F]{6}', String.Escape)
868        ]
869    }
870