1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.haskell 4 ~~~~~~~~~~~~~~~~~~~~~~~ 5 6 Lexers for Haskell and related languages. 7 8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12import re 13 14from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ 15 default, include, inherit 16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 17 Number, Punctuation, Generic 18from pygments import unistring as uni 19 20__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', 21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', 22 'LiterateCryptolLexer', 'KokaLexer'] 23 24 25line_re = re.compile('.*?\n') 26 27 28class HaskellLexer(RegexLexer): 29 """ 30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. 31 32 .. versionadded:: 0.8 33 """ 34 name = 'Haskell' 35 aliases = ['haskell', 'hs'] 36 filenames = ['*.hs'] 37 mimetypes = ['text/x-haskell'] 38 39 flags = re.MULTILINE | re.UNICODE 40 41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', 42 'family', 'if', 'in', 'infix[lr]?', 'instance', 43 'let', 'newtype', 'of', 'then', 'type', 'where', '_') 44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 48 49 tokens = { 50 'root': [ 51 # Whitespace: 52 (r'\s+', Text), 53 # (r'--\s*|.*$', Comment.Doc), 54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 55 (r'\{-', Comment.Multiline, 'comment'), 56 # Lexemes: 57 # Identifiers 58 (r'\bimport\b', Keyword.Reserved, 'import'), 59 (r'\bmodule\b', Keyword.Reserved, 'module'), 60 (r'\berror\b', Name.Exception), 61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 62 (r"'[^\\]'", String.Char), # this has to come before the TH quote 63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), 64 (r"'?[_" + uni.Ll + r"][\w']*", Name), 65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), 66 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type), 67 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC 68 (r"(')\([^)]*\)", Keyword.Type), # .. 69 # Operators 70 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 71 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 72 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 73 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 74 # Numbers 75 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float), 76 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*' 77 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float), 78 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float), 79 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float), 80 (r'0[bB]_*[01](_*[01])*', Number.Bin), 81 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct), 82 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex), 83 (r'\d(_*\d)*', Number.Integer), 84 # Character/String Literals 85 (r"'", String.Char, 'character'), 86 (r'"', String, 'string'), 87 # Special 88 (r'\[\]', Keyword.Type), 89 (r'\(\)', Name.Builtin), 90 (r'[][(),;`{}]', Punctuation), 91 ], 92 'import': [ 93 # Import statements 94 (r'\s+', Text), 95 (r'"', String, 'string'), 96 # after "funclist" state 97 (r'\)', Punctuation, '#pop'), 98 (r'qualified\b', Keyword), 99 # import X as Y 100 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', 101 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), 102 # import X hiding (functions) 103 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', 104 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), 105 # import X (functions) 106 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 107 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 108 # import X 109 (r'[\w.]+', Name.Namespace, '#pop'), 110 ], 111 'module': [ 112 (r'\s+', Text), 113 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 114 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 115 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), 116 ], 117 'funclist': [ 118 (r'\s+', Text), 119 (r'[' + uni.Lu + r']\w*', Keyword.Type), 120 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), 121 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 122 (r'\{-', Comment.Multiline, 'comment'), 123 (r',', Punctuation), 124 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 125 # (HACK, but it makes sense to push two instances, believe me) 126 (r'\(', Punctuation, ('funclist', 'funclist')), 127 (r'\)', Punctuation, '#pop:2'), 128 ], 129 # NOTE: the next four states are shared in the AgdaLexer; make sure 130 # any change is compatible with Agda as well or copy over and change 131 'comment': [ 132 # Multiline Comments 133 (r'[^-{}]+', Comment.Multiline), 134 (r'\{-', Comment.Multiline, '#push'), 135 (r'-\}', Comment.Multiline, '#pop'), 136 (r'[-{}]', Comment.Multiline), 137 ], 138 'character': [ 139 # Allows multi-chars, incorrectly. 140 (r"[^\\']'", String.Char, '#pop'), 141 (r"\\", String.Escape, 'escape'), 142 ("'", String.Char, '#pop'), 143 ], 144 'string': [ 145 (r'[^\\"]+', String), 146 (r"\\", String.Escape, 'escape'), 147 ('"', String, '#pop'), 148 ], 149 'escape': [ 150 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 151 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), 152 ('|'.join(ascii), String.Escape, '#pop'), 153 (r'o[0-7]+', String.Escape, '#pop'), 154 (r'x[\da-fA-F]+', String.Escape, '#pop'), 155 (r'\d+', String.Escape, '#pop'), 156 (r'\s+\\', String.Escape, '#pop'), 157 ], 158 } 159 160 161class HspecLexer(HaskellLexer): 162 """ 163 A Haskell lexer with support for Hspec constructs. 164 165 .. versionadded:: 2.4.0 166 """ 167 168 name = 'Hspec' 169 aliases = ['hspec'] 170 filenames = [] 171 mimetypes = [] 172 173 tokens = { 174 'root': [ 175 (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)), 176 (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)), 177 (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)), 178 inherit, 179 ], 180 } 181 182 183class IdrisLexer(RegexLexer): 184 """ 185 A lexer for the dependently typed programming language Idris. 186 187 Based on the Haskell and Agda Lexer. 188 189 .. versionadded:: 2.0 190 """ 191 name = 'Idris' 192 aliases = ['idris', 'idr'] 193 filenames = ['*.idr'] 194 mimetypes = ['text/x-idris'] 195 196 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', 197 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', 198 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', 199 'total', 'partial', 200 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', 201 'pattern', 'term', 'syntax', 'prefix', 202 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', 203 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') 204 205 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 206 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 207 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 208 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 209 210 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', 211 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') 212 213 tokens = { 214 'root': [ 215 # Comments 216 (r'^(\s*)(%%%s)' % '|'.join(directives), 217 bygroups(Text, Keyword.Reserved)), 218 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)), 219 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)), 220 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'), 221 # Declaration 222 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 223 bygroups(Text, Name.Function, Text, Operator.Word, Text)), 224 # Identifiers 225 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 226 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), 227 (r"('')?[A-Z][\w\']*", Keyword.Type), 228 (r'[a-z][\w\']*', Text), 229 # Special Symbols 230 (r'(<-|::|->|=>|=)', Operator.Word), # specials 231 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 232 # Numbers 233 (r'\d+[eE][+-]?\d+', Number.Float), 234 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 235 (r'0[xX][\da-fA-F]+', Number.Hex), 236 (r'\d+', Number.Integer), 237 # Strings 238 (r"'", String.Char, 'character'), 239 (r'"', String, 'string'), 240 (r'[^\s(){}]+', Text), 241 (r'\s+?', Text), # Whitespace 242 ], 243 'module': [ 244 (r'\s+', Text), 245 (r'([A-Z][\w.]*)(\s+)(\()', 246 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 247 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 248 ], 249 'funclist': [ 250 (r'\s+', Text), 251 (r'[A-Z]\w*', Keyword.Type), 252 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 253 (r'--.*$', Comment.Single), 254 (r'\{-', Comment.Multiline, 'comment'), 255 (r',', Punctuation), 256 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 257 # (HACK, but it makes sense to push two instances, believe me) 258 (r'\(', Punctuation, ('funclist', 'funclist')), 259 (r'\)', Punctuation, '#pop:2'), 260 ], 261 # NOTE: the next four states are shared in the AgdaLexer; make sure 262 # any change is compatible with Agda as well or copy over and change 263 'comment': [ 264 # Multiline Comments 265 (r'[^-{}]+', Comment.Multiline), 266 (r'\{-', Comment.Multiline, '#push'), 267 (r'-\}', Comment.Multiline, '#pop'), 268 (r'[-{}]', Comment.Multiline), 269 ], 270 'character': [ 271 # Allows multi-chars, incorrectly. 272 (r"[^\\']", String.Char), 273 (r"\\", String.Escape, 'escape'), 274 ("'", String.Char, '#pop'), 275 ], 276 'string': [ 277 (r'[^\\"]+', String), 278 (r"\\", String.Escape, 'escape'), 279 ('"', String, '#pop'), 280 ], 281 'escape': [ 282 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 283 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 284 ('|'.join(ascii), String.Escape, '#pop'), 285 (r'o[0-7]+', String.Escape, '#pop'), 286 (r'x[\da-fA-F]+', String.Escape, '#pop'), 287 (r'\d+', String.Escape, '#pop'), 288 (r'\s+\\', String.Escape, '#pop') 289 ], 290 } 291 292 293class AgdaLexer(RegexLexer): 294 """ 295 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_ 296 dependently typed functional programming language and proof assistant. 297 298 .. versionadded:: 2.0 299 """ 300 301 name = 'Agda' 302 aliases = ['agda'] 303 filenames = ['*.agda'] 304 mimetypes = ['text/x-agda'] 305 306 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data', 307 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', 308 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open', 309 'pattern', 'postulate', 'primitive', 'private', 310 'quote', 'quoteGoal', 'quoteTerm', 311 'record', 'renaming', 'rewrite', 'syntax', 'tactic', 312 'unquote', 'unquoteDecl', 'using', 'where', 'with'] 313 314 tokens = { 315 'root': [ 316 # Declaration 317 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 318 bygroups(Text, Name.Function, Text, Operator.Word, Text)), 319 # Comments 320 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 321 (r'\{-', Comment.Multiline, 'comment'), 322 # Holes 323 (r'\{!', Comment.Directive, 'hole'), 324 # Lexemes: 325 # Identifiers 326 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 327 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), 328 (u'\\b(Set|Prop)[\u2080-\u2089]*\\b', Keyword.Type), 329 # Special Symbols 330 (r'(\(|\)|\{|\})', Operator), 331 (u'(\\.{1,3}|\\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word), 332 # Numbers 333 (r'\d+[eE][+-]?\d+', Number.Float), 334 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 335 (r'0[xX][\da-fA-F]+', Number.Hex), 336 (r'\d+', Number.Integer), 337 # Strings 338 (r"'", String.Char, 'character'), 339 (r'"', String, 'string'), 340 (r'[^\s(){}]+', Text), 341 (r'\s+?', Text), # Whitespace 342 ], 343 'hole': [ 344 # Holes 345 (r'[^!{}]+', Comment.Directive), 346 (r'\{!', Comment.Directive, '#push'), 347 (r'!\}', Comment.Directive, '#pop'), 348 (r'[!{}]', Comment.Directive), 349 ], 350 'module': [ 351 (r'\{-', Comment.Multiline, 'comment'), 352 (r'[a-zA-Z][\w.]*', Name, '#pop'), 353 (r'[\W0-9_]+', Text) 354 ], 355 'comment': HaskellLexer.tokens['comment'], 356 'character': HaskellLexer.tokens['character'], 357 'string': HaskellLexer.tokens['string'], 358 'escape': HaskellLexer.tokens['escape'] 359 } 360 361 362class CryptolLexer(RegexLexer): 363 """ 364 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. 365 366 .. versionadded:: 2.0 367 """ 368 name = 'Cryptol' 369 aliases = ['cryptol', 'cry'] 370 filenames = ['*.cry'] 371 mimetypes = ['text/x-cryptol'] 372 373 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', 374 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', 375 'max', 'min', 'module', 'newtype', 'pragma', 'property', 376 'then', 'type', 'where', 'width') 377 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 378 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 379 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 380 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 381 382 tokens = { 383 'root': [ 384 # Whitespace: 385 (r'\s+', Text), 386 # (r'--\s*|.*$', Comment.Doc), 387 (r'//.*$', Comment.Single), 388 (r'/\*', Comment.Multiline, 'comment'), 389 # Lexemes: 390 # Identifiers 391 (r'\bimport\b', Keyword.Reserved, 'import'), 392 (r'\bmodule\b', Keyword.Reserved, 'module'), 393 (r'\berror\b', Name.Exception), 394 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 395 (r'^[_a-z][\w\']*', Name.Function), 396 (r"'?[_a-z][\w']*", Name), 397 (r"('')?[A-Z][\w\']*", Keyword.Type), 398 # Operators 399 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 400 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 401 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 402 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 403 # Numbers 404 (r'\d+[eE][+-]?\d+', Number.Float), 405 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 406 (r'0[oO][0-7]+', Number.Oct), 407 (r'0[xX][\da-fA-F]+', Number.Hex), 408 (r'\d+', Number.Integer), 409 # Character/String Literals 410 (r"'", String.Char, 'character'), 411 (r'"', String, 'string'), 412 # Special 413 (r'\[\]', Keyword.Type), 414 (r'\(\)', Name.Builtin), 415 (r'[][(),;`{}]', Punctuation), 416 ], 417 'import': [ 418 # Import statements 419 (r'\s+', Text), 420 (r'"', String, 'string'), 421 # after "funclist" state 422 (r'\)', Punctuation, '#pop'), 423 (r'qualified\b', Keyword), 424 # import X as Y 425 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', 426 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), 427 # import X hiding (functions) 428 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', 429 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), 430 # import X (functions) 431 (r'([A-Z][\w.]*)(\s+)(\()', 432 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 433 # import X 434 (r'[\w.]+', Name.Namespace, '#pop'), 435 ], 436 'module': [ 437 (r'\s+', Text), 438 (r'([A-Z][\w.]*)(\s+)(\()', 439 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 440 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 441 ], 442 'funclist': [ 443 (r'\s+', Text), 444 (r'[A-Z]\w*', Keyword.Type), 445 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 446 # TODO: these don't match the comments in docs, remove. 447 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 448 # (r'{-', Comment.Multiline, 'comment'), 449 (r',', Punctuation), 450 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 451 # (HACK, but it makes sense to push two instances, believe me) 452 (r'\(', Punctuation, ('funclist', 'funclist')), 453 (r'\)', Punctuation, '#pop:2'), 454 ], 455 'comment': [ 456 # Multiline Comments 457 (r'[^/*]+', Comment.Multiline), 458 (r'/\*', Comment.Multiline, '#push'), 459 (r'\*/', Comment.Multiline, '#pop'), 460 (r'[*/]', Comment.Multiline), 461 ], 462 'character': [ 463 # Allows multi-chars, incorrectly. 464 (r"[^\\']'", String.Char, '#pop'), 465 (r"\\", String.Escape, 'escape'), 466 ("'", String.Char, '#pop'), 467 ], 468 'string': [ 469 (r'[^\\"]+', String), 470 (r"\\", String.Escape, 'escape'), 471 ('"', String, '#pop'), 472 ], 473 'escape': [ 474 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 475 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 476 ('|'.join(ascii), String.Escape, '#pop'), 477 (r'o[0-7]+', String.Escape, '#pop'), 478 (r'x[\da-fA-F]+', String.Escape, '#pop'), 479 (r'\d+', String.Escape, '#pop'), 480 (r'\s+\\', String.Escape, '#pop'), 481 ], 482 } 483 484 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width', 485 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', 486 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', 487 'trace'} 488 489 def get_tokens_unprocessed(self, text): 490 stack = ['root'] 491 for index, token, value in \ 492 RegexLexer.get_tokens_unprocessed(self, text, stack): 493 if token is Name and value in self.EXTRA_KEYWORDS: 494 yield index, Name.Builtin, value 495 else: 496 yield index, token, value 497 498 499class LiterateLexer(Lexer): 500 """ 501 Base class for lexers of literate file formats based on LaTeX or Bird-style 502 (prefixing each code line with ">"). 503 504 Additional options accepted: 505 506 `litstyle` 507 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 508 is autodetected: if the first non-whitespace character in the source 509 is a backslash or percent character, LaTeX is assumed, else Bird. 510 """ 511 512 bird_re = re.compile(r'(>[ \t]*)(.*\n)') 513 514 def __init__(self, baselexer, **options): 515 self.baselexer = baselexer 516 Lexer.__init__(self, **options) 517 518 def get_tokens_unprocessed(self, text): 519 style = self.options.get('litstyle') 520 if style is None: 521 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' 522 523 code = '' 524 insertions = [] 525 if style == 'bird': 526 # bird-style 527 for match in line_re.finditer(text): 528 line = match.group() 529 m = self.bird_re.match(line) 530 if m: 531 insertions.append((len(code), 532 [(0, Comment.Special, m.group(1))])) 533 code += m.group(2) 534 else: 535 insertions.append((len(code), [(0, Text, line)])) 536 else: 537 # latex-style 538 from pygments.lexers.markup import TexLexer 539 lxlexer = TexLexer(**self.options) 540 codelines = 0 541 latex = '' 542 for match in line_re.finditer(text): 543 line = match.group() 544 if codelines: 545 if line.lstrip().startswith('\\end{code}'): 546 codelines = 0 547 latex += line 548 else: 549 code += line 550 elif line.lstrip().startswith('\\begin{code}'): 551 codelines = 1 552 latex += line 553 insertions.append((len(code), 554 list(lxlexer.get_tokens_unprocessed(latex)))) 555 latex = '' 556 else: 557 latex += line 558 insertions.append((len(code), 559 list(lxlexer.get_tokens_unprocessed(latex)))) 560 for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)): 561 yield item 562 563 564class LiterateHaskellLexer(LiterateLexer): 565 """ 566 For Literate Haskell (Bird-style or LaTeX) source. 567 568 Additional options accepted: 569 570 `litstyle` 571 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 572 is autodetected: if the first non-whitespace character in the source 573 is a backslash or percent character, LaTeX is assumed, else Bird. 574 575 .. versionadded:: 0.9 576 """ 577 name = 'Literate Haskell' 578 aliases = ['lhs', 'literate-haskell', 'lhaskell'] 579 filenames = ['*.lhs'] 580 mimetypes = ['text/x-literate-haskell'] 581 582 def __init__(self, **options): 583 hslexer = HaskellLexer(**options) 584 LiterateLexer.__init__(self, hslexer, **options) 585 586 587class LiterateIdrisLexer(LiterateLexer): 588 """ 589 For Literate Idris (Bird-style or LaTeX) source. 590 591 Additional options accepted: 592 593 `litstyle` 594 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 595 is autodetected: if the first non-whitespace character in the source 596 is a backslash or percent character, LaTeX is assumed, else Bird. 597 598 .. versionadded:: 2.0 599 """ 600 name = 'Literate Idris' 601 aliases = ['lidr', 'literate-idris', 'lidris'] 602 filenames = ['*.lidr'] 603 mimetypes = ['text/x-literate-idris'] 604 605 def __init__(self, **options): 606 hslexer = IdrisLexer(**options) 607 LiterateLexer.__init__(self, hslexer, **options) 608 609 610class LiterateAgdaLexer(LiterateLexer): 611 """ 612 For Literate Agda source. 613 614 Additional options accepted: 615 616 `litstyle` 617 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 618 is autodetected: if the first non-whitespace character in the source 619 is a backslash or percent character, LaTeX is assumed, else Bird. 620 621 .. versionadded:: 2.0 622 """ 623 name = 'Literate Agda' 624 aliases = ['lagda', 'literate-agda'] 625 filenames = ['*.lagda'] 626 mimetypes = ['text/x-literate-agda'] 627 628 def __init__(self, **options): 629 agdalexer = AgdaLexer(**options) 630 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) 631 632 633class LiterateCryptolLexer(LiterateLexer): 634 """ 635 For Literate Cryptol (Bird-style or LaTeX) source. 636 637 Additional options accepted: 638 639 `litstyle` 640 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 641 is autodetected: if the first non-whitespace character in the source 642 is a backslash or percent character, LaTeX is assumed, else Bird. 643 644 .. versionadded:: 2.0 645 """ 646 name = 'Literate Cryptol' 647 aliases = ['lcry', 'literate-cryptol', 'lcryptol'] 648 filenames = ['*.lcry'] 649 mimetypes = ['text/x-literate-cryptol'] 650 651 def __init__(self, **options): 652 crylexer = CryptolLexer(**options) 653 LiterateLexer.__init__(self, crylexer, **options) 654 655 656class KokaLexer(RegexLexer): 657 """ 658 Lexer for the `Koka <http://koka.codeplex.com>`_ 659 language. 660 661 .. versionadded:: 1.6 662 """ 663 664 name = 'Koka' 665 aliases = ['koka'] 666 filenames = ['*.kk', '*.kki'] 667 mimetypes = ['text/x-koka'] 668 669 keywords = [ 670 'infix', 'infixr', 'infixl', 671 'type', 'cotype', 'rectype', 'alias', 672 'struct', 'con', 673 'fun', 'function', 'val', 'var', 674 'external', 675 'if', 'then', 'else', 'elif', 'return', 'match', 676 'private', 'public', 'private', 677 'module', 'import', 'as', 678 'include', 'inline', 679 'rec', 680 'try', 'yield', 'enum', 681 'interface', 'instance', 682 ] 683 684 # keywords that are followed by a type 685 typeStartKeywords = [ 686 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', 687 ] 688 689 # keywords valid in a type 690 typekeywords = [ 691 'forall', 'exists', 'some', 'with', 692 ] 693 694 # builtin names and special names 695 builtin = [ 696 'for', 'while', 'repeat', 697 'foreach', 'foreach-indexed', 698 'error', 'catch', 'finally', 699 'cs', 'js', 'file', 'ref', 'assigned', 700 ] 701 702 # symbols that can be in an operator 703 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' 704 705 # symbol boundary: an operator keyword should not be followed by any of these 706 sboundary = '(?!' + symbols + ')' 707 708 # name boundary: a keyword should not be followed by any of these 709 boundary = r'(?![\w/])' 710 711 # koka token abstractions 712 tokenType = Name.Attribute 713 tokenTypeDef = Name.Class 714 tokenConstructor = Generic.Emph 715 716 # main lexer 717 tokens = { 718 'root': [ 719 include('whitespace'), 720 721 # go into type mode 722 (r'::?' + sboundary, tokenType, 'type'), 723 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 724 'alias-type'), 725 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 726 'struct-type'), 727 ((r'(%s)' % '|'.join(typeStartKeywords)) + 728 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 729 'type'), 730 731 # special sequences of tokens (we use ?: for non-capturing group as 732 # required by 'bygroups') 733 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', 734 bygroups(Keyword, Text, Keyword, Name.Namespace)), 735 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' 736 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' 737 r'((?:[a-z]\w*/)*[a-z]\w*))?', 738 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, 739 Keyword, Name.Namespace)), 740 741 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' 742 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', 743 bygroups(Keyword, Text, Name.Function)), 744 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' 745 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', 746 bygroups(Keyword, Text, Keyword, Name.Function)), 747 748 # keywords 749 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), 750 (r'(%s)' % '|'.join(keywords) + boundary, Keyword), 751 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), 752 (r'::?|:=|\->|[=.]' + sboundary, Keyword), 753 754 # names 755 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 756 bygroups(Name.Namespace, tokenConstructor)), 757 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), 758 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', 759 bygroups(Name.Namespace, Name)), 760 (r'_\w*', Name.Variable), 761 762 # literal string 763 (r'@"', String.Double, 'litstring'), 764 765 # operators 766 (symbols + "|/(?![*/])", Operator), 767 (r'`', Operator), 768 (r'[{}()\[\];,]', Punctuation), 769 770 # literals. No check for literal characters with len > 1 771 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), 772 (r'0[xX][0-9a-fA-F]+', Number.Hex), 773 (r'[0-9]+', Number.Integer), 774 775 (r"'", String.Char, 'char'), 776 (r'"', String.Double, 'string'), 777 ], 778 779 # type started by alias 780 'alias-type': [ 781 (r'=', Keyword), 782 include('type') 783 ], 784 785 # type started by struct 786 'struct-type': [ 787 (r'(?=\((?!,*\)))', Punctuation, '#pop'), 788 include('type') 789 ], 790 791 # type started by colon 792 'type': [ 793 (r'[(\[<]', tokenType, 'type-nested'), 794 include('type-content') 795 ], 796 797 # type nested in brackets: can contain parameters, comma etc. 798 'type-nested': [ 799 (r'[)\]>]', tokenType, '#pop'), 800 (r'[(\[<]', tokenType, 'type-nested'), 801 (r',', tokenType), 802 (r'([a-z]\w*)(\s*)(:)(?!:)', 803 bygroups(Name, Text, tokenType)), # parameter name 804 include('type-content') 805 ], 806 807 # shared contents of a type 808 'type-content': [ 809 include('whitespace'), 810 811 # keywords 812 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), 813 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', 814 Keyword, '#pop'), # need to match because names overlap... 815 816 # kinds 817 (r'[EPHVX]' + boundary, tokenType), 818 819 # type names 820 (r'[a-z][0-9]*(?![\w/])', tokenType), 821 (r'_\w*', tokenType.Variable), # Generic.Emph 822 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 823 bygroups(Name.Namespace, tokenType)), 824 (r'((?:[a-z]\w*/)*)([a-z]\w+)', 825 bygroups(Name.Namespace, tokenType)), 826 827 # type keyword operators 828 (r'::|->|[.:|]', tokenType), 829 830 # catchall 831 default('#pop') 832 ], 833 834 # comments and literals 835 'whitespace': [ 836 (r'\n\s*#.*$', Comment.Preproc), 837 (r'\s+', Text), 838 (r'/\*', Comment.Multiline, 'comment'), 839 (r'//.*$', Comment.Single) 840 ], 841 'comment': [ 842 (r'[^/*]+', Comment.Multiline), 843 (r'/\*', Comment.Multiline, '#push'), 844 (r'\*/', Comment.Multiline, '#pop'), 845 (r'[*/]', Comment.Multiline), 846 ], 847 'litstring': [ 848 (r'[^"]+', String.Double), 849 (r'""', String.Escape), 850 (r'"', String.Double, '#pop'), 851 ], 852 'string': [ 853 (r'[^\\"\n]+', String.Double), 854 include('escape-sequence'), 855 (r'["\n]', String.Double, '#pop'), 856 ], 857 'char': [ 858 (r'[^\\\'\n]+', String.Char), 859 include('escape-sequence'), 860 (r'[\'\n]', String.Char, '#pop'), 861 ], 862 'escape-sequence': [ 863 (r'\\[nrt\\"\']', String.Escape), 864 (r'\\x[0-9a-fA-F]{2}', String.Escape), 865 (r'\\u[0-9a-fA-F]{4}', String.Escape), 866 # Yes, \U literals are 6 hex digits. 867 (r'\\U[0-9a-fA-F]{6}', String.Escape) 868 ] 869 } 870