1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.haskell 4 ~~~~~~~~~~~~~~~~~~~~~~~ 5 6 Lexers for Haskell and related languages. 7 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12import re 13 14from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ 15 default, include, inherit 16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 17 Number, Punctuation, Generic 18from pygments import unistring as uni 19 20__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', 21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', 22 'LiterateCryptolLexer', 'KokaLexer'] 23 24 25line_re = re.compile('.*?\n') 26 27 28class HaskellLexer(RegexLexer): 29 """ 30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. 31 32 .. versionadded:: 0.8 33 """ 34 name = 'Haskell' 35 aliases = ['haskell', 'hs'] 36 filenames = ['*.hs'] 37 mimetypes = ['text/x-haskell'] 38 39 flags = re.MULTILINE | re.UNICODE 40 41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', 42 'family', 'if', 'in', 'infix[lr]?', 'instance', 43 'let', 'newtype', 'of', 'then', 'type', 'where', '_') 44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 48 49 tokens = { 50 'root': [ 51 # Whitespace: 52 (r'\s+', Text), 53 # (r'--\s*|.*$', Comment.Doc), 54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 55 (r'\{-', Comment.Multiline, 'comment'), 56 # Lexemes: 57 # Identifiers 58 (r'\bimport\b', Keyword.Reserved, 'import'), 59 (r'\bmodule\b', Keyword.Reserved, 'module'), 60 (r'\berror\b', Name.Exception), 61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 62 (r"'[^\\]'", String.Char), # this has to come before the TH quote 63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), 64 (r"'?[_" + uni.Ll + r"][\w']*", Name), 65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), 66 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type), 67 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC 68 (r"(')\([^)]*\)", Keyword.Type), # .. 69 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators 70 # Operators 71 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 72 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 73 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 74 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 75 # Numbers 76 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float), 77 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*' 78 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float), 79 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float), 80 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float), 81 (r'0[bB]_*[01](_*[01])*', Number.Bin), 82 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct), 83 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex), 84 (r'\d(_*\d)*', Number.Integer), 85 # Character/String Literals 86 (r"'", String.Char, 'character'), 87 (r'"', String, 'string'), 88 # Special 89 (r'\[\]', Keyword.Type), 90 (r'\(\)', Name.Builtin), 91 (r'[][(),;`{}]', Punctuation), 92 ], 93 'import': [ 94 # Import statements 95 (r'\s+', Text), 96 (r'"', String, 'string'), 97 # after "funclist" state 98 (r'\)', Punctuation, '#pop'), 99 (r'qualified\b', Keyword), 100 # import X as Y 101 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', 102 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), 103 # import X hiding (functions) 104 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', 105 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), 106 # import X (functions) 107 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 108 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 109 # import X 110 (r'[\w.]+', Name.Namespace, '#pop'), 111 ], 112 'module': [ 113 (r'\s+', Text), 114 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 115 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 116 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), 117 ], 118 'funclist': [ 119 (r'\s+', Text), 120 (r'[' + uni.Lu + r']\w*', Keyword.Type), 121 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), 122 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 123 (r'\{-', Comment.Multiline, 'comment'), 124 (r',', Punctuation), 125 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 126 # (HACK, but it makes sense to push two instances, believe me) 127 (r'\(', Punctuation, ('funclist', 'funclist')), 128 (r'\)', Punctuation, '#pop:2'), 129 ], 130 # NOTE: the next four states are shared in the AgdaLexer; make sure 131 # any change is compatible with Agda as well or copy over and change 132 'comment': [ 133 # Multiline Comments 134 (r'[^-{}]+', Comment.Multiline), 135 (r'\{-', Comment.Multiline, '#push'), 136 (r'-\}', Comment.Multiline, '#pop'), 137 (r'[-{}]', Comment.Multiline), 138 ], 139 'character': [ 140 # Allows multi-chars, incorrectly. 141 (r"[^\\']'", String.Char, '#pop'), 142 (r"\\", String.Escape, 'escape'), 143 ("'", String.Char, '#pop'), 144 ], 145 'string': [ 146 (r'[^\\"]+', String), 147 (r"\\", String.Escape, 'escape'), 148 ('"', String, '#pop'), 149 ], 150 'escape': [ 151 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 152 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), 153 ('|'.join(ascii), String.Escape, '#pop'), 154 (r'o[0-7]+', String.Escape, '#pop'), 155 (r'x[\da-fA-F]+', String.Escape, '#pop'), 156 (r'\d+', String.Escape, '#pop'), 157 (r'\s+\\', String.Escape, '#pop'), 158 ], 159 } 160 161 162class HspecLexer(HaskellLexer): 163 """ 164 A Haskell lexer with support for Hspec constructs. 165 166 .. versionadded:: 2.4.0 167 """ 168 169 name = 'Hspec' 170 aliases = ['hspec'] 171 filenames = [] 172 mimetypes = [] 173 174 tokens = { 175 'root': [ 176 (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)), 177 (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)), 178 (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)), 179 inherit, 180 ], 181 } 182 183 184class IdrisLexer(RegexLexer): 185 """ 186 A lexer for the dependently typed programming language Idris. 187 188 Based on the Haskell and Agda Lexer. 189 190 .. versionadded:: 2.0 191 """ 192 name = 'Idris' 193 aliases = ['idris', 'idr'] 194 filenames = ['*.idr'] 195 mimetypes = ['text/x-idris'] 196 197 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', 198 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', 199 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', 200 'total', 'partial', 201 'interface', 'implementation', 'export', 'covering', 'constructor', 202 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', 203 'pattern', 'term', 'syntax', 'prefix', 204 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', 205 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') 206 207 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 208 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 209 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 210 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 211 212 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', 213 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') 214 215 tokens = { 216 'root': [ 217 # Comments 218 (r'^(\s*)(%%(%s))' % '|'.join(directives), 219 bygroups(Text, Keyword.Reserved)), 220 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)), 221 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)), 222 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'), 223 # Declaration 224 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 225 bygroups(Text, Name.Function, Text, Operator.Word, Text)), 226 # Identifiers 227 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 228 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), 229 (r"('')?[A-Z][\w\']*", Keyword.Type), 230 (r'[a-z][\w\']*', Text), 231 # Special Symbols 232 (r'(<-|::|->|=>|=)', Operator.Word), # specials 233 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 234 # Numbers 235 (r'\d+[eE][+-]?\d+', Number.Float), 236 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 237 (r'0[xX][\da-fA-F]+', Number.Hex), 238 (r'\d+', Number.Integer), 239 # Strings 240 (r"'", String.Char, 'character'), 241 (r'"', String, 'string'), 242 (r'[^\s(){}]+', Text), 243 (r'\s+?', Text), # Whitespace 244 ], 245 'module': [ 246 (r'\s+', Text), 247 (r'([A-Z][\w.]*)(\s+)(\()', 248 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 249 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 250 ], 251 'funclist': [ 252 (r'\s+', Text), 253 (r'[A-Z]\w*', Keyword.Type), 254 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 255 (r'--.*$', Comment.Single), 256 (r'\{-', Comment.Multiline, 'comment'), 257 (r',', Punctuation), 258 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 259 # (HACK, but it makes sense to push two instances, believe me) 260 (r'\(', Punctuation, ('funclist', 'funclist')), 261 (r'\)', Punctuation, '#pop:2'), 262 ], 263 # NOTE: the next four states are shared in the AgdaLexer; make sure 264 # any change is compatible with Agda as well or copy over and change 265 'comment': [ 266 # Multiline Comments 267 (r'[^-{}]+', Comment.Multiline), 268 (r'\{-', Comment.Multiline, '#push'), 269 (r'-\}', Comment.Multiline, '#pop'), 270 (r'[-{}]', Comment.Multiline), 271 ], 272 'character': [ 273 # Allows multi-chars, incorrectly. 274 (r"[^\\']", String.Char), 275 (r"\\", String.Escape, 'escape'), 276 ("'", String.Char, '#pop'), 277 ], 278 'string': [ 279 (r'[^\\"]+', String), 280 (r"\\", String.Escape, 'escape'), 281 ('"', String, '#pop'), 282 ], 283 'escape': [ 284 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 285 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 286 ('|'.join(ascii), String.Escape, '#pop'), 287 (r'o[0-7]+', String.Escape, '#pop'), 288 (r'x[\da-fA-F]+', String.Escape, '#pop'), 289 (r'\d+', String.Escape, '#pop'), 290 (r'\s+\\', String.Escape, '#pop') 291 ], 292 } 293 294 295class AgdaLexer(RegexLexer): 296 """ 297 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_ 298 dependently typed functional programming language and proof assistant. 299 300 .. versionadded:: 2.0 301 """ 302 303 name = 'Agda' 304 aliases = ['agda'] 305 filenames = ['*.agda'] 306 mimetypes = ['text/x-agda'] 307 308 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data', 309 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', 310 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open', 311 'pattern', 'postulate', 'primitive', 'private', 312 'quote', 'quoteGoal', 'quoteTerm', 313 'record', 'renaming', 'rewrite', 'syntax', 'tactic', 314 'unquote', 'unquoteDecl', 'using', 'where', 'with'] 315 316 tokens = { 317 'root': [ 318 # Declaration 319 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 320 bygroups(Text, Name.Function, Text, Operator.Word, Text)), 321 # Comments 322 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 323 (r'\{-', Comment.Multiline, 'comment'), 324 # Holes 325 (r'\{!', Comment.Directive, 'hole'), 326 # Lexemes: 327 # Identifiers 328 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 329 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), 330 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type), 331 # Special Symbols 332 (r'(\(|\)|\{|\})', Operator), 333 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word), 334 # Numbers 335 (r'\d+[eE][+-]?\d+', Number.Float), 336 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 337 (r'0[xX][\da-fA-F]+', Number.Hex), 338 (r'\d+', Number.Integer), 339 # Strings 340 (r"'", String.Char, 'character'), 341 (r'"', String, 'string'), 342 (r'[^\s(){}]+', Text), 343 (r'\s+?', Text), # Whitespace 344 ], 345 'hole': [ 346 # Holes 347 (r'[^!{}]+', Comment.Directive), 348 (r'\{!', Comment.Directive, '#push'), 349 (r'!\}', Comment.Directive, '#pop'), 350 (r'[!{}]', Comment.Directive), 351 ], 352 'module': [ 353 (r'\{-', Comment.Multiline, 'comment'), 354 (r'[a-zA-Z][\w.]*', Name, '#pop'), 355 (r'[\W0-9_]+', Text) 356 ], 357 'comment': HaskellLexer.tokens['comment'], 358 'character': HaskellLexer.tokens['character'], 359 'string': HaskellLexer.tokens['string'], 360 'escape': HaskellLexer.tokens['escape'] 361 } 362 363 364class CryptolLexer(RegexLexer): 365 """ 366 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. 367 368 .. versionadded:: 2.0 369 """ 370 name = 'Cryptol' 371 aliases = ['cryptol', 'cry'] 372 filenames = ['*.cry'] 373 mimetypes = ['text/x-cryptol'] 374 375 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', 376 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', 377 'max', 'min', 'module', 'newtype', 'pragma', 'property', 378 'then', 'type', 'where', 'width') 379 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 380 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 381 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 382 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 383 384 tokens = { 385 'root': [ 386 # Whitespace: 387 (r'\s+', Text), 388 # (r'--\s*|.*$', Comment.Doc), 389 (r'//.*$', Comment.Single), 390 (r'/\*', Comment.Multiline, 'comment'), 391 # Lexemes: 392 # Identifiers 393 (r'\bimport\b', Keyword.Reserved, 'import'), 394 (r'\bmodule\b', Keyword.Reserved, 'module'), 395 (r'\berror\b', Name.Exception), 396 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 397 (r'^[_a-z][\w\']*', Name.Function), 398 (r"'?[_a-z][\w']*", Name), 399 (r"('')?[A-Z][\w\']*", Keyword.Type), 400 # Operators 401 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 402 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 403 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 404 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 405 # Numbers 406 (r'\d+[eE][+-]?\d+', Number.Float), 407 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 408 (r'0[oO][0-7]+', Number.Oct), 409 (r'0[xX][\da-fA-F]+', Number.Hex), 410 (r'\d+', Number.Integer), 411 # Character/String Literals 412 (r"'", String.Char, 'character'), 413 (r'"', String, 'string'), 414 # Special 415 (r'\[\]', Keyword.Type), 416 (r'\(\)', Name.Builtin), 417 (r'[][(),;`{}]', Punctuation), 418 ], 419 'import': [ 420 # Import statements 421 (r'\s+', Text), 422 (r'"', String, 'string'), 423 # after "funclist" state 424 (r'\)', Punctuation, '#pop'), 425 (r'qualified\b', Keyword), 426 # import X as Y 427 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', 428 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), 429 # import X hiding (functions) 430 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', 431 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), 432 # import X (functions) 433 (r'([A-Z][\w.]*)(\s+)(\()', 434 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 435 # import X 436 (r'[\w.]+', Name.Namespace, '#pop'), 437 ], 438 'module': [ 439 (r'\s+', Text), 440 (r'([A-Z][\w.]*)(\s+)(\()', 441 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), 442 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 443 ], 444 'funclist': [ 445 (r'\s+', Text), 446 (r'[A-Z]\w*', Keyword.Type), 447 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 448 # TODO: these don't match the comments in docs, remove. 449 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 450 # (r'{-', Comment.Multiline, 'comment'), 451 (r',', Punctuation), 452 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 453 # (HACK, but it makes sense to push two instances, believe me) 454 (r'\(', Punctuation, ('funclist', 'funclist')), 455 (r'\)', Punctuation, '#pop:2'), 456 ], 457 'comment': [ 458 # Multiline Comments 459 (r'[^/*]+', Comment.Multiline), 460 (r'/\*', Comment.Multiline, '#push'), 461 (r'\*/', Comment.Multiline, '#pop'), 462 (r'[*/]', Comment.Multiline), 463 ], 464 'character': [ 465 # Allows multi-chars, incorrectly. 466 (r"[^\\']'", String.Char, '#pop'), 467 (r"\\", String.Escape, 'escape'), 468 ("'", String.Char, '#pop'), 469 ], 470 'string': [ 471 (r'[^\\"]+', String), 472 (r"\\", String.Escape, 'escape'), 473 ('"', String, '#pop'), 474 ], 475 'escape': [ 476 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 477 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 478 ('|'.join(ascii), String.Escape, '#pop'), 479 (r'o[0-7]+', String.Escape, '#pop'), 480 (r'x[\da-fA-F]+', String.Escape, '#pop'), 481 (r'\d+', String.Escape, '#pop'), 482 (r'\s+\\', String.Escape, '#pop'), 483 ], 484 } 485 486 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width', 487 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', 488 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', 489 'trace'} 490 491 def get_tokens_unprocessed(self, text): 492 stack = ['root'] 493 for index, token, value in \ 494 RegexLexer.get_tokens_unprocessed(self, text, stack): 495 if token is Name and value in self.EXTRA_KEYWORDS: 496 yield index, Name.Builtin, value 497 else: 498 yield index, token, value 499 500 501class LiterateLexer(Lexer): 502 """ 503 Base class for lexers of literate file formats based on LaTeX or Bird-style 504 (prefixing each code line with ">"). 505 506 Additional options accepted: 507 508 `litstyle` 509 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 510 is autodetected: if the first non-whitespace character in the source 511 is a backslash or percent character, LaTeX is assumed, else Bird. 512 """ 513 514 bird_re = re.compile(r'(>[ \t]*)(.*\n)') 515 516 def __init__(self, baselexer, **options): 517 self.baselexer = baselexer 518 Lexer.__init__(self, **options) 519 520 def get_tokens_unprocessed(self, text): 521 style = self.options.get('litstyle') 522 if style is None: 523 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' 524 525 code = '' 526 insertions = [] 527 if style == 'bird': 528 # bird-style 529 for match in line_re.finditer(text): 530 line = match.group() 531 m = self.bird_re.match(line) 532 if m: 533 insertions.append((len(code), 534 [(0, Comment.Special, m.group(1))])) 535 code += m.group(2) 536 else: 537 insertions.append((len(code), [(0, Text, line)])) 538 else: 539 # latex-style 540 from pygments.lexers.markup import TexLexer 541 lxlexer = TexLexer(**self.options) 542 codelines = 0 543 latex = '' 544 for match in line_re.finditer(text): 545 line = match.group() 546 if codelines: 547 if line.lstrip().startswith('\\end{code}'): 548 codelines = 0 549 latex += line 550 else: 551 code += line 552 elif line.lstrip().startswith('\\begin{code}'): 553 codelines = 1 554 latex += line 555 insertions.append((len(code), 556 list(lxlexer.get_tokens_unprocessed(latex)))) 557 latex = '' 558 else: 559 latex += line 560 insertions.append((len(code), 561 list(lxlexer.get_tokens_unprocessed(latex)))) 562 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)) 563 564 565class LiterateHaskellLexer(LiterateLexer): 566 """ 567 For Literate Haskell (Bird-style or LaTeX) source. 568 569 Additional options accepted: 570 571 `litstyle` 572 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 573 is autodetected: if the first non-whitespace character in the source 574 is a backslash or percent character, LaTeX is assumed, else Bird. 575 576 .. versionadded:: 0.9 577 """ 578 name = 'Literate Haskell' 579 aliases = ['lhs', 'literate-haskell', 'lhaskell'] 580 filenames = ['*.lhs'] 581 mimetypes = ['text/x-literate-haskell'] 582 583 def __init__(self, **options): 584 hslexer = HaskellLexer(**options) 585 LiterateLexer.__init__(self, hslexer, **options) 586 587 588class LiterateIdrisLexer(LiterateLexer): 589 """ 590 For Literate Idris (Bird-style or LaTeX) source. 591 592 Additional options accepted: 593 594 `litstyle` 595 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 596 is autodetected: if the first non-whitespace character in the source 597 is a backslash or percent character, LaTeX is assumed, else Bird. 598 599 .. versionadded:: 2.0 600 """ 601 name = 'Literate Idris' 602 aliases = ['lidr', 'literate-idris', 'lidris'] 603 filenames = ['*.lidr'] 604 mimetypes = ['text/x-literate-idris'] 605 606 def __init__(self, **options): 607 hslexer = IdrisLexer(**options) 608 LiterateLexer.__init__(self, hslexer, **options) 609 610 611class LiterateAgdaLexer(LiterateLexer): 612 """ 613 For Literate Agda source. 614 615 Additional options accepted: 616 617 `litstyle` 618 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 619 is autodetected: if the first non-whitespace character in the source 620 is a backslash or percent character, LaTeX is assumed, else Bird. 621 622 .. versionadded:: 2.0 623 """ 624 name = 'Literate Agda' 625 aliases = ['lagda', 'literate-agda'] 626 filenames = ['*.lagda'] 627 mimetypes = ['text/x-literate-agda'] 628 629 def __init__(self, **options): 630 agdalexer = AgdaLexer(**options) 631 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) 632 633 634class LiterateCryptolLexer(LiterateLexer): 635 """ 636 For Literate Cryptol (Bird-style or LaTeX) source. 637 638 Additional options accepted: 639 640 `litstyle` 641 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 642 is autodetected: if the first non-whitespace character in the source 643 is a backslash or percent character, LaTeX is assumed, else Bird. 644 645 .. versionadded:: 2.0 646 """ 647 name = 'Literate Cryptol' 648 aliases = ['lcry', 'literate-cryptol', 'lcryptol'] 649 filenames = ['*.lcry'] 650 mimetypes = ['text/x-literate-cryptol'] 651 652 def __init__(self, **options): 653 crylexer = CryptolLexer(**options) 654 LiterateLexer.__init__(self, crylexer, **options) 655 656 657class KokaLexer(RegexLexer): 658 """ 659 Lexer for the `Koka <http://koka.codeplex.com>`_ 660 language. 661 662 .. versionadded:: 1.6 663 """ 664 665 name = 'Koka' 666 aliases = ['koka'] 667 filenames = ['*.kk', '*.kki'] 668 mimetypes = ['text/x-koka'] 669 670 keywords = [ 671 'infix', 'infixr', 'infixl', 672 'type', 'cotype', 'rectype', 'alias', 673 'struct', 'con', 674 'fun', 'function', 'val', 'var', 675 'external', 676 'if', 'then', 'else', 'elif', 'return', 'match', 677 'private', 'public', 'private', 678 'module', 'import', 'as', 679 'include', 'inline', 680 'rec', 681 'try', 'yield', 'enum', 682 'interface', 'instance', 683 ] 684 685 # keywords that are followed by a type 686 typeStartKeywords = [ 687 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', 688 ] 689 690 # keywords valid in a type 691 typekeywords = [ 692 'forall', 'exists', 'some', 'with', 693 ] 694 695 # builtin names and special names 696 builtin = [ 697 'for', 'while', 'repeat', 698 'foreach', 'foreach-indexed', 699 'error', 'catch', 'finally', 700 'cs', 'js', 'file', 'ref', 'assigned', 701 ] 702 703 # symbols that can be in an operator 704 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' 705 706 # symbol boundary: an operator keyword should not be followed by any of these 707 sboundary = '(?!' + symbols + ')' 708 709 # name boundary: a keyword should not be followed by any of these 710 boundary = r'(?![\w/])' 711 712 # koka token abstractions 713 tokenType = Name.Attribute 714 tokenTypeDef = Name.Class 715 tokenConstructor = Generic.Emph 716 717 # main lexer 718 tokens = { 719 'root': [ 720 include('whitespace'), 721 722 # go into type mode 723 (r'::?' + sboundary, tokenType, 'type'), 724 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 725 'alias-type'), 726 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 727 'struct-type'), 728 ((r'(%s)' % '|'.join(typeStartKeywords)) + 729 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), 730 'type'), 731 732 # special sequences of tokens (we use ?: for non-capturing group as 733 # required by 'bygroups') 734 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', 735 bygroups(Keyword, Text, Keyword, Name.Namespace)), 736 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' 737 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' 738 r'((?:[a-z]\w*/)*[a-z]\w*))?', 739 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, 740 Keyword, Name.Namespace)), 741 742 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' 743 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', 744 bygroups(Keyword, Text, Name.Function)), 745 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' 746 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', 747 bygroups(Keyword, Text, Keyword, Name.Function)), 748 749 # keywords 750 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), 751 (r'(%s)' % '|'.join(keywords) + boundary, Keyword), 752 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), 753 (r'::?|:=|\->|[=.]' + sboundary, Keyword), 754 755 # names 756 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 757 bygroups(Name.Namespace, tokenConstructor)), 758 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), 759 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', 760 bygroups(Name.Namespace, Name)), 761 (r'_\w*', Name.Variable), 762 763 # literal string 764 (r'@"', String.Double, 'litstring'), 765 766 # operators 767 (symbols + "|/(?![*/])", Operator), 768 (r'`', Operator), 769 (r'[{}()\[\];,]', Punctuation), 770 771 # literals. No check for literal characters with len > 1 772 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), 773 (r'0[xX][0-9a-fA-F]+', Number.Hex), 774 (r'[0-9]+', Number.Integer), 775 776 (r"'", String.Char, 'char'), 777 (r'"', String.Double, 'string'), 778 ], 779 780 # type started by alias 781 'alias-type': [ 782 (r'=', Keyword), 783 include('type') 784 ], 785 786 # type started by struct 787 'struct-type': [ 788 (r'(?=\((?!,*\)))', Punctuation, '#pop'), 789 include('type') 790 ], 791 792 # type started by colon 793 'type': [ 794 (r'[(\[<]', tokenType, 'type-nested'), 795 include('type-content') 796 ], 797 798 # type nested in brackets: can contain parameters, comma etc. 799 'type-nested': [ 800 (r'[)\]>]', tokenType, '#pop'), 801 (r'[(\[<]', tokenType, 'type-nested'), 802 (r',', tokenType), 803 (r'([a-z]\w*)(\s*)(:)(?!:)', 804 bygroups(Name, Text, tokenType)), # parameter name 805 include('type-content') 806 ], 807 808 # shared contents of a type 809 'type-content': [ 810 include('whitespace'), 811 812 # keywords 813 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), 814 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', 815 Keyword, '#pop'), # need to match because names overlap... 816 817 # kinds 818 (r'[EPHVX]' + boundary, tokenType), 819 820 # type names 821 (r'[a-z][0-9]*(?![\w/])', tokenType), 822 (r'_\w*', tokenType.Variable), # Generic.Emph 823 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 824 bygroups(Name.Namespace, tokenType)), 825 (r'((?:[a-z]\w*/)*)([a-z]\w+)', 826 bygroups(Name.Namespace, tokenType)), 827 828 # type keyword operators 829 (r'::|->|[.:|]', tokenType), 830 831 # catchall 832 default('#pop') 833 ], 834 835 # comments and literals 836 'whitespace': [ 837 (r'\n\s*#.*$', Comment.Preproc), 838 (r'\s+', Text), 839 (r'/\*', Comment.Multiline, 'comment'), 840 (r'//.*$', Comment.Single) 841 ], 842 'comment': [ 843 (r'[^/*]+', Comment.Multiline), 844 (r'/\*', Comment.Multiline, '#push'), 845 (r'\*/', Comment.Multiline, '#pop'), 846 (r'[*/]', Comment.Multiline), 847 ], 848 'litstring': [ 849 (r'[^"]+', String.Double), 850 (r'""', String.Escape), 851 (r'"', String.Double, '#pop'), 852 ], 853 'string': [ 854 (r'[^\\"\n]+', String.Double), 855 include('escape-sequence'), 856 (r'["\n]', String.Double, '#pop'), 857 ], 858 'char': [ 859 (r'[^\\\'\n]+', String.Char), 860 include('escape-sequence'), 861 (r'[\'\n]', String.Char, '#pop'), 862 ], 863 'escape-sequence': [ 864 (r'\\[nrt\\"\']', String.Escape), 865 (r'\\x[0-9a-fA-F]{2}', String.Escape), 866 (r'\\u[0-9a-fA-F]{4}', String.Escape), 867 # Yes, \U literals are 6 hex digits. 868 (r'\\U[0-9a-fA-F]{6}', String.Escape) 869 ] 870 } 871