1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.perl 4 ~~~~~~~~~~~~~~~~~~~~ 5 6 Lexers for Perl, Raku and related languages. 7 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12import re 13 14from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ 15 using, this, default, words 16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 17 Number, Punctuation 18from pygments.util import shebang_matches 19 20__all__ = ['PerlLexer', 'Perl6Lexer'] 21 22 23class PerlLexer(RegexLexer): 24 """ 25 For `Perl <https://www.perl.org>`_ source code. 26 """ 27 28 name = 'Perl' 29 aliases = ['perl', 'pl'] 30 filenames = ['*.pl', '*.pm', '*.t', '*.perl'] 31 mimetypes = ['text/x-perl', 'application/x-perl'] 32 33 flags = re.DOTALL | re.MULTILINE 34 # TODO: give this to a perl guy who knows how to parse perl... 35 tokens = { 36 'balanced-regex': [ 37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'), 38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'), 39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), 40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'), 41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'), 42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'), 43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'), 44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'), 45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'), 46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'), 47 ], 48 'root': [ 49 (r'\A\#!.+?$', Comment.Hashbang), 50 (r'\#.*?$', Comment.Single), 51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), 52 (words(( 53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach', 54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then', 55 'unless', 'until', 'while', 'print', 'new', 'BEGIN', 56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'), 57 Keyword), 58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)', 59 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'), 60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), 61 # common delimiters 62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', 63 String.Regex), 64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex), 65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex), 66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', 67 String.Regex), 68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', 69 String.Regex), 70 # balanced delimiters 71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'), 72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'), 73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex, 74 'balanced-regex'), 75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex, 76 'balanced-regex'), 77 78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex), 79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'), 80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*', 81 String.Regex), 82 (r'\s+', Text), 83 (words(( 84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect', 86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent', 88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl', 89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid', 90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber', 93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname', 94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last', 96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat', 97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open', 98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf', 99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir', 100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename', 101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir', 102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent', 104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown', 105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt', 106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread', 107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr', 108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'), 110 Name.Builtin), 111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), 112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)', 113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)), 114 (r'__END__', Comment.Preproc, 'end-part'), 115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), 116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), 117 (r'[$@%#]+', Name.Variable, 'varname'), 118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), 119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), 120 (r'0b[01]+(_[01]+)*', Number.Bin), 121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', 122 Number.Float), 123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), 124 (r'\d+(_\d+)*', Number.Integer), 125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String), 126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String), 127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick), 128 (r'<([^\s>]+)>', String.Regex), 129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'), 130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'), 131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'), 132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'), 133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other), 134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 135 bygroups(Keyword, Text, Name.Namespace)), 136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 137 bygroups(Keyword, Text, Name.Namespace)), 138 (r'(sub)(\s+)', bygroups(Keyword, Text), 'funcname'), 139 (words(( 140 'no', 'package', 'require', 'use'), suffix=r'\b'), 141 Keyword), 142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|' 143 r'!~|&&?|\|\||\.{1,3})', Operator), 144 (r'[-+/*%=<>&^|!\\~]=?', Operator), 145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage 146 # of punctuation in Perl! 147 (r'(?=\w)', Name, 'name'), 148 ], 149 'format': [ 150 (r'\.\n', String.Interpol, '#pop'), 151 (r'[^\n]*\n', String.Interpol), 152 ], 153 'varname': [ 154 (r'\s+', Text), 155 (r'\{', Punctuation, '#pop'), # hash syntax? 156 (r'\)|,', Punctuation, '#pop'), # argument specifier 157 (r'\w+::', Name.Namespace), 158 (r'[\w:]+', Name.Variable, '#pop'), 159 ], 160 'name': [ 161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'), 162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'), 163 (r'[\w:]+', Name, '#pop'), 164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'), 165 (r'(?=\W)', Text, '#pop'), 166 ], 167 'funcname': [ 168 (r'[a-zA-Z_]\w*[!?]?', Name.Function), 169 (r'\s+', Text), 170 # argument declaration 171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)), 172 (r';', Punctuation, '#pop'), 173 (r'.*?\{', Punctuation, '#pop'), 174 ], 175 'cb-string': [ 176 (r'\\[{}\\]', String.Other), 177 (r'\\', String.Other), 178 (r'\{', String.Other, 'cb-string'), 179 (r'\}', String.Other, '#pop'), 180 (r'[^{}\\]+', String.Other) 181 ], 182 'rb-string': [ 183 (r'\\[()\\]', String.Other), 184 (r'\\', String.Other), 185 (r'\(', String.Other, 'rb-string'), 186 (r'\)', String.Other, '#pop'), 187 (r'[^()]+', String.Other) 188 ], 189 'sb-string': [ 190 (r'\\[\[\]\\]', String.Other), 191 (r'\\', String.Other), 192 (r'\[', String.Other, 'sb-string'), 193 (r'\]', String.Other, '#pop'), 194 (r'[^\[\]]+', String.Other) 195 ], 196 'lt-string': [ 197 (r'\\[<>\\]', String.Other), 198 (r'\\', String.Other), 199 (r'\<', String.Other, 'lt-string'), 200 (r'\>', String.Other, '#pop'), 201 (r'[^<>]+', String.Other) 202 ], 203 'end-part': [ 204 (r'.+', Comment.Preproc, '#pop') 205 ] 206 } 207 208 def analyse_text(text): 209 if shebang_matches(text, r'perl'): 210 return True 211 212 result = 0 213 214 if re.search(r'(?:my|our)\s+[$@%(]', text): 215 result += 0.9 216 217 if ':=' in text: 218 # := is not valid Perl, but it appears in unicon, so we should 219 # become less confident if we think we found Perl with := 220 result /= 2 221 222 return result 223 224 225class Perl6Lexer(ExtendedRegexLexer): 226 """ 227 For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code. 228 229 .. versionadded:: 2.0 230 """ 231 232 name = 'Perl6' 233 aliases = ['perl6', 'pl6', 'raku'] 234 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', 235 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod', 236 '*.rakutest', '*.rakudoc'] 237 mimetypes = ['text/x-perl6', 'application/x-perl6'] 238 flags = re.MULTILINE | re.DOTALL | re.UNICODE 239 240 PERL6_IDENTIFIER_RANGE = r"['\w:-]" 241 242 PERL6_KEYWORDS = ( 243 #Phasers 244 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST', 245 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO', 246 #Keywords 247 'anon','augment','but','class','constant','default','does','else', 248 'elsif','enum','for','gather','given','grammar','has','if','import', 249 'is','let','loop','made','make','method','module','multi','my','need', 250 'orwith','our','proceed','proto','repeat','require','return', 251 'return-rw','returns','role','rule','state','sub','submethod','subset', 252 'succeed','supersede','token','try','unit','unless','until','use', 253 'when','while','with','without', 254 #Traits 255 'export','native','repr','required','rw','symbol', 256 ) 257 258 PERL6_BUILTINS = ( 259 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos', 260 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action', 261 'actions','add','add_attribute','add_enum_value','add_fallback', 262 'add_method','add_parent','add_private_method','add_role','add_trustee', 263 'adverb','after','all','allocate','allof','allowed','alternative-names', 264 'annotations','antipair','antipairs','any','anyof','app_lifetime', 265 'append','arch','archname','args','arity','Array','asec','asech','asin', 266 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2', 267 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch', 268 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc', 269 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth', 270 'await','backtrace','Bag','BagHash','bail-out','base','basename', 271 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr', 272 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool', 273 'bool-only','bounds','break','Bridge','broken','BUILD','build-date', 274 'bytes','cache','callframe','calling-package','CALL-ME','callsame', 275 'callwith','can','cancel','candidates','cando','can-ok','canonpath', 276 'caps','caption','Capture','cas','catdir','categorize','categorize-list', 277 'catfile','catpath','cause','ceiling','cglobal','changed','Channel', 278 'chars','chdir','child','child-name','child-typename','chmod','chomp', 279 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup', 280 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate', 281 'column','comb','combinations','command','comment','compiler','Complex', 282 'compose','compose_type','composer','condition','config', 283 'configure_destroy','configure_type_checking','conj','connect', 284 'constraints','construct','contains','contents','copy','cos','cosec', 285 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores', 286 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d', 287 'Date','DateTime','day','daycount','day-of-month','day-of-week', 288 'day-of-year','days-in-month','declaration','decode','decoder','deepmap', 289 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS', 290 'denominator','desc','DESTROY','destroyers','devnull','diag', 291 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames', 292 'do','does','does-ok','done','done-testing','duckmap','dynamic','e', 293 'eager','earlier','elems','emit','enclosing','encode','encoder', 294 'encoding','end','ends-with','enum_from_value','enum_value_list', 295 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE', 296 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY', 297 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage', 298 'expmod','extension','f','fail','fails-like','fc','feature','file', 299 'filename','find_method','find_method_qualified','finish','first','flat', 300 'flatmap','flip','floor','flunk','flush','fmt','format','formatter', 301 'freeze','from','from-list','from-loop','from-posix','full', 302 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs', 303 'grep','handle','handled','handles','hardware','has_accessor','Hash', 304 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id', 305 'illegal','im','in','indent','index','indices','indir','infinite', 306 'infix','infix:<+>','infix:<->','install_method_cache','Instant', 307 'instead','Int','int-bounds','interval','in-timezone','invalid-str', 308 'invert','invocant','IO','IO::Notification.watch-path','is_trusted', 309 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply', 310 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year', 311 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting', 312 'is-win','item','iterator','join','keep','kept','KERNELnames','key', 313 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later', 314 'lazy','lc','leading','level','like','line','lines','link','List', 315 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb', 316 'made','MAIN','make','Map','match','max','maxpairs','merge','message', 317 'method','method_table','methods','migrate','min','minmax','minpairs', 318 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month', 319 'move','mro','msb','multi','multiness','my','name','named','named_names', 320 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type', 321 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle', 322 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out', 323 'nodemap','nok','none','norm','not','note','now','nude','Num', 324 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes', 325 'ok','old','on-close','one','on-switch','open','opened','operation', 326 'optional','ord','ords','orig','os-error','osname','out-buffer','pack', 327 'package','package-kind','package-name','packages','pair','pairs', 328 'pairup','parameter','params','parent','parent-name','parents','parse', 329 'parse-base','parsefile','parse-names','parts','pass','path','path-sep', 330 'payload','peer-host','peer-port','periods','perl','permutations','phaser', 331 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll', 332 'polymod','pop','pos','positional','posix','postfix','postmatch', 333 'precomp-ext','precomp-target','pred','prefix','prematch','prepend', 334 'print','printf','print-nl','print-to','private','private_method_table', 335 'proc','produce','Promise','prompt','protect','pull-one','push', 336 'push-all','push-at-least','push-exactly','push-until-lazy','put', 337 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw', 338 're','read','readchars','readonly','ready','Real','reallocate','reals', 339 'reason','rebless','receive','recv','redispatcher','redo','reduce', 340 'rel2abs','relative','release','rename','repeated','replacement', 341 'report','reserved','resolve','restore','result','resume','rethrow', 342 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish', 343 'roll','rootdir','roots','rotate','rotor','round','roundrobin', 344 'routine-type','run','rwx','s','samecase','samemark','samewith','say', 345 'schedule-on','scheduler','scope','sec','sech','second','seek','self', 346 'send','Set','set_hidden','set_name','set_package','set_rw','set_value', 347 'SetHash','set-instruments','setup_finalization','shape','share','shell', 348 'shift','sibling','sigil','sign','signal','signals','signature','sin', 349 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one', 350 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp', 351 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port', 352 'sort','source','source-package','spawn','SPEC','splice','split', 353 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable', 354 'start','started','starts-with','status','stderr','stdout','Str', 355 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst', 356 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum', 357 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap', 358 'target','target-name','tc','tclc','tell','then','throttle','throw', 359 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix', 360 'total','trailing','trans','tree','trim','trim-leading','trim-trailing', 361 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type', 362 'type_captures','typename','uc','udp','uncaught_handler','unimatch', 363 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival', 364 'univals','unlike','unlink','unlock','unpack','unpolar','unshift', 365 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR', 366 'variable','verbose-config','version','VMnames','volume','vow','w','wait', 367 'warn','watch','watch-path','week','weekday-of-month','week-number', 368 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO', 369 'whole-second','WHY','wordcase','words','workaround','wrap','write', 370 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest', 371 372 ) 373 374 PERL6_BUILTIN_CLASSES = ( 375 #Booleans 376 'False','True', 377 #Classes 378 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace', 379 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf', 380 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code', 381 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler', 382 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding', 383 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant', 384 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles', 385 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path', 386 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32', 387 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec', 388 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32', 389 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List', 390 'Lock','Lock::Async','long','longlong','Macro','Map','Match', 391 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW', 392 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer', 393 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance', 394 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer', 395 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash', 396 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64', 397 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block', 398 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator', 399 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading', 400 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc', 401 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat', 402 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler', 403 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip', 404 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier', 405 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry', 406 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage', 407 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler', 408 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable', 409 'Version','VM','Whatever','WhateverCode','WrapHandle' 410 ) 411 412 PERL6_OPERATORS = ( 413 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', 414 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', 415 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', 416 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', 417 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', 418 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', 419 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', 420 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', 421 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', 422 'not', '<==', '==>', '<<==', '==>>','unicmp', 423 ) 424 425 # Perl 6 has a *lot* of possible bracketing characters 426 # this list was lifted from STD.pm6 (https://github.com/perl6/std) 427 PERL6_BRACKETS = { 428 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d', 429 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b', 430 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019', 431 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d', 432 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a', 433 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e', 434 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d', 435 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd', 436 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265', 437 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b', 438 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273', 439 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279', 440 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f', 441 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285', 442 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b', 443 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8', 444 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4', 445 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1', 446 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7', 447 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1', 448 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db', 449 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1', 450 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7', 451 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed', 452 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb', 453 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe', 454 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a', 455 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b', 456 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771', 457 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4', 458 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de', 459 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7', 460 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984', 461 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a', 462 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990', 463 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996', 464 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5', 465 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5', 466 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9', 467 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e', 468 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65', 469 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80', 470 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c', 471 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96', 472 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c', 473 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9', 474 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0', 475 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe', 476 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4', 477 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0', 478 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6', 479 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa', 480 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a', 481 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21', 482 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d', 483 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015', 484 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b', 485 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18', 486 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a', 487 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40', 488 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48', 489 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e', 490 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d', 491 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63', 492 } 493 494 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''): 495 if boundary_regex_fragment is None: 496 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \ 497 suffix + r')\b' 498 else: 499 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ 500 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \ 501 boundary_regex_fragment + r')' 502 503 def brackets_callback(token_class): 504 def callback(lexer, match, context): 505 groups = match.groupdict() 506 opening_chars = groups['delimiter'] 507 n_chars = len(opening_chars) 508 adverbs = groups.get('adverbs') 509 510 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) 511 text = context.text 512 513 if closer is None: # it's not a mirrored character, which means we 514 # just need to look for the next occurrence 515 516 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) 517 else: # we need to look for the corresponding closing character, 518 # keep nesting in mind 519 closing_chars = closer * n_chars 520 nesting_level = 1 521 522 search_pos = match.start('delimiter') 523 524 while nesting_level > 0: 525 next_open_pos = text.find(opening_chars, search_pos + n_chars) 526 next_close_pos = text.find(closing_chars, search_pos + n_chars) 527 528 if next_close_pos == -1: 529 next_close_pos = len(text) 530 nesting_level = 0 531 elif next_open_pos != -1 and next_open_pos < next_close_pos: 532 nesting_level += 1 533 search_pos = next_open_pos 534 else: # next_close_pos < next_open_pos 535 nesting_level -= 1 536 search_pos = next_close_pos 537 538 end_pos = next_close_pos 539 540 if end_pos < 0: # if we didn't find a closer, just highlight the 541 # rest of the text in this class 542 end_pos = len(text) 543 544 if adverbs is not None and re.search(r':to\b', adverbs): 545 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos] 546 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + 547 r'\s*$', text[end_pos:], re.MULTILINE) 548 549 if end_heredoc: 550 end_pos += end_heredoc.end() 551 else: 552 end_pos = len(text) 553 554 yield match.start(), token_class, text[match.start():end_pos + n_chars] 555 context.pos = end_pos + n_chars 556 557 return callback 558 559 def opening_brace_callback(lexer, match, context): 560 stack = context.stack 561 562 yield match.start(), Text, context.text[match.start():match.end()] 563 context.pos = match.end() 564 565 # if we encounter an opening brace and we're one level 566 # below a token state, it means we need to increment 567 # the nesting level for braces so we know later when 568 # we should return to the token rules. 569 if len(stack) > 2 and stack[-2] == 'token': 570 context.perl6_token_nesting_level += 1 571 572 def closing_brace_callback(lexer, match, context): 573 stack = context.stack 574 575 yield match.start(), Text, context.text[match.start():match.end()] 576 context.pos = match.end() 577 578 # if we encounter a free closing brace and we're one level 579 # below a token state, it means we need to check the nesting 580 # level to see if we need to return to the token state. 581 if len(stack) > 2 and stack[-2] == 'token': 582 context.perl6_token_nesting_level -= 1 583 if context.perl6_token_nesting_level == 0: 584 stack.pop() 585 586 def embedded_perl6_callback(lexer, match, context): 587 context.perl6_token_nesting_level = 1 588 yield match.start(), Text, context.text[match.start():match.end()] 589 context.pos = match.end() 590 context.stack.append('root') 591 592 # If you're modifying these rules, be careful if you need to process '{' or '}' 593 # characters. We have special logic for processing these characters (due to the fact 594 # that you can nest Perl 6 code in regex blocks), so if you need to process one of 595 # them, make sure you also process the corresponding one! 596 tokens = { 597 'common': [ 598 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', 599 brackets_callback(Comment.Multiline)), 600 (r'#[^\n]*$', Comment.Single), 601 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), 602 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), 603 (r'^=.*?\n\s*?\n', Comment.Multiline), 604 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', 605 bygroups(Keyword, Name), 'token-sym-brackets'), 606 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', 607 bygroups(Keyword, Name), 'pre-token'), 608 # deal with a special case in the Perl 6 grammar (role q { ... }) 609 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), 610 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), 611 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'), 612 Name.Builtin), 613 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), 614 # copied from PerlLexer 615 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', 616 Name.Variable), 617 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), 618 (r'::\?\w+', Name.Variable.Global), 619 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', 620 Name.Variable.Global), 621 (r'\$(?:<.*?>)+', Name.Variable), 622 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])' 623 r'(?P=first_char)*)', brackets_callback(String)), 624 # copied from PerlLexer 625 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), 626 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), 627 (r'0b[01]+(_[01]+)*', Number.Bin), 628 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', 629 Number.Float), 630 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), 631 (r'\d+(_\d+)*', Number.Integer), 632 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), 633 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), 634 (r'm\w+(?=\()', Name), 635 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])' 636 r'(?P=first_char)*)', brackets_callback(String.Regex)), 637 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', 638 String.Regex), 639 (r'<[^\s=].*?\S>', String), 640 (_build_word_match(PERL6_OPERATORS), Operator), 641 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name), 642 (r"'(\\\\|\\[^\\]|[^'\\])*'", String), 643 (r'"(\\\\|\\[^\\]|[^"\\])*"', String), 644 ], 645 'root': [ 646 include('common'), 647 (r'\{', opening_brace_callback), 648 (r'\}', closing_brace_callback), 649 (r'.+?', Text), 650 ], 651 'pre-token': [ 652 include('common'), 653 (r'\{', Text, ('#pop', 'token')), 654 (r'.+?', Text), 655 ], 656 'token-sym-brackets': [ 657 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', 658 brackets_callback(Name), ('#pop', 'pre-token')), 659 default(('#pop', 'pre-token')), 660 ], 661 'token': [ 662 (r'\}', Text, '#pop'), 663 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), 664 # make sure that quotes in character classes aren't treated as strings 665 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), 666 # make sure that '#' characters in quotes aren't treated as comments 667 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), 668 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), 669 (r'#.*?$', Comment.Single), 670 (r'\{', embedded_perl6_callback), 671 ('.+?', String.Regex), 672 ], 673 } 674 675 def analyse_text(text): 676 def strip_pod(lines): 677 in_pod = False 678 stripped_lines = [] 679 680 for line in lines: 681 if re.match(r'^=(?:end|cut)', line): 682 in_pod = False 683 elif re.match(r'^=\w+', line): 684 in_pod = True 685 elif not in_pod: 686 stripped_lines.append(line) 687 688 return stripped_lines 689 690 # XXX handle block comments 691 lines = text.splitlines() 692 lines = strip_pod(lines) 693 text = '\n'.join(lines) 694 695 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'): 696 return True 697 698 saw_perl_decl = False 699 rating = False 700 701 # check for my/our/has declarations 702 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + 703 r"+\s+)?[$@%&(]", text): 704 rating = 0.8 705 saw_perl_decl = True 706 707 for line in lines: 708 line = re.sub('#.*', '', line) 709 if re.match(r'^\s*$', line): 710 continue 711 712 # match v6; use v6; use v6.0; use v6.0.0; 713 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line): 714 return True 715 # match class, module, role, enum, grammar declarations 716 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line) 717 if class_decl: 718 if saw_perl_decl or class_decl.group('scope') is not None: 719 return True 720 rating = 0.05 721 continue 722 break 723 724 if ':=' in text: 725 # Same logic as above for PerlLexer 726 rating /= 2 727 728 return rating 729 730 def __init__(self, **options): 731 super().__init__(**options) 732 self.encoding = options.get('encoding', 'utf-8') 733