1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.perl
4    ~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for Perl, Raku and related languages.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15    using, this, default, words
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17    Number, Punctuation
18from pygments.util import shebang_matches
19
20__all__ = ['PerlLexer', 'Perl6Lexer']
21
22
23class PerlLexer(RegexLexer):
24    """
25    For `Perl <https://www.perl.org>`_ source code.
26    """
27
28    name = 'Perl'
29    aliases = ['perl', 'pl']
30    filenames = ['*.pl', '*.pm', '*.t', '*.perl']
31    mimetypes = ['text/x-perl', 'application/x-perl']
32
33    flags = re.DOTALL | re.MULTILINE
34    # TODO: give this to a perl guy who knows how to parse perl...
35    tokens = {
36        'balanced-regex': [
37            (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
38            (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
39            (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
40            (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'),
41            (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
42            (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
43            (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'),
44            (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'),
45            (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'),
46            (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'),
47        ],
48        'root': [
49            (r'\A\#!.+?$', Comment.Hashbang),
50            (r'\#.*?$', Comment.Single),
51            (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
52            (words((
53                'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach',
54                'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then',
55                'unless', 'until', 'while', 'print', 'new', 'BEGIN',
56                'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'),
57             Keyword),
58            (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
59             bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
60            (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
61            # common delimiters
62            (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
63                String.Regex),
64            (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
65            (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
66            (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
67                String.Regex),
68            (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
69                String.Regex),
70            # balanced delimiters
71            (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'),
72            (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
73            (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
74                'balanced-regex'),
75            (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex,
76                'balanced-regex'),
77
78            (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
79            (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
80            (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
81                String.Regex),
82            (r'\s+', Text),
83            (words((
84                'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir',
85                'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect',
86                'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die',
87                'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent',
88                'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl',
89                'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
90                'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin',
91                'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp',
92                'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber',
93                'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
94                'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime',
95                'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last',
96                'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat',
97                'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open',
98                'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf',
99                'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir',
100                'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename',
101                'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir',
102                'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
103                'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
104                'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
105                'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt',
106                'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread',
107                'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr',
108                'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie',
109                'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
110             Name.Builtin),
111            (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
112            (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
113             bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)),
114            (r'__END__', Comment.Preproc, 'end-part'),
115            (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
116            (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
117            (r'[$@%#]+', Name.Variable, 'varname'),
118            (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
119            (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
120            (r'0b[01]+(_[01]+)*', Number.Bin),
121            (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
122             Number.Float),
123            (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
124            (r'\d+(_\d+)*', Number.Integer),
125            (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
126            (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
127            (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
128            (r'<([^\s>]+)>', String.Regex),
129            (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
130            (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
131            (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
132            (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
133            (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other),
134            (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
135             bygroups(Keyword, Text, Name.Namespace)),
136            (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
137             bygroups(Keyword, Text, Name.Namespace)),
138            (r'(sub)(\s+)', bygroups(Keyword, Text), 'funcname'),
139            (words((
140                'no', 'package', 'require', 'use'), suffix=r'\b'),
141             Keyword),
142            (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
143             r'!~|&&?|\|\||\.{1,3})', Operator),
144            (r'[-+/*%=<>&^|!\\~]=?', Operator),
145            (r'[()\[\]:;,<>/?{}]', Punctuation),  # yes, there's no shortage
146                                                  # of punctuation in Perl!
147            (r'(?=\w)', Name, 'name'),
148        ],
149        'format': [
150            (r'\.\n', String.Interpol, '#pop'),
151            (r'[^\n]*\n', String.Interpol),
152        ],
153        'varname': [
154            (r'\s+', Text),
155            (r'\{', Punctuation, '#pop'),    # hash syntax?
156            (r'\)|,', Punctuation, '#pop'),  # argument specifier
157            (r'\w+::', Name.Namespace),
158            (r'[\w:]+', Name.Variable, '#pop'),
159        ],
160        'name': [
161            (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'),
162            (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'),
163            (r'[\w:]+', Name, '#pop'),
164            (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
165            (r'(?=\W)', Text, '#pop'),
166        ],
167        'funcname': [
168            (r'[a-zA-Z_]\w*[!?]?', Name.Function),
169            (r'\s+', Text),
170            # argument declaration
171            (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
172            (r';', Punctuation, '#pop'),
173            (r'.*?\{', Punctuation, '#pop'),
174        ],
175        'cb-string': [
176            (r'\\[{}\\]', String.Other),
177            (r'\\', String.Other),
178            (r'\{', String.Other, 'cb-string'),
179            (r'\}', String.Other, '#pop'),
180            (r'[^{}\\]+', String.Other)
181        ],
182        'rb-string': [
183            (r'\\[()\\]', String.Other),
184            (r'\\', String.Other),
185            (r'\(', String.Other, 'rb-string'),
186            (r'\)', String.Other, '#pop'),
187            (r'[^()]+', String.Other)
188        ],
189        'sb-string': [
190            (r'\\[\[\]\\]', String.Other),
191            (r'\\', String.Other),
192            (r'\[', String.Other, 'sb-string'),
193            (r'\]', String.Other, '#pop'),
194            (r'[^\[\]]+', String.Other)
195        ],
196        'lt-string': [
197            (r'\\[<>\\]', String.Other),
198            (r'\\', String.Other),
199            (r'\<', String.Other, 'lt-string'),
200            (r'\>', String.Other, '#pop'),
201            (r'[^<>]+', String.Other)
202        ],
203        'end-part': [
204            (r'.+', Comment.Preproc, '#pop')
205        ]
206    }
207
208    def analyse_text(text):
209        if shebang_matches(text, r'perl'):
210            return True
211
212        result = 0
213
214        if re.search(r'(?:my|our)\s+[$@%(]', text):
215            result += 0.9
216
217        if ':=' in text:
218            # := is not valid Perl, but it appears in unicon, so we should
219            # become less confident if we think we found Perl with :=
220            result /= 2
221
222        return result
223
224
225class Perl6Lexer(ExtendedRegexLexer):
226    """
227    For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code.
228
229    .. versionadded:: 2.0
230    """
231
232    name = 'Perl6'
233    aliases = ['perl6', 'pl6', 'raku']
234    filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
235                 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod',
236                 '*.rakutest', '*.rakudoc']
237    mimetypes = ['text/x-perl6', 'application/x-perl6']
238    flags = re.MULTILINE | re.DOTALL | re.UNICODE
239
240    PERL6_IDENTIFIER_RANGE = r"['\w:-]"
241
242    PERL6_KEYWORDS = (
243        #Phasers
244        'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST',
245        'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO',
246        #Keywords
247        'anon','augment','but','class','constant','default','does','else',
248        'elsif','enum','for','gather','given','grammar','has','if','import',
249        'is','let','loop','made','make','method','module','multi','my','need',
250        'orwith','our','proceed','proto','repeat','require','return',
251        'return-rw','returns','role','rule','state','sub','submethod','subset',
252        'succeed','supersede','token','try','unit','unless','until','use',
253        'when','while','with','without',
254        #Traits
255        'export','native','repr','required','rw','symbol',
256    )
257
258    PERL6_BUILTINS = (
259        'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos',
260        'acosec','acosech','acosh','acotan','acotanh','acquire','act','action',
261        'actions','add','add_attribute','add_enum_value','add_fallback',
262        'add_method','add_parent','add_private_method','add_role','add_trustee',
263        'adverb','after','all','allocate','allof','allowed','alternative-names',
264        'annotations','antipair','antipairs','any','anyof','app_lifetime',
265        'append','arch','archname','args','arity','Array','asec','asech','asin',
266        'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2',
267        'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch',
268        'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc',
269        'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth',
270        'await','backtrace','Bag','BagHash','bail-out','base','basename',
271        'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr',
272        'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool',
273        'bool-only','bounds','break','Bridge','broken','BUILD','build-date',
274        'bytes','cache','callframe','calling-package','CALL-ME','callsame',
275        'callwith','can','cancel','candidates','cando','can-ok','canonpath',
276        'caps','caption','Capture','cas','catdir','categorize','categorize-list',
277        'catfile','catpath','cause','ceiling','cglobal','changed','Channel',
278        'chars','chdir','child','child-name','child-typename','chmod','chomp',
279        'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup',
280        'clone','close','closed','close-stdin','cmp-ok','code','codes','collate',
281        'column','comb','combinations','command','comment','compiler','Complex',
282        'compose','compose_type','composer','condition','config',
283        'configure_destroy','configure_type_checking','conj','connect',
284        'constraints','construct','contains','contents','copy','cos','cosec',
285        'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores',
286        'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d',
287        'Date','DateTime','day','daycount','day-of-month','day-of-week',
288        'day-of-year','days-in-month','declaration','decode','decoder','deepmap',
289        'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS',
290        'denominator','desc','DESTROY','destroyers','devnull','diag',
291        'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames',
292        'do','does','does-ok','done','done-testing','duckmap','dynamic','e',
293        'eager','earlier','elems','emit','enclosing','encode','encoder',
294        'encoding','end','ends-with','enum_from_value','enum_value_list',
295        'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE',
296        'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY',
297        'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage',
298        'expmod','extension','f','fail','fails-like','fc','feature','file',
299        'filename','find_method','find_method_qualified','finish','first','flat',
300        'flatmap','flip','floor','flunk','flush','fmt','format','formatter',
301        'freeze','from','from-list','from-loop','from-posix','full',
302        'full-barrier','get','get_value','getc','gist','got','grab','grabpairs',
303        'grep','handle','handled','handles','hardware','has_accessor','Hash',
304        'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id',
305        'illegal','im','in','indent','index','indices','indir','infinite',
306        'infix','infix:<+>','infix:<->','install_method_cache','Instant',
307        'instead','Int','int-bounds','interval','in-timezone','invalid-str',
308        'invert','invocant','IO','IO::Notification.watch-path','is_trusted',
309        'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply',
310        'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year',
311        'isNaN','isnt','is-prime','is-relative','is-routine','is-setting',
312        'is-win','item','iterator','join','keep','kept','KERNELnames','key',
313        'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later',
314        'lazy','lc','leading','level','like','line','lines','link','List',
315        'listen','live','lives-ok','local','lock','log','log10','lookup','lsb',
316        'made','MAIN','make','Map','match','max','maxpairs','merge','message',
317        'method','method_table','methods','migrate','min','minmax','minpairs',
318        'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month',
319        'move','mro','msb','multi','multiness','my','name','named','named_names',
320        'narrow','nativecast','native-descriptor','nativesizeof','new','new_type',
321        'new-from-daycount','new-from-pairs','next','nextcallee','next-handle',
322        'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out',
323        'nodemap','nok','none','norm','not','note','now','nude','Num',
324        'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes',
325        'ok','old','on-close','one','on-switch','open','opened','operation',
326        'optional','ord','ords','orig','os-error','osname','out-buffer','pack',
327        'package','package-kind','package-name','packages','pair','pairs',
328        'pairup','parameter','params','parent','parent-name','parents','parse',
329        'parse-base','parsefile','parse-names','parts','pass','path','path-sep',
330        'payload','peer-host','peer-port','periods','perl','permutations','phaser',
331        'pick','pickpairs','pid','placeholder','plan','plus','polar','poll',
332        'polymod','pop','pos','positional','posix','postfix','postmatch',
333        'precomp-ext','precomp-target','pred','prefix','prematch','prepend',
334        'print','printf','print-nl','print-to','private','private_method_table',
335        'proc','produce','Promise','prompt','protect','pull-one','push',
336        'push-all','push-at-least','push-exactly','push-until-lazy','put',
337        'qualifier-type','quit','r','race','radix','rand','range','Rat','raw',
338        're','read','readchars','readonly','ready','Real','reallocate','reals',
339        'reason','rebless','receive','recv','redispatcher','redo','reduce',
340        'rel2abs','relative','release','rename','repeated','replacement',
341        'report','reserved','resolve','restore','result','resume','rethrow',
342        'reverse','right','rindex','rmdir','role','roles_to_compose','rolish',
343        'roll','rootdir','roots','rotate','rotor','round','roundrobin',
344        'routine-type','run','rwx','s','samecase','samemark','samewith','say',
345        'schedule-on','scheduler','scope','sec','sech','second','seek','self',
346        'send','Set','set_hidden','set_name','set_package','set_rw','set_value',
347        'SetHash','set-instruments','setup_finalization','shape','share','shell',
348        'shift','sibling','sigil','sign','signal','signals','signature','sin',
349        'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one',
350        'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp',
351        'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port',
352        'sort','source','source-package','spawn','SPEC','splice','split',
353        'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable',
354        'start','started','starts-with','status','stderr','stdout','Str',
355        'sub_signature','subbuf','subbuf-rw','subname','subparse','subst',
356        'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum',
357        'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap',
358        'target','target-name','tc','tclc','tell','then','throttle','throw',
359        'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix',
360        'total','trailing','trans','tree','trim','trim-leading','trim-trailing',
361        'truncate','truncated-to','trusts','try_acquire','trying','twigil','type',
362        'type_captures','typename','uc','udp','uncaught_handler','unimatch',
363        'uniname','uninames','uniparse','uniprop','uniprops','unique','unival',
364        'univals','unlike','unlink','unlock','unpack','unpolar','unshift',
365        'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR',
366        'variable','verbose-config','version','VMnames','volume','vow','w','wait',
367        'warn','watch','watch-path','week','weekday-of-month','week-number',
368        'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO',
369        'whole-second','WHY','wordcase','words','workaround','wrap','write',
370        'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest',
371
372    )
373
374    PERL6_BUILTIN_CLASSES = (
375        #Booleans
376        'False','True',
377        #Classes
378        'Any','Array','Associative','AST','atomicint','Attribute','Backtrace',
379        'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf',
380        'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code',
381        'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler',
382        'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding',
383        'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant',
384        'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles',
385        'IO::CatHandle','IO::Handle','IO::Notification','IO::Path',
386        'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32',
387        'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec',
388        'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32',
389        'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List',
390        'Lock','Lock::Async','long','longlong','Macro','Map','Match',
391        'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW',
392        'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer',
393        'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance',
394        'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer',
395        'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash',
396        'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64',
397        'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block',
398        'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator',
399        'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading',
400        'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc',
401        'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat',
402        'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler',
403        'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip',
404        'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier',
405        'Supplier::Preserving','Supply','Systemic','Tap','Telemetry',
406        'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage',
407        'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler',
408        'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable',
409        'Version','VM','Whatever','WhateverCode','WrapHandle'
410    )
411
412    PERL6_OPERATORS = (
413        'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
414        'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
415        'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
416        '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
417        '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
418        'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
419        '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
420        '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
421        '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
422        'not', '<==', '==>', '<<==', '==>>','unicmp',
423    )
424
425    # Perl 6 has a *lot* of possible bracketing characters
426    # this list was lifted from STD.pm6 (https://github.com/perl6/std)
427    PERL6_BRACKETS = {
428        '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
429        '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
430        '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
431        '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
432        '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
433        '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
434        '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
435        '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
436        '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
437        '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
438        '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
439        '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
440        '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
441        '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
442        '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
443        '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
444        '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
445        '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
446        '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
447        '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
448        '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
449        '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
450        '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
451        '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
452        '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
453        '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
454        '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
455        '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
456        '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
457        '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
458        '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
459        '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
460        '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
461        '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
462        '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
463        '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
464        '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
465        '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
466        '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
467        '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
468        '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
469        '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
470        '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
471        '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
472        '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
473        '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
474        '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
475        '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
476        '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
477        '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
478        '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
479        '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
480        '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
481        '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
482        '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
483        '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
484        '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
485        '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
486        '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
487        '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
488        '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
489        '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
490        '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
491        '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
492    }
493
494    def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
495        if boundary_regex_fragment is None:
496            return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
497                suffix + r')\b'
498        else:
499            return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
500                r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
501                boundary_regex_fragment + r')'
502
503    def brackets_callback(token_class):
504        def callback(lexer, match, context):
505            groups = match.groupdict()
506            opening_chars = groups['delimiter']
507            n_chars = len(opening_chars)
508            adverbs = groups.get('adverbs')
509
510            closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
511            text = context.text
512
513            if closer is None:  # it's not a mirrored character, which means we
514                                # just need to look for the next occurrence
515
516                end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
517            else:   # we need to look for the corresponding closing character,
518                    # keep nesting in mind
519                closing_chars = closer * n_chars
520                nesting_level = 1
521
522                search_pos = match.start('delimiter')
523
524                while nesting_level > 0:
525                    next_open_pos = text.find(opening_chars, search_pos + n_chars)
526                    next_close_pos = text.find(closing_chars, search_pos + n_chars)
527
528                    if next_close_pos == -1:
529                        next_close_pos = len(text)
530                        nesting_level = 0
531                    elif next_open_pos != -1 and next_open_pos < next_close_pos:
532                        nesting_level += 1
533                        search_pos = next_open_pos
534                    else:  # next_close_pos < next_open_pos
535                        nesting_level -= 1
536                        search_pos = next_close_pos
537
538                end_pos = next_close_pos
539
540            if end_pos < 0:     # if we didn't find a closer, just highlight the
541                                # rest of the text in this class
542                end_pos = len(text)
543
544            if adverbs is not None and re.search(r':to\b', adverbs):
545                heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
546                end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
547                                        r'\s*$', text[end_pos:], re.MULTILINE)
548
549                if end_heredoc:
550                    end_pos += end_heredoc.end()
551                else:
552                    end_pos = len(text)
553
554            yield match.start(), token_class, text[match.start():end_pos + n_chars]
555            context.pos = end_pos + n_chars
556
557        return callback
558
559    def opening_brace_callback(lexer, match, context):
560        stack = context.stack
561
562        yield match.start(), Text, context.text[match.start():match.end()]
563        context.pos = match.end()
564
565        # if we encounter an opening brace and we're one level
566        # below a token state, it means we need to increment
567        # the nesting level for braces so we know later when
568        # we should return to the token rules.
569        if len(stack) > 2 and stack[-2] == 'token':
570            context.perl6_token_nesting_level += 1
571
572    def closing_brace_callback(lexer, match, context):
573        stack = context.stack
574
575        yield match.start(), Text, context.text[match.start():match.end()]
576        context.pos = match.end()
577
578        # if we encounter a free closing brace and we're one level
579        # below a token state, it means we need to check the nesting
580        # level to see if we need to return to the token state.
581        if len(stack) > 2 and stack[-2] == 'token':
582            context.perl6_token_nesting_level -= 1
583            if context.perl6_token_nesting_level == 0:
584                stack.pop()
585
586    def embedded_perl6_callback(lexer, match, context):
587        context.perl6_token_nesting_level = 1
588        yield match.start(), Text, context.text[match.start():match.end()]
589        context.pos = match.end()
590        context.stack.append('root')
591
592    # If you're modifying these rules, be careful if you need to process '{' or '}'
593    # characters. We have special logic for processing these characters (due to the fact
594    # that you can nest Perl 6 code in regex blocks), so if you need to process one of
595    # them, make sure you also process the corresponding one!
596    tokens = {
597        'common': [
598            (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
599             brackets_callback(Comment.Multiline)),
600            (r'#[^\n]*$', Comment.Single),
601            (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
602            (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
603            (r'^=.*?\n\s*?\n', Comment.Multiline),
604            (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
605             bygroups(Keyword, Name), 'token-sym-brackets'),
606            (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
607             bygroups(Keyword, Name), 'pre-token'),
608            # deal with a special case in the Perl 6 grammar (role q { ... })
609            (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
610            (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
611            (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
612             Name.Builtin),
613            (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
614            # copied from PerlLexer
615            (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
616             Name.Variable),
617            (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
618            (r'::\?\w+', Name.Variable.Global),
619            (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
620             Name.Variable.Global),
621            (r'\$(?:<.*?>)+', Name.Variable),
622            (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
623             r'(?P=first_char)*)', brackets_callback(String)),
624            # copied from PerlLexer
625            (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
626            (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
627            (r'0b[01]+(_[01]+)*', Number.Bin),
628            (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
629             Number.Float),
630            (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
631            (r'\d+(_\d+)*', Number.Integer),
632            (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
633            (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
634            (r'm\w+(?=\()', Name),
635            (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
636             r'(?P=first_char)*)', brackets_callback(String.Regex)),
637            (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
638             String.Regex),
639            (r'<[^\s=].*?\S>', String),
640            (_build_word_match(PERL6_OPERATORS), Operator),
641            (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
642            (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
643            (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
644        ],
645        'root': [
646            include('common'),
647            (r'\{', opening_brace_callback),
648            (r'\}', closing_brace_callback),
649            (r'.+?', Text),
650        ],
651        'pre-token': [
652            include('common'),
653            (r'\{', Text, ('#pop', 'token')),
654            (r'.+?', Text),
655        ],
656        'token-sym-brackets': [
657            (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
658             brackets_callback(Name), ('#pop', 'pre-token')),
659            default(('#pop', 'pre-token')),
660        ],
661        'token': [
662            (r'\}', Text, '#pop'),
663            (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
664            # make sure that quotes in character classes aren't treated as strings
665            (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
666            # make sure that '#' characters in quotes aren't treated as comments
667            (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
668            (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
669            (r'#.*?$', Comment.Single),
670            (r'\{', embedded_perl6_callback),
671            ('.+?', String.Regex),
672        ],
673    }
674
675    def analyse_text(text):
676        def strip_pod(lines):
677            in_pod = False
678            stripped_lines = []
679
680            for line in lines:
681                if re.match(r'^=(?:end|cut)', line):
682                    in_pod = False
683                elif re.match(r'^=\w+', line):
684                    in_pod = True
685                elif not in_pod:
686                    stripped_lines.append(line)
687
688            return stripped_lines
689
690        # XXX handle block comments
691        lines = text.splitlines()
692        lines = strip_pod(lines)
693        text = '\n'.join(lines)
694
695        if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
696            return True
697
698        saw_perl_decl = False
699        rating = False
700
701        # check for my/our/has declarations
702        if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
703                     r"+\s+)?[$@%&(]", text):
704            rating = 0.8
705            saw_perl_decl = True
706
707        for line in lines:
708            line = re.sub('#.*', '', line)
709            if re.match(r'^\s*$', line):
710                continue
711
712            # match v6; use v6; use v6.0; use v6.0.0;
713            if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
714                return True
715            # match class, module, role, enum, grammar declarations
716            class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
717            if class_decl:
718                if saw_perl_decl or class_decl.group('scope') is not None:
719                    return True
720                rating = 0.05
721                continue
722            break
723
724        if ':=' in text:
725            # Same logic as above for PerlLexer
726            rating /= 2
727
728        return rating
729
730    def __init__(self, **options):
731        super().__init__(**options)
732        self.encoding = options.get('encoding', 'utf-8')
733