1package r
2
3import (
4	"regexp"
5	"strings"
6	"unicode/utf8"
7
8	. "github.com/alecthomas/chroma" // nolint
9	"github.com/alecthomas/chroma/lexers/internal"
10	"github.com/dlclark/regexp2"
11)
12
13// Raku lexer.
14var Raku Lexer = internal.Register(MustNewLazyLexer(
15	&Config{
16		Name:    "Raku",
17		Aliases: []string{"perl6", "pl6", "raku"},
18		Filenames: []string{
19			"*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
20			"*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
21		},
22		MimeTypes: []string{
23			"text/x-perl6", "application/x-perl6",
24			"text/x-raku", "application/x-raku",
25		},
26		DotAll: true,
27	},
28	rakuRules,
29))
30
31func rakuRules() Rules {
32	type RakuToken int
33
34	const (
35		rakuQuote RakuToken = iota
36		rakuNameAttribute
37		rakuPod
38		rakuPodFormatter
39		rakuPodDeclaration
40		rakuMultilineComment
41		rakuMatchRegex
42		rakuSubstitutionRegex
43	)
44
45	const (
46		colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
47		colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
48		colonPairPattern         = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
49		colonPairLookahead       = `(?=(:['\w-]+` +
50			colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
51		namePattern           = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
52		variablePattern       = `[$@%&]+[.^:?=!~]?` + namePattern
53		globalVariablePattern = `[$@%&]+\*` + namePattern
54	)
55
56	keywords := []string{
57		`BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
58		`KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
59		`class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
60		`grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
61		`module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
62		`where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
63		`submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
64		`use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
65		`symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
66		`pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
67		`dynamic-scope`, `built`, `temp`,
68	}
69
70	keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
71
72	wordOperators := []string{
73		`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
74		`gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
75		`but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
76		`TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
77		`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
78	}
79
80	wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
81
82	operators := []string{
83		`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
84		`+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
85		`<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
86		`::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
87		`??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
88		`,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
89		`⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
90		`⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
91	}
92
93	operatorsPattern := Words(``, ``, operators...)
94
95	builtinTypes := []string{
96		`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
97		`atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
98		`Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
99		`CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
100		`CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
101		`CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
102		`CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
103		`CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
104		`DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
105		`Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
106		`Encoding::Registry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
107		`Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
108		`IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
109		`IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
110		`IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
111		`IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
112		`IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
113		`Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
114		`Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
115		`Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
116		`Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
117		`Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
118		`Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
119		`Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
120		`Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
121		`Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
122		`MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
123		`Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
124		`Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
125		`Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
126		`Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
127		`PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
128		`Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
129		`Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
130		`Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
131		`Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
132		`Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
133		`Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
134		`uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
135		`WhateverCode`, `WrapHandle`, `NativeCall`,
136		// Pragmas
137		`precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
138		`MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
139		`strict`, `trace`, `variables`,
140	}
141
142	builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
143
144	builtinRoutines := []string{
145		`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
146		`acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
147		`add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
148		`add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
149		`all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
150		`antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
151		`archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
152		`ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
153		`atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
154		`atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
155		`await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
156		`basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
157		`bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
158		`bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
159		`callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
160		`candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
161		`cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
162		`cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
163		`child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
164		`classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
165		`codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
166		`command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
167		`compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
168		`configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
169		`content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
170		`count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
171		`curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
172		`day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
173		`default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
174		`DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
175		`diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
176		`DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
177		`eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
178		`endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
179		`eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
180		`excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
181		`expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
182		`FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
183		`find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
184		`flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
185		`free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
186		`full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
187		`gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
188		`has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
189		`hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
190		`indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
191		`install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
192		`invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
193		`is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
194		`is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
195		`is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
196		`kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
197		`lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
198		`List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
199		`loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
200		`map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
201		`methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
202		`MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
203		`mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
204		`nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
205		`new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
206		`nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
207		`nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
208		`Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
209		`ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
210		`ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
211		`package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
212		`parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
213		`parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
214		`permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
215		`polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
216		`precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
217		`primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
218		`private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
219		`protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
220		`push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
221		`quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
222		`read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
223		`read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
224		`read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
225		`reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
226		`rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
227		`replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
228		`result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
229		`rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
230		`rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
231		`samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
232		`sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
233		`set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
234		`set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
235		`set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
236		`setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
237		`short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
238		`signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
239		`skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
240		`Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
241		`socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
242		`splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
243		`started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
244		`store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
245		`subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
246		`subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
247		`take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
248		`term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
249		`tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
250		`trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
251		`trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
252		`typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
253		`uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
254		`unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
255		`USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
256		`verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
257		`watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
258		`what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
259		`with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
260		`write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
261		`write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
262		`write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
263		`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
264	}
265
266	builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
267
268	// A map of opening and closing brackets
269	brackets := map[rune]rune{
270		'\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
271		'\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
272		'\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
273		'\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
274		'\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
275		'\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
276		'\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
277		'\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
278		'\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
279		'\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
280		'\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
281		'\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
282		'\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
283		'\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
284		'\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
285		'\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
286		'\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
287		'\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
288		'\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
289		'\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
290		'\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
291		'\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
292		'\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
293		'\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
294		'\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
295		'\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
296		'\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
297		'\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
298		'\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
299		'\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
300		'\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
301		'\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
302		'\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
303		'\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
304		'\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
305		'\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
306		'\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
307		'\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
308		'\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
309		'\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
310		'\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
311		'\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
312		'\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
313		'\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
314		'\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
315		'\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
316		'\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
317		'\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
318		'\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
319		'\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
320		'\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
321		'\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
322		'\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
323		'\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
324		'\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
325		'\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
326		'\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
327		'\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
328		'\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
329		'\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
330		'\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
331		'\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
332		'\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
333		'\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
334	}
335
336	bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
337
338	// Finds opening brackets and their closing counterparts (including pod and heredoc)
339	// and modifies state groups and position accordingly
340	findBrackets := func(tokenClass RakuToken) MutatorFunc {
341		return func(state *LexerState) error {
342			var openingChars []rune
343			var adverbs []rune
344			switch tokenClass {
345			case rakuPod:
346				openingChars = []rune(strings.Join(state.Groups[1:5], ``))
347			default:
348				adverbs = []rune(state.NamedGroups[`adverbs`])
349				openingChars = []rune(state.NamedGroups[`opening_delimiters`])
350			}
351
352			openingChar := openingChars[0]
353
354			nChars := len(openingChars)
355
356			var closingChar rune
357			var closingCharExists bool
358			var closingChars []rune
359
360			switch tokenClass {
361			case rakuPod:
362				closingCharExists = true
363			default:
364				closingChar, closingCharExists = brackets[openingChar]
365			}
366
367			switch tokenClass {
368			case rakuPodFormatter:
369				formatter := StringOther
370
371				switch state.NamedGroups[`keyword`] {
372				case "B":
373					formatter = GenericStrong
374				case "I":
375					formatter = GenericEmph
376				case "U":
377					formatter = GenericUnderline
378				}
379
380				formatterRule := ruleReplacingConfig{
381					pattern:      `.+?`,
382					tokenType:    formatter,
383					mutator:      nil,
384					stateName:    `pod-formatter`,
385					rulePosition: bottomRule,
386				}
387
388				err := replaceRule(formatterRule)(state)
389				if err != nil {
390					panic(err)
391				}
392
393				err = replaceRule(ruleReplacingConfig{
394					delimiter:              []rune{closingChar},
395					tokenType:              Punctuation,
396					stateName:              `pod-formatter`,
397					pushState:              true,
398					numberOfDelimiterChars: nChars,
399					appendMutator:          popRule(formatterRule),
400				})(state)
401				if err != nil {
402					panic(err)
403				}
404
405				return nil
406			case rakuMatchRegex:
407				var delimiter []rune
408				if closingCharExists {
409					delimiter = []rune{closingChar}
410				} else {
411					delimiter = openingChars
412				}
413
414				err := replaceRule(ruleReplacingConfig{
415					delimiter: delimiter,
416					tokenType: Punctuation,
417					stateName: `regex`,
418					popState:  true,
419					pushState: true,
420				})(state)
421				if err != nil {
422					panic(err)
423				}
424
425				return nil
426			case rakuSubstitutionRegex:
427				delimiter := regexp2.Escape(string(openingChars))
428
429				err := replaceRule(ruleReplacingConfig{
430					pattern:      `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
431					tokenType:    ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
432					rulePosition: topRule,
433					stateName:    `regex`,
434					popState:     true,
435					pushState:    true,
436				})(state)
437				if err != nil {
438					panic(err)
439				}
440
441				return nil
442			}
443
444			text := state.Text
445
446			var endPos int
447
448			var nonMirroredOpeningCharPosition int
449
450			if !closingCharExists {
451				// it's not a mirrored character, which means we
452				// just need to look for the next occurrence
453				closingChars = openingChars
454				nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
455				endPos = nonMirroredOpeningCharPosition
456			} else {
457				var podRegex *regexp2.Regexp
458				if tokenClass == rakuPod {
459					podRegex = regexp2.MustCompile(
460						state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
461						0,
462					)
463				} else {
464					closingChars = []rune(strings.Repeat(string(closingChar), nChars))
465				}
466
467				// we need to look for the corresponding closing character,
468				// keep nesting in mind
469				nestingLevel := 1
470
471				searchPos := state.Pos - nChars
472
473				var nextClosePos int
474
475				for nestingLevel > 0 {
476					if tokenClass == rakuPod {
477						match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
478						if err == nil {
479							closingChars = match.Runes()
480							nextClosePos = match.Index
481						} else {
482							nextClosePos = -1
483						}
484					} else {
485						nextClosePos = indexAt(text, closingChars, searchPos+nChars)
486					}
487
488					nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
489
490					switch {
491					case nextClosePos == -1:
492						nextClosePos = len(text)
493						nestingLevel = 0
494					case nextOpenPos != -1 && nextOpenPos < nextClosePos:
495						nestingLevel++
496						nChars = len(openingChars)
497						searchPos = nextOpenPos
498					default: // next_close_pos < next_open_pos
499						nestingLevel--
500						nChars = len(closingChars)
501						searchPos = nextClosePos
502					}
503				}
504
505				endPos = nextClosePos
506			}
507
508			if endPos < 0 {
509				// if we didn't find a closer, just highlight the
510				// rest of the text in this class
511				endPos = len(text)
512			}
513
514			adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
515			var heredocTerminator []rune
516			var endHeredocPos int
517			if adverbre.MatchString(string(adverbs)) {
518				if endPos != len(text) {
519					heredocTerminator = text[state.Pos:endPos]
520					nChars = len(heredocTerminator)
521				} else {
522					endPos = state.Pos + 1
523					heredocTerminator = []rune{}
524					nChars = 0
525				}
526
527				if nChars > 0 {
528					endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
529					if endHeredocPos > -1 {
530						endPos += endHeredocPos
531					} else {
532						endPos = len(text)
533					}
534				}
535			}
536
537			textBetweenBrackets := string(text[state.Pos:endPos])
538			switch tokenClass {
539			case rakuPod, rakuPodDeclaration, rakuNameAttribute:
540				state.NamedGroups[`value`] = textBetweenBrackets
541				state.NamedGroups[`closing_delimiters`] = string(closingChars)
542			case rakuQuote:
543				if len(heredocTerminator) > 0 {
544					// Length of heredoc terminator + closing chars + `;`
545					heredocFristPunctuationLen := nChars + len(openingChars) + 1
546
547					state.NamedGroups[`opening_delimiters`] = string(openingChars) +
548						string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
549
550					state.NamedGroups[`value`] =
551						string(text[state.Pos+heredocFristPunctuationLen : endPos])
552
553					if endHeredocPos > -1 {
554						state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
555					}
556				} else {
557					state.NamedGroups[`value`] = textBetweenBrackets
558					if nChars > 0 {
559						state.NamedGroups[`closing_delimiters`] = string(closingChars)
560					}
561				}
562			default:
563				state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
564			}
565
566			state.Pos = endPos + nChars
567
568			return nil
569		}
570	}
571
572	// Raku rules
573	// Empty capture groups are placeholders and will be replaced by mutators
574	// DO NOT REMOVE THEM!
575	return Rules{
576		"root": {
577			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
578			{`\A\z`, nil, nil},
579			Include("common"),
580			{`{`, Punctuation, Push(`root`)},
581			{`\(`, Punctuation, Push(`root`)},
582			{`[)}]`, Punctuation, Pop(1)},
583			{`;`, Punctuation, nil},
584			{`\[|\]`, Operator, nil},
585			{`.+?`, Text, nil},
586		},
587		"common": {
588			{`^#![^\n]*$`, CommentHashbang, nil},
589			Include("pod"),
590			// Multi-line, Embedded comment
591			{
592				"#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
593				CommentMultiline,
594				findBrackets(rakuMultilineComment),
595			},
596			{`#[^\n]*$`, CommentSingle, nil},
597			// /regex/
598			{
599				`(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
600				ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
601				nil,
602			},
603			Include("variable"),
604			// ::?VARIABLE
605			{`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
606			// Version
607			{
608				`\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
609				ByGroups(Keyword, NumberInteger, NameEntity, Operator),
610				nil,
611			},
612			Include("number"),
613			// Hyperoperator | »*«
614			{`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
615			{`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
616			// Hyperoperator | «*«
617			{`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
618			{`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
619			// Hyperoperator | »*»
620			{`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
621			{`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
622			// <<quoted words>>
623			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
624			// «quoted words»
625			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
626			// [<]
627			{`(?<=\[\\?)<(?=\])`, Operator, nil},
628			// < and > operators | something < onething > something
629			{
630				`(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
631				ByGroups(Operator, UsingSelf("root"), Operator),
632				nil,
633			},
634			// <quoted words>
635			{
636				`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
637				ByGroups(Punctuation, String, Punctuation),
638				nil,
639			},
640			{`C?X::['\w:-]+`, NameException, nil},
641			Include("metaoperator"),
642			// Pair | key => value
643			{
644				`(\w[\w'-]*)(\s*)(=>)`,
645				ByGroups(String, Text, Operator),
646				nil,
647			},
648			Include("colon-pair"),
649			// Token
650			{
651				`(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
652				NameFunction,
653				Push("token", "name-adverb"),
654			},
655			// Substitution
656			{`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
657			{keywordsPattern, Keyword, nil},
658			{builtinTypesPattern, NameBuiltin, nil},
659			{builtinRoutinesPattern, NameBuiltin, nil},
660			// Class name
661			{
662				`(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
663				NameClass,
664				Push("name-adverb"),
665			},
666			//  Routine
667			{
668				`(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
669				NameFunction,
670				Push("name-adverb"),
671			},
672			// Constant
673			{`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
674			// Namespace
675			{`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
676			Include("operator"),
677			Include("single-quote"),
678			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
679			// m,rx regex
680			{`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
681			// Quote constructs
682			{
683				`(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
684				EmitterFunc(quote),
685				findBrackets(rakuQuote),
686			},
687			// Function
688			{
689				`\b` + namePattern + colonPairLookahead + `\()`,
690				NameFunction,
691				Push("name-adverb"),
692			},
693			// Method
694			{
695				`(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
696				NameFunction,
697				Push("name-adverb"),
698			},
699			// Indirect invocant
700			{namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
701			{`(?<=\W)(?:∅|i|e|��|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
702			{`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
703			{`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
704			// Sigilless variable
705			{
706				`(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
707				NameVariable,
708				Push("name-adverb"),
709			},
710			{namePattern, Name, Push("name-adverb")},
711		},
712		"rx": {
713			Include("colon-pair-attribute"),
714			{
715				`(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
716				ByGroupNames(
717					map[string]Emitter{
718						`opening_delimiters`: Punctuation,
719						`delimiter`:          nil,
720					},
721				),
722				findBrackets(rakuMatchRegex),
723			},
724		},
725		"substitution": {
726			Include("colon-pair-attribute"),
727			// Substitution | s{regex} = value
728			{
729				`(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
730				ByGroupNames(map[string]Emitter{
731					`opening_delimiters`: Punctuation,
732					`delimiter`:          nil,
733				}),
734				findBrackets(rakuMatchRegex),
735			},
736			// Substitution | s/regex/string/
737			{
738				`(?<opening_delimiters>[^\w:\s])`,
739				Punctuation,
740				findBrackets(rakuSubstitutionRegex),
741			},
742		},
743		"number": {
744			{`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
745			{`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
746			{`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
747			{
748				`(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
749				LiteralNumberFloat,
750				nil,
751			},
752			{`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
753			{`(?<=\d+)i`, NameConstant, nil},
754			{`\d+(_\d+)*`, LiteralNumberInteger, nil},
755		},
756		"name-adverb": {
757			Include("colon-pair-attribute-keyvalue"),
758			Default(Pop(1)),
759		},
760		"colon-pair": {
761			// :key(value)
762			{colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
763			// :123abc
764			{
765				`(:)(\d+)(\w[\w'-]*)`,
766				ByGroups(Punctuation, UsingSelf("number"), String),
767				nil,
768			},
769			// :key
770			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
771			{`\s+`, Text, nil},
772		},
773		"colon-pair-attribute": {
774			// :key(value)
775			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
776			// :123abc
777			{
778				`(:)(\d+)(\w[\w'-]*)`,
779				ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
780				nil,
781			},
782			// :key
783			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
784			{`\s+`, Text, nil},
785		},
786		"colon-pair-attribute-keyvalue": {
787			// :key(value)
788			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
789		},
790		"escape-qq": {
791			{
792				`(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
793				ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
794				nil,
795			},
796		},
797		`escape-char`: {
798			{`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
799		},
800		`escape-single-quote`: {
801			{`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
802		},
803		"escape-c-name": {
804			{
805				`(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
806				ByGroups(StringEscape, Punctuation, String, Punctuation),
807				nil,
808			},
809		},
810		"escape-hexadecimal": {
811			{
812				`(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
813				ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
814				nil,
815			},
816			{`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
817		},
818		"regex": {
819			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
820			{`\A\z`, nil, nil},
821			Include("regex-escape-class"),
822			Include(`regex-character-escape`),
823			// $(code)
824			{
825				`([$@])((?<!(?<!\\)\\)\()`,
826				ByGroups(Keyword, Punctuation),
827				replaceRule(ruleReplacingConfig{
828					delimiter: []rune(`)`),
829					tokenType: Punctuation,
830					stateName: `root`,
831					pushState: true,
832				}),
833			},
834			// Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
835			{`\$(?=/)`, NameEntity, nil},
836			// Exclude $ from variables
837			{`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
838			Include("variable"),
839			Include("escape-c-name"),
840			Include("escape-hexadecimal"),
841			Include("number"),
842			Include("single-quote"),
843			// :my variable code ...
844			{
845				`(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
846				ByGroups(Operator, KeywordDeclaration),
847				replaceRule(ruleReplacingConfig{
848					delimiter: []rune(`;`),
849					tokenType: Punctuation,
850					stateName: `root`,
851					pushState: true,
852				}),
853			},
854			// <{code}>
855			{
856				`(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
857				ByGroups(Punctuation, Operator, Punctuation),
858				replaceRule(ruleReplacingConfig{
859					delimiter: []rune(`}>`),
860					tokenType: Punctuation,
861					stateName: `root`,
862					pushState: true,
863				}),
864			},
865			// {code}
866			Include(`closure`),
867			// Properties
868			{`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
869			// Operator
870			{`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
871			// Anchors
872			{`\^\^|\^|\$\$|\$`, NameEntity, nil},
873			{`\.`, NameEntity, nil},
874			{`#[^\n]*\n`, CommentSingle, nil},
875			// Lookaround
876			{
877				`(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
878				ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
879				replaceRule(ruleReplacingConfig{
880					delimiter: []rune(`>`),
881					tokenType: Punctuation,
882					stateName: `regex`,
883					pushState: true,
884				}),
885			},
886			{
887				`(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
888				ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
889				nil,
890			},
891			// <$variable>
892			{
893				`(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
894				ByGroups(Punctuation, Operator, NameVariable, Punctuation),
895				nil,
896			},
897			// Capture markers
898			{`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
899			{
900				`(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
901				ByGroups(Punctuation, NameVariable, Operator),
902				Push(`regex-variable`),
903			},
904			{
905				`(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
906				ByGroups(Punctuation, Operator, NameFunction),
907				Push(`regex-function`),
908			},
909			{`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
910			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
911			{`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
912			{`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
913			{`.+?`, StringRegex, nil},
914		},
915		"regex-class-builtin": {
916			{
917				`\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
918				NameBuiltin,
919				nil,
920			},
921		},
922		"regex-function": {
923			// <function>
924			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
925			// <function(parameter)>
926			{
927				`\(`,
928				Punctuation,
929				replaceRule(ruleReplacingConfig{
930					delimiter: []rune(`)>`),
931					tokenType: Punctuation,
932					stateName: `root`,
933					popState:  true,
934					pushState: true,
935				}),
936			},
937			// <function value>
938			{
939				`\s+`,
940				StringRegex,
941				replaceRule(ruleReplacingConfig{
942					delimiter: []rune(`>`),
943					tokenType: Punctuation,
944					stateName: `regex`,
945					popState:  true,
946					pushState: true,
947				}),
948			},
949			// <function: value>
950			{
951				`:`,
952				Punctuation,
953				replaceRule(ruleReplacingConfig{
954					delimiter: []rune(`>`),
955					tokenType: Punctuation,
956					stateName: `root`,
957					popState:  true,
958					pushState: true,
959				}),
960			},
961		},
962		"regex-variable": {
963			Include(`regex-starting-operators`),
964			// <var=function(
965			{
966				`(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
967				ByGroups(Operator, NameFunction),
968				Mutators(Pop(1), Push(`regex-function`)),
969			},
970			// <var=function>
971			{`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
972			// <var=
973			Default(Pop(1), Push(`regex-property`)),
974		},
975		"regex-property": {
976			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
977			Include("regex-class-builtin"),
978			Include("variable"),
979			Include(`regex-starting-operators`),
980			Include("colon-pair-attribute"),
981			{`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
982			{`\+|\-`, Operator, nil},
983			{`@[\w':-]+`, NameVariable, nil},
984			{`.+?`, StringRegex, nil},
985		},
986		`regex-starting-operators`: {
987			{`(?<=<)[|!?.]+`, Operator, nil},
988		},
989		"regex-escape-class": {
990			{`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
991		},
992		`regex-character-escape`: {
993			{`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
994		},
995		"regex-character-class": {
996			{`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
997			Include("regex-escape-class"),
998			Include("escape-c-name"),
999			Include("escape-hexadecimal"),
1000			Include(`regex-character-escape`),
1001			Include("number"),
1002			{`\.\.`, Operator, nil},
1003			{`.+?`, StringRegex, nil},
1004		},
1005		"metaoperator": {
1006			// Z[=>]
1007			{
1008				`\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
1009				ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
1010				nil,
1011			},
1012			// Z=>
1013			{`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
1014		},
1015		"operator": {
1016			// Word Operator
1017			{wordOperatorsPattern, OperatorWord, nil},
1018			// Operator
1019			{operatorsPattern, Operator, nil},
1020		},
1021		"pod": {
1022			// Single-line pod declaration
1023			{`(#[|=])\s`, Keyword, Push("pod-single")},
1024			// Multi-line pod declaration
1025			{
1026				"(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
1027				ByGroupNames(
1028					map[string]Emitter{
1029						`keyword`:            Keyword,
1030						`opening_delimiters`: Punctuation,
1031						`delimiter`:          nil,
1032						`value`:              UsingSelf("pod-declaration"),
1033						`closing_delimiters`: Punctuation,
1034					}),
1035				findBrackets(rakuPodDeclaration),
1036			},
1037			Include("pod-blocks"),
1038		},
1039		"pod-blocks": {
1040			// =begin code
1041			{
1042				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
1043				EmitterFunc(podCode),
1044				nil,
1045			},
1046			// =begin
1047			{
1048				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
1049				ByGroupNames(
1050					map[string]Emitter{
1051						`ws`:                 Comment,
1052						`keyword`:            Keyword,
1053						`ws2`:                StringDoc,
1054						`name`:               Keyword,
1055						`config`:             EmitterFunc(podConfig),
1056						`value`:              UsingSelf("pod-begin"),
1057						`closing_delimiters`: Keyword,
1058					}),
1059				findBrackets(rakuPod),
1060			},
1061			// =for ...
1062			{
1063				`(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1064				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1065				Push("pod-paragraph"),
1066			},
1067			// =config
1068			{
1069				`(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1070				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1071				nil,
1072			},
1073			// =alias
1074			{
1075				`(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
1076				ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
1077				nil,
1078			},
1079			// =encoding
1080			{
1081				`(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
1082				ByGroups(Comment, Keyword, StringDoc, Name),
1083				nil,
1084			},
1085			// =para ...
1086			{
1087				`(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
1088				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1089				Push("pod-paragraph"),
1090			},
1091			// =head1 ...
1092			{
1093				`(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
1094				ByGroups(Comment, Keyword, GenericHeading, Keyword),
1095				Push("pod-heading"),
1096			},
1097			// =item ...
1098			{
1099				`(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
1100				ByGroups(Comment, Keyword, StringDoc, Keyword),
1101				Push("pod-paragraph"),
1102			},
1103			{
1104				`(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
1105				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1106				Push("pod-finish"),
1107			},
1108			// ={custom} ...
1109			{
1110				`(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
1111				ByGroups(Comment, Name, StringDoc, Keyword),
1112				Push("pod-paragraph"),
1113			},
1114			// = podconfig
1115			{
1116				`(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
1117					colonPairClosingBrackets + `) *)*\n)`,
1118				ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
1119				nil,
1120			},
1121		},
1122		"pod-begin": {
1123			Include("pod-blocks"),
1124			Include("pre-pod-formatter"),
1125			{`.+?`, StringDoc, nil},
1126		},
1127		"pod-declaration": {
1128			Include("pre-pod-formatter"),
1129			{`.+?`, StringDoc, nil},
1130		},
1131		"pod-paragraph": {
1132			{`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
1133			Include("pre-pod-formatter"),
1134			{`.+?`, StringDoc, nil},
1135		},
1136		"pod-single": {
1137			{`\n`, StringDoc, Pop(1)},
1138			Include("pre-pod-formatter"),
1139			{`.+?`, StringDoc, nil},
1140		},
1141		"pod-heading": {
1142			{`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
1143			Include("pre-pod-formatter"),
1144			{`.+?`, GenericHeading, nil},
1145		},
1146		"pod-finish": {
1147			{`\z`, nil, Pop(1)},
1148			Include("pre-pod-formatter"),
1149			{`.+?`, StringDoc, nil},
1150		},
1151		"pre-pod-formatter": {
1152			// C<code>, B<bold>, ...
1153			{
1154				`(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
1155				ByGroups(Keyword, Punctuation),
1156				findBrackets(rakuPodFormatter),
1157			},
1158		},
1159		"pod-formatter": {
1160			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1161			{`>`, Punctuation, Pop(1)},
1162			Include("pre-pod-formatter"),
1163			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1164			{`.+?`, StringOther, nil},
1165		},
1166		"variable": {
1167			{variablePattern, NameVariable, Push("name-adverb")},
1168			{globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
1169			{`[$@]<[^>]+>`, NameVariable, nil},
1170			{`\$[/!¢]`, NameVariable, nil},
1171			{`[$@%]`, NameVariable, nil},
1172		},
1173		"single-quote": {
1174			{`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
1175		},
1176		"single-quote-inner": {
1177			{`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
1178			Include("escape-single-quote"),
1179			Include("escape-qq"),
1180			{`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
1181		},
1182		"double-quotes": {
1183			{`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
1184			Include("qq"),
1185		},
1186		"<<": {
1187			{`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1188			Include("ww"),
1189		},
1190		"«": {
1191			{`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1192			Include("ww"),
1193		},
1194		"ww": {
1195			Include("single-quote"),
1196			Include("qq"),
1197		},
1198		"qq": {
1199			Include("qq-variable"),
1200			Include("closure"),
1201			Include(`escape-char`),
1202			Include("escape-hexadecimal"),
1203			Include("escape-c-name"),
1204			Include("escape-qq"),
1205			{`.+?`, StringDouble, nil},
1206		},
1207		"qq-variable": {
1208			{
1209				`(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
1210				NameVariable,
1211				Push("qq-variable-extras", "name-adverb"),
1212			},
1213		},
1214		"qq-variable-extras": {
1215			// Method
1216			{
1217				`(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
1218				ByGroupNames(map[string]Emitter{
1219					`operator`:    Operator,
1220					`method_name`: NameFunction,
1221				}),
1222				Push(`name-adverb`),
1223			},
1224			// Function/Signature
1225			{
1226				`\(`, Punctuation, replaceRule(
1227					ruleReplacingConfig{
1228						delimiter: []rune(`)`),
1229						tokenType: Punctuation,
1230						stateName: `root`,
1231						pushState: true,
1232					}),
1233			},
1234			Default(Pop(1)),
1235		},
1236		"Q": {
1237			Include("escape-qq"),
1238			{`.+?`, String, nil},
1239		},
1240		"Q-closure": {
1241			Include("escape-qq"),
1242			Include("closure"),
1243			{`.+?`, String, nil},
1244		},
1245		"Q-variable": {
1246			Include("escape-qq"),
1247			Include("qq-variable"),
1248			{`.+?`, String, nil},
1249		},
1250		"closure": {
1251			{`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
1252				ruleReplacingConfig{
1253					delimiter: []rune(`}`),
1254					tokenType: Punctuation,
1255					stateName: `root`,
1256					pushState: true,
1257				}),
1258			},
1259		},
1260		"token": {
1261			// Token signature
1262			{`\(`, Punctuation, replaceRule(
1263				ruleReplacingConfig{
1264					delimiter: []rune(`)`),
1265					tokenType: Punctuation,
1266					stateName: `root`,
1267					pushState: true,
1268				}),
1269			},
1270			{`{`, Punctuation, replaceRule(
1271				ruleReplacingConfig{
1272					delimiter: []rune(`}`),
1273					tokenType: Punctuation,
1274					stateName: `regex`,
1275					popState:  true,
1276					pushState: true,
1277				}),
1278			},
1279			{`\s*`, Text, nil},
1280			Default(Pop(1)),
1281		},
1282	}
1283}
1284
1285// Joins keys of rune map
1286func joinRuneMap(m map[rune]rune) string {
1287	runes := make([]rune, 0, len(m))
1288	for k := range m {
1289		runes = append(runes, k)
1290	}
1291
1292	return string(runes)
1293}
1294
1295// Finds the index of substring in the string starting at position n
1296func indexAt(str []rune, substr []rune, pos int) int {
1297	strFromPos := str[pos:]
1298	text := string(strFromPos)
1299
1300	idx := strings.Index(text, string(substr))
1301	if idx > -1 {
1302		idx = utf8.RuneCountInString(text[:idx])
1303
1304		// Search again if the substr is escaped with backslash
1305		if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
1306			(idx == 1 && strFromPos[idx-1] == '\\') {
1307			idx = indexAt(str[pos:], substr, idx+1)
1308
1309			idx = utf8.RuneCountInString(text[:idx])
1310
1311			if idx < 0 {
1312				return idx
1313			}
1314		}
1315		idx += pos
1316	}
1317
1318	return idx
1319}
1320
1321// Tells if an array of string contains a string
1322func contains(s []string, e string) bool {
1323	for _, value := range s {
1324		if value == e {
1325			return true
1326		}
1327	}
1328	return false
1329}
1330
1331type rulePosition int
1332
1333const (
1334	topRule    rulePosition = 0
1335	bottomRule              = -1
1336)
1337
1338type ruleMakingConfig struct {
1339	delimiter              []rune
1340	pattern                string
1341	tokenType              Emitter
1342	mutator                Mutator
1343	numberOfDelimiterChars int
1344}
1345
1346type ruleReplacingConfig struct {
1347	delimiter              []rune
1348	pattern                string
1349	tokenType              Emitter
1350	numberOfDelimiterChars int
1351	mutator                Mutator
1352	appendMutator          Mutator
1353	rulePosition           rulePosition
1354	stateName              string
1355	pop                    bool
1356	popState               bool
1357	pushState              bool
1358}
1359
1360// Pops rule from state-stack and replaces the rule with the previous rule
1361func popRule(rule ruleReplacingConfig) MutatorFunc {
1362	return func(state *LexerState) error {
1363		stackName := genStackName(rule.stateName, rule.rulePosition)
1364
1365		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1366
1367		if ok && len(stack) > 0 {
1368			// Pop from stack
1369			stack = stack[:len(stack)-1]
1370			lastRule := stack[len(stack)-1]
1371			lastRule.pushState = false
1372			lastRule.popState = false
1373			lastRule.pop = true
1374			state.Set(stackName, stack)
1375
1376			// Call replaceRule to use the last rule
1377			err := replaceRule(lastRule)(state)
1378			if err != nil {
1379				panic(err)
1380			}
1381		}
1382
1383		return nil
1384	}
1385}
1386
1387// Replaces a state's rule based on the rule config and position
1388func replaceRule(rule ruleReplacingConfig) MutatorFunc {
1389	return func(state *LexerState) error {
1390		stateName := rule.stateName
1391		stackName := genStackName(rule.stateName, rule.rulePosition)
1392
1393		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1394		if !ok {
1395			stack = []ruleReplacingConfig{}
1396		}
1397
1398		// If state-stack is empty fill it with the placeholder rule
1399		if len(stack) == 0 {
1400			stack = []ruleReplacingConfig{
1401				{
1402					// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
1403					pattern:      `\A\z`,
1404					tokenType:    nil,
1405					mutator:      nil,
1406					stateName:    stateName,
1407					rulePosition: rule.rulePosition,
1408				},
1409			}
1410			state.Set(stackName, stack)
1411		}
1412
1413		var mutator Mutator
1414		mutators := []Mutator{}
1415
1416		switch {
1417		case rule.rulePosition == topRule && rule.mutator == nil:
1418			// Default mutator for top rule
1419			mutators = []Mutator{Pop(1), popRule(rule)}
1420		case rule.rulePosition == topRule && rule.mutator != nil:
1421			// Default mutator for top rule, when rule.mutator is set
1422			mutators = []Mutator{rule.mutator, popRule(rule)}
1423		case rule.mutator != nil:
1424			mutators = []Mutator{rule.mutator}
1425		}
1426
1427		if rule.appendMutator != nil {
1428			mutators = append(mutators, rule.appendMutator)
1429		}
1430
1431		if len(mutators) > 0 {
1432			mutator = Mutators(mutators...)
1433		} else {
1434			mutator = nil
1435		}
1436
1437		ruleConfig := ruleMakingConfig{
1438			pattern:                rule.pattern,
1439			delimiter:              rule.delimiter,
1440			numberOfDelimiterChars: rule.numberOfDelimiterChars,
1441			tokenType:              rule.tokenType,
1442			mutator:                mutator,
1443		}
1444
1445		cRule := makeRule(ruleConfig)
1446
1447		switch rule.rulePosition {
1448		case topRule:
1449			state.Rules[stateName][0] = cRule
1450		case bottomRule:
1451			state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
1452		}
1453
1454		// Pop state name from stack if asked. State should be popped first before Pushing
1455		if rule.popState {
1456			err := Pop(1)(state)
1457			if err != nil {
1458				panic(err)
1459			}
1460		}
1461
1462		// Push state name to stack if asked
1463		if rule.pushState {
1464			err := Push(stateName)(state)
1465			if err != nil {
1466				panic(err)
1467			}
1468		}
1469
1470		if !rule.pop {
1471			state.Set(stackName, append(stack, rule))
1472		}
1473
1474		return nil
1475	}
1476}
1477
1478// Generates rule replacing stack using state name and rule position
1479func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
1480	switch rulePosition {
1481	case topRule:
1482		stackName = stateName + `-top-stack`
1483	case bottomRule:
1484		stackName = stateName + `-bottom-stack`
1485	}
1486	return
1487}
1488
1489// Makes a compiled rule and returns it
1490func makeRule(config ruleMakingConfig) *CompiledRule {
1491	var rePattern string
1492
1493	if len(config.delimiter) > 0 {
1494		delimiter := string(config.delimiter)
1495
1496		if config.numberOfDelimiterChars > 1 {
1497			delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
1498		}
1499
1500		rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
1501	} else {
1502		rePattern = config.pattern
1503	}
1504
1505	regex := regexp2.MustCompile(rePattern, regexp2.None)
1506
1507	cRule := &CompiledRule{
1508		Rule:   Rule{rePattern, config.tokenType, config.mutator},
1509		Regexp: regex,
1510	}
1511
1512	return cRule
1513}
1514
1515// Emitter for colon pairs, changes token state based on key and brackets
1516func colonPair(tokenClass TokenType) Emitter {
1517	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
1518		iterators := []Iterator{}
1519		tokens := []Token{
1520			{Punctuation, state.NamedGroups[`colon`]},
1521			{Punctuation, state.NamedGroups[`opening_delimiters`]},
1522			{Punctuation, state.NamedGroups[`closing_delimiters`]},
1523		}
1524
1525		// Append colon
1526		iterators = append(iterators, Literator(tokens[0]))
1527
1528		if tokenClass == NameAttribute {
1529			iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
1530		} else {
1531			var keyTokenState string
1532			keyre := regexp.MustCompile(`^\d+$`)
1533			if keyre.MatchString(state.NamedGroups[`key`]) {
1534				keyTokenState = "common"
1535			} else {
1536				keyTokenState = "Q"
1537			}
1538
1539			// Use token state to Tokenise key
1540			if keyTokenState != "" {
1541				iterator, err := state.Lexer.Tokenise(
1542					&TokeniseOptions{
1543						State:  keyTokenState,
1544						Nested: true,
1545					}, state.NamedGroups[`key`])
1546
1547				if err != nil {
1548					panic(err)
1549				} else {
1550					// Append key
1551					iterators = append(iterators, iterator)
1552				}
1553			}
1554		}
1555
1556		// Append punctuation
1557		iterators = append(iterators, Literator(tokens[1]))
1558
1559		var valueTokenState string
1560
1561		switch state.NamedGroups[`opening_delimiters`] {
1562		case "(", "{", "[":
1563			valueTokenState = "root"
1564		case "<<", "«":
1565			valueTokenState = "ww"
1566		case "<":
1567			valueTokenState = "Q"
1568		}
1569
1570		// Use token state to Tokenise value
1571		if valueTokenState != "" {
1572			iterator, err := state.Lexer.Tokenise(
1573				&TokeniseOptions{
1574					State:  valueTokenState,
1575					Nested: true,
1576				}, state.NamedGroups[`value`])
1577
1578			if err != nil {
1579				panic(err)
1580			} else {
1581				// Append value
1582				iterators = append(iterators, iterator)
1583			}
1584		}
1585		// Append last punctuation
1586		iterators = append(iterators, Literator(tokens[2]))
1587
1588		return Concaterator(iterators...)
1589	})
1590}
1591
1592// Emitter for quoting constructs, changes token state based on quote name and adverbs
1593func quote(groups []string, state *LexerState) Iterator {
1594	keyword := state.NamedGroups[`keyword`]
1595	adverbsStr := state.NamedGroups[`adverbs`]
1596	iterators := []Iterator{}
1597	tokens := []Token{
1598		{Keyword, keyword},
1599		{StringAffix, adverbsStr},
1600		{Text, state.NamedGroups[`ws`]},
1601		{Punctuation, state.NamedGroups[`opening_delimiters`]},
1602		{Punctuation, state.NamedGroups[`closing_delimiters`]},
1603	}
1604
1605	// Append all tokens before dealing with the main string
1606	iterators = append(iterators, Literator(tokens[:4]...))
1607
1608	var tokenStates []string
1609
1610	// Set tokenStates based on adverbs
1611	adverbs := strings.Split(adverbsStr, ":")
1612	for _, adverb := range adverbs {
1613		switch adverb {
1614		case "c", "closure":
1615			tokenStates = append(tokenStates, "Q-closure")
1616		case "qq":
1617			tokenStates = append(tokenStates, "qq")
1618		case "ww":
1619			tokenStates = append(tokenStates, "ww")
1620		case "s", "scalar", "a", "array", "h", "hash", "f", "function":
1621			tokenStates = append(tokenStates, "Q-variable")
1622		}
1623	}
1624
1625	var tokenState string
1626
1627	switch {
1628	case keyword == "qq" || contains(tokenStates, "qq"):
1629		tokenState = "qq"
1630	case adverbsStr == "ww" || contains(tokenStates, "ww"):
1631		tokenState = "ww"
1632	case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"):
1633		tokenState = "qq"
1634	case contains(tokenStates, "Q-closure"):
1635		tokenState = "Q-closure"
1636	case contains(tokenStates, "Q-variable"):
1637		tokenState = "Q-variable"
1638	default:
1639		tokenState = "Q"
1640	}
1641
1642	iterator, err := state.Lexer.Tokenise(
1643		&TokeniseOptions{
1644			State:  tokenState,
1645			Nested: true,
1646		}, state.NamedGroups[`value`])
1647
1648	if err != nil {
1649		panic(err)
1650	} else {
1651		iterators = append(iterators, iterator)
1652	}
1653
1654	// Append the last punctuation
1655	iterators = append(iterators, Literator(tokens[4]))
1656
1657	return Concaterator(iterators...)
1658}
1659
1660// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
1661func podConfig(groups []string, state *LexerState) Iterator {
1662	// Tokenise pod config
1663	iterator, err := state.Lexer.Tokenise(
1664		&TokeniseOptions{
1665			State:  "colon-pair-attribute",
1666			Nested: true,
1667		}, groups[0])
1668
1669	if err != nil {
1670		panic(err)
1671	} else {
1672		return iterator
1673	}
1674}
1675
1676// Emitter for pod code, tokenises the code based on the lang specified
1677func podCode(groups []string, state *LexerState) Iterator {
1678	iterators := []Iterator{}
1679	tokens := []Token{
1680		{Comment, state.NamedGroups[`ws`]},
1681		{Keyword, state.NamedGroups[`keyword`]},
1682		{Keyword, state.NamedGroups[`ws2`]},
1683		{Keyword, state.NamedGroups[`name`]},
1684		{StringDoc, state.NamedGroups[`value`]},
1685		{Comment, state.NamedGroups[`ws3`]},
1686		{Keyword, state.NamedGroups[`end_keyword`]},
1687		{Keyword, state.NamedGroups[`ws4`]},
1688		{Keyword, state.NamedGroups[`name`]},
1689	}
1690
1691	// Append all tokens before dealing with the pod config
1692	iterators = append(iterators, Literator(tokens[:4]...))
1693
1694	// Tokenise pod config
1695	iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
1696
1697	langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
1698	var lang string
1699	if len(langMatch) > 1 {
1700		lang = langMatch[1]
1701	}
1702
1703	// Tokenise code based on lang property
1704	sublexer := internal.Get(lang)
1705	if sublexer != nil {
1706		iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
1707
1708		if err != nil {
1709			panic(err)
1710		} else {
1711			iterators = append(iterators, iterator)
1712		}
1713	} else {
1714		iterators = append(iterators, Literator(tokens[4]))
1715	}
1716
1717	// Append the rest of the tokens
1718	iterators = append(iterators, Literator(tokens[5:]...))
1719
1720	return Concaterator(iterators...)
1721}
1722