1package r 2 3import ( 4 "regexp" 5 "strings" 6 "unicode/utf8" 7 8 . "github.com/alecthomas/chroma" // nolint 9 "github.com/alecthomas/chroma/lexers/internal" 10 "github.com/dlclark/regexp2" 11) 12 13// Raku lexer. 14var Raku Lexer = internal.Register(MustNewLazyLexer( 15 &Config{ 16 Name: "Raku", 17 Aliases: []string{"perl6", "pl6", "raku"}, 18 Filenames: []string{ 19 "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm", 20 "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc", 21 }, 22 MimeTypes: []string{ 23 "text/x-perl6", "application/x-perl6", 24 "text/x-raku", "application/x-raku", 25 }, 26 DotAll: true, 27 }, 28 rakuRules, 29)) 30 31func rakuRules() Rules { 32 type RakuToken int 33 34 const ( 35 rakuQuote RakuToken = iota 36 rakuNameAttribute 37 rakuPod 38 rakuPodFormatter 39 rakuPodDeclaration 40 rakuMultilineComment 41 rakuMatchRegex 42 rakuSubstitutionRegex 43 ) 44 45 const ( 46 colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)` 47 colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})` 48 colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)` 49 colonPairLookahead = `(?=(:['\w-]+` + 50 colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?` 51 namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+` 52 variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern 53 globalVariablePattern = `[$@%&]+\*` + namePattern 54 ) 55 56 keywords := []string{ 57 `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`, 58 `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`, 59 `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`, 60 `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`, 61 `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`, 62 `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`, 63 `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`, 64 `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`, 65 `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`, 66 `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`, 67 `dynamic-scope`, `built`, `temp`, 68 } 69 70 keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...) 71 72 wordOperators := []string{ 73 `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`, 74 `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`, 75 `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`, 76 `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`, 77 `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`, 78 } 79 80 wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...) 81 82 operators := []string{ 83 `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`, 84 `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`, 85 `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`, 86 `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`, 87 `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`, 88 `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`, 89 `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`, 90 `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`, 91 } 92 93 operatorsPattern := Words(``, ``, operators...) 94 95 builtinTypes := []string{ 96 `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`, 97 `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`, 98 `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`, 99 `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`, 100 `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`, 101 `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`, 102 `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`, 103 `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`, 104 `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`, 105 `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`, 106 `Encoding::Registry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`, 107 `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`, 108 `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`, 109 `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`, 110 `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`, 111 `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`, 112 `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`, 113 `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`, 114 `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`, 115 `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`, 116 `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`, 117 `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`, 118 `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`, 119 `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`, 120 `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`, 121 `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`, 122 `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`, 123 `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`, 124 `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`, 125 `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`, 126 `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`, 127 `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`, 128 `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`, 129 `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`, 130 `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`, 131 `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`, 132 `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`, 133 `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`, 134 `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`, 135 `WhateverCode`, `WrapHandle`, `NativeCall`, 136 // Pragmas 137 `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`, 138 `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`, 139 `strict`, `trace`, `variables`, 140 } 141 142 builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...) 143 144 builtinRoutines := []string{ 145 `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`, 146 `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`, 147 `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`, 148 `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`, 149 `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`, 150 `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`, 151 `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`, 152 `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`, 153 `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`, 154 `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`, 155 `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`, 156 `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`, 157 `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`, 158 `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`, 159 `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`, 160 `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`, 161 `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`, 162 `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`, 163 `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`, 164 `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`, 165 `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`, 166 `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`, 167 `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`, 168 `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`, 169 `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`, 170 `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`, 171 `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`, 172 `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`, 173 `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`, 174 `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`, 175 `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`, 176 `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`, 177 `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`, 178 `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`, 179 `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`, 180 `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`, 181 `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`, 182 `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`, 183 `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`, 184 `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`, 185 `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`, 186 `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`, 187 `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`, 188 `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`, 189 `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`, 190 `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`, 191 `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`, 192 `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`, 193 `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`, 194 `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`, 195 `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`, 196 `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`, 197 `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`, 198 `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`, 199 `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`, 200 `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`, 201 `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`, 202 `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`, 203 `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`, 204 `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`, 205 `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`, 206 `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`, 207 `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`, 208 `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`, 209 `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`, 210 `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`, 211 `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`, 212 `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`, 213 `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`, 214 `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`, 215 `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`, 216 `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`, 217 `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`, 218 `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`, 219 `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`, 220 `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`, 221 `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`, 222 `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`, 223 `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`, 224 `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`, 225 `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`, 226 `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`, 227 `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`, 228 `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`, 229 `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`, 230 `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`, 231 `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`, 232 `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`, 233 `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`, 234 `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`, 235 `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`, 236 `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`, 237 `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`, 238 `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`, 239 `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`, 240 `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`, 241 `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`, 242 `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`, 243 `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`, 244 `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`, 245 `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`, 246 `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`, 247 `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`, 248 `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`, 249 `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`, 250 `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`, 251 `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`, 252 `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`, 253 `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`, 254 `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`, 255 `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`, 256 `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`, 257 `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`, 258 `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`, 259 `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`, 260 `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`, 261 `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`, 262 `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`, 263 `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`, 264 } 265 266 builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...) 267 268 // A map of opening and closing brackets 269 brackets := map[rune]rune{ 270 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d', 271 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b', 272 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019', 273 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d', 274 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a', 275 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e', 276 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d', 277 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd', 278 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265', 279 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b', 280 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273', 281 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279', 282 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f', 283 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285', 284 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b', 285 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8', 286 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4', 287 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1', 288 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7', 289 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1', 290 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db', 291 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1', 292 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7', 293 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed', 294 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb', 295 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe', 296 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a', 297 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b', 298 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771', 299 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4', 300 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de', 301 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7', 302 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984', 303 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a', 304 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990', 305 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996', 306 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5', 307 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5', 308 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9', 309 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e', 310 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65', 311 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80', 312 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c', 313 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96', 314 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c', 315 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9', 316 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0', 317 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe', 318 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4', 319 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0', 320 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6', 321 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa', 322 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a', 323 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21', 324 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d', 325 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015', 326 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b', 327 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18', 328 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a', 329 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40', 330 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48', 331 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e', 332 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d', 333 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63', 334 } 335 336 bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]` 337 338 // Finds opening brackets and their closing counterparts (including pod and heredoc) 339 // and modifies state groups and position accordingly 340 findBrackets := func(tokenClass RakuToken) MutatorFunc { 341 return func(state *LexerState) error { 342 var openingChars []rune 343 var adverbs []rune 344 switch tokenClass { 345 case rakuPod: 346 openingChars = []rune(strings.Join(state.Groups[1:5], ``)) 347 default: 348 adverbs = []rune(state.NamedGroups[`adverbs`]) 349 openingChars = []rune(state.NamedGroups[`opening_delimiters`]) 350 } 351 352 openingChar := openingChars[0] 353 354 nChars := len(openingChars) 355 356 var closingChar rune 357 var closingCharExists bool 358 var closingChars []rune 359 360 switch tokenClass { 361 case rakuPod: 362 closingCharExists = true 363 default: 364 closingChar, closingCharExists = brackets[openingChar] 365 } 366 367 switch tokenClass { 368 case rakuPodFormatter: 369 formatter := StringOther 370 371 switch state.NamedGroups[`keyword`] { 372 case "B": 373 formatter = GenericStrong 374 case "I": 375 formatter = GenericEmph 376 case "U": 377 formatter = GenericUnderline 378 } 379 380 formatterRule := ruleReplacingConfig{ 381 pattern: `.+?`, 382 tokenType: formatter, 383 mutator: nil, 384 stateName: `pod-formatter`, 385 rulePosition: bottomRule, 386 } 387 388 err := replaceRule(formatterRule)(state) 389 if err != nil { 390 panic(err) 391 } 392 393 err = replaceRule(ruleReplacingConfig{ 394 delimiter: []rune{closingChar}, 395 tokenType: Punctuation, 396 stateName: `pod-formatter`, 397 pushState: true, 398 numberOfDelimiterChars: nChars, 399 appendMutator: popRule(formatterRule), 400 })(state) 401 if err != nil { 402 panic(err) 403 } 404 405 return nil 406 case rakuMatchRegex: 407 var delimiter []rune 408 if closingCharExists { 409 delimiter = []rune{closingChar} 410 } else { 411 delimiter = openingChars 412 } 413 414 err := replaceRule(ruleReplacingConfig{ 415 delimiter: delimiter, 416 tokenType: Punctuation, 417 stateName: `regex`, 418 popState: true, 419 pushState: true, 420 })(state) 421 if err != nil { 422 panic(err) 423 } 424 425 return nil 426 case rakuSubstitutionRegex: 427 delimiter := regexp2.Escape(string(openingChars)) 428 429 err := replaceRule(ruleReplacingConfig{ 430 pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`, 431 tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation), 432 rulePosition: topRule, 433 stateName: `regex`, 434 popState: true, 435 pushState: true, 436 })(state) 437 if err != nil { 438 panic(err) 439 } 440 441 return nil 442 } 443 444 text := state.Text 445 446 var endPos int 447 448 var nonMirroredOpeningCharPosition int 449 450 if !closingCharExists { 451 // it's not a mirrored character, which means we 452 // just need to look for the next occurrence 453 closingChars = openingChars 454 nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos) 455 endPos = nonMirroredOpeningCharPosition 456 } else { 457 var podRegex *regexp2.Regexp 458 if tokenClass == rakuPod { 459 podRegex = regexp2.MustCompile( 460 state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]), 461 0, 462 ) 463 } else { 464 closingChars = []rune(strings.Repeat(string(closingChar), nChars)) 465 } 466 467 // we need to look for the corresponding closing character, 468 // keep nesting in mind 469 nestingLevel := 1 470 471 searchPos := state.Pos - nChars 472 473 var nextClosePos int 474 475 for nestingLevel > 0 { 476 if tokenClass == rakuPod { 477 match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars) 478 if err == nil { 479 closingChars = match.Runes() 480 nextClosePos = match.Index 481 } else { 482 nextClosePos = -1 483 } 484 } else { 485 nextClosePos = indexAt(text, closingChars, searchPos+nChars) 486 } 487 488 nextOpenPos := indexAt(text, openingChars, searchPos+nChars) 489 490 switch { 491 case nextClosePos == -1: 492 nextClosePos = len(text) 493 nestingLevel = 0 494 case nextOpenPos != -1 && nextOpenPos < nextClosePos: 495 nestingLevel++ 496 nChars = len(openingChars) 497 searchPos = nextOpenPos 498 default: // next_close_pos < next_open_pos 499 nestingLevel-- 500 nChars = len(closingChars) 501 searchPos = nextClosePos 502 } 503 } 504 505 endPos = nextClosePos 506 } 507 508 if endPos < 0 { 509 // if we didn't find a closer, just highlight the 510 // rest of the text in this class 511 endPos = len(text) 512 } 513 514 adverbre := regexp.MustCompile(`:to\b|:heredoc\b`) 515 var heredocTerminator []rune 516 var endHeredocPos int 517 if adverbre.MatchString(string(adverbs)) { 518 if endPos != len(text) { 519 heredocTerminator = text[state.Pos:endPos] 520 nChars = len(heredocTerminator) 521 } else { 522 endPos = state.Pos + 1 523 heredocTerminator = []rune{} 524 nChars = 0 525 } 526 527 if nChars > 0 { 528 endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0) 529 if endHeredocPos > -1 { 530 endPos += endHeredocPos 531 } else { 532 endPos = len(text) 533 } 534 } 535 } 536 537 textBetweenBrackets := string(text[state.Pos:endPos]) 538 switch tokenClass { 539 case rakuPod, rakuPodDeclaration, rakuNameAttribute: 540 state.NamedGroups[`value`] = textBetweenBrackets 541 state.NamedGroups[`closing_delimiters`] = string(closingChars) 542 case rakuQuote: 543 if len(heredocTerminator) > 0 { 544 // Length of heredoc terminator + closing chars + `;` 545 heredocFristPunctuationLen := nChars + len(openingChars) + 1 546 547 state.NamedGroups[`opening_delimiters`] = string(openingChars) + 548 string(text[state.Pos:state.Pos+heredocFristPunctuationLen]) 549 550 state.NamedGroups[`value`] = 551 string(text[state.Pos+heredocFristPunctuationLen : endPos]) 552 553 if endHeredocPos > -1 { 554 state.NamedGroups[`closing_delimiters`] = string(heredocTerminator) 555 } 556 } else { 557 state.NamedGroups[`value`] = textBetweenBrackets 558 if nChars > 0 { 559 state.NamedGroups[`closing_delimiters`] = string(closingChars) 560 } 561 } 562 default: 563 state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])} 564 } 565 566 state.Pos = endPos + nChars 567 568 return nil 569 } 570 } 571 572 // Raku rules 573 // Empty capture groups are placeholders and will be replaced by mutators 574 // DO NOT REMOVE THEM! 575 return Rules{ 576 "root": { 577 // Placeholder, will be overwritten by mutators, DO NOT REMOVE! 578 {`\A\z`, nil, nil}, 579 Include("common"), 580 {`{`, Punctuation, Push(`root`)}, 581 {`\(`, Punctuation, Push(`root`)}, 582 {`[)}]`, Punctuation, Pop(1)}, 583 {`;`, Punctuation, nil}, 584 {`\[|\]`, Operator, nil}, 585 {`.+?`, Text, nil}, 586 }, 587 "common": { 588 {`^#![^\n]*$`, CommentHashbang, nil}, 589 Include("pod"), 590 // Multi-line, Embedded comment 591 { 592 "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`, 593 CommentMultiline, 594 findBrackets(rakuMultilineComment), 595 }, 596 {`#[^\n]*$`, CommentSingle, nil}, 597 // /regex/ 598 { 599 `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`, 600 ByGroups(Punctuation, UsingSelf("regex"), Punctuation), 601 nil, 602 }, 603 Include("variable"), 604 // ::?VARIABLE 605 {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil}, 606 // Version 607 { 608 `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`, 609 ByGroups(Keyword, NumberInteger, NameEntity, Operator), 610 nil, 611 }, 612 Include("number"), 613 // Hyperoperator | »*« 614 {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, 615 {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, 616 // Hyperoperator | «*« 617 {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, 618 {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, 619 // Hyperoperator | »*» 620 {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, 621 {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, 622 // <<quoted words>> 623 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")}, 624 // «quoted words» 625 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")}, 626 // [<] 627 {`(?<=\[\\?)<(?=\])`, Operator, nil}, 628 // < and > operators | something < onething > something 629 { 630 `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`, 631 ByGroups(Operator, UsingSelf("root"), Operator), 632 nil, 633 }, 634 // <quoted words> 635 { 636 `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`, 637 ByGroups(Punctuation, String, Punctuation), 638 nil, 639 }, 640 {`C?X::['\w:-]+`, NameException, nil}, 641 Include("metaoperator"), 642 // Pair | key => value 643 { 644 `(\w[\w'-]*)(\s*)(=>)`, 645 ByGroups(String, Text, Operator), 646 nil, 647 }, 648 Include("colon-pair"), 649 // Token 650 { 651 `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`, 652 NameFunction, 653 Push("token", "name-adverb"), 654 }, 655 // Substitution 656 {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")}, 657 {keywordsPattern, Keyword, nil}, 658 {builtinTypesPattern, NameBuiltin, nil}, 659 {builtinRoutinesPattern, NameBuiltin, nil}, 660 // Class name 661 { 662 `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern, 663 NameClass, 664 Push("name-adverb"), 665 }, 666 // Routine 667 { 668 `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`, 669 NameFunction, 670 Push("name-adverb"), 671 }, 672 // Constant 673 {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")}, 674 // Namespace 675 {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")}, 676 Include("operator"), 677 Include("single-quote"), 678 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")}, 679 // m,rx regex 680 {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")}, 681 // Quote constructs 682 { 683 `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`, 684 EmitterFunc(quote), 685 findBrackets(rakuQuote), 686 }, 687 // Function 688 { 689 `\b` + namePattern + colonPairLookahead + `\()`, 690 NameFunction, 691 Push("name-adverb"), 692 }, 693 // Method 694 { 695 `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`, 696 NameFunction, 697 Push("name-adverb"), 698 }, 699 // Indirect invocant 700 {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")}, 701 {`(?<=\W)(?:∅|i|e||tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil}, 702 {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil}, 703 {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil}, 704 // Sigilless variable 705 { 706 `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern, 707 NameVariable, 708 Push("name-adverb"), 709 }, 710 {namePattern, Name, Push("name-adverb")}, 711 }, 712 "rx": { 713 Include("colon-pair-attribute"), 714 { 715 `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`, 716 ByGroupNames( 717 map[string]Emitter{ 718 `opening_delimiters`: Punctuation, 719 `delimiter`: nil, 720 }, 721 ), 722 findBrackets(rakuMatchRegex), 723 }, 724 }, 725 "substitution": { 726 Include("colon-pair-attribute"), 727 // Substitution | s{regex} = value 728 { 729 `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`, 730 ByGroupNames(map[string]Emitter{ 731 `opening_delimiters`: Punctuation, 732 `delimiter`: nil, 733 }), 734 findBrackets(rakuMatchRegex), 735 }, 736 // Substitution | s/regex/string/ 737 { 738 `(?<opening_delimiters>[^\w:\s])`, 739 Punctuation, 740 findBrackets(rakuSubstitutionRegex), 741 }, 742 }, 743 "number": { 744 {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil}, 745 {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil}, 746 {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil}, 747 { 748 `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`, 749 LiteralNumberFloat, 750 nil, 751 }, 752 {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil}, 753 {`(?<=\d+)i`, NameConstant, nil}, 754 {`\d+(_\d+)*`, LiteralNumberInteger, nil}, 755 }, 756 "name-adverb": { 757 Include("colon-pair-attribute-keyvalue"), 758 Default(Pop(1)), 759 }, 760 "colon-pair": { 761 // :key(value) 762 {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)}, 763 // :123abc 764 { 765 `(:)(\d+)(\w[\w'-]*)`, 766 ByGroups(Punctuation, UsingSelf("number"), String), 767 nil, 768 }, 769 // :key 770 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil}, 771 {`\s+`, Text, nil}, 772 }, 773 "colon-pair-attribute": { 774 // :key(value) 775 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, 776 // :123abc 777 { 778 `(:)(\d+)(\w[\w'-]*)`, 779 ByGroups(Punctuation, UsingSelf("number"), NameAttribute), 780 nil, 781 }, 782 // :key 783 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil}, 784 {`\s+`, Text, nil}, 785 }, 786 "colon-pair-attribute-keyvalue": { 787 // :key(value) 788 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, 789 }, 790 "escape-qq": { 791 { 792 `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`, 793 ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation), 794 nil, 795 }, 796 }, 797 `escape-char`: { 798 {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil}, 799 }, 800 `escape-single-quote`: { 801 {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil}, 802 }, 803 "escape-c-name": { 804 { 805 `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`, 806 ByGroups(StringEscape, Punctuation, String, Punctuation), 807 nil, 808 }, 809 }, 810 "escape-hexadecimal": { 811 { 812 `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`, 813 ByGroups(StringEscape, Punctuation, NumberHex, Punctuation), 814 nil, 815 }, 816 {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil}, 817 }, 818 "regex": { 819 // Placeholder, will be overwritten by mutators, DO NOT REMOVE! 820 {`\A\z`, nil, nil}, 821 Include("regex-escape-class"), 822 Include(`regex-character-escape`), 823 // $(code) 824 { 825 `([$@])((?<!(?<!\\)\\)\()`, 826 ByGroups(Keyword, Punctuation), 827 replaceRule(ruleReplacingConfig{ 828 delimiter: []rune(`)`), 829 tokenType: Punctuation, 830 stateName: `root`, 831 pushState: true, 832 }), 833 }, 834 // Exclude $/ from variables, because we can't get out of the end of the slash regex: $/; 835 {`\$(?=/)`, NameEntity, nil}, 836 // Exclude $ from variables 837 {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil}, 838 Include("variable"), 839 Include("escape-c-name"), 840 Include("escape-hexadecimal"), 841 Include("number"), 842 Include("single-quote"), 843 // :my variable code ... 844 { 845 `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`, 846 ByGroups(Operator, KeywordDeclaration), 847 replaceRule(ruleReplacingConfig{ 848 delimiter: []rune(`;`), 849 tokenType: Punctuation, 850 stateName: `root`, 851 pushState: true, 852 }), 853 }, 854 // <{code}> 855 { 856 `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`, 857 ByGroups(Punctuation, Operator, Punctuation), 858 replaceRule(ruleReplacingConfig{ 859 delimiter: []rune(`}>`), 860 tokenType: Punctuation, 861 stateName: `root`, 862 pushState: true, 863 }), 864 }, 865 // {code} 866 Include(`closure`), 867 // Properties 868 {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil}, 869 // Operator 870 {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil}, 871 // Anchors 872 {`\^\^|\^|\$\$|\$`, NameEntity, nil}, 873 {`\.`, NameEntity, nil}, 874 {`#[^\n]*\n`, CommentSingle, nil}, 875 // Lookaround 876 { 877 `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`, 878 ByGroups(Punctuation, Text, Operator, Text, OperatorWord), 879 replaceRule(ruleReplacingConfig{ 880 delimiter: []rune(`>`), 881 tokenType: Punctuation, 882 stateName: `regex`, 883 pushState: true, 884 }), 885 }, 886 { 887 `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`, 888 ByGroups(Punctuation, Operator, OperatorWord, Punctuation), 889 nil, 890 }, 891 // <$variable> 892 { 893 `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`, 894 ByGroups(Punctuation, Operator, NameVariable, Punctuation), 895 nil, 896 }, 897 // Capture markers 898 {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil}, 899 { 900 `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`, 901 ByGroups(Punctuation, NameVariable, Operator), 902 Push(`regex-variable`), 903 }, 904 { 905 `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`, 906 ByGroups(Punctuation, Operator, NameFunction), 907 Push(`regex-function`), 908 }, 909 {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")}, 910 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")}, 911 {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)}, 912 {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")}, 913 {`.+?`, StringRegex, nil}, 914 }, 915 "regex-class-builtin": { 916 { 917 `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`, 918 NameBuiltin, 919 nil, 920 }, 921 }, 922 "regex-function": { 923 // <function> 924 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)}, 925 // <function(parameter)> 926 { 927 `\(`, 928 Punctuation, 929 replaceRule(ruleReplacingConfig{ 930 delimiter: []rune(`)>`), 931 tokenType: Punctuation, 932 stateName: `root`, 933 popState: true, 934 pushState: true, 935 }), 936 }, 937 // <function value> 938 { 939 `\s+`, 940 StringRegex, 941 replaceRule(ruleReplacingConfig{ 942 delimiter: []rune(`>`), 943 tokenType: Punctuation, 944 stateName: `regex`, 945 popState: true, 946 pushState: true, 947 }), 948 }, 949 // <function: value> 950 { 951 `:`, 952 Punctuation, 953 replaceRule(ruleReplacingConfig{ 954 delimiter: []rune(`>`), 955 tokenType: Punctuation, 956 stateName: `root`, 957 popState: true, 958 pushState: true, 959 }), 960 }, 961 }, 962 "regex-variable": { 963 Include(`regex-starting-operators`), 964 // <var=function( 965 { 966 `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`, 967 ByGroups(Operator, NameFunction), 968 Mutators(Pop(1), Push(`regex-function`)), 969 }, 970 // <var=function> 971 {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)}, 972 // <var= 973 Default(Pop(1), Push(`regex-property`)), 974 }, 975 "regex-property": { 976 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)}, 977 Include("regex-class-builtin"), 978 Include("variable"), 979 Include(`regex-starting-operators`), 980 Include("colon-pair-attribute"), 981 {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")}, 982 {`\+|\-`, Operator, nil}, 983 {`@[\w':-]+`, NameVariable, nil}, 984 {`.+?`, StringRegex, nil}, 985 }, 986 `regex-starting-operators`: { 987 {`(?<=<)[|!?.]+`, Operator, nil}, 988 }, 989 "regex-escape-class": { 990 {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil}, 991 }, 992 `regex-character-escape`: { 993 {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil}, 994 }, 995 "regex-character-class": { 996 {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)}, 997 Include("regex-escape-class"), 998 Include("escape-c-name"), 999 Include("escape-hexadecimal"), 1000 Include(`regex-character-escape`), 1001 Include("number"), 1002 {`\.\.`, Operator, nil}, 1003 {`.+?`, StringRegex, nil}, 1004 }, 1005 "metaoperator": { 1006 // Z[=>] 1007 { 1008 `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`, 1009 ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation), 1010 nil, 1011 }, 1012 // Z=> 1013 {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil}, 1014 }, 1015 "operator": { 1016 // Word Operator 1017 {wordOperatorsPattern, OperatorWord, nil}, 1018 // Operator 1019 {operatorsPattern, Operator, nil}, 1020 }, 1021 "pod": { 1022 // Single-line pod declaration 1023 {`(#[|=])\s`, Keyword, Push("pod-single")}, 1024 // Multi-line pod declaration 1025 { 1026 "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`, 1027 ByGroupNames( 1028 map[string]Emitter{ 1029 `keyword`: Keyword, 1030 `opening_delimiters`: Punctuation, 1031 `delimiter`: nil, 1032 `value`: UsingSelf("pod-declaration"), 1033 `closing_delimiters`: Punctuation, 1034 }), 1035 findBrackets(rakuPodDeclaration), 1036 }, 1037 Include("pod-blocks"), 1038 }, 1039 "pod-blocks": { 1040 // =begin code 1041 { 1042 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`, 1043 EmitterFunc(podCode), 1044 nil, 1045 }, 1046 // =begin 1047 { 1048 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`, 1049 ByGroupNames( 1050 map[string]Emitter{ 1051 `ws`: Comment, 1052 `keyword`: Keyword, 1053 `ws2`: StringDoc, 1054 `name`: Keyword, 1055 `config`: EmitterFunc(podConfig), 1056 `value`: UsingSelf("pod-begin"), 1057 `closing_delimiters`: Keyword, 1058 }), 1059 findBrackets(rakuPod), 1060 }, 1061 // =for ... 1062 { 1063 `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`, 1064 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), 1065 Push("pod-paragraph"), 1066 }, 1067 // =config 1068 { 1069 `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`, 1070 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), 1071 nil, 1072 }, 1073 // =alias 1074 { 1075 `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`, 1076 ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc), 1077 nil, 1078 }, 1079 // =encoding 1080 { 1081 `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`, 1082 ByGroups(Comment, Keyword, StringDoc, Name), 1083 nil, 1084 }, 1085 // =para ... 1086 { 1087 `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`, 1088 ByGroups(Comment, Keyword, EmitterFunc(podConfig)), 1089 Push("pod-paragraph"), 1090 }, 1091 // =head1 ... 1092 { 1093 `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`, 1094 ByGroups(Comment, Keyword, GenericHeading, Keyword), 1095 Push("pod-heading"), 1096 }, 1097 // =item ... 1098 { 1099 `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`, 1100 ByGroups(Comment, Keyword, StringDoc, Keyword), 1101 Push("pod-paragraph"), 1102 }, 1103 { 1104 `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`, 1105 ByGroups(Comment, Keyword, EmitterFunc(podConfig)), 1106 Push("pod-finish"), 1107 }, 1108 // ={custom} ... 1109 { 1110 `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`, 1111 ByGroups(Comment, Name, StringDoc, Keyword), 1112 Push("pod-paragraph"), 1113 }, 1114 // = podconfig 1115 { 1116 `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` + 1117 colonPairClosingBrackets + `) *)*\n)`, 1118 ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)), 1119 nil, 1120 }, 1121 }, 1122 "pod-begin": { 1123 Include("pod-blocks"), 1124 Include("pre-pod-formatter"), 1125 {`.+?`, StringDoc, nil}, 1126 }, 1127 "pod-declaration": { 1128 Include("pre-pod-formatter"), 1129 {`.+?`, StringDoc, nil}, 1130 }, 1131 "pod-paragraph": { 1132 {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)}, 1133 Include("pre-pod-formatter"), 1134 {`.+?`, StringDoc, nil}, 1135 }, 1136 "pod-single": { 1137 {`\n`, StringDoc, Pop(1)}, 1138 Include("pre-pod-formatter"), 1139 {`.+?`, StringDoc, nil}, 1140 }, 1141 "pod-heading": { 1142 {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)}, 1143 Include("pre-pod-formatter"), 1144 {`.+?`, GenericHeading, nil}, 1145 }, 1146 "pod-finish": { 1147 {`\z`, nil, Pop(1)}, 1148 Include("pre-pod-formatter"), 1149 {`.+?`, StringDoc, nil}, 1150 }, 1151 "pre-pod-formatter": { 1152 // C<code>, B<bold>, ... 1153 { 1154 `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`, 1155 ByGroups(Keyword, Punctuation), 1156 findBrackets(rakuPodFormatter), 1157 }, 1158 }, 1159 "pod-formatter": { 1160 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! 1161 {`>`, Punctuation, Pop(1)}, 1162 Include("pre-pod-formatter"), 1163 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! 1164 {`.+?`, StringOther, nil}, 1165 }, 1166 "variable": { 1167 {variablePattern, NameVariable, Push("name-adverb")}, 1168 {globalVariablePattern, NameVariableGlobal, Push("name-adverb")}, 1169 {`[$@]<[^>]+>`, NameVariable, nil}, 1170 {`\$[/!¢]`, NameVariable, nil}, 1171 {`[$@%]`, NameVariable, nil}, 1172 }, 1173 "single-quote": { 1174 {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")}, 1175 }, 1176 "single-quote-inner": { 1177 {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)}, 1178 Include("escape-single-quote"), 1179 Include("escape-qq"), 1180 {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil}, 1181 }, 1182 "double-quotes": { 1183 {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)}, 1184 Include("qq"), 1185 }, 1186 "<<": { 1187 {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, 1188 Include("ww"), 1189 }, 1190 "«": { 1191 {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, 1192 Include("ww"), 1193 }, 1194 "ww": { 1195 Include("single-quote"), 1196 Include("qq"), 1197 }, 1198 "qq": { 1199 Include("qq-variable"), 1200 Include("closure"), 1201 Include(`escape-char`), 1202 Include("escape-hexadecimal"), 1203 Include("escape-c-name"), 1204 Include("escape-qq"), 1205 {`.+?`, StringDouble, nil}, 1206 }, 1207 "qq-variable": { 1208 { 1209 `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`, 1210 NameVariable, 1211 Push("qq-variable-extras", "name-adverb"), 1212 }, 1213 }, 1214 "qq-variable-extras": { 1215 // Method 1216 { 1217 `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`, 1218 ByGroupNames(map[string]Emitter{ 1219 `operator`: Operator, 1220 `method_name`: NameFunction, 1221 }), 1222 Push(`name-adverb`), 1223 }, 1224 // Function/Signature 1225 { 1226 `\(`, Punctuation, replaceRule( 1227 ruleReplacingConfig{ 1228 delimiter: []rune(`)`), 1229 tokenType: Punctuation, 1230 stateName: `root`, 1231 pushState: true, 1232 }), 1233 }, 1234 Default(Pop(1)), 1235 }, 1236 "Q": { 1237 Include("escape-qq"), 1238 {`.+?`, String, nil}, 1239 }, 1240 "Q-closure": { 1241 Include("escape-qq"), 1242 Include("closure"), 1243 {`.+?`, String, nil}, 1244 }, 1245 "Q-variable": { 1246 Include("escape-qq"), 1247 Include("qq-variable"), 1248 {`.+?`, String, nil}, 1249 }, 1250 "closure": { 1251 {`(?<!(?<!\\)\\){`, Punctuation, replaceRule( 1252 ruleReplacingConfig{ 1253 delimiter: []rune(`}`), 1254 tokenType: Punctuation, 1255 stateName: `root`, 1256 pushState: true, 1257 }), 1258 }, 1259 }, 1260 "token": { 1261 // Token signature 1262 {`\(`, Punctuation, replaceRule( 1263 ruleReplacingConfig{ 1264 delimiter: []rune(`)`), 1265 tokenType: Punctuation, 1266 stateName: `root`, 1267 pushState: true, 1268 }), 1269 }, 1270 {`{`, Punctuation, replaceRule( 1271 ruleReplacingConfig{ 1272 delimiter: []rune(`}`), 1273 tokenType: Punctuation, 1274 stateName: `regex`, 1275 popState: true, 1276 pushState: true, 1277 }), 1278 }, 1279 {`\s*`, Text, nil}, 1280 Default(Pop(1)), 1281 }, 1282 } 1283} 1284 1285// Joins keys of rune map 1286func joinRuneMap(m map[rune]rune) string { 1287 runes := make([]rune, 0, len(m)) 1288 for k := range m { 1289 runes = append(runes, k) 1290 } 1291 1292 return string(runes) 1293} 1294 1295// Finds the index of substring in the string starting at position n 1296func indexAt(str []rune, substr []rune, pos int) int { 1297 strFromPos := str[pos:] 1298 text := string(strFromPos) 1299 1300 idx := strings.Index(text, string(substr)) 1301 if idx > -1 { 1302 idx = utf8.RuneCountInString(text[:idx]) 1303 1304 // Search again if the substr is escaped with backslash 1305 if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') || 1306 (idx == 1 && strFromPos[idx-1] == '\\') { 1307 idx = indexAt(str[pos:], substr, idx+1) 1308 1309 idx = utf8.RuneCountInString(text[:idx]) 1310 1311 if idx < 0 { 1312 return idx 1313 } 1314 } 1315 idx += pos 1316 } 1317 1318 return idx 1319} 1320 1321// Tells if an array of string contains a string 1322func contains(s []string, e string) bool { 1323 for _, value := range s { 1324 if value == e { 1325 return true 1326 } 1327 } 1328 return false 1329} 1330 1331type rulePosition int 1332 1333const ( 1334 topRule rulePosition = 0 1335 bottomRule = -1 1336) 1337 1338type ruleMakingConfig struct { 1339 delimiter []rune 1340 pattern string 1341 tokenType Emitter 1342 mutator Mutator 1343 numberOfDelimiterChars int 1344} 1345 1346type ruleReplacingConfig struct { 1347 delimiter []rune 1348 pattern string 1349 tokenType Emitter 1350 numberOfDelimiterChars int 1351 mutator Mutator 1352 appendMutator Mutator 1353 rulePosition rulePosition 1354 stateName string 1355 pop bool 1356 popState bool 1357 pushState bool 1358} 1359 1360// Pops rule from state-stack and replaces the rule with the previous rule 1361func popRule(rule ruleReplacingConfig) MutatorFunc { 1362 return func(state *LexerState) error { 1363 stackName := genStackName(rule.stateName, rule.rulePosition) 1364 1365 stack, ok := state.Get(stackName).([]ruleReplacingConfig) 1366 1367 if ok && len(stack) > 0 { 1368 // Pop from stack 1369 stack = stack[:len(stack)-1] 1370 lastRule := stack[len(stack)-1] 1371 lastRule.pushState = false 1372 lastRule.popState = false 1373 lastRule.pop = true 1374 state.Set(stackName, stack) 1375 1376 // Call replaceRule to use the last rule 1377 err := replaceRule(lastRule)(state) 1378 if err != nil { 1379 panic(err) 1380 } 1381 } 1382 1383 return nil 1384 } 1385} 1386 1387// Replaces a state's rule based on the rule config and position 1388func replaceRule(rule ruleReplacingConfig) MutatorFunc { 1389 return func(state *LexerState) error { 1390 stateName := rule.stateName 1391 stackName := genStackName(rule.stateName, rule.rulePosition) 1392 1393 stack, ok := state.Get(stackName).([]ruleReplacingConfig) 1394 if !ok { 1395 stack = []ruleReplacingConfig{} 1396 } 1397 1398 // If state-stack is empty fill it with the placeholder rule 1399 if len(stack) == 0 { 1400 stack = []ruleReplacingConfig{ 1401 { 1402 // Placeholder, will be overwritten by mutators, DO NOT REMOVE! 1403 pattern: `\A\z`, 1404 tokenType: nil, 1405 mutator: nil, 1406 stateName: stateName, 1407 rulePosition: rule.rulePosition, 1408 }, 1409 } 1410 state.Set(stackName, stack) 1411 } 1412 1413 var mutator Mutator 1414 mutators := []Mutator{} 1415 1416 switch { 1417 case rule.rulePosition == topRule && rule.mutator == nil: 1418 // Default mutator for top rule 1419 mutators = []Mutator{Pop(1), popRule(rule)} 1420 case rule.rulePosition == topRule && rule.mutator != nil: 1421 // Default mutator for top rule, when rule.mutator is set 1422 mutators = []Mutator{rule.mutator, popRule(rule)} 1423 case rule.mutator != nil: 1424 mutators = []Mutator{rule.mutator} 1425 } 1426 1427 if rule.appendMutator != nil { 1428 mutators = append(mutators, rule.appendMutator) 1429 } 1430 1431 if len(mutators) > 0 { 1432 mutator = Mutators(mutators...) 1433 } else { 1434 mutator = nil 1435 } 1436 1437 ruleConfig := ruleMakingConfig{ 1438 pattern: rule.pattern, 1439 delimiter: rule.delimiter, 1440 numberOfDelimiterChars: rule.numberOfDelimiterChars, 1441 tokenType: rule.tokenType, 1442 mutator: mutator, 1443 } 1444 1445 cRule := makeRule(ruleConfig) 1446 1447 switch rule.rulePosition { 1448 case topRule: 1449 state.Rules[stateName][0] = cRule 1450 case bottomRule: 1451 state.Rules[stateName][len(state.Rules[stateName])-1] = cRule 1452 } 1453 1454 // Pop state name from stack if asked. State should be popped first before Pushing 1455 if rule.popState { 1456 err := Pop(1)(state) 1457 if err != nil { 1458 panic(err) 1459 } 1460 } 1461 1462 // Push state name to stack if asked 1463 if rule.pushState { 1464 err := Push(stateName)(state) 1465 if err != nil { 1466 panic(err) 1467 } 1468 } 1469 1470 if !rule.pop { 1471 state.Set(stackName, append(stack, rule)) 1472 } 1473 1474 return nil 1475 } 1476} 1477 1478// Generates rule replacing stack using state name and rule position 1479func genStackName(stateName string, rulePosition rulePosition) (stackName string) { 1480 switch rulePosition { 1481 case topRule: 1482 stackName = stateName + `-top-stack` 1483 case bottomRule: 1484 stackName = stateName + `-bottom-stack` 1485 } 1486 return 1487} 1488 1489// Makes a compiled rule and returns it 1490func makeRule(config ruleMakingConfig) *CompiledRule { 1491 var rePattern string 1492 1493 if len(config.delimiter) > 0 { 1494 delimiter := string(config.delimiter) 1495 1496 if config.numberOfDelimiterChars > 1 { 1497 delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars) 1498 } 1499 1500 rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter) 1501 } else { 1502 rePattern = config.pattern 1503 } 1504 1505 regex := regexp2.MustCompile(rePattern, regexp2.None) 1506 1507 cRule := &CompiledRule{ 1508 Rule: Rule{rePattern, config.tokenType, config.mutator}, 1509 Regexp: regex, 1510 } 1511 1512 return cRule 1513} 1514 1515// Emitter for colon pairs, changes token state based on key and brackets 1516func colonPair(tokenClass TokenType) Emitter { 1517 return EmitterFunc(func(groups []string, state *LexerState) Iterator { 1518 iterators := []Iterator{} 1519 tokens := []Token{ 1520 {Punctuation, state.NamedGroups[`colon`]}, 1521 {Punctuation, state.NamedGroups[`opening_delimiters`]}, 1522 {Punctuation, state.NamedGroups[`closing_delimiters`]}, 1523 } 1524 1525 // Append colon 1526 iterators = append(iterators, Literator(tokens[0])) 1527 1528 if tokenClass == NameAttribute { 1529 iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]})) 1530 } else { 1531 var keyTokenState string 1532 keyre := regexp.MustCompile(`^\d+$`) 1533 if keyre.MatchString(state.NamedGroups[`key`]) { 1534 keyTokenState = "common" 1535 } else { 1536 keyTokenState = "Q" 1537 } 1538 1539 // Use token state to Tokenise key 1540 if keyTokenState != "" { 1541 iterator, err := state.Lexer.Tokenise( 1542 &TokeniseOptions{ 1543 State: keyTokenState, 1544 Nested: true, 1545 }, state.NamedGroups[`key`]) 1546 1547 if err != nil { 1548 panic(err) 1549 } else { 1550 // Append key 1551 iterators = append(iterators, iterator) 1552 } 1553 } 1554 } 1555 1556 // Append punctuation 1557 iterators = append(iterators, Literator(tokens[1])) 1558 1559 var valueTokenState string 1560 1561 switch state.NamedGroups[`opening_delimiters`] { 1562 case "(", "{", "[": 1563 valueTokenState = "root" 1564 case "<<", "«": 1565 valueTokenState = "ww" 1566 case "<": 1567 valueTokenState = "Q" 1568 } 1569 1570 // Use token state to Tokenise value 1571 if valueTokenState != "" { 1572 iterator, err := state.Lexer.Tokenise( 1573 &TokeniseOptions{ 1574 State: valueTokenState, 1575 Nested: true, 1576 }, state.NamedGroups[`value`]) 1577 1578 if err != nil { 1579 panic(err) 1580 } else { 1581 // Append value 1582 iterators = append(iterators, iterator) 1583 } 1584 } 1585 // Append last punctuation 1586 iterators = append(iterators, Literator(tokens[2])) 1587 1588 return Concaterator(iterators...) 1589 }) 1590} 1591 1592// Emitter for quoting constructs, changes token state based on quote name and adverbs 1593func quote(groups []string, state *LexerState) Iterator { 1594 keyword := state.NamedGroups[`keyword`] 1595 adverbsStr := state.NamedGroups[`adverbs`] 1596 iterators := []Iterator{} 1597 tokens := []Token{ 1598 {Keyword, keyword}, 1599 {StringAffix, adverbsStr}, 1600 {Text, state.NamedGroups[`ws`]}, 1601 {Punctuation, state.NamedGroups[`opening_delimiters`]}, 1602 {Punctuation, state.NamedGroups[`closing_delimiters`]}, 1603 } 1604 1605 // Append all tokens before dealing with the main string 1606 iterators = append(iterators, Literator(tokens[:4]...)) 1607 1608 var tokenStates []string 1609 1610 // Set tokenStates based on adverbs 1611 adverbs := strings.Split(adverbsStr, ":") 1612 for _, adverb := range adverbs { 1613 switch adverb { 1614 case "c", "closure": 1615 tokenStates = append(tokenStates, "Q-closure") 1616 case "qq": 1617 tokenStates = append(tokenStates, "qq") 1618 case "ww": 1619 tokenStates = append(tokenStates, "ww") 1620 case "s", "scalar", "a", "array", "h", "hash", "f", "function": 1621 tokenStates = append(tokenStates, "Q-variable") 1622 } 1623 } 1624 1625 var tokenState string 1626 1627 switch { 1628 case keyword == "qq" || contains(tokenStates, "qq"): 1629 tokenState = "qq" 1630 case adverbsStr == "ww" || contains(tokenStates, "ww"): 1631 tokenState = "ww" 1632 case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"): 1633 tokenState = "qq" 1634 case contains(tokenStates, "Q-closure"): 1635 tokenState = "Q-closure" 1636 case contains(tokenStates, "Q-variable"): 1637 tokenState = "Q-variable" 1638 default: 1639 tokenState = "Q" 1640 } 1641 1642 iterator, err := state.Lexer.Tokenise( 1643 &TokeniseOptions{ 1644 State: tokenState, 1645 Nested: true, 1646 }, state.NamedGroups[`value`]) 1647 1648 if err != nil { 1649 panic(err) 1650 } else { 1651 iterators = append(iterators, iterator) 1652 } 1653 1654 // Append the last punctuation 1655 iterators = append(iterators, Literator(tokens[4])) 1656 1657 return Concaterator(iterators...) 1658} 1659 1660// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state 1661func podConfig(groups []string, state *LexerState) Iterator { 1662 // Tokenise pod config 1663 iterator, err := state.Lexer.Tokenise( 1664 &TokeniseOptions{ 1665 State: "colon-pair-attribute", 1666 Nested: true, 1667 }, groups[0]) 1668 1669 if err != nil { 1670 panic(err) 1671 } else { 1672 return iterator 1673 } 1674} 1675 1676// Emitter for pod code, tokenises the code based on the lang specified 1677func podCode(groups []string, state *LexerState) Iterator { 1678 iterators := []Iterator{} 1679 tokens := []Token{ 1680 {Comment, state.NamedGroups[`ws`]}, 1681 {Keyword, state.NamedGroups[`keyword`]}, 1682 {Keyword, state.NamedGroups[`ws2`]}, 1683 {Keyword, state.NamedGroups[`name`]}, 1684 {StringDoc, state.NamedGroups[`value`]}, 1685 {Comment, state.NamedGroups[`ws3`]}, 1686 {Keyword, state.NamedGroups[`end_keyword`]}, 1687 {Keyword, state.NamedGroups[`ws4`]}, 1688 {Keyword, state.NamedGroups[`name`]}, 1689 } 1690 1691 // Append all tokens before dealing with the pod config 1692 iterators = append(iterators, Literator(tokens[:4]...)) 1693 1694 // Tokenise pod config 1695 iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state)) 1696 1697 langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`]) 1698 var lang string 1699 if len(langMatch) > 1 { 1700 lang = langMatch[1] 1701 } 1702 1703 // Tokenise code based on lang property 1704 sublexer := internal.Get(lang) 1705 if sublexer != nil { 1706 iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`]) 1707 1708 if err != nil { 1709 panic(err) 1710 } else { 1711 iterators = append(iterators, iterator) 1712 } 1713 } else { 1714 iterators = append(iterators, Literator(tokens[4])) 1715 } 1716 1717 // Append the rest of the tokens 1718 iterators = append(iterators, Literator(tokens[5:]...)) 1719 1720 return Concaterator(iterators...) 1721} 1722