1package regexp2 2 3import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "math" 8 "strconv" 9 "strings" 10 "time" 11 "unicode" 12 13 "github.com/dlclark/regexp2/syntax" 14) 15 16type runner struct { 17 re *Regexp 18 code *syntax.Code 19 20 runtextstart int // starting point for search 21 22 runtext []rune // text to search 23 runtextpos int // current position in text 24 runtextend int 25 26 // The backtracking stack. Opcodes use this to store data regarding 27 // what they have matched and where to backtrack to. Each "frame" on 28 // the stack takes the form of [CodePosition Data1 Data2...], where 29 // CodePosition is the position of the current opcode and 30 // the data values are all optional. The CodePosition can be negative, and 31 // these values (also called "back2") are used by the BranchMark family of opcodes 32 // to indicate whether they are backtracking after a successful or failed 33 // match. 34 // When we backtrack, we pop the CodePosition off the stack, set the current 35 // instruction pointer to that code position, and mark the opcode 36 // with a backtracking flag ("Back"). Each opcode then knows how to 37 // handle its own data. 38 runtrack []int 39 runtrackpos int 40 41 // This stack is used to track text positions across different opcodes. 42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark 43 // pair. SetMark records the text position before we match a*b. Then 44 // CaptureMark uses that position to figure out where the capture starts. 45 // Opcodes which push onto this stack are always paired with other opcodes 46 // which will pop the value from it later. A successful match should mean 47 // that this stack is empty. 48 runstack []int 49 runstackpos int 50 51 // The crawl stack is used to keep track of captures. Every time a group 52 // has a capture, we push its group number onto the runcrawl stack. In 53 // the case of a balanced match, we push BOTH groups onto the stack. 54 runcrawl []int 55 runcrawlpos int 56 57 runtrackcount int // count of states that may do backtracking 58 59 runmatch *Match // result object 60 61 ignoreTimeout bool 62 timeout time.Duration // timeout in milliseconds (needed for actual) 63 timeoutChecksToSkip int 64 timeoutAt time.Time 65 66 operator syntax.InstOp 67 codepos int 68 rightToLeft bool 69 caseInsensitive bool 70} 71 72// run searches for matches and can continue from the previous match 73// 74// quick is usually false, but can be true to not return matches, just put it in caches 75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input 76// input is the string to search for our regex pattern 77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) { 78 79 // get a cached runner 80 runner := re.getRunner() 81 defer re.putRunner(runner) 82 83 if textstart < 0 { 84 if re.RightToLeft() { 85 textstart = len(input) 86 } else { 87 textstart = 0 88 } 89 } 90 91 return runner.scan(input, textstart, quick, re.MatchTimeout) 92} 93 94// Scans the string to find the first match. Uses the Match object 95// both to feed text in and as a place to store matches that come out. 96// 97// All the action is in the Go() method. Our 98// responsibility is to load up the class members before 99// calling Go. 100// 101// The optimizer can compute a set of candidate starting characters, 102// and we could use a separate method Skip() that will quickly scan past 103// any characters that we know can't match. 104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) { 105 r.timeout = timeout 106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout) 107 r.runtextstart = textstart 108 r.runtext = rt 109 r.runtextend = len(rt) 110 111 stoppos := r.runtextend 112 bump := 1 113 114 if r.re.RightToLeft() { 115 bump = -1 116 stoppos = 0 117 } 118 119 r.runtextpos = textstart 120 initted := false 121 122 r.startTimeoutWatch() 123 for { 124 if r.re.Debug() { 125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext)) 126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend) 127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos) 128 } 129 130 if r.findFirstChar() { 131 if err := r.checkTimeout(); err != nil { 132 return nil, err 133 } 134 135 if !initted { 136 r.initMatch() 137 initted = true 138 } 139 140 if r.re.Debug() { 141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos) 142 } 143 144 if err := r.execute(); err != nil { 145 return nil, err 146 } 147 148 if r.runmatch.matchcount[0] > 0 { 149 // We'll return a match even if it touches a previous empty match 150 return r.tidyMatch(quick), nil 151 } 152 153 // reset state for another go 154 r.runtrackpos = len(r.runtrack) 155 r.runstackpos = len(r.runstack) 156 r.runcrawlpos = len(r.runcrawl) 157 } 158 159 // failure! 160 161 if r.runtextpos == stoppos { 162 r.tidyMatch(true) 163 return nil, nil 164 } 165 166 // Recognize leading []* and various anchors, and bump on failure accordingly 167 168 // r.bump by one and start again 169 170 r.runtextpos += bump 171 } 172 // We never get here 173} 174 175func (r *runner) execute() error { 176 177 r.goTo(0) 178 179 for { 180 181 if r.re.Debug() { 182 r.dumpState() 183 } 184 185 if err := r.checkTimeout(); err != nil { 186 return err 187 } 188 189 switch r.operator { 190 case syntax.Stop: 191 return nil 192 193 case syntax.Nothing: 194 break 195 196 case syntax.Goto: 197 r.goTo(r.operand(0)) 198 continue 199 200 case syntax.Testref: 201 if !r.runmatch.isMatched(r.operand(0)) { 202 break 203 } 204 r.advance(1) 205 continue 206 207 case syntax.Lazybranch: 208 r.trackPush1(r.textPos()) 209 r.advance(1) 210 continue 211 212 case syntax.Lazybranch | syntax.Back: 213 r.trackPop() 214 r.textto(r.trackPeek()) 215 r.goTo(r.operand(0)) 216 continue 217 218 case syntax.Setmark: 219 r.stackPush(r.textPos()) 220 r.trackPush() 221 r.advance(0) 222 continue 223 224 case syntax.Nullmark: 225 r.stackPush(-1) 226 r.trackPush() 227 r.advance(0) 228 continue 229 230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back: 231 r.stackPop() 232 break 233 234 case syntax.Getmark: 235 r.stackPop() 236 r.trackPush1(r.stackPeek()) 237 r.textto(r.stackPeek()) 238 r.advance(0) 239 continue 240 241 case syntax.Getmark | syntax.Back: 242 r.trackPop() 243 r.stackPush(r.trackPeek()) 244 break 245 246 case syntax.Capturemark: 247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) { 248 break 249 } 250 r.stackPop() 251 if r.operand(1) != -1 { 252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos()) 253 } else { 254 r.capture(r.operand(0), r.stackPeek(), r.textPos()) 255 } 256 r.trackPush1(r.stackPeek()) 257 258 r.advance(2) 259 260 continue 261 262 case syntax.Capturemark | syntax.Back: 263 r.trackPop() 264 r.stackPush(r.trackPeek()) 265 r.uncapture() 266 if r.operand(0) != -1 && r.operand(1) != -1 { 267 r.uncapture() 268 } 269 270 break 271 272 case syntax.Branchmark: 273 r.stackPop() 274 275 matched := r.textPos() - r.stackPeek() 276 277 if matched != 0 { // Nonempty match -> loop now 278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos 279 r.stackPush(r.textPos()) // Make new mark 280 r.goTo(r.operand(0)) // Loop 281 } else { // Empty match -> straight now 282 r.trackPushNeg1(r.stackPeek()) // Save old mark 283 r.advance(1) // Straight 284 } 285 continue 286 287 case syntax.Branchmark | syntax.Back: 288 r.trackPopN(2) 289 r.stackPop() 290 r.textto(r.trackPeekN(1)) // Recall position 291 r.trackPushNeg1(r.trackPeek()) // Save old mark 292 r.advance(1) // Straight 293 continue 294 295 case syntax.Branchmark | syntax.Back2: 296 r.trackPop() 297 r.stackPush(r.trackPeek()) // Recall old mark 298 break // Backtrack 299 300 case syntax.Lazybranchmark: 301 { 302 // We hit this the first time through a lazy loop and after each 303 // successful match of the inner expression. It simply continues 304 // on and doesn't loop. 305 r.stackPop() 306 307 oldMarkPos := r.stackPeek() 308 309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state 310 if oldMarkPos != -1 { 311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos 312 } else { 313 r.trackPush2(r.textPos(), r.textPos()) 314 } 315 } else { 316 // The inner expression found an empty match, so we'll go directly to 'back2' if we 317 // backtrack. In this case, we need to push something on the stack, since back2 pops. 318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text 319 // position associated with that empty match. 320 r.stackPush(oldMarkPos) 321 322 r.trackPushNeg1(r.stackPeek()) // Save old mark 323 } 324 r.advance(1) 325 continue 326 } 327 328 case syntax.Lazybranchmark | syntax.Back: 329 330 // After the first time, Lazybranchmark | syntax.Back occurs 331 // with each iteration of the loop, and therefore with every attempted 332 // match of the inner expression. We'll try to match the inner expression, 333 // then go back to Lazybranchmark if successful. If the inner expression 334 // fails, we go to Lazybranchmark | syntax.Back2 335 336 r.trackPopN(2) 337 pos := r.trackPeekN(1) 338 r.trackPushNeg1(r.trackPeek()) // Save old mark 339 r.stackPush(pos) // Make new mark 340 r.textto(pos) // Recall position 341 r.goTo(r.operand(0)) // Loop 342 continue 343 344 case syntax.Lazybranchmark | syntax.Back2: 345 // The lazy loop has failed. We'll do a true backtrack and 346 // start over before the lazy loop. 347 r.stackPop() 348 r.trackPop() 349 r.stackPush(r.trackPeek()) // Recall old mark 350 break 351 352 case syntax.Setcount: 353 r.stackPush2(r.textPos(), r.operand(0)) 354 r.trackPush() 355 r.advance(1) 356 continue 357 358 case syntax.Nullcount: 359 r.stackPush2(-1, r.operand(0)) 360 r.trackPush() 361 r.advance(1) 362 continue 363 364 case syntax.Setcount | syntax.Back: 365 r.stackPopN(2) 366 break 367 368 case syntax.Nullcount | syntax.Back: 369 r.stackPopN(2) 370 break 371 372 case syntax.Branchcount: 373 // r.stackPush: 374 // 0: Mark 375 // 1: Count 376 377 r.stackPopN(2) 378 mark := r.stackPeek() 379 count := r.stackPeekN(1) 380 matched := r.textPos() - mark 381 382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now 383 r.trackPushNeg2(mark, count) // Save old mark, count 384 r.advance(2) // Straight 385 } else { // Nonempty match -> count+loop now 386 r.trackPush1(mark) // remember mark 387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count 388 r.goTo(r.operand(0)) // Loop 389 } 390 continue 391 392 case syntax.Branchcount | syntax.Back: 393 // r.trackPush: 394 // 0: Previous mark 395 // r.stackPush: 396 // 0: Mark (= current pos, discarded) 397 // 1: Count 398 r.trackPop() 399 r.stackPopN(2) 400 if r.stackPeekN(1) > 0 { // Positive -> can go straight 401 r.textto(r.stackPeek()) // Zap to mark 402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count 403 r.advance(2) // Straight 404 continue 405 } 406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count 407 break 408 409 case syntax.Branchcount | syntax.Back2: 410 // r.trackPush: 411 // 0: Previous mark 412 // 1: Previous count 413 r.trackPopN(2) 414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count 415 break // Backtrack 416 417 case syntax.Lazybranchcount: 418 // r.stackPush: 419 // 0: Mark 420 // 1: Count 421 422 r.stackPopN(2) 423 mark := r.stackPeek() 424 count := r.stackPeekN(1) 425 426 if count < 0 { // Negative count -> loop now 427 r.trackPushNeg1(mark) // Save old mark 428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count 429 r.goTo(r.operand(0)) // Loop 430 } else { // Nonneg count -> straight now 431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position 432 r.advance(2) // Straight 433 } 434 continue 435 436 case syntax.Lazybranchcount | syntax.Back: 437 // r.trackPush: 438 // 0: Mark 439 // 1: Count 440 // 2: r.textPos 441 442 r.trackPopN(3) 443 mark := r.trackPeek() 444 textpos := r.trackPeekN(2) 445 446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop 447 r.textto(textpos) // Recall position 448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count 449 r.trackPushNeg1(mark) // Save old mark 450 r.goTo(r.operand(0)) // Loop 451 continue 452 } else { // Max loops or empty match -> backtrack 453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count 454 break // backtrack 455 } 456 457 case syntax.Lazybranchcount | syntax.Back2: 458 // r.trackPush: 459 // 0: Previous mark 460 // r.stackPush: 461 // 0: Mark (== current pos, discarded) 462 // 1: Count 463 r.trackPop() 464 r.stackPopN(2) 465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count 466 break // Backtrack 467 468 case syntax.Setjump: 469 r.stackPush2(r.trackpos(), r.crawlpos()) 470 r.trackPush() 471 r.advance(0) 472 continue 473 474 case syntax.Setjump | syntax.Back: 475 r.stackPopN(2) 476 break 477 478 case syntax.Backjump: 479 // r.stackPush: 480 // 0: Saved trackpos 481 // 1: r.crawlpos 482 r.stackPopN(2) 483 r.trackto(r.stackPeek()) 484 485 for r.crawlpos() != r.stackPeekN(1) { 486 r.uncapture() 487 } 488 489 break 490 491 case syntax.Forejump: 492 // r.stackPush: 493 // 0: Saved trackpos 494 // 1: r.crawlpos 495 r.stackPopN(2) 496 r.trackto(r.stackPeek()) 497 r.trackPush1(r.stackPeekN(1)) 498 r.advance(0) 499 continue 500 501 case syntax.Forejump | syntax.Back: 502 // r.trackPush: 503 // 0: r.crawlpos 504 r.trackPop() 505 506 for r.crawlpos() != r.trackPeek() { 507 r.uncapture() 508 } 509 510 break 511 512 case syntax.Bol: 513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' { 514 break 515 } 516 r.advance(0) 517 continue 518 519 case syntax.Eol: 520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' { 521 break 522 } 523 r.advance(0) 524 continue 525 526 case syntax.Boundary: 527 if !r.isBoundary(r.textPos(), 0, r.runtextend) { 528 break 529 } 530 r.advance(0) 531 continue 532 533 case syntax.Nonboundary: 534 if r.isBoundary(r.textPos(), 0, r.runtextend) { 535 break 536 } 537 r.advance(0) 538 continue 539 540 case syntax.ECMABoundary: 541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) { 542 break 543 } 544 r.advance(0) 545 continue 546 547 case syntax.NonECMABoundary: 548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) { 549 break 550 } 551 r.advance(0) 552 continue 553 554 case syntax.Beginning: 555 if r.leftchars() > 0 { 556 break 557 } 558 r.advance(0) 559 continue 560 561 case syntax.Start: 562 if r.textPos() != r.textstart() { 563 break 564 } 565 r.advance(0) 566 continue 567 568 case syntax.EndZ: 569 if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' { 570 break 571 } 572 r.advance(0) 573 continue 574 575 case syntax.End: 576 if r.rightchars() > 0 { 577 break 578 } 579 r.advance(0) 580 continue 581 582 case syntax.One: 583 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) { 584 break 585 } 586 587 r.advance(1) 588 continue 589 590 case syntax.Notone: 591 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) { 592 break 593 } 594 595 r.advance(1) 596 continue 597 598 case syntax.Set: 599 600 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { 601 break 602 } 603 604 r.advance(1) 605 continue 606 607 case syntax.Multi: 608 if !r.runematch(r.code.Strings[r.operand(0)]) { 609 break 610 } 611 612 r.advance(1) 613 continue 614 615 case syntax.Ref: 616 617 capnum := r.operand(0) 618 619 if r.runmatch.isMatched(capnum) { 620 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) { 621 break 622 } 623 } else { 624 if (r.re.options & ECMAScript) == 0 { 625 break 626 } 627 } 628 629 r.advance(1) 630 continue 631 632 case syntax.Onerep: 633 634 c := r.operand(1) 635 636 if r.forwardchars() < c { 637 break 638 } 639 640 ch := rune(r.operand(0)) 641 642 for c > 0 { 643 if r.forwardcharnext() != ch { 644 goto BreakBackward 645 } 646 c-- 647 } 648 649 r.advance(2) 650 continue 651 652 case syntax.Notonerep: 653 654 c := r.operand(1) 655 656 if r.forwardchars() < c { 657 break 658 } 659 ch := rune(r.operand(0)) 660 661 for c > 0 { 662 if r.forwardcharnext() == ch { 663 goto BreakBackward 664 } 665 c-- 666 } 667 668 r.advance(2) 669 continue 670 671 case syntax.Setrep: 672 673 c := r.operand(1) 674 675 if r.forwardchars() < c { 676 break 677 } 678 679 set := r.code.Sets[r.operand(0)] 680 681 for c > 0 { 682 if !set.CharIn(r.forwardcharnext()) { 683 goto BreakBackward 684 } 685 c-- 686 } 687 688 r.advance(2) 689 continue 690 691 case syntax.Oneloop: 692 693 c := r.operand(1) 694 695 if c > r.forwardchars() { 696 c = r.forwardchars() 697 } 698 699 ch := rune(r.operand(0)) 700 i := c 701 702 for ; i > 0; i-- { 703 if r.forwardcharnext() != ch { 704 r.backwardnext() 705 break 706 } 707 } 708 709 if c > i { 710 r.trackPush2(c-i-1, r.textPos()-r.bump()) 711 } 712 713 r.advance(2) 714 continue 715 716 case syntax.Notoneloop: 717 718 c := r.operand(1) 719 720 if c > r.forwardchars() { 721 c = r.forwardchars() 722 } 723 724 ch := rune(r.operand(0)) 725 i := c 726 727 for ; i > 0; i-- { 728 if r.forwardcharnext() == ch { 729 r.backwardnext() 730 break 731 } 732 } 733 734 if c > i { 735 r.trackPush2(c-i-1, r.textPos()-r.bump()) 736 } 737 738 r.advance(2) 739 continue 740 741 case syntax.Setloop: 742 743 c := r.operand(1) 744 745 if c > r.forwardchars() { 746 c = r.forwardchars() 747 } 748 749 set := r.code.Sets[r.operand(0)] 750 i := c 751 752 for ; i > 0; i-- { 753 if !set.CharIn(r.forwardcharnext()) { 754 r.backwardnext() 755 break 756 } 757 } 758 759 if c > i { 760 r.trackPush2(c-i-1, r.textPos()-r.bump()) 761 } 762 763 r.advance(2) 764 continue 765 766 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back: 767 768 r.trackPopN(2) 769 i := r.trackPeek() 770 pos := r.trackPeekN(1) 771 772 r.textto(pos) 773 774 if i > 0 { 775 r.trackPush2(i-1, pos-r.bump()) 776 } 777 778 r.advance(2) 779 continue 780 781 case syntax.Setloop | syntax.Back: 782 783 r.trackPopN(2) 784 i := r.trackPeek() 785 pos := r.trackPeekN(1) 786 787 r.textto(pos) 788 789 if i > 0 { 790 r.trackPush2(i-1, pos-r.bump()) 791 } 792 793 r.advance(2) 794 continue 795 796 case syntax.Onelazy, syntax.Notonelazy: 797 798 c := r.operand(1) 799 800 if c > r.forwardchars() { 801 c = r.forwardchars() 802 } 803 804 if c > 0 { 805 r.trackPush2(c-1, r.textPos()) 806 } 807 808 r.advance(2) 809 continue 810 811 case syntax.Setlazy: 812 813 c := r.operand(1) 814 815 if c > r.forwardchars() { 816 c = r.forwardchars() 817 } 818 819 if c > 0 { 820 r.trackPush2(c-1, r.textPos()) 821 } 822 823 r.advance(2) 824 continue 825 826 case syntax.Onelazy | syntax.Back: 827 828 r.trackPopN(2) 829 pos := r.trackPeekN(1) 830 r.textto(pos) 831 832 if r.forwardcharnext() != rune(r.operand(0)) { 833 break 834 } 835 836 i := r.trackPeek() 837 838 if i > 0 { 839 r.trackPush2(i-1, pos+r.bump()) 840 } 841 842 r.advance(2) 843 continue 844 845 case syntax.Notonelazy | syntax.Back: 846 847 r.trackPopN(2) 848 pos := r.trackPeekN(1) 849 r.textto(pos) 850 851 if r.forwardcharnext() == rune(r.operand(0)) { 852 break 853 } 854 855 i := r.trackPeek() 856 857 if i > 0 { 858 r.trackPush2(i-1, pos+r.bump()) 859 } 860 861 r.advance(2) 862 continue 863 864 case syntax.Setlazy | syntax.Back: 865 866 r.trackPopN(2) 867 pos := r.trackPeekN(1) 868 r.textto(pos) 869 870 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { 871 break 872 } 873 874 i := r.trackPeek() 875 876 if i > 0 { 877 r.trackPush2(i-1, pos+r.bump()) 878 } 879 880 r.advance(2) 881 continue 882 883 default: 884 return errors.New("unknown state in regex runner") 885 } 886 887 BreakBackward: 888 ; 889 890 // "break Backward" comes here: 891 r.backtrack() 892 } 893} 894 895// increase the size of stack and track storage 896func (r *runner) ensureStorage() { 897 if r.runstackpos < r.runtrackcount*4 { 898 doubleIntSlice(&r.runstack, &r.runstackpos) 899 } 900 if r.runtrackpos < r.runtrackcount*4 { 901 doubleIntSlice(&r.runtrack, &r.runtrackpos) 902 } 903} 904 905func doubleIntSlice(s *[]int, pos *int) { 906 oldLen := len(*s) 907 newS := make([]int, oldLen*2) 908 909 copy(newS[oldLen:], *s) 910 *pos += oldLen 911 *s = newS 912} 913 914// Save a number on the longjump unrolling stack 915func (r *runner) crawl(i int) { 916 if r.runcrawlpos == 0 { 917 doubleIntSlice(&r.runcrawl, &r.runcrawlpos) 918 } 919 r.runcrawlpos-- 920 r.runcrawl[r.runcrawlpos] = i 921} 922 923// Remove a number from the longjump unrolling stack 924func (r *runner) popcrawl() int { 925 val := r.runcrawl[r.runcrawlpos] 926 r.runcrawlpos++ 927 return val 928} 929 930// Get the height of the stack 931func (r *runner) crawlpos() int { 932 return len(r.runcrawl) - r.runcrawlpos 933} 934 935func (r *runner) advance(i int) { 936 r.codepos += (i + 1) 937 r.setOperator(r.code.Codes[r.codepos]) 938} 939 940func (r *runner) goTo(newpos int) { 941 // when branching backward, ensure storage 942 if newpos < r.codepos { 943 r.ensureStorage() 944 } 945 946 r.setOperator(r.code.Codes[newpos]) 947 r.codepos = newpos 948} 949 950func (r *runner) textto(newpos int) { 951 r.runtextpos = newpos 952} 953 954func (r *runner) trackto(newpos int) { 955 r.runtrackpos = len(r.runtrack) - newpos 956} 957 958func (r *runner) textstart() int { 959 return r.runtextstart 960} 961 962func (r *runner) textPos() int { 963 return r.runtextpos 964} 965 966// push onto the backtracking stack 967func (r *runner) trackpos() int { 968 return len(r.runtrack) - r.runtrackpos 969} 970 971func (r *runner) trackPush() { 972 r.runtrackpos-- 973 r.runtrack[r.runtrackpos] = r.codepos 974} 975 976func (r *runner) trackPush1(I1 int) { 977 r.runtrackpos-- 978 r.runtrack[r.runtrackpos] = I1 979 r.runtrackpos-- 980 r.runtrack[r.runtrackpos] = r.codepos 981} 982 983func (r *runner) trackPush2(I1, I2 int) { 984 r.runtrackpos-- 985 r.runtrack[r.runtrackpos] = I1 986 r.runtrackpos-- 987 r.runtrack[r.runtrackpos] = I2 988 r.runtrackpos-- 989 r.runtrack[r.runtrackpos] = r.codepos 990} 991 992func (r *runner) trackPush3(I1, I2, I3 int) { 993 r.runtrackpos-- 994 r.runtrack[r.runtrackpos] = I1 995 r.runtrackpos-- 996 r.runtrack[r.runtrackpos] = I2 997 r.runtrackpos-- 998 r.runtrack[r.runtrackpos] = I3 999 r.runtrackpos-- 1000 r.runtrack[r.runtrackpos] = r.codepos 1001} 1002 1003func (r *runner) trackPushNeg1(I1 int) { 1004 r.runtrackpos-- 1005 r.runtrack[r.runtrackpos] = I1 1006 r.runtrackpos-- 1007 r.runtrack[r.runtrackpos] = -r.codepos 1008} 1009 1010func (r *runner) trackPushNeg2(I1, I2 int) { 1011 r.runtrackpos-- 1012 r.runtrack[r.runtrackpos] = I1 1013 r.runtrackpos-- 1014 r.runtrack[r.runtrackpos] = I2 1015 r.runtrackpos-- 1016 r.runtrack[r.runtrackpos] = -r.codepos 1017} 1018 1019func (r *runner) backtrack() { 1020 newpos := r.runtrack[r.runtrackpos] 1021 r.runtrackpos++ 1022 1023 if r.re.Debug() { 1024 if newpos < 0 { 1025 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos) 1026 } else { 1027 fmt.Printf(" Backtracking to code position %v\n", newpos) 1028 } 1029 } 1030 1031 if newpos < 0 { 1032 newpos = -newpos 1033 r.setOperator(r.code.Codes[newpos] | syntax.Back2) 1034 } else { 1035 r.setOperator(r.code.Codes[newpos] | syntax.Back) 1036 } 1037 1038 // When branching backward, ensure storage 1039 if newpos < r.codepos { 1040 r.ensureStorage() 1041 } 1042 1043 r.codepos = newpos 1044} 1045 1046func (r *runner) setOperator(op int) { 1047 r.caseInsensitive = (0 != (op & syntax.Ci)) 1048 r.rightToLeft = (0 != (op & syntax.Rtl)) 1049 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci)) 1050} 1051 1052func (r *runner) trackPop() { 1053 r.runtrackpos++ 1054} 1055 1056// pop framesize items from the backtracking stack 1057func (r *runner) trackPopN(framesize int) { 1058 r.runtrackpos += framesize 1059} 1060 1061// Technically we are actually peeking at items already popped. So if you want to 1062// get and pop the top item from the stack, you do 1063// r.trackPop(); 1064// r.trackPeek(); 1065func (r *runner) trackPeek() int { 1066 return r.runtrack[r.runtrackpos-1] 1067} 1068 1069// get the ith element down on the backtracking stack 1070func (r *runner) trackPeekN(i int) int { 1071 return r.runtrack[r.runtrackpos-i-1] 1072} 1073 1074// Push onto the grouping stack 1075func (r *runner) stackPush(I1 int) { 1076 r.runstackpos-- 1077 r.runstack[r.runstackpos] = I1 1078} 1079 1080func (r *runner) stackPush2(I1, I2 int) { 1081 r.runstackpos-- 1082 r.runstack[r.runstackpos] = I1 1083 r.runstackpos-- 1084 r.runstack[r.runstackpos] = I2 1085} 1086 1087func (r *runner) stackPop() { 1088 r.runstackpos++ 1089} 1090 1091// pop framesize items from the grouping stack 1092func (r *runner) stackPopN(framesize int) { 1093 r.runstackpos += framesize 1094} 1095 1096// Technically we are actually peeking at items already popped. So if you want to 1097// get and pop the top item from the stack, you do 1098// r.stackPop(); 1099// r.stackPeek(); 1100func (r *runner) stackPeek() int { 1101 return r.runstack[r.runstackpos-1] 1102} 1103 1104// get the ith element down on the grouping stack 1105func (r *runner) stackPeekN(i int) int { 1106 return r.runstack[r.runstackpos-i-1] 1107} 1108 1109func (r *runner) operand(i int) int { 1110 return r.code.Codes[r.codepos+i+1] 1111} 1112 1113func (r *runner) leftchars() int { 1114 return r.runtextpos 1115} 1116 1117func (r *runner) rightchars() int { 1118 return r.runtextend - r.runtextpos 1119} 1120 1121func (r *runner) bump() int { 1122 if r.rightToLeft { 1123 return -1 1124 } 1125 return 1 1126} 1127 1128func (r *runner) forwardchars() int { 1129 if r.rightToLeft { 1130 return r.runtextpos 1131 } 1132 return r.runtextend - r.runtextpos 1133} 1134 1135func (r *runner) forwardcharnext() rune { 1136 var ch rune 1137 if r.rightToLeft { 1138 r.runtextpos-- 1139 ch = r.runtext[r.runtextpos] 1140 } else { 1141 ch = r.runtext[r.runtextpos] 1142 r.runtextpos++ 1143 } 1144 1145 if r.caseInsensitive { 1146 return unicode.ToLower(ch) 1147 } 1148 return ch 1149} 1150 1151func (r *runner) runematch(str []rune) bool { 1152 var pos int 1153 1154 c := len(str) 1155 if !r.rightToLeft { 1156 if r.runtextend-r.runtextpos < c { 1157 return false 1158 } 1159 1160 pos = r.runtextpos + c 1161 } else { 1162 if r.runtextpos-0 < c { 1163 return false 1164 } 1165 1166 pos = r.runtextpos 1167 } 1168 1169 if !r.caseInsensitive { 1170 for c != 0 { 1171 c-- 1172 pos-- 1173 if str[c] != r.runtext[pos] { 1174 return false 1175 } 1176 } 1177 } else { 1178 for c != 0 { 1179 c-- 1180 pos-- 1181 if str[c] != unicode.ToLower(r.runtext[pos]) { 1182 return false 1183 } 1184 } 1185 } 1186 1187 if !r.rightToLeft { 1188 pos += len(str) 1189 } 1190 1191 r.runtextpos = pos 1192 1193 return true 1194} 1195 1196func (r *runner) refmatch(index, len int) bool { 1197 var c, pos, cmpos int 1198 1199 if !r.rightToLeft { 1200 if r.runtextend-r.runtextpos < len { 1201 return false 1202 } 1203 1204 pos = r.runtextpos + len 1205 } else { 1206 if r.runtextpos-0 < len { 1207 return false 1208 } 1209 1210 pos = r.runtextpos 1211 } 1212 cmpos = index + len 1213 1214 c = len 1215 1216 if !r.caseInsensitive { 1217 for c != 0 { 1218 c-- 1219 cmpos-- 1220 pos-- 1221 if r.runtext[cmpos] != r.runtext[pos] { 1222 return false 1223 } 1224 1225 } 1226 } else { 1227 for c != 0 { 1228 c-- 1229 cmpos-- 1230 pos-- 1231 1232 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) { 1233 return false 1234 } 1235 } 1236 } 1237 1238 if !r.rightToLeft { 1239 pos += len 1240 } 1241 1242 r.runtextpos = pos 1243 1244 return true 1245} 1246 1247func (r *runner) backwardnext() { 1248 if r.rightToLeft { 1249 r.runtextpos++ 1250 } else { 1251 r.runtextpos-- 1252 } 1253} 1254 1255func (r *runner) charAt(j int) rune { 1256 return r.runtext[j] 1257} 1258 1259func (r *runner) findFirstChar() bool { 1260 1261 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) { 1262 if !r.code.RightToLeft { 1263 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) || 1264 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) { 1265 r.runtextpos = r.runtextend 1266 return false 1267 } 1268 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 { 1269 r.runtextpos = r.runtextend - 1 1270 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend { 1271 r.runtextpos = r.runtextend 1272 } 1273 } else { 1274 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) || 1275 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 || 1276 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) || 1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) { 1278 r.runtextpos = 0 1279 return false 1280 } 1281 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 { 1282 r.runtextpos = 0 1283 } 1284 } 1285 1286 if r.code.BmPrefix != nil { 1287 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend) 1288 } 1289 1290 return true // found a valid start or end anchor 1291 } else if r.code.BmPrefix != nil { 1292 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend) 1293 1294 if r.runtextpos == -1 { 1295 if r.code.RightToLeft { 1296 r.runtextpos = 0 1297 } else { 1298 r.runtextpos = r.runtextend 1299 } 1300 return false 1301 } 1302 1303 return true 1304 } else if r.code.FcPrefix == nil { 1305 return true 1306 } 1307 1308 r.rightToLeft = r.code.RightToLeft 1309 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive 1310 1311 set := r.code.FcPrefix.PrefixSet 1312 if set.IsSingleton() { 1313 ch := set.SingletonChar() 1314 for i := r.forwardchars(); i > 0; i-- { 1315 if ch == r.forwardcharnext() { 1316 r.backwardnext() 1317 return true 1318 } 1319 } 1320 } else { 1321 for i := r.forwardchars(); i > 0; i-- { 1322 n := r.forwardcharnext() 1323 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n)) 1324 if set.CharIn(n) { 1325 r.backwardnext() 1326 return true 1327 } 1328 } 1329 } 1330 1331 return false 1332} 1333 1334func (r *runner) initMatch() { 1335 // Use a hashtable'ed Match object if the capture numbers are sparse 1336 1337 if r.runmatch == nil { 1338 if r.re.caps != nil { 1339 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart) 1340 } else { 1341 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart) 1342 } 1343 } else { 1344 r.runmatch.reset(r.runtext, r.runtextstart) 1345 } 1346 1347 // note we test runcrawl, because it is the last one to be allocated 1348 // If there is an alloc failure in the middle of the three allocations, 1349 // we may still return to reuse this instance, and we want to behave 1350 // as if the allocations didn't occur. (we used to test _trackcount != 0) 1351 1352 if r.runcrawl != nil { 1353 r.runtrackpos = len(r.runtrack) 1354 r.runstackpos = len(r.runstack) 1355 r.runcrawlpos = len(r.runcrawl) 1356 return 1357 } 1358 1359 r.initTrackCount() 1360 1361 tracksize := r.runtrackcount * 8 1362 stacksize := r.runtrackcount * 8 1363 1364 if tracksize < 32 { 1365 tracksize = 32 1366 } 1367 if stacksize < 16 { 1368 stacksize = 16 1369 } 1370 1371 r.runtrack = make([]int, tracksize) 1372 r.runtrackpos = tracksize 1373 1374 r.runstack = make([]int, stacksize) 1375 r.runstackpos = stacksize 1376 1377 r.runcrawl = make([]int, 32) 1378 r.runcrawlpos = 32 1379} 1380 1381func (r *runner) tidyMatch(quick bool) *Match { 1382 if !quick { 1383 match := r.runmatch 1384 1385 r.runmatch = nil 1386 1387 match.tidy(r.runtextpos) 1388 return match 1389 } else { 1390 // send back our match -- it's not leaving the package, so it's safe to not clean it up 1391 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions 1392 return r.runmatch 1393 } 1394} 1395 1396// capture captures a subexpression. Note that the 1397// capnum used here has already been mapped to a non-sparse 1398// index (by the code generator RegexWriter). 1399func (r *runner) capture(capnum, start, end int) { 1400 if end < start { 1401 T := end 1402 end = start 1403 start = T 1404 } 1405 1406 r.crawl(capnum) 1407 r.runmatch.addMatch(capnum, start, end-start) 1408} 1409 1410// transferCapture captures a subexpression. Note that the 1411// capnum used here has already been mapped to a non-sparse 1412// index (by the code generator RegexWriter). 1413func (r *runner) transferCapture(capnum, uncapnum, start, end int) { 1414 var start2, end2 int 1415 1416 // these are the two intervals that are cancelling each other 1417 1418 if end < start { 1419 T := end 1420 end = start 1421 start = T 1422 } 1423 1424 start2 = r.runmatch.matchIndex(uncapnum) 1425 end2 = start2 + r.runmatch.matchLength(uncapnum) 1426 1427 // The new capture gets the innermost defined interval 1428 1429 if start >= end2 { 1430 end = start 1431 start = end2 1432 } else if end <= start2 { 1433 start = start2 1434 } else { 1435 if end > end2 { 1436 end = end2 1437 } 1438 if start2 > start { 1439 start = start2 1440 } 1441 } 1442 1443 r.crawl(uncapnum) 1444 r.runmatch.balanceMatch(uncapnum) 1445 1446 if capnum != -1 { 1447 r.crawl(capnum) 1448 r.runmatch.addMatch(capnum, start, end-start) 1449 } 1450} 1451 1452// revert the last capture 1453func (r *runner) uncapture() { 1454 capnum := r.popcrawl() 1455 r.runmatch.removeMatch(capnum) 1456} 1457 1458//debug 1459 1460func (r *runner) dumpState() { 1461 back := "" 1462 if r.operator&syntax.Back != 0 { 1463 back = " Back" 1464 } 1465 if r.operator&syntax.Back2 != 0 { 1466 back += " Back2" 1467 } 1468 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n", 1469 r.textposDescription(), 1470 r.stackDescription(r.runtrack, r.runtrackpos), 1471 r.stackDescription(r.runstack, r.runstackpos), 1472 r.code.OpcodeDescription(r.codepos), 1473 back) 1474} 1475 1476func (r *runner) stackDescription(a []int, index int) string { 1477 buf := &bytes.Buffer{} 1478 1479 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a)) 1480 if buf.Len() < 8 { 1481 buf.WriteString(strings.Repeat(" ", 8-buf.Len())) 1482 } 1483 1484 buf.WriteRune('(') 1485 for i := index; i < len(a); i++ { 1486 if i > index { 1487 buf.WriteRune(' ') 1488 } 1489 1490 buf.WriteString(strconv.Itoa(a[i])) 1491 } 1492 1493 buf.WriteRune(')') 1494 1495 return buf.String() 1496} 1497 1498func (r *runner) textposDescription() string { 1499 buf := &bytes.Buffer{} 1500 1501 buf.WriteString(strconv.Itoa(r.runtextpos)) 1502 1503 if buf.Len() < 8 { 1504 buf.WriteString(strings.Repeat(" ", 8-buf.Len())) 1505 } 1506 1507 if r.runtextpos > 0 { 1508 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1])) 1509 } else { 1510 buf.WriteRune('^') 1511 } 1512 1513 buf.WriteRune('>') 1514 1515 for i := r.runtextpos; i < r.runtextend; i++ { 1516 buf.WriteString(syntax.CharDescription(r.runtext[i])) 1517 } 1518 if buf.Len() >= 64 { 1519 buf.Truncate(61) 1520 buf.WriteString("...") 1521 } else { 1522 buf.WriteRune('$') 1523 } 1524 1525 return buf.String() 1526} 1527 1528// decide whether the pos 1529// at the specified index is a boundary or not. It's just not worth 1530// emitting inline code for this logic. 1531func (r *runner) isBoundary(index, startpos, endpos int) bool { 1532 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) != 1533 (index < endpos && syntax.IsWordChar(r.runtext[index])) 1534} 1535 1536func (r *runner) isECMABoundary(index, startpos, endpos int) bool { 1537 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) != 1538 (index < endpos && syntax.IsECMAWordChar(r.runtext[index])) 1539} 1540 1541// this seems like a comment to justify randomly picking 1000 :-P 1542// We have determined this value in a series of experiments where x86 retail 1543// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values 1544// of TimeoutCheckFrequency did not tend to increase performance; smaller values 1545// of TimeoutCheckFrequency tended to slow down the execution. 1546const timeoutCheckFrequency int = 1000 1547 1548func (r *runner) startTimeoutWatch() { 1549 if r.ignoreTimeout { 1550 return 1551 } 1552 1553 r.timeoutChecksToSkip = timeoutCheckFrequency 1554 r.timeoutAt = time.Now().Add(r.timeout) 1555} 1556 1557func (r *runner) checkTimeout() error { 1558 if r.ignoreTimeout { 1559 return nil 1560 } 1561 r.timeoutChecksToSkip-- 1562 if r.timeoutChecksToSkip != 0 { 1563 return nil 1564 } 1565 1566 r.timeoutChecksToSkip = timeoutCheckFrequency 1567 return r.doCheckTimeout() 1568} 1569 1570func (r *runner) doCheckTimeout() error { 1571 current := time.Now() 1572 1573 if current.Before(r.timeoutAt) { 1574 return nil 1575 } 1576 1577 if r.re.Debug() { 1578 //Debug.WriteLine("") 1579 //Debug.WriteLine("RegEx match timeout occurred!") 1580 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString()) 1581 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency) 1582 //Debug.WriteLine("Search pattern: " + _runregex._pattern) 1583 //Debug.WriteLine("Input: " + r.runtext) 1584 //Debug.WriteLine("About to throw RegexMatchTimeoutException.") 1585 } 1586 1587 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext)) 1588} 1589 1590func (r *runner) initTrackCount() { 1591 r.runtrackcount = r.code.TrackCount 1592} 1593 1594// getRunner returns a run to use for matching re. 1595// It uses the re's runner cache if possible, to avoid 1596// unnecessary allocation. 1597func (re *Regexp) getRunner() *runner { 1598 re.muRun.Lock() 1599 if n := len(re.runner); n > 0 { 1600 z := re.runner[n-1] 1601 re.runner = re.runner[:n-1] 1602 re.muRun.Unlock() 1603 return z 1604 } 1605 re.muRun.Unlock() 1606 z := &runner{ 1607 re: re, 1608 code: re.code, 1609 } 1610 return z 1611} 1612 1613// putRunner returns a runner to the re's cache. 1614// There is no attempt to limit the size of the cache, so it will 1615// grow to the maximum number of simultaneous matches 1616// run using re. (The cache empties when re gets garbage collected.) 1617func (re *Regexp) putRunner(r *runner) { 1618 re.muRun.Lock() 1619 re.runner = append(re.runner, r) 1620 re.muRun.Unlock() 1621} 1622