1package regexp2 2 3import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "math" 8 "strconv" 9 "strings" 10 "time" 11 "unicode" 12 13 "github.com/dlclark/regexp2/syntax" 14) 15 16type runner struct { 17 re *Regexp 18 code *syntax.Code 19 20 runtextstart int // starting point for search 21 22 runtext []rune // text to search 23 runtextpos int // current position in text 24 runtextend int 25 26 // The backtracking stack. Opcodes use this to store data regarding 27 // what they have matched and where to backtrack to. Each "frame" on 28 // the stack takes the form of [CodePosition Data1 Data2...], where 29 // CodePosition is the position of the current opcode and 30 // the data values are all optional. The CodePosition can be negative, and 31 // these values (also called "back2") are used by the BranchMark family of opcodes 32 // to indicate whether they are backtracking after a successful or failed 33 // match. 34 // When we backtrack, we pop the CodePosition off the stack, set the current 35 // instruction pointer to that code position, and mark the opcode 36 // with a backtracking flag ("Back"). Each opcode then knows how to 37 // handle its own data. 38 runtrack []int 39 runtrackpos int 40 41 // This stack is used to track text positions across different opcodes. 42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark 43 // pair. SetMark records the text position before we match a*b. Then 44 // CaptureMark uses that position to figure out where the capture starts. 45 // Opcodes which push onto this stack are always paired with other opcodes 46 // which will pop the value from it later. A successful match should mean 47 // that this stack is empty. 48 runstack []int 49 runstackpos int 50 51 // The crawl stack is used to keep track of captures. Every time a group 52 // has a capture, we push its group number onto the runcrawl stack. In 53 // the case of a balanced match, we push BOTH groups onto the stack. 54 runcrawl []int 55 runcrawlpos int 56 57 runtrackcount int // count of states that may do backtracking 58 59 runmatch *Match // result object 60 61 ignoreTimeout bool 62 timeout time.Duration // timeout in milliseconds (needed for actual) 63 timeoutChecksToSkip int 64 timeoutAt time.Time 65 66 operator syntax.InstOp 67 codepos int 68 rightToLeft bool 69 caseInsensitive bool 70} 71 72// run searches for matches and can continue from the previous match 73// 74// quick is usually false, but can be true to not return matches, just put it in caches 75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input 76// input is the string to search for our regex pattern 77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) { 78 79 // get a cached runner 80 runner := re.getRunner() 81 defer re.putRunner(runner) 82 83 if textstart < 0 { 84 if re.RightToLeft() { 85 textstart = len(input) 86 } else { 87 textstart = 0 88 } 89 } 90 91 return runner.scan(input, textstart, quick, re.MatchTimeout) 92} 93 94// Scans the string to find the first match. Uses the Match object 95// both to feed text in and as a place to store matches that come out. 96// 97// All the action is in the Go() method. Our 98// responsibility is to load up the class members before 99// calling Go. 100// 101// The optimizer can compute a set of candidate starting characters, 102// and we could use a separate method Skip() that will quickly scan past 103// any characters that we know can't match. 104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) { 105 r.timeout = timeout 106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout) 107 r.runtextstart = textstart 108 r.runtext = rt 109 r.runtextend = len(rt) 110 111 stoppos := r.runtextend 112 bump := 1 113 114 if r.re.RightToLeft() { 115 bump = -1 116 stoppos = 0 117 } 118 119 r.runtextpos = textstart 120 initted := false 121 122 r.startTimeoutWatch() 123 for { 124 if r.re.Debug() { 125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext)) 126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend) 127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos) 128 } 129 130 if r.findFirstChar() { 131 if err := r.checkTimeout(); err != nil { 132 return nil, err 133 } 134 135 if !initted { 136 r.initMatch() 137 initted = true 138 } 139 140 if r.re.Debug() { 141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos) 142 } 143 144 if err := r.execute(); err != nil { 145 return nil, err 146 } 147 148 if r.runmatch.matchcount[0] > 0 { 149 // We'll return a match even if it touches a previous empty match 150 return r.tidyMatch(quick), nil 151 } 152 153 // reset state for another go 154 r.runtrackpos = len(r.runtrack) 155 r.runstackpos = len(r.runstack) 156 r.runcrawlpos = len(r.runcrawl) 157 } 158 159 // failure! 160 161 if r.runtextpos == stoppos { 162 r.tidyMatch(true) 163 return nil, nil 164 } 165 166 // Recognize leading []* and various anchors, and bump on failure accordingly 167 168 // r.bump by one and start again 169 170 r.runtextpos += bump 171 } 172 // We never get here 173} 174 175func (r *runner) execute() error { 176 177 r.goTo(0) 178 179 for { 180 181 if r.re.Debug() { 182 r.dumpState() 183 } 184 185 if err := r.checkTimeout(); err != nil { 186 return err 187 } 188 189 switch r.operator { 190 case syntax.Stop: 191 return nil 192 193 case syntax.Nothing: 194 break 195 196 case syntax.Goto: 197 r.goTo(r.operand(0)) 198 continue 199 200 case syntax.Testref: 201 if !r.runmatch.isMatched(r.operand(0)) { 202 break 203 } 204 r.advance(1) 205 continue 206 207 case syntax.Lazybranch: 208 r.trackPush1(r.textPos()) 209 r.advance(1) 210 continue 211 212 case syntax.Lazybranch | syntax.Back: 213 r.trackPop() 214 r.textto(r.trackPeek()) 215 r.goTo(r.operand(0)) 216 continue 217 218 case syntax.Setmark: 219 r.stackPush(r.textPos()) 220 r.trackPush() 221 r.advance(0) 222 continue 223 224 case syntax.Nullmark: 225 r.stackPush(-1) 226 r.trackPush() 227 r.advance(0) 228 continue 229 230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back: 231 r.stackPop() 232 break 233 234 case syntax.Getmark: 235 r.stackPop() 236 r.trackPush1(r.stackPeek()) 237 r.textto(r.stackPeek()) 238 r.advance(0) 239 continue 240 241 case syntax.Getmark | syntax.Back: 242 r.trackPop() 243 r.stackPush(r.trackPeek()) 244 break 245 246 case syntax.Capturemark: 247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) { 248 break 249 } 250 r.stackPop() 251 if r.operand(1) != -1 { 252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos()) 253 } else { 254 r.capture(r.operand(0), r.stackPeek(), r.textPos()) 255 } 256 r.trackPush1(r.stackPeek()) 257 258 r.advance(2) 259 260 continue 261 262 case syntax.Capturemark | syntax.Back: 263 r.trackPop() 264 r.stackPush(r.trackPeek()) 265 r.uncapture() 266 if r.operand(0) != -1 && r.operand(1) != -1 { 267 r.uncapture() 268 } 269 270 break 271 272 case syntax.Branchmark: 273 r.stackPop() 274 275 matched := r.textPos() - r.stackPeek() 276 277 if matched != 0 { // Nonempty match -> loop now 278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos 279 r.stackPush(r.textPos()) // Make new mark 280 r.goTo(r.operand(0)) // Loop 281 } else { // Empty match -> straight now 282 r.trackPushNeg1(r.stackPeek()) // Save old mark 283 r.advance(1) // Straight 284 } 285 continue 286 287 case syntax.Branchmark | syntax.Back: 288 r.trackPopN(2) 289 r.stackPop() 290 r.textto(r.trackPeekN(1)) // Recall position 291 r.trackPushNeg1(r.trackPeek()) // Save old mark 292 r.advance(1) // Straight 293 continue 294 295 case syntax.Branchmark | syntax.Back2: 296 r.trackPop() 297 r.stackPush(r.trackPeek()) // Recall old mark 298 break // Backtrack 299 300 case syntax.Lazybranchmark: 301 { 302 // We hit this the first time through a lazy loop and after each 303 // successful match of the inner expression. It simply continues 304 // on and doesn't loop. 305 r.stackPop() 306 307 oldMarkPos := r.stackPeek() 308 309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state 310 if oldMarkPos != -1 { 311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos 312 } else { 313 r.trackPush2(r.textPos(), r.textPos()) 314 } 315 } else { 316 // The inner expression found an empty match, so we'll go directly to 'back2' if we 317 // backtrack. In this case, we need to push something on the stack, since back2 pops. 318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text 319 // position associated with that empty match. 320 r.stackPush(oldMarkPos) 321 322 r.trackPushNeg1(r.stackPeek()) // Save old mark 323 } 324 r.advance(1) 325 continue 326 } 327 328 case syntax.Lazybranchmark | syntax.Back: 329 330 // After the first time, Lazybranchmark | syntax.Back occurs 331 // with each iteration of the loop, and therefore with every attempted 332 // match of the inner expression. We'll try to match the inner expression, 333 // then go back to Lazybranchmark if successful. If the inner expression 334 // fails, we go to Lazybranchmark | syntax.Back2 335 336 r.trackPopN(2) 337 pos := r.trackPeekN(1) 338 r.trackPushNeg1(r.trackPeek()) // Save old mark 339 r.stackPush(pos) // Make new mark 340 r.textto(pos) // Recall position 341 r.goTo(r.operand(0)) // Loop 342 continue 343 344 case syntax.Lazybranchmark | syntax.Back2: 345 // The lazy loop has failed. We'll do a true backtrack and 346 // start over before the lazy loop. 347 r.stackPop() 348 r.trackPop() 349 r.stackPush(r.trackPeek()) // Recall old mark 350 break 351 352 case syntax.Setcount: 353 r.stackPush2(r.textPos(), r.operand(0)) 354 r.trackPush() 355 r.advance(1) 356 continue 357 358 case syntax.Nullcount: 359 r.stackPush2(-1, r.operand(0)) 360 r.trackPush() 361 r.advance(1) 362 continue 363 364 case syntax.Setcount | syntax.Back: 365 r.stackPopN(2) 366 break 367 368 case syntax.Nullcount | syntax.Back: 369 r.stackPopN(2) 370 break 371 372 case syntax.Branchcount: 373 // r.stackPush: 374 // 0: Mark 375 // 1: Count 376 377 r.stackPopN(2) 378 mark := r.stackPeek() 379 count := r.stackPeekN(1) 380 matched := r.textPos() - mark 381 382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now 383 r.trackPushNeg2(mark, count) // Save old mark, count 384 r.advance(2) // Straight 385 } else { // Nonempty match -> count+loop now 386 r.trackPush1(mark) // remember mark 387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count 388 r.goTo(r.operand(0)) // Loop 389 } 390 continue 391 392 case syntax.Branchcount | syntax.Back: 393 // r.trackPush: 394 // 0: Previous mark 395 // r.stackPush: 396 // 0: Mark (= current pos, discarded) 397 // 1: Count 398 r.trackPop() 399 r.stackPopN(2) 400 if r.stackPeekN(1) > 0 { // Positive -> can go straight 401 r.textto(r.stackPeek()) // Zap to mark 402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count 403 r.advance(2) // Straight 404 continue 405 } 406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count 407 break 408 409 case syntax.Branchcount | syntax.Back2: 410 // r.trackPush: 411 // 0: Previous mark 412 // 1: Previous count 413 r.trackPopN(2) 414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count 415 break // Backtrack 416 417 case syntax.Lazybranchcount: 418 // r.stackPush: 419 // 0: Mark 420 // 1: Count 421 422 r.stackPopN(2) 423 mark := r.stackPeek() 424 count := r.stackPeekN(1) 425 426 if count < 0 { // Negative count -> loop now 427 r.trackPushNeg1(mark) // Save old mark 428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count 429 r.goTo(r.operand(0)) // Loop 430 } else { // Nonneg count -> straight now 431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position 432 r.advance(2) // Straight 433 } 434 continue 435 436 case syntax.Lazybranchcount | syntax.Back: 437 // r.trackPush: 438 // 0: Mark 439 // 1: Count 440 // 2: r.textPos 441 442 r.trackPopN(3) 443 mark := r.trackPeek() 444 textpos := r.trackPeekN(2) 445 446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop 447 r.textto(textpos) // Recall position 448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count 449 r.trackPushNeg1(mark) // Save old mark 450 r.goTo(r.operand(0)) // Loop 451 continue 452 } else { // Max loops or empty match -> backtrack 453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count 454 break // backtrack 455 } 456 457 case syntax.Lazybranchcount | syntax.Back2: 458 // r.trackPush: 459 // 0: Previous mark 460 // r.stackPush: 461 // 0: Mark (== current pos, discarded) 462 // 1: Count 463 r.trackPop() 464 r.stackPopN(2) 465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count 466 break // Backtrack 467 468 case syntax.Setjump: 469 r.stackPush2(r.trackpos(), r.crawlpos()) 470 r.trackPush() 471 r.advance(0) 472 continue 473 474 case syntax.Setjump | syntax.Back: 475 r.stackPopN(2) 476 break 477 478 case syntax.Backjump: 479 // r.stackPush: 480 // 0: Saved trackpos 481 // 1: r.crawlpos 482 r.stackPopN(2) 483 r.trackto(r.stackPeek()) 484 485 for r.crawlpos() != r.stackPeekN(1) { 486 r.uncapture() 487 } 488 489 break 490 491 case syntax.Forejump: 492 // r.stackPush: 493 // 0: Saved trackpos 494 // 1: r.crawlpos 495 r.stackPopN(2) 496 r.trackto(r.stackPeek()) 497 r.trackPush1(r.stackPeekN(1)) 498 r.advance(0) 499 continue 500 501 case syntax.Forejump | syntax.Back: 502 // r.trackPush: 503 // 0: r.crawlpos 504 r.trackPop() 505 506 for r.crawlpos() != r.trackPeek() { 507 r.uncapture() 508 } 509 510 break 511 512 case syntax.Bol: 513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' { 514 break 515 } 516 r.advance(0) 517 continue 518 519 case syntax.Eol: 520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' { 521 break 522 } 523 r.advance(0) 524 continue 525 526 case syntax.Boundary: 527 if !r.isBoundary(r.textPos(), 0, r.runtextend) { 528 break 529 } 530 r.advance(0) 531 continue 532 533 case syntax.Nonboundary: 534 if r.isBoundary(r.textPos(), 0, r.runtextend) { 535 break 536 } 537 r.advance(0) 538 continue 539 540 case syntax.ECMABoundary: 541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) { 542 break 543 } 544 r.advance(0) 545 continue 546 547 case syntax.NonECMABoundary: 548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) { 549 break 550 } 551 r.advance(0) 552 continue 553 554 case syntax.Beginning: 555 if r.leftchars() > 0 { 556 break 557 } 558 r.advance(0) 559 continue 560 561 case syntax.Start: 562 if r.textPos() != r.textstart() { 563 break 564 } 565 r.advance(0) 566 continue 567 568 case syntax.EndZ: 569 rchars := r.rightchars() 570 if rchars > 1 { 571 break 572 } 573 // RE2 and EcmaScript define $ as "asserts position at the end of the string" 574 // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)" 575 if (r.re.options & (RE2 | ECMAScript)) != 0 { 576 // RE2/Ecmascript mode 577 if rchars > 0 { 578 break 579 } 580 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' { 581 // "regular" mode 582 break 583 } 584 585 r.advance(0) 586 continue 587 588 case syntax.End: 589 if r.rightchars() > 0 { 590 break 591 } 592 r.advance(0) 593 continue 594 595 case syntax.One: 596 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) { 597 break 598 } 599 600 r.advance(1) 601 continue 602 603 case syntax.Notone: 604 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) { 605 break 606 } 607 608 r.advance(1) 609 continue 610 611 case syntax.Set: 612 613 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { 614 break 615 } 616 617 r.advance(1) 618 continue 619 620 case syntax.Multi: 621 if !r.runematch(r.code.Strings[r.operand(0)]) { 622 break 623 } 624 625 r.advance(1) 626 continue 627 628 case syntax.Ref: 629 630 capnum := r.operand(0) 631 632 if r.runmatch.isMatched(capnum) { 633 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) { 634 break 635 } 636 } else { 637 if (r.re.options & ECMAScript) == 0 { 638 break 639 } 640 } 641 642 r.advance(1) 643 continue 644 645 case syntax.Onerep: 646 647 c := r.operand(1) 648 649 if r.forwardchars() < c { 650 break 651 } 652 653 ch := rune(r.operand(0)) 654 655 for c > 0 { 656 if r.forwardcharnext() != ch { 657 goto BreakBackward 658 } 659 c-- 660 } 661 662 r.advance(2) 663 continue 664 665 case syntax.Notonerep: 666 667 c := r.operand(1) 668 669 if r.forwardchars() < c { 670 break 671 } 672 ch := rune(r.operand(0)) 673 674 for c > 0 { 675 if r.forwardcharnext() == ch { 676 goto BreakBackward 677 } 678 c-- 679 } 680 681 r.advance(2) 682 continue 683 684 case syntax.Setrep: 685 686 c := r.operand(1) 687 688 if r.forwardchars() < c { 689 break 690 } 691 692 set := r.code.Sets[r.operand(0)] 693 694 for c > 0 { 695 if !set.CharIn(r.forwardcharnext()) { 696 goto BreakBackward 697 } 698 c-- 699 } 700 701 r.advance(2) 702 continue 703 704 case syntax.Oneloop: 705 706 c := r.operand(1) 707 708 if c > r.forwardchars() { 709 c = r.forwardchars() 710 } 711 712 ch := rune(r.operand(0)) 713 i := c 714 715 for ; i > 0; i-- { 716 if r.forwardcharnext() != ch { 717 r.backwardnext() 718 break 719 } 720 } 721 722 if c > i { 723 r.trackPush2(c-i-1, r.textPos()-r.bump()) 724 } 725 726 r.advance(2) 727 continue 728 729 case syntax.Notoneloop: 730 731 c := r.operand(1) 732 733 if c > r.forwardchars() { 734 c = r.forwardchars() 735 } 736 737 ch := rune(r.operand(0)) 738 i := c 739 740 for ; i > 0; i-- { 741 if r.forwardcharnext() == ch { 742 r.backwardnext() 743 break 744 } 745 } 746 747 if c > i { 748 r.trackPush2(c-i-1, r.textPos()-r.bump()) 749 } 750 751 r.advance(2) 752 continue 753 754 case syntax.Setloop: 755 756 c := r.operand(1) 757 758 if c > r.forwardchars() { 759 c = r.forwardchars() 760 } 761 762 set := r.code.Sets[r.operand(0)] 763 i := c 764 765 for ; i > 0; i-- { 766 if !set.CharIn(r.forwardcharnext()) { 767 r.backwardnext() 768 break 769 } 770 } 771 772 if c > i { 773 r.trackPush2(c-i-1, r.textPos()-r.bump()) 774 } 775 776 r.advance(2) 777 continue 778 779 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back: 780 781 r.trackPopN(2) 782 i := r.trackPeek() 783 pos := r.trackPeekN(1) 784 785 r.textto(pos) 786 787 if i > 0 { 788 r.trackPush2(i-1, pos-r.bump()) 789 } 790 791 r.advance(2) 792 continue 793 794 case syntax.Setloop | syntax.Back: 795 796 r.trackPopN(2) 797 i := r.trackPeek() 798 pos := r.trackPeekN(1) 799 800 r.textto(pos) 801 802 if i > 0 { 803 r.trackPush2(i-1, pos-r.bump()) 804 } 805 806 r.advance(2) 807 continue 808 809 case syntax.Onelazy, syntax.Notonelazy: 810 811 c := r.operand(1) 812 813 if c > r.forwardchars() { 814 c = r.forwardchars() 815 } 816 817 if c > 0 { 818 r.trackPush2(c-1, r.textPos()) 819 } 820 821 r.advance(2) 822 continue 823 824 case syntax.Setlazy: 825 826 c := r.operand(1) 827 828 if c > r.forwardchars() { 829 c = r.forwardchars() 830 } 831 832 if c > 0 { 833 r.trackPush2(c-1, r.textPos()) 834 } 835 836 r.advance(2) 837 continue 838 839 case syntax.Onelazy | syntax.Back: 840 841 r.trackPopN(2) 842 pos := r.trackPeekN(1) 843 r.textto(pos) 844 845 if r.forwardcharnext() != rune(r.operand(0)) { 846 break 847 } 848 849 i := r.trackPeek() 850 851 if i > 0 { 852 r.trackPush2(i-1, pos+r.bump()) 853 } 854 855 r.advance(2) 856 continue 857 858 case syntax.Notonelazy | syntax.Back: 859 860 r.trackPopN(2) 861 pos := r.trackPeekN(1) 862 r.textto(pos) 863 864 if r.forwardcharnext() == rune(r.operand(0)) { 865 break 866 } 867 868 i := r.trackPeek() 869 870 if i > 0 { 871 r.trackPush2(i-1, pos+r.bump()) 872 } 873 874 r.advance(2) 875 continue 876 877 case syntax.Setlazy | syntax.Back: 878 879 r.trackPopN(2) 880 pos := r.trackPeekN(1) 881 r.textto(pos) 882 883 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { 884 break 885 } 886 887 i := r.trackPeek() 888 889 if i > 0 { 890 r.trackPush2(i-1, pos+r.bump()) 891 } 892 893 r.advance(2) 894 continue 895 896 default: 897 return errors.New("unknown state in regex runner") 898 } 899 900 BreakBackward: 901 ; 902 903 // "break Backward" comes here: 904 r.backtrack() 905 } 906} 907 908// increase the size of stack and track storage 909func (r *runner) ensureStorage() { 910 if r.runstackpos < r.runtrackcount*4 { 911 doubleIntSlice(&r.runstack, &r.runstackpos) 912 } 913 if r.runtrackpos < r.runtrackcount*4 { 914 doubleIntSlice(&r.runtrack, &r.runtrackpos) 915 } 916} 917 918func doubleIntSlice(s *[]int, pos *int) { 919 oldLen := len(*s) 920 newS := make([]int, oldLen*2) 921 922 copy(newS[oldLen:], *s) 923 *pos += oldLen 924 *s = newS 925} 926 927// Save a number on the longjump unrolling stack 928func (r *runner) crawl(i int) { 929 if r.runcrawlpos == 0 { 930 doubleIntSlice(&r.runcrawl, &r.runcrawlpos) 931 } 932 r.runcrawlpos-- 933 r.runcrawl[r.runcrawlpos] = i 934} 935 936// Remove a number from the longjump unrolling stack 937func (r *runner) popcrawl() int { 938 val := r.runcrawl[r.runcrawlpos] 939 r.runcrawlpos++ 940 return val 941} 942 943// Get the height of the stack 944func (r *runner) crawlpos() int { 945 return len(r.runcrawl) - r.runcrawlpos 946} 947 948func (r *runner) advance(i int) { 949 r.codepos += (i + 1) 950 r.setOperator(r.code.Codes[r.codepos]) 951} 952 953func (r *runner) goTo(newpos int) { 954 // when branching backward or in place, ensure storage 955 if newpos <= r.codepos { 956 r.ensureStorage() 957 } 958 959 r.setOperator(r.code.Codes[newpos]) 960 r.codepos = newpos 961} 962 963func (r *runner) textto(newpos int) { 964 r.runtextpos = newpos 965} 966 967func (r *runner) trackto(newpos int) { 968 r.runtrackpos = len(r.runtrack) - newpos 969} 970 971func (r *runner) textstart() int { 972 return r.runtextstart 973} 974 975func (r *runner) textPos() int { 976 return r.runtextpos 977} 978 979// push onto the backtracking stack 980func (r *runner) trackpos() int { 981 return len(r.runtrack) - r.runtrackpos 982} 983 984func (r *runner) trackPush() { 985 r.runtrackpos-- 986 r.runtrack[r.runtrackpos] = r.codepos 987} 988 989func (r *runner) trackPush1(I1 int) { 990 r.runtrackpos-- 991 r.runtrack[r.runtrackpos] = I1 992 r.runtrackpos-- 993 r.runtrack[r.runtrackpos] = r.codepos 994} 995 996func (r *runner) trackPush2(I1, I2 int) { 997 r.runtrackpos-- 998 r.runtrack[r.runtrackpos] = I1 999 r.runtrackpos-- 1000 r.runtrack[r.runtrackpos] = I2 1001 r.runtrackpos-- 1002 r.runtrack[r.runtrackpos] = r.codepos 1003} 1004 1005func (r *runner) trackPush3(I1, I2, I3 int) { 1006 r.runtrackpos-- 1007 r.runtrack[r.runtrackpos] = I1 1008 r.runtrackpos-- 1009 r.runtrack[r.runtrackpos] = I2 1010 r.runtrackpos-- 1011 r.runtrack[r.runtrackpos] = I3 1012 r.runtrackpos-- 1013 r.runtrack[r.runtrackpos] = r.codepos 1014} 1015 1016func (r *runner) trackPushNeg1(I1 int) { 1017 r.runtrackpos-- 1018 r.runtrack[r.runtrackpos] = I1 1019 r.runtrackpos-- 1020 r.runtrack[r.runtrackpos] = -r.codepos 1021} 1022 1023func (r *runner) trackPushNeg2(I1, I2 int) { 1024 r.runtrackpos-- 1025 r.runtrack[r.runtrackpos] = I1 1026 r.runtrackpos-- 1027 r.runtrack[r.runtrackpos] = I2 1028 r.runtrackpos-- 1029 r.runtrack[r.runtrackpos] = -r.codepos 1030} 1031 1032func (r *runner) backtrack() { 1033 newpos := r.runtrack[r.runtrackpos] 1034 r.runtrackpos++ 1035 1036 if r.re.Debug() { 1037 if newpos < 0 { 1038 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos) 1039 } else { 1040 fmt.Printf(" Backtracking to code position %v\n", newpos) 1041 } 1042 } 1043 1044 if newpos < 0 { 1045 newpos = -newpos 1046 r.setOperator(r.code.Codes[newpos] | syntax.Back2) 1047 } else { 1048 r.setOperator(r.code.Codes[newpos] | syntax.Back) 1049 } 1050 1051 // When branching backward, ensure storage 1052 if newpos < r.codepos { 1053 r.ensureStorage() 1054 } 1055 1056 r.codepos = newpos 1057} 1058 1059func (r *runner) setOperator(op int) { 1060 r.caseInsensitive = (0 != (op & syntax.Ci)) 1061 r.rightToLeft = (0 != (op & syntax.Rtl)) 1062 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci)) 1063} 1064 1065func (r *runner) trackPop() { 1066 r.runtrackpos++ 1067} 1068 1069// pop framesize items from the backtracking stack 1070func (r *runner) trackPopN(framesize int) { 1071 r.runtrackpos += framesize 1072} 1073 1074// Technically we are actually peeking at items already popped. So if you want to 1075// get and pop the top item from the stack, you do 1076// r.trackPop(); 1077// r.trackPeek(); 1078func (r *runner) trackPeek() int { 1079 return r.runtrack[r.runtrackpos-1] 1080} 1081 1082// get the ith element down on the backtracking stack 1083func (r *runner) trackPeekN(i int) int { 1084 return r.runtrack[r.runtrackpos-i-1] 1085} 1086 1087// Push onto the grouping stack 1088func (r *runner) stackPush(I1 int) { 1089 r.runstackpos-- 1090 r.runstack[r.runstackpos] = I1 1091} 1092 1093func (r *runner) stackPush2(I1, I2 int) { 1094 r.runstackpos-- 1095 r.runstack[r.runstackpos] = I1 1096 r.runstackpos-- 1097 r.runstack[r.runstackpos] = I2 1098} 1099 1100func (r *runner) stackPop() { 1101 r.runstackpos++ 1102} 1103 1104// pop framesize items from the grouping stack 1105func (r *runner) stackPopN(framesize int) { 1106 r.runstackpos += framesize 1107} 1108 1109// Technically we are actually peeking at items already popped. So if you want to 1110// get and pop the top item from the stack, you do 1111// r.stackPop(); 1112// r.stackPeek(); 1113func (r *runner) stackPeek() int { 1114 return r.runstack[r.runstackpos-1] 1115} 1116 1117// get the ith element down on the grouping stack 1118func (r *runner) stackPeekN(i int) int { 1119 return r.runstack[r.runstackpos-i-1] 1120} 1121 1122func (r *runner) operand(i int) int { 1123 return r.code.Codes[r.codepos+i+1] 1124} 1125 1126func (r *runner) leftchars() int { 1127 return r.runtextpos 1128} 1129 1130func (r *runner) rightchars() int { 1131 return r.runtextend - r.runtextpos 1132} 1133 1134func (r *runner) bump() int { 1135 if r.rightToLeft { 1136 return -1 1137 } 1138 return 1 1139} 1140 1141func (r *runner) forwardchars() int { 1142 if r.rightToLeft { 1143 return r.runtextpos 1144 } 1145 return r.runtextend - r.runtextpos 1146} 1147 1148func (r *runner) forwardcharnext() rune { 1149 var ch rune 1150 if r.rightToLeft { 1151 r.runtextpos-- 1152 ch = r.runtext[r.runtextpos] 1153 } else { 1154 ch = r.runtext[r.runtextpos] 1155 r.runtextpos++ 1156 } 1157 1158 if r.caseInsensitive { 1159 return unicode.ToLower(ch) 1160 } 1161 return ch 1162} 1163 1164func (r *runner) runematch(str []rune) bool { 1165 var pos int 1166 1167 c := len(str) 1168 if !r.rightToLeft { 1169 if r.runtextend-r.runtextpos < c { 1170 return false 1171 } 1172 1173 pos = r.runtextpos + c 1174 } else { 1175 if r.runtextpos-0 < c { 1176 return false 1177 } 1178 1179 pos = r.runtextpos 1180 } 1181 1182 if !r.caseInsensitive { 1183 for c != 0 { 1184 c-- 1185 pos-- 1186 if str[c] != r.runtext[pos] { 1187 return false 1188 } 1189 } 1190 } else { 1191 for c != 0 { 1192 c-- 1193 pos-- 1194 if str[c] != unicode.ToLower(r.runtext[pos]) { 1195 return false 1196 } 1197 } 1198 } 1199 1200 if !r.rightToLeft { 1201 pos += len(str) 1202 } 1203 1204 r.runtextpos = pos 1205 1206 return true 1207} 1208 1209func (r *runner) refmatch(index, len int) bool { 1210 var c, pos, cmpos int 1211 1212 if !r.rightToLeft { 1213 if r.runtextend-r.runtextpos < len { 1214 return false 1215 } 1216 1217 pos = r.runtextpos + len 1218 } else { 1219 if r.runtextpos-0 < len { 1220 return false 1221 } 1222 1223 pos = r.runtextpos 1224 } 1225 cmpos = index + len 1226 1227 c = len 1228 1229 if !r.caseInsensitive { 1230 for c != 0 { 1231 c-- 1232 cmpos-- 1233 pos-- 1234 if r.runtext[cmpos] != r.runtext[pos] { 1235 return false 1236 } 1237 1238 } 1239 } else { 1240 for c != 0 { 1241 c-- 1242 cmpos-- 1243 pos-- 1244 1245 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) { 1246 return false 1247 } 1248 } 1249 } 1250 1251 if !r.rightToLeft { 1252 pos += len 1253 } 1254 1255 r.runtextpos = pos 1256 1257 return true 1258} 1259 1260func (r *runner) backwardnext() { 1261 if r.rightToLeft { 1262 r.runtextpos++ 1263 } else { 1264 r.runtextpos-- 1265 } 1266} 1267 1268func (r *runner) charAt(j int) rune { 1269 return r.runtext[j] 1270} 1271 1272func (r *runner) findFirstChar() bool { 1273 1274 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) { 1275 if !r.code.RightToLeft { 1276 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) || 1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) { 1278 r.runtextpos = r.runtextend 1279 return false 1280 } 1281 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 { 1282 r.runtextpos = r.runtextend - 1 1283 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend { 1284 r.runtextpos = r.runtextend 1285 } 1286 } else { 1287 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) || 1288 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 || 1289 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) || 1290 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) { 1291 r.runtextpos = 0 1292 return false 1293 } 1294 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 { 1295 r.runtextpos = 0 1296 } 1297 } 1298 1299 if r.code.BmPrefix != nil { 1300 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend) 1301 } 1302 1303 return true // found a valid start or end anchor 1304 } else if r.code.BmPrefix != nil { 1305 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend) 1306 1307 if r.runtextpos == -1 { 1308 if r.code.RightToLeft { 1309 r.runtextpos = 0 1310 } else { 1311 r.runtextpos = r.runtextend 1312 } 1313 return false 1314 } 1315 1316 return true 1317 } else if r.code.FcPrefix == nil { 1318 return true 1319 } 1320 1321 r.rightToLeft = r.code.RightToLeft 1322 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive 1323 1324 set := r.code.FcPrefix.PrefixSet 1325 if set.IsSingleton() { 1326 ch := set.SingletonChar() 1327 for i := r.forwardchars(); i > 0; i-- { 1328 if ch == r.forwardcharnext() { 1329 r.backwardnext() 1330 return true 1331 } 1332 } 1333 } else { 1334 for i := r.forwardchars(); i > 0; i-- { 1335 n := r.forwardcharnext() 1336 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n)) 1337 if set.CharIn(n) { 1338 r.backwardnext() 1339 return true 1340 } 1341 } 1342 } 1343 1344 return false 1345} 1346 1347func (r *runner) initMatch() { 1348 // Use a hashtable'ed Match object if the capture numbers are sparse 1349 1350 if r.runmatch == nil { 1351 if r.re.caps != nil { 1352 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart) 1353 } else { 1354 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart) 1355 } 1356 } else { 1357 r.runmatch.reset(r.runtext, r.runtextstart) 1358 } 1359 1360 // note we test runcrawl, because it is the last one to be allocated 1361 // If there is an alloc failure in the middle of the three allocations, 1362 // we may still return to reuse this instance, and we want to behave 1363 // as if the allocations didn't occur. (we used to test _trackcount != 0) 1364 1365 if r.runcrawl != nil { 1366 r.runtrackpos = len(r.runtrack) 1367 r.runstackpos = len(r.runstack) 1368 r.runcrawlpos = len(r.runcrawl) 1369 return 1370 } 1371 1372 r.initTrackCount() 1373 1374 tracksize := r.runtrackcount * 8 1375 stacksize := r.runtrackcount * 8 1376 1377 if tracksize < 32 { 1378 tracksize = 32 1379 } 1380 if stacksize < 16 { 1381 stacksize = 16 1382 } 1383 1384 r.runtrack = make([]int, tracksize) 1385 r.runtrackpos = tracksize 1386 1387 r.runstack = make([]int, stacksize) 1388 r.runstackpos = stacksize 1389 1390 r.runcrawl = make([]int, 32) 1391 r.runcrawlpos = 32 1392} 1393 1394func (r *runner) tidyMatch(quick bool) *Match { 1395 if !quick { 1396 match := r.runmatch 1397 1398 r.runmatch = nil 1399 1400 match.tidy(r.runtextpos) 1401 return match 1402 } else { 1403 // send back our match -- it's not leaving the package, so it's safe to not clean it up 1404 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions 1405 return r.runmatch 1406 } 1407} 1408 1409// capture captures a subexpression. Note that the 1410// capnum used here has already been mapped to a non-sparse 1411// index (by the code generator RegexWriter). 1412func (r *runner) capture(capnum, start, end int) { 1413 if end < start { 1414 T := end 1415 end = start 1416 start = T 1417 } 1418 1419 r.crawl(capnum) 1420 r.runmatch.addMatch(capnum, start, end-start) 1421} 1422 1423// transferCapture captures a subexpression. Note that the 1424// capnum used here has already been mapped to a non-sparse 1425// index (by the code generator RegexWriter). 1426func (r *runner) transferCapture(capnum, uncapnum, start, end int) { 1427 var start2, end2 int 1428 1429 // these are the two intervals that are cancelling each other 1430 1431 if end < start { 1432 T := end 1433 end = start 1434 start = T 1435 } 1436 1437 start2 = r.runmatch.matchIndex(uncapnum) 1438 end2 = start2 + r.runmatch.matchLength(uncapnum) 1439 1440 // The new capture gets the innermost defined interval 1441 1442 if start >= end2 { 1443 end = start 1444 start = end2 1445 } else if end <= start2 { 1446 start = start2 1447 } else { 1448 if end > end2 { 1449 end = end2 1450 } 1451 if start2 > start { 1452 start = start2 1453 } 1454 } 1455 1456 r.crawl(uncapnum) 1457 r.runmatch.balanceMatch(uncapnum) 1458 1459 if capnum != -1 { 1460 r.crawl(capnum) 1461 r.runmatch.addMatch(capnum, start, end-start) 1462 } 1463} 1464 1465// revert the last capture 1466func (r *runner) uncapture() { 1467 capnum := r.popcrawl() 1468 r.runmatch.removeMatch(capnum) 1469} 1470 1471//debug 1472 1473func (r *runner) dumpState() { 1474 back := "" 1475 if r.operator&syntax.Back != 0 { 1476 back = " Back" 1477 } 1478 if r.operator&syntax.Back2 != 0 { 1479 back += " Back2" 1480 } 1481 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n", 1482 r.textposDescription(), 1483 r.stackDescription(r.runtrack, r.runtrackpos), 1484 r.stackDescription(r.runstack, r.runstackpos), 1485 r.code.OpcodeDescription(r.codepos), 1486 back) 1487} 1488 1489func (r *runner) stackDescription(a []int, index int) string { 1490 buf := &bytes.Buffer{} 1491 1492 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a)) 1493 if buf.Len() < 8 { 1494 buf.WriteString(strings.Repeat(" ", 8-buf.Len())) 1495 } 1496 1497 buf.WriteRune('(') 1498 for i := index; i < len(a); i++ { 1499 if i > index { 1500 buf.WriteRune(' ') 1501 } 1502 1503 buf.WriteString(strconv.Itoa(a[i])) 1504 } 1505 1506 buf.WriteRune(')') 1507 1508 return buf.String() 1509} 1510 1511func (r *runner) textposDescription() string { 1512 buf := &bytes.Buffer{} 1513 1514 buf.WriteString(strconv.Itoa(r.runtextpos)) 1515 1516 if buf.Len() < 8 { 1517 buf.WriteString(strings.Repeat(" ", 8-buf.Len())) 1518 } 1519 1520 if r.runtextpos > 0 { 1521 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1])) 1522 } else { 1523 buf.WriteRune('^') 1524 } 1525 1526 buf.WriteRune('>') 1527 1528 for i := r.runtextpos; i < r.runtextend; i++ { 1529 buf.WriteString(syntax.CharDescription(r.runtext[i])) 1530 } 1531 if buf.Len() >= 64 { 1532 buf.Truncate(61) 1533 buf.WriteString("...") 1534 } else { 1535 buf.WriteRune('$') 1536 } 1537 1538 return buf.String() 1539} 1540 1541// decide whether the pos 1542// at the specified index is a boundary or not. It's just not worth 1543// emitting inline code for this logic. 1544func (r *runner) isBoundary(index, startpos, endpos int) bool { 1545 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) != 1546 (index < endpos && syntax.IsWordChar(r.runtext[index])) 1547} 1548 1549func (r *runner) isECMABoundary(index, startpos, endpos int) bool { 1550 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) != 1551 (index < endpos && syntax.IsECMAWordChar(r.runtext[index])) 1552} 1553 1554// this seems like a comment to justify randomly picking 1000 :-P 1555// We have determined this value in a series of experiments where x86 retail 1556// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values 1557// of TimeoutCheckFrequency did not tend to increase performance; smaller values 1558// of TimeoutCheckFrequency tended to slow down the execution. 1559const timeoutCheckFrequency int = 1000 1560 1561func (r *runner) startTimeoutWatch() { 1562 if r.ignoreTimeout { 1563 return 1564 } 1565 1566 r.timeoutChecksToSkip = timeoutCheckFrequency 1567 r.timeoutAt = time.Now().Add(r.timeout) 1568} 1569 1570func (r *runner) checkTimeout() error { 1571 if r.ignoreTimeout { 1572 return nil 1573 } 1574 r.timeoutChecksToSkip-- 1575 if r.timeoutChecksToSkip != 0 { 1576 return nil 1577 } 1578 1579 r.timeoutChecksToSkip = timeoutCheckFrequency 1580 return r.doCheckTimeout() 1581} 1582 1583func (r *runner) doCheckTimeout() error { 1584 current := time.Now() 1585 1586 if current.Before(r.timeoutAt) { 1587 return nil 1588 } 1589 1590 if r.re.Debug() { 1591 //Debug.WriteLine("") 1592 //Debug.WriteLine("RegEx match timeout occurred!") 1593 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString()) 1594 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency) 1595 //Debug.WriteLine("Search pattern: " + _runregex._pattern) 1596 //Debug.WriteLine("Input: " + r.runtext) 1597 //Debug.WriteLine("About to throw RegexMatchTimeoutException.") 1598 } 1599 1600 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext)) 1601} 1602 1603func (r *runner) initTrackCount() { 1604 r.runtrackcount = r.code.TrackCount 1605} 1606 1607// getRunner returns a run to use for matching re. 1608// It uses the re's runner cache if possible, to avoid 1609// unnecessary allocation. 1610func (re *Regexp) getRunner() *runner { 1611 re.muRun.Lock() 1612 if n := len(re.runner); n > 0 { 1613 z := re.runner[n-1] 1614 re.runner = re.runner[:n-1] 1615 re.muRun.Unlock() 1616 return z 1617 } 1618 re.muRun.Unlock() 1619 z := &runner{ 1620 re: re, 1621 code: re.code, 1622 } 1623 return z 1624} 1625 1626// putRunner returns a runner to the re's cache. 1627// There is no attempt to limit the size of the cache, so it will 1628// grow to the maximum number of simultaneous matches 1629// run using re. (The cache empties when re gets garbage collected.) 1630func (re *Regexp) putRunner(r *runner) { 1631 re.muRun.Lock() 1632 re.runner = append(re.runner, r) 1633 re.muRun.Unlock() 1634} 1635