1package regexp2
2
3import (
4	"bytes"
5	"errors"
6	"fmt"
7	"math"
8	"strconv"
9	"strings"
10	"time"
11	"unicode"
12
13	"github.com/dlclark/regexp2/syntax"
14)
15
16type runner struct {
17	re   *Regexp
18	code *syntax.Code
19
20	runtextstart int // starting point for search
21
22	runtext    []rune // text to search
23	runtextpos int    // current position in text
24	runtextend int
25
26	// The backtracking stack.  Opcodes use this to store data regarding
27	// what they have matched and where to backtrack to.  Each "frame" on
28	// the stack takes the form of [CodePosition Data1 Data2...], where
29	// CodePosition is the position of the current opcode and
30	// the data values are all optional.  The CodePosition can be negative, and
31	// these values (also called "back2") are used by the BranchMark family of opcodes
32	// to indicate whether they are backtracking after a successful or failed
33	// match.
34	// When we backtrack, we pop the CodePosition off the stack, set the current
35	// instruction pointer to that code position, and mark the opcode
36	// with a backtracking flag ("Back").  Each opcode then knows how to
37	// handle its own data.
38	runtrack    []int
39	runtrackpos int
40
41	// This stack is used to track text positions across different opcodes.
42	// For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
43	// pair. SetMark records the text position before we match a*b.  Then
44	// CaptureMark uses that position to figure out where the capture starts.
45	// Opcodes which push onto this stack are always paired with other opcodes
46	// which will pop the value from it later.  A successful match should mean
47	// that this stack is empty.
48	runstack    []int
49	runstackpos int
50
51	// The crawl stack is used to keep track of captures.  Every time a group
52	// has a capture, we push its group number onto the runcrawl stack.  In
53	// the case of a balanced match, we push BOTH groups onto the stack.
54	runcrawl    []int
55	runcrawlpos int
56
57	runtrackcount int // count of states that may do backtracking
58
59	runmatch *Match // result object
60
61	ignoreTimeout       bool
62	timeout             time.Duration // timeout in milliseconds (needed for actual)
63	timeoutChecksToSkip int
64	timeoutAt           time.Time
65
66	operator        syntax.InstOp
67	codepos         int
68	rightToLeft     bool
69	caseInsensitive bool
70}
71
72// run searches for matches and can continue from the previous match
73//
74// quick is usually false, but can be true to not return matches, just put it in caches
75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
76// input is the string to search for our regex pattern
77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
78
79	// get a cached runner
80	runner := re.getRunner()
81	defer re.putRunner(runner)
82
83	if textstart < 0 {
84		if re.RightToLeft() {
85			textstart = len(input)
86		} else {
87			textstart = 0
88		}
89	}
90
91	return runner.scan(input, textstart, quick, re.MatchTimeout)
92}
93
94// Scans the string to find the first match. Uses the Match object
95// both to feed text in and as a place to store matches that come out.
96//
97// All the action is in the Go() method. Our
98// responsibility is to load up the class members before
99// calling Go.
100//
101// The optimizer can compute a set of candidate starting characters,
102// and we could use a separate method Skip() that will quickly scan past
103// any characters that we know can't match.
104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
105	r.timeout = timeout
106	r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
107	r.runtextstart = textstart
108	r.runtext = rt
109	r.runtextend = len(rt)
110
111	stoppos := r.runtextend
112	bump := 1
113
114	if r.re.RightToLeft() {
115		bump = -1
116		stoppos = 0
117	}
118
119	r.runtextpos = textstart
120	initted := false
121
122	r.startTimeoutWatch()
123	for {
124		if r.re.Debug() {
125			//fmt.Printf("\nSearch content: %v\n", string(r.runtext))
126			fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
127			fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
128		}
129
130		if r.findFirstChar() {
131			if err := r.checkTimeout(); err != nil {
132				return nil, err
133			}
134
135			if !initted {
136				r.initMatch()
137				initted = true
138			}
139
140			if r.re.Debug() {
141				fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
142			}
143
144			if err := r.execute(); err != nil {
145				return nil, err
146			}
147
148			if r.runmatch.matchcount[0] > 0 {
149				// We'll return a match even if it touches a previous empty match
150				return r.tidyMatch(quick), nil
151			}
152
153			// reset state for another go
154			r.runtrackpos = len(r.runtrack)
155			r.runstackpos = len(r.runstack)
156			r.runcrawlpos = len(r.runcrawl)
157		}
158
159		// failure!
160
161		if r.runtextpos == stoppos {
162			r.tidyMatch(true)
163			return nil, nil
164		}
165
166		// Recognize leading []* and various anchors, and bump on failure accordingly
167
168		// r.bump by one and start again
169
170		r.runtextpos += bump
171	}
172	// We never get here
173}
174
175func (r *runner) execute() error {
176
177	r.goTo(0)
178
179	for {
180
181		if r.re.Debug() {
182			r.dumpState()
183		}
184
185		if err := r.checkTimeout(); err != nil {
186			return err
187		}
188
189		switch r.operator {
190		case syntax.Stop:
191			return nil
192
193		case syntax.Nothing:
194			break
195
196		case syntax.Goto:
197			r.goTo(r.operand(0))
198			continue
199
200		case syntax.Testref:
201			if !r.runmatch.isMatched(r.operand(0)) {
202				break
203			}
204			r.advance(1)
205			continue
206
207		case syntax.Lazybranch:
208			r.trackPush1(r.textPos())
209			r.advance(1)
210			continue
211
212		case syntax.Lazybranch | syntax.Back:
213			r.trackPop()
214			r.textto(r.trackPeek())
215			r.goTo(r.operand(0))
216			continue
217
218		case syntax.Setmark:
219			r.stackPush(r.textPos())
220			r.trackPush()
221			r.advance(0)
222			continue
223
224		case syntax.Nullmark:
225			r.stackPush(-1)
226			r.trackPush()
227			r.advance(0)
228			continue
229
230		case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
231			r.stackPop()
232			break
233
234		case syntax.Getmark:
235			r.stackPop()
236			r.trackPush1(r.stackPeek())
237			r.textto(r.stackPeek())
238			r.advance(0)
239			continue
240
241		case syntax.Getmark | syntax.Back:
242			r.trackPop()
243			r.stackPush(r.trackPeek())
244			break
245
246		case syntax.Capturemark:
247			if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
248				break
249			}
250			r.stackPop()
251			if r.operand(1) != -1 {
252				r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
253			} else {
254				r.capture(r.operand(0), r.stackPeek(), r.textPos())
255			}
256			r.trackPush1(r.stackPeek())
257
258			r.advance(2)
259
260			continue
261
262		case syntax.Capturemark | syntax.Back:
263			r.trackPop()
264			r.stackPush(r.trackPeek())
265			r.uncapture()
266			if r.operand(0) != -1 && r.operand(1) != -1 {
267				r.uncapture()
268			}
269
270			break
271
272		case syntax.Branchmark:
273			r.stackPop()
274
275			matched := r.textPos() - r.stackPeek()
276
277			if matched != 0 { // Nonempty match -> loop now
278				r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
279				r.stackPush(r.textPos())                 // Make new mark
280				r.goTo(r.operand(0))                     // Loop
281			} else { // Empty match -> straight now
282				r.trackPushNeg1(r.stackPeek()) // Save old mark
283				r.advance(1)                   // Straight
284			}
285			continue
286
287		case syntax.Branchmark | syntax.Back:
288			r.trackPopN(2)
289			r.stackPop()
290			r.textto(r.trackPeekN(1))      // Recall position
291			r.trackPushNeg1(r.trackPeek()) // Save old mark
292			r.advance(1)                   // Straight
293			continue
294
295		case syntax.Branchmark | syntax.Back2:
296			r.trackPop()
297			r.stackPush(r.trackPeek()) // Recall old mark
298			break                      // Backtrack
299
300		case syntax.Lazybranchmark:
301			{
302				// We hit this the first time through a lazy loop and after each
303				// successful match of the inner expression.  It simply continues
304				// on and doesn't loop.
305				r.stackPop()
306
307				oldMarkPos := r.stackPeek()
308
309				if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
310					if oldMarkPos != -1 {
311						r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
312					} else {
313						r.trackPush2(r.textPos(), r.textPos())
314					}
315				} else {
316					// The inner expression found an empty match, so we'll go directly to 'back2' if we
317					// backtrack.  In this case, we need to push something on the stack, since back2 pops.
318					// However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
319					// position associated with that empty match.
320					r.stackPush(oldMarkPos)
321
322					r.trackPushNeg1(r.stackPeek()) // Save old mark
323				}
324				r.advance(1)
325				continue
326			}
327
328		case syntax.Lazybranchmark | syntax.Back:
329
330			// After the first time, Lazybranchmark | syntax.Back occurs
331			// with each iteration of the loop, and therefore with every attempted
332			// match of the inner expression.  We'll try to match the inner expression,
333			// then go back to Lazybranchmark if successful.  If the inner expression
334			// fails, we go to Lazybranchmark | syntax.Back2
335
336			r.trackPopN(2)
337			pos := r.trackPeekN(1)
338			r.trackPushNeg1(r.trackPeek()) // Save old mark
339			r.stackPush(pos)               // Make new mark
340			r.textto(pos)                  // Recall position
341			r.goTo(r.operand(0))           // Loop
342			continue
343
344		case syntax.Lazybranchmark | syntax.Back2:
345			// The lazy loop has failed.  We'll do a true backtrack and
346			// start over before the lazy loop.
347			r.stackPop()
348			r.trackPop()
349			r.stackPush(r.trackPeek()) // Recall old mark
350			break
351
352		case syntax.Setcount:
353			r.stackPush2(r.textPos(), r.operand(0))
354			r.trackPush()
355			r.advance(1)
356			continue
357
358		case syntax.Nullcount:
359			r.stackPush2(-1, r.operand(0))
360			r.trackPush()
361			r.advance(1)
362			continue
363
364		case syntax.Setcount | syntax.Back:
365			r.stackPopN(2)
366			break
367
368		case syntax.Nullcount | syntax.Back:
369			r.stackPopN(2)
370			break
371
372		case syntax.Branchcount:
373			// r.stackPush:
374			//  0: Mark
375			//  1: Count
376
377			r.stackPopN(2)
378			mark := r.stackPeek()
379			count := r.stackPeekN(1)
380			matched := r.textPos() - mark
381
382			if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
383				r.trackPushNeg2(mark, count) // Save old mark, count
384				r.advance(2)                 // Straight
385			} else { // Nonempty match -> count+loop now
386				r.trackPush1(mark)                 // remember mark
387				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
388				r.goTo(r.operand(0))               // Loop
389			}
390			continue
391
392		case syntax.Branchcount | syntax.Back:
393			// r.trackPush:
394			//  0: Previous mark
395			// r.stackPush:
396			//  0: Mark (= current pos, discarded)
397			//  1: Count
398			r.trackPop()
399			r.stackPopN(2)
400			if r.stackPeekN(1) > 0 { // Positive -> can go straight
401				r.textto(r.stackPeek())                           // Zap to mark
402				r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
403				r.advance(2)                                      // Straight
404				continue
405			}
406			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
407			break
408
409		case syntax.Branchcount | syntax.Back2:
410			// r.trackPush:
411			//  0: Previous mark
412			//  1: Previous count
413			r.trackPopN(2)
414			r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
415			break                                        // Backtrack
416
417		case syntax.Lazybranchcount:
418			// r.stackPush:
419			//  0: Mark
420			//  1: Count
421
422			r.stackPopN(2)
423			mark := r.stackPeek()
424			count := r.stackPeekN(1)
425
426			if count < 0 { // Negative count -> loop now
427				r.trackPushNeg1(mark)              // Save old mark
428				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
429				r.goTo(r.operand(0))               // Loop
430			} else { // Nonneg count -> straight now
431				r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
432				r.advance(2)                           // Straight
433			}
434			continue
435
436		case syntax.Lazybranchcount | syntax.Back:
437			// r.trackPush:
438			//  0: Mark
439			//  1: Count
440			//  2: r.textPos
441
442			r.trackPopN(3)
443			mark := r.trackPeek()
444			textpos := r.trackPeekN(2)
445
446			if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
447				r.textto(textpos)                        // Recall position
448				r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
449				r.trackPushNeg1(mark)                    // Save old mark
450				r.goTo(r.operand(0))                     // Loop
451				continue
452			} else { // Max loops or empty match -> backtrack
453				r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
454				break                                        // backtrack
455			}
456
457		case syntax.Lazybranchcount | syntax.Back2:
458			// r.trackPush:
459			//  0: Previous mark
460			// r.stackPush:
461			//  0: Mark (== current pos, discarded)
462			//  1: Count
463			r.trackPop()
464			r.stackPopN(2)
465			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
466			break                                          // Backtrack
467
468		case syntax.Setjump:
469			r.stackPush2(r.trackpos(), r.crawlpos())
470			r.trackPush()
471			r.advance(0)
472			continue
473
474		case syntax.Setjump | syntax.Back:
475			r.stackPopN(2)
476			break
477
478		case syntax.Backjump:
479			// r.stackPush:
480			//  0: Saved trackpos
481			//  1: r.crawlpos
482			r.stackPopN(2)
483			r.trackto(r.stackPeek())
484
485			for r.crawlpos() != r.stackPeekN(1) {
486				r.uncapture()
487			}
488
489			break
490
491		case syntax.Forejump:
492			// r.stackPush:
493			//  0: Saved trackpos
494			//  1: r.crawlpos
495			r.stackPopN(2)
496			r.trackto(r.stackPeek())
497			r.trackPush1(r.stackPeekN(1))
498			r.advance(0)
499			continue
500
501		case syntax.Forejump | syntax.Back:
502			// r.trackPush:
503			//  0: r.crawlpos
504			r.trackPop()
505
506			for r.crawlpos() != r.trackPeek() {
507				r.uncapture()
508			}
509
510			break
511
512		case syntax.Bol:
513			if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
514				break
515			}
516			r.advance(0)
517			continue
518
519		case syntax.Eol:
520			if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
521				break
522			}
523			r.advance(0)
524			continue
525
526		case syntax.Boundary:
527			if !r.isBoundary(r.textPos(), 0, r.runtextend) {
528				break
529			}
530			r.advance(0)
531			continue
532
533		case syntax.Nonboundary:
534			if r.isBoundary(r.textPos(), 0, r.runtextend) {
535				break
536			}
537			r.advance(0)
538			continue
539
540		case syntax.ECMABoundary:
541			if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
542				break
543			}
544			r.advance(0)
545			continue
546
547		case syntax.NonECMABoundary:
548			if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
549				break
550			}
551			r.advance(0)
552			continue
553
554		case syntax.Beginning:
555			if r.leftchars() > 0 {
556				break
557			}
558			r.advance(0)
559			continue
560
561		case syntax.Start:
562			if r.textPos() != r.textstart() {
563				break
564			}
565			r.advance(0)
566			continue
567
568		case syntax.EndZ:
569			if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' {
570				break
571			}
572			r.advance(0)
573			continue
574
575		case syntax.End:
576			if r.rightchars() > 0 {
577				break
578			}
579			r.advance(0)
580			continue
581
582		case syntax.One:
583			if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
584				break
585			}
586
587			r.advance(1)
588			continue
589
590		case syntax.Notone:
591			if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
592				break
593			}
594
595			r.advance(1)
596			continue
597
598		case syntax.Set:
599
600			if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
601				break
602			}
603
604			r.advance(1)
605			continue
606
607		case syntax.Multi:
608			if !r.runematch(r.code.Strings[r.operand(0)]) {
609				break
610			}
611
612			r.advance(1)
613			continue
614
615		case syntax.Ref:
616
617			capnum := r.operand(0)
618
619			if r.runmatch.isMatched(capnum) {
620				if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
621					break
622				}
623			} else {
624				if (r.re.options & ECMAScript) == 0 {
625					break
626				}
627			}
628
629			r.advance(1)
630			continue
631
632		case syntax.Onerep:
633
634			c := r.operand(1)
635
636			if r.forwardchars() < c {
637				break
638			}
639
640			ch := rune(r.operand(0))
641
642			for c > 0 {
643				if r.forwardcharnext() != ch {
644					goto BreakBackward
645				}
646				c--
647			}
648
649			r.advance(2)
650			continue
651
652		case syntax.Notonerep:
653
654			c := r.operand(1)
655
656			if r.forwardchars() < c {
657				break
658			}
659			ch := rune(r.operand(0))
660
661			for c > 0 {
662				if r.forwardcharnext() == ch {
663					goto BreakBackward
664				}
665				c--
666			}
667
668			r.advance(2)
669			continue
670
671		case syntax.Setrep:
672
673			c := r.operand(1)
674
675			if r.forwardchars() < c {
676				break
677			}
678
679			set := r.code.Sets[r.operand(0)]
680
681			for c > 0 {
682				if !set.CharIn(r.forwardcharnext()) {
683					goto BreakBackward
684				}
685				c--
686			}
687
688			r.advance(2)
689			continue
690
691		case syntax.Oneloop:
692
693			c := r.operand(1)
694
695			if c > r.forwardchars() {
696				c = r.forwardchars()
697			}
698
699			ch := rune(r.operand(0))
700			i := c
701
702			for ; i > 0; i-- {
703				if r.forwardcharnext() != ch {
704					r.backwardnext()
705					break
706				}
707			}
708
709			if c > i {
710				r.trackPush2(c-i-1, r.textPos()-r.bump())
711			}
712
713			r.advance(2)
714			continue
715
716		case syntax.Notoneloop:
717
718			c := r.operand(1)
719
720			if c > r.forwardchars() {
721				c = r.forwardchars()
722			}
723
724			ch := rune(r.operand(0))
725			i := c
726
727			for ; i > 0; i-- {
728				if r.forwardcharnext() == ch {
729					r.backwardnext()
730					break
731				}
732			}
733
734			if c > i {
735				r.trackPush2(c-i-1, r.textPos()-r.bump())
736			}
737
738			r.advance(2)
739			continue
740
741		case syntax.Setloop:
742
743			c := r.operand(1)
744
745			if c > r.forwardchars() {
746				c = r.forwardchars()
747			}
748
749			set := r.code.Sets[r.operand(0)]
750			i := c
751
752			for ; i > 0; i-- {
753				if !set.CharIn(r.forwardcharnext()) {
754					r.backwardnext()
755					break
756				}
757			}
758
759			if c > i {
760				r.trackPush2(c-i-1, r.textPos()-r.bump())
761			}
762
763			r.advance(2)
764			continue
765
766		case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
767
768			r.trackPopN(2)
769			i := r.trackPeek()
770			pos := r.trackPeekN(1)
771
772			r.textto(pos)
773
774			if i > 0 {
775				r.trackPush2(i-1, pos-r.bump())
776			}
777
778			r.advance(2)
779			continue
780
781		case syntax.Setloop | syntax.Back:
782
783			r.trackPopN(2)
784			i := r.trackPeek()
785			pos := r.trackPeekN(1)
786
787			r.textto(pos)
788
789			if i > 0 {
790				r.trackPush2(i-1, pos-r.bump())
791			}
792
793			r.advance(2)
794			continue
795
796		case syntax.Onelazy, syntax.Notonelazy:
797
798			c := r.operand(1)
799
800			if c > r.forwardchars() {
801				c = r.forwardchars()
802			}
803
804			if c > 0 {
805				r.trackPush2(c-1, r.textPos())
806			}
807
808			r.advance(2)
809			continue
810
811		case syntax.Setlazy:
812
813			c := r.operand(1)
814
815			if c > r.forwardchars() {
816				c = r.forwardchars()
817			}
818
819			if c > 0 {
820				r.trackPush2(c-1, r.textPos())
821			}
822
823			r.advance(2)
824			continue
825
826		case syntax.Onelazy | syntax.Back:
827
828			r.trackPopN(2)
829			pos := r.trackPeekN(1)
830			r.textto(pos)
831
832			if r.forwardcharnext() != rune(r.operand(0)) {
833				break
834			}
835
836			i := r.trackPeek()
837
838			if i > 0 {
839				r.trackPush2(i-1, pos+r.bump())
840			}
841
842			r.advance(2)
843			continue
844
845		case syntax.Notonelazy | syntax.Back:
846
847			r.trackPopN(2)
848			pos := r.trackPeekN(1)
849			r.textto(pos)
850
851			if r.forwardcharnext() == rune(r.operand(0)) {
852				break
853			}
854
855			i := r.trackPeek()
856
857			if i > 0 {
858				r.trackPush2(i-1, pos+r.bump())
859			}
860
861			r.advance(2)
862			continue
863
864		case syntax.Setlazy | syntax.Back:
865
866			r.trackPopN(2)
867			pos := r.trackPeekN(1)
868			r.textto(pos)
869
870			if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
871				break
872			}
873
874			i := r.trackPeek()
875
876			if i > 0 {
877				r.trackPush2(i-1, pos+r.bump())
878			}
879
880			r.advance(2)
881			continue
882
883		default:
884			return errors.New("unknown state in regex runner")
885		}
886
887	BreakBackward:
888		;
889
890		// "break Backward" comes here:
891		r.backtrack()
892	}
893}
894
895// increase the size of stack and track storage
896func (r *runner) ensureStorage() {
897	if r.runstackpos < r.runtrackcount*4 {
898		doubleIntSlice(&r.runstack, &r.runstackpos)
899	}
900	if r.runtrackpos < r.runtrackcount*4 {
901		doubleIntSlice(&r.runtrack, &r.runtrackpos)
902	}
903}
904
905func doubleIntSlice(s *[]int, pos *int) {
906	oldLen := len(*s)
907	newS := make([]int, oldLen*2)
908
909	copy(newS[oldLen:], *s)
910	*pos += oldLen
911	*s = newS
912}
913
914// Save a number on the longjump unrolling stack
915func (r *runner) crawl(i int) {
916	if r.runcrawlpos == 0 {
917		doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
918	}
919	r.runcrawlpos--
920	r.runcrawl[r.runcrawlpos] = i
921}
922
923// Remove a number from the longjump unrolling stack
924func (r *runner) popcrawl() int {
925	val := r.runcrawl[r.runcrawlpos]
926	r.runcrawlpos++
927	return val
928}
929
930// Get the height of the stack
931func (r *runner) crawlpos() int {
932	return len(r.runcrawl) - r.runcrawlpos
933}
934
935func (r *runner) advance(i int) {
936	r.codepos += (i + 1)
937	r.setOperator(r.code.Codes[r.codepos])
938}
939
940func (r *runner) goTo(newpos int) {
941	// when branching backward, ensure storage
942	if newpos < r.codepos {
943		r.ensureStorage()
944	}
945
946	r.setOperator(r.code.Codes[newpos])
947	r.codepos = newpos
948}
949
950func (r *runner) textto(newpos int) {
951	r.runtextpos = newpos
952}
953
954func (r *runner) trackto(newpos int) {
955	r.runtrackpos = len(r.runtrack) - newpos
956}
957
958func (r *runner) textstart() int {
959	return r.runtextstart
960}
961
962func (r *runner) textPos() int {
963	return r.runtextpos
964}
965
966// push onto the backtracking stack
967func (r *runner) trackpos() int {
968	return len(r.runtrack) - r.runtrackpos
969}
970
971func (r *runner) trackPush() {
972	r.runtrackpos--
973	r.runtrack[r.runtrackpos] = r.codepos
974}
975
976func (r *runner) trackPush1(I1 int) {
977	r.runtrackpos--
978	r.runtrack[r.runtrackpos] = I1
979	r.runtrackpos--
980	r.runtrack[r.runtrackpos] = r.codepos
981}
982
983func (r *runner) trackPush2(I1, I2 int) {
984	r.runtrackpos--
985	r.runtrack[r.runtrackpos] = I1
986	r.runtrackpos--
987	r.runtrack[r.runtrackpos] = I2
988	r.runtrackpos--
989	r.runtrack[r.runtrackpos] = r.codepos
990}
991
992func (r *runner) trackPush3(I1, I2, I3 int) {
993	r.runtrackpos--
994	r.runtrack[r.runtrackpos] = I1
995	r.runtrackpos--
996	r.runtrack[r.runtrackpos] = I2
997	r.runtrackpos--
998	r.runtrack[r.runtrackpos] = I3
999	r.runtrackpos--
1000	r.runtrack[r.runtrackpos] = r.codepos
1001}
1002
1003func (r *runner) trackPushNeg1(I1 int) {
1004	r.runtrackpos--
1005	r.runtrack[r.runtrackpos] = I1
1006	r.runtrackpos--
1007	r.runtrack[r.runtrackpos] = -r.codepos
1008}
1009
1010func (r *runner) trackPushNeg2(I1, I2 int) {
1011	r.runtrackpos--
1012	r.runtrack[r.runtrackpos] = I1
1013	r.runtrackpos--
1014	r.runtrack[r.runtrackpos] = I2
1015	r.runtrackpos--
1016	r.runtrack[r.runtrackpos] = -r.codepos
1017}
1018
1019func (r *runner) backtrack() {
1020	newpos := r.runtrack[r.runtrackpos]
1021	r.runtrackpos++
1022
1023	if r.re.Debug() {
1024		if newpos < 0 {
1025			fmt.Printf("       Backtracking (back2) to code position %v\n", -newpos)
1026		} else {
1027			fmt.Printf("       Backtracking to code position %v\n", newpos)
1028		}
1029	}
1030
1031	if newpos < 0 {
1032		newpos = -newpos
1033		r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1034	} else {
1035		r.setOperator(r.code.Codes[newpos] | syntax.Back)
1036	}
1037
1038	// When branching backward, ensure storage
1039	if newpos < r.codepos {
1040		r.ensureStorage()
1041	}
1042
1043	r.codepos = newpos
1044}
1045
1046func (r *runner) setOperator(op int) {
1047	r.caseInsensitive = (0 != (op & syntax.Ci))
1048	r.rightToLeft = (0 != (op & syntax.Rtl))
1049	r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1050}
1051
1052func (r *runner) trackPop() {
1053	r.runtrackpos++
1054}
1055
1056// pop framesize items from the backtracking stack
1057func (r *runner) trackPopN(framesize int) {
1058	r.runtrackpos += framesize
1059}
1060
1061// Technically we are actually peeking at items already popped.  So if you want to
1062// get and pop the top item from the stack, you do
1063// r.trackPop();
1064// r.trackPeek();
1065func (r *runner) trackPeek() int {
1066	return r.runtrack[r.runtrackpos-1]
1067}
1068
1069// get the ith element down on the backtracking stack
1070func (r *runner) trackPeekN(i int) int {
1071	return r.runtrack[r.runtrackpos-i-1]
1072}
1073
1074// Push onto the grouping stack
1075func (r *runner) stackPush(I1 int) {
1076	r.runstackpos--
1077	r.runstack[r.runstackpos] = I1
1078}
1079
1080func (r *runner) stackPush2(I1, I2 int) {
1081	r.runstackpos--
1082	r.runstack[r.runstackpos] = I1
1083	r.runstackpos--
1084	r.runstack[r.runstackpos] = I2
1085}
1086
1087func (r *runner) stackPop() {
1088	r.runstackpos++
1089}
1090
1091// pop framesize items from the grouping stack
1092func (r *runner) stackPopN(framesize int) {
1093	r.runstackpos += framesize
1094}
1095
1096// Technically we are actually peeking at items already popped.  So if you want to
1097// get and pop the top item from the stack, you do
1098// r.stackPop();
1099// r.stackPeek();
1100func (r *runner) stackPeek() int {
1101	return r.runstack[r.runstackpos-1]
1102}
1103
1104// get the ith element down on the grouping stack
1105func (r *runner) stackPeekN(i int) int {
1106	return r.runstack[r.runstackpos-i-1]
1107}
1108
1109func (r *runner) operand(i int) int {
1110	return r.code.Codes[r.codepos+i+1]
1111}
1112
1113func (r *runner) leftchars() int {
1114	return r.runtextpos
1115}
1116
1117func (r *runner) rightchars() int {
1118	return r.runtextend - r.runtextpos
1119}
1120
1121func (r *runner) bump() int {
1122	if r.rightToLeft {
1123		return -1
1124	}
1125	return 1
1126}
1127
1128func (r *runner) forwardchars() int {
1129	if r.rightToLeft {
1130		return r.runtextpos
1131	}
1132	return r.runtextend - r.runtextpos
1133}
1134
1135func (r *runner) forwardcharnext() rune {
1136	var ch rune
1137	if r.rightToLeft {
1138		r.runtextpos--
1139		ch = r.runtext[r.runtextpos]
1140	} else {
1141		ch = r.runtext[r.runtextpos]
1142		r.runtextpos++
1143	}
1144
1145	if r.caseInsensitive {
1146		return unicode.ToLower(ch)
1147	}
1148	return ch
1149}
1150
1151func (r *runner) runematch(str []rune) bool {
1152	var pos int
1153
1154	c := len(str)
1155	if !r.rightToLeft {
1156		if r.runtextend-r.runtextpos < c {
1157			return false
1158		}
1159
1160		pos = r.runtextpos + c
1161	} else {
1162		if r.runtextpos-0 < c {
1163			return false
1164		}
1165
1166		pos = r.runtextpos
1167	}
1168
1169	if !r.caseInsensitive {
1170		for c != 0 {
1171			c--
1172			pos--
1173			if str[c] != r.runtext[pos] {
1174				return false
1175			}
1176		}
1177	} else {
1178		for c != 0 {
1179			c--
1180			pos--
1181			if str[c] != unicode.ToLower(r.runtext[pos]) {
1182				return false
1183			}
1184		}
1185	}
1186
1187	if !r.rightToLeft {
1188		pos += len(str)
1189	}
1190
1191	r.runtextpos = pos
1192
1193	return true
1194}
1195
1196func (r *runner) refmatch(index, len int) bool {
1197	var c, pos, cmpos int
1198
1199	if !r.rightToLeft {
1200		if r.runtextend-r.runtextpos < len {
1201			return false
1202		}
1203
1204		pos = r.runtextpos + len
1205	} else {
1206		if r.runtextpos-0 < len {
1207			return false
1208		}
1209
1210		pos = r.runtextpos
1211	}
1212	cmpos = index + len
1213
1214	c = len
1215
1216	if !r.caseInsensitive {
1217		for c != 0 {
1218			c--
1219			cmpos--
1220			pos--
1221			if r.runtext[cmpos] != r.runtext[pos] {
1222				return false
1223			}
1224
1225		}
1226	} else {
1227		for c != 0 {
1228			c--
1229			cmpos--
1230			pos--
1231
1232			if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1233				return false
1234			}
1235		}
1236	}
1237
1238	if !r.rightToLeft {
1239		pos += len
1240	}
1241
1242	r.runtextpos = pos
1243
1244	return true
1245}
1246
1247func (r *runner) backwardnext() {
1248	if r.rightToLeft {
1249		r.runtextpos++
1250	} else {
1251		r.runtextpos--
1252	}
1253}
1254
1255func (r *runner) charAt(j int) rune {
1256	return r.runtext[j]
1257}
1258
1259func (r *runner) findFirstChar() bool {
1260
1261	if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1262		if !r.code.RightToLeft {
1263			if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1264				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1265				r.runtextpos = r.runtextend
1266				return false
1267			}
1268			if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1269				r.runtextpos = r.runtextend - 1
1270			} else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1271				r.runtextpos = r.runtextend
1272			}
1273		} else {
1274			if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1275				(0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1276					(r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1277				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1278				r.runtextpos = 0
1279				return false
1280			}
1281			if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1282				r.runtextpos = 0
1283			}
1284		}
1285
1286		if r.code.BmPrefix != nil {
1287			return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1288		}
1289
1290		return true // found a valid start or end anchor
1291	} else if r.code.BmPrefix != nil {
1292		r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1293
1294		if r.runtextpos == -1 {
1295			if r.code.RightToLeft {
1296				r.runtextpos = 0
1297			} else {
1298				r.runtextpos = r.runtextend
1299			}
1300			return false
1301		}
1302
1303		return true
1304	} else if r.code.FcPrefix == nil {
1305		return true
1306	}
1307
1308	r.rightToLeft = r.code.RightToLeft
1309	r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1310
1311	set := r.code.FcPrefix.PrefixSet
1312	if set.IsSingleton() {
1313		ch := set.SingletonChar()
1314		for i := r.forwardchars(); i > 0; i-- {
1315			if ch == r.forwardcharnext() {
1316				r.backwardnext()
1317				return true
1318			}
1319		}
1320	} else {
1321		for i := r.forwardchars(); i > 0; i-- {
1322			n := r.forwardcharnext()
1323			//fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1324			if set.CharIn(n) {
1325				r.backwardnext()
1326				return true
1327			}
1328		}
1329	}
1330
1331	return false
1332}
1333
1334func (r *runner) initMatch() {
1335	// Use a hashtable'ed Match object if the capture numbers are sparse
1336
1337	if r.runmatch == nil {
1338		if r.re.caps != nil {
1339			r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1340		} else {
1341			r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1342		}
1343	} else {
1344		r.runmatch.reset(r.runtext, r.runtextstart)
1345	}
1346
1347	// note we test runcrawl, because it is the last one to be allocated
1348	// If there is an alloc failure in the middle of the three allocations,
1349	// we may still return to reuse this instance, and we want to behave
1350	// as if the allocations didn't occur. (we used to test _trackcount != 0)
1351
1352	if r.runcrawl != nil {
1353		r.runtrackpos = len(r.runtrack)
1354		r.runstackpos = len(r.runstack)
1355		r.runcrawlpos = len(r.runcrawl)
1356		return
1357	}
1358
1359	r.initTrackCount()
1360
1361	tracksize := r.runtrackcount * 8
1362	stacksize := r.runtrackcount * 8
1363
1364	if tracksize < 32 {
1365		tracksize = 32
1366	}
1367	if stacksize < 16 {
1368		stacksize = 16
1369	}
1370
1371	r.runtrack = make([]int, tracksize)
1372	r.runtrackpos = tracksize
1373
1374	r.runstack = make([]int, stacksize)
1375	r.runstackpos = stacksize
1376
1377	r.runcrawl = make([]int, 32)
1378	r.runcrawlpos = 32
1379}
1380
1381func (r *runner) tidyMatch(quick bool) *Match {
1382	if !quick {
1383		match := r.runmatch
1384
1385		r.runmatch = nil
1386
1387		match.tidy(r.runtextpos)
1388		return match
1389	} else {
1390		// send back our match -- it's not leaving the package, so it's safe to not clean it up
1391		// this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1392		return r.runmatch
1393	}
1394}
1395
1396// capture captures a subexpression. Note that the
1397// capnum used here has already been mapped to a non-sparse
1398// index (by the code generator RegexWriter).
1399func (r *runner) capture(capnum, start, end int) {
1400	if end < start {
1401		T := end
1402		end = start
1403		start = T
1404	}
1405
1406	r.crawl(capnum)
1407	r.runmatch.addMatch(capnum, start, end-start)
1408}
1409
1410// transferCapture captures a subexpression. Note that the
1411// capnum used here has already been mapped to a non-sparse
1412// index (by the code generator RegexWriter).
1413func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1414	var start2, end2 int
1415
1416	// these are the two intervals that are cancelling each other
1417
1418	if end < start {
1419		T := end
1420		end = start
1421		start = T
1422	}
1423
1424	start2 = r.runmatch.matchIndex(uncapnum)
1425	end2 = start2 + r.runmatch.matchLength(uncapnum)
1426
1427	// The new capture gets the innermost defined interval
1428
1429	if start >= end2 {
1430		end = start
1431		start = end2
1432	} else if end <= start2 {
1433		start = start2
1434	} else {
1435		if end > end2 {
1436			end = end2
1437		}
1438		if start2 > start {
1439			start = start2
1440		}
1441	}
1442
1443	r.crawl(uncapnum)
1444	r.runmatch.balanceMatch(uncapnum)
1445
1446	if capnum != -1 {
1447		r.crawl(capnum)
1448		r.runmatch.addMatch(capnum, start, end-start)
1449	}
1450}
1451
1452// revert the last capture
1453func (r *runner) uncapture() {
1454	capnum := r.popcrawl()
1455	r.runmatch.removeMatch(capnum)
1456}
1457
1458//debug
1459
1460func (r *runner) dumpState() {
1461	back := ""
1462	if r.operator&syntax.Back != 0 {
1463		back = " Back"
1464	}
1465	if r.operator&syntax.Back2 != 0 {
1466		back += " Back2"
1467	}
1468	fmt.Printf("Text:  %v\nTrack: %v\nStack: %v\n       %s%s\n\n",
1469		r.textposDescription(),
1470		r.stackDescription(r.runtrack, r.runtrackpos),
1471		r.stackDescription(r.runstack, r.runstackpos),
1472		r.code.OpcodeDescription(r.codepos),
1473		back)
1474}
1475
1476func (r *runner) stackDescription(a []int, index int) string {
1477	buf := &bytes.Buffer{}
1478
1479	fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1480	if buf.Len() < 8 {
1481		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1482	}
1483
1484	buf.WriteRune('(')
1485	for i := index; i < len(a); i++ {
1486		if i > index {
1487			buf.WriteRune(' ')
1488		}
1489
1490		buf.WriteString(strconv.Itoa(a[i]))
1491	}
1492
1493	buf.WriteRune(')')
1494
1495	return buf.String()
1496}
1497
1498func (r *runner) textposDescription() string {
1499	buf := &bytes.Buffer{}
1500
1501	buf.WriteString(strconv.Itoa(r.runtextpos))
1502
1503	if buf.Len() < 8 {
1504		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1505	}
1506
1507	if r.runtextpos > 0 {
1508		buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1509	} else {
1510		buf.WriteRune('^')
1511	}
1512
1513	buf.WriteRune('>')
1514
1515	for i := r.runtextpos; i < r.runtextend; i++ {
1516		buf.WriteString(syntax.CharDescription(r.runtext[i]))
1517	}
1518	if buf.Len() >= 64 {
1519		buf.Truncate(61)
1520		buf.WriteString("...")
1521	} else {
1522		buf.WriteRune('$')
1523	}
1524
1525	return buf.String()
1526}
1527
1528// decide whether the pos
1529// at the specified index is a boundary or not. It's just not worth
1530// emitting inline code for this logic.
1531func (r *runner) isBoundary(index, startpos, endpos int) bool {
1532	return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1533		(index < endpos && syntax.IsWordChar(r.runtext[index]))
1534}
1535
1536func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1537	return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1538		(index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1539}
1540
1541// this seems like a comment to justify randomly picking 1000 :-P
1542// We have determined this value in a series of experiments where x86 retail
1543// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
1544// of TimeoutCheckFrequency did not tend to increase performance; smaller values
1545// of TimeoutCheckFrequency tended to slow down the execution.
1546const timeoutCheckFrequency int = 1000
1547
1548func (r *runner) startTimeoutWatch() {
1549	if r.ignoreTimeout {
1550		return
1551	}
1552
1553	r.timeoutChecksToSkip = timeoutCheckFrequency
1554	r.timeoutAt = time.Now().Add(r.timeout)
1555}
1556
1557func (r *runner) checkTimeout() error {
1558	if r.ignoreTimeout {
1559		return nil
1560	}
1561	r.timeoutChecksToSkip--
1562	if r.timeoutChecksToSkip != 0 {
1563		return nil
1564	}
1565
1566	r.timeoutChecksToSkip = timeoutCheckFrequency
1567	return r.doCheckTimeout()
1568}
1569
1570func (r *runner) doCheckTimeout() error {
1571	current := time.Now()
1572
1573	if current.Before(r.timeoutAt) {
1574		return nil
1575	}
1576
1577	if r.re.Debug() {
1578		//Debug.WriteLine("")
1579		//Debug.WriteLine("RegEx match timeout occurred!")
1580		//Debug.WriteLine("Specified timeout:       " + TimeSpan.FromMilliseconds(_timeout).ToString())
1581		//Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1582		//Debug.WriteLine("Search pattern:          " + _runregex._pattern)
1583		//Debug.WriteLine("Input:                   " + r.runtext)
1584		//Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1585	}
1586
1587	return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1588}
1589
1590func (r *runner) initTrackCount() {
1591	r.runtrackcount = r.code.TrackCount
1592}
1593
1594// getRunner returns a run to use for matching re.
1595// It uses the re's runner cache if possible, to avoid
1596// unnecessary allocation.
1597func (re *Regexp) getRunner() *runner {
1598	re.muRun.Lock()
1599	if n := len(re.runner); n > 0 {
1600		z := re.runner[n-1]
1601		re.runner = re.runner[:n-1]
1602		re.muRun.Unlock()
1603		return z
1604	}
1605	re.muRun.Unlock()
1606	z := &runner{
1607		re:   re,
1608		code: re.code,
1609	}
1610	return z
1611}
1612
1613// putRunner returns a runner to the re's cache.
1614// There is no attempt to limit the size of the cache, so it will
1615// grow to the maximum number of simultaneous matches
1616// run using re.  (The cache empties when re gets garbage collected.)
1617func (re *Regexp) putRunner(r *runner) {
1618	re.muRun.Lock()
1619	re.runner = append(re.runner, r)
1620	re.muRun.Unlock()
1621}
1622