1package goja
2
3import (
4	"fmt"
5	"github.com/dop251/goja/parser"
6	"regexp"
7	"strings"
8	"unicode/utf16"
9	"unicode/utf8"
10)
11
12func (r *Runtime) newRegexpObject(proto *Object) *regexpObject {
13	v := &Object{runtime: r}
14
15	o := &regexpObject{}
16	o.class = classRegExp
17	o.val = v
18	o.extensible = true
19	v.self = o
20	o.prototype = proto
21	o.init()
22	return o
23}
24
25func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr valueString, proto *Object) *regexpObject {
26	o := r.newRegexpObject(proto)
27
28	o.pattern = pattern
29	o.source = patternStr
30
31	return o
32}
33
34func decodeHex(s string) (int, bool) {
35	var hex int
36	for i := 0; i < len(s); i++ {
37		var n byte
38		chr := s[i]
39		switch {
40		case '0' <= chr && chr <= '9':
41			n = chr - '0'
42		case 'a' <= chr && chr <= 'f':
43			n = chr - 'a' + 10
44		case 'A' <= chr && chr <= 'F':
45			n = chr - 'A' + 10
46		default:
47			return 0, false
48		}
49		hex = hex*16 + int(n)
50	}
51	return hex, true
52}
53
54func writeHex4(b *strings.Builder, i int) {
55	b.WriteByte(hex[i>>12])
56	b.WriteByte(hex[(i>>8)&0xF])
57	b.WriteByte(hex[(i>>4)&0xF])
58	b.WriteByte(hex[i&0xF])
59}
60
61// Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters
62func convertRegexpToUnicode(patternStr string) string {
63	var sb strings.Builder
64	pos := 0
65	for i := 0; i < len(patternStr)-11; {
66		r, size := utf8.DecodeRuneInString(patternStr[i:])
67		if r == '\\' {
68			i++
69			if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' {
70				if first, ok := decodeHex(patternStr[i+1 : i+5]); ok {
71					if isUTF16FirstSurrogate(rune(first)) {
72						if second, ok := decodeHex(patternStr[i+7 : i+11]); ok {
73							if isUTF16SecondSurrogate(rune(second)) {
74								r = utf16.DecodeRune(rune(first), rune(second))
75								sb.WriteString(patternStr[pos : i-1])
76								sb.WriteRune(r)
77								i += 11
78								pos = i
79								continue
80							}
81						}
82					}
83				}
84			}
85			i++
86		} else {
87			i += size
88		}
89	}
90	if pos > 0 {
91		sb.WriteString(patternStr[pos:])
92		return sb.String()
93	}
94	return patternStr
95}
96
97// Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX
98func convertRegexpToUtf16(patternStr string) string {
99	var sb strings.Builder
100	pos := 0
101	var prevRune rune
102	for i := 0; i < len(patternStr); {
103		r, size := utf8.DecodeRuneInString(patternStr[i:])
104		if r > 0xFFFF {
105			sb.WriteString(patternStr[pos:i])
106			if prevRune == '\\' {
107				sb.WriteRune('\\')
108			}
109			first, second := utf16.EncodeRune(r)
110			sb.WriteString(`\u`)
111			writeHex4(&sb, int(first))
112			sb.WriteString(`\u`)
113			writeHex4(&sb, int(second))
114			pos = i + size
115		}
116		i += size
117		prevRune = r
118	}
119	if pos > 0 {
120		sb.WriteString(patternStr[pos:])
121		return sb.String()
122	}
123	return patternStr
124}
125
126// convert any broken UTF-16 surrogate pairs to \uXXXX
127func escapeInvalidUtf16(s valueString) string {
128	if ascii, ok := s.(asciiString); ok {
129		return ascii.String()
130	}
131	var sb strings.Builder
132	rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)}
133	pos := 0
134	utf8Size := 0
135	var utf8Buf [utf8.UTFMax]byte
136	for {
137		c, size, err := rd.ReadRune()
138		if err != nil {
139			break
140		}
141		if utf16.IsSurrogate(c) {
142			if sb.Len() == 0 {
143				sb.Grow(utf8Size + 7)
144				hrd := s.reader(0)
145				var c rune
146				for p := 0; p < pos; {
147					var size int
148					var err error
149					c, size, err = hrd.ReadRune()
150					if err != nil {
151						// will not happen
152						panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err))
153					}
154					sb.WriteRune(c)
155					p += size
156				}
157				if c == '\\' {
158					sb.WriteRune(c)
159				}
160			}
161			sb.WriteString(`\u`)
162			writeHex4(&sb, int(c))
163		} else {
164			if sb.Len() > 0 {
165				sb.WriteRune(c)
166			} else {
167				utf8Size += utf8.EncodeRune(utf8Buf[:], c)
168				pos += size
169			}
170		}
171	}
172	if sb.Len() > 0 {
173		return sb.String()
174	}
175	return s.String()
176}
177
178func compileRegexpFromValueString(patternStr valueString, flags string) (*regexpPattern, error) {
179	return compileRegexp(escapeInvalidUtf16(patternStr), flags)
180}
181
182func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
183	var global, ignoreCase, multiline, sticky, unicode bool
184	var wrapper *regexpWrapper
185	var wrapper2 *regexp2Wrapper
186
187	if flags != "" {
188		invalidFlags := func() {
189			err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags)
190		}
191		for _, chr := range flags {
192			switch chr {
193			case 'g':
194				if global {
195					invalidFlags()
196					return
197				}
198				global = true
199			case 'm':
200				if multiline {
201					invalidFlags()
202					return
203				}
204				multiline = true
205			case 'i':
206				if ignoreCase {
207					invalidFlags()
208					return
209				}
210				ignoreCase = true
211			case 'y':
212				if sticky {
213					invalidFlags()
214					return
215				}
216				sticky = true
217			case 'u':
218				if unicode {
219					invalidFlags()
220				}
221				unicode = true
222			default:
223				invalidFlags()
224				return
225			}
226		}
227	}
228
229	if unicode {
230		patternStr = convertRegexpToUnicode(patternStr)
231	} else {
232		patternStr = convertRegexpToUtf16(patternStr)
233	}
234
235	re2Str, err1 := parser.TransformRegExp(patternStr)
236	if err1 == nil {
237		re2flags := ""
238		if multiline {
239			re2flags += "m"
240		}
241		if ignoreCase {
242			re2flags += "i"
243		}
244		if len(re2flags) > 0 {
245			re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str)
246		}
247
248		pattern, err1 := regexp.Compile(re2Str)
249		if err1 != nil {
250			err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1)
251			return
252		}
253		wrapper = (*regexpWrapper)(pattern)
254	} else {
255		if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat {
256			err = err1
257			return
258		}
259		wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase)
260		if err != nil {
261			err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
262			return
263		}
264	}
265
266	p = &regexpPattern{
267		src:            patternStr,
268		regexpWrapper:  wrapper,
269		regexp2Wrapper: wrapper2,
270		global:         global,
271		ignoreCase:     ignoreCase,
272		multiline:      multiline,
273		sticky:         sticky,
274		unicode:        unicode,
275	}
276	return
277}
278
279func (r *Runtime) _newRegExp(patternStr valueString, flags string, proto *Object) *regexpObject {
280	pattern, err := compileRegexpFromValueString(patternStr, flags)
281	if err != nil {
282		panic(r.newSyntaxError(err.Error(), -1))
283	}
284	return r.newRegExpp(pattern, patternStr, proto)
285}
286
287func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object {
288	var patternVal, flagsVal Value
289	if len(args) > 0 {
290		patternVal = args[0]
291	}
292	if len(args) > 1 {
293		flagsVal = args[1]
294	}
295	return r.newRegExp(patternVal, flagsVal, proto).val
296}
297
298func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject {
299	var pattern valueString
300	var flags string
301	if isRegexp(patternVal) { // this may have side effects so need to call it anyway
302		if obj, ok := patternVal.(*Object); ok {
303			if rx, ok := obj.self.(*regexpObject); ok {
304				if flagsVal == nil || flagsVal == _undefined {
305					return rx.clone()
306				} else {
307					return r._newRegExp(rx.source, flagsVal.toString().String(), proto)
308				}
309			} else {
310				pattern = nilSafe(obj.self.getStr("source", nil)).toString()
311				if flagsVal == nil || flagsVal == _undefined {
312					flags = nilSafe(obj.self.getStr("flags", nil)).toString().String()
313				} else {
314					flags = flagsVal.toString().String()
315				}
316				goto exit
317			}
318		}
319	}
320
321	if patternVal != nil && patternVal != _undefined {
322		pattern = patternVal.toString()
323	}
324	if flagsVal != nil && flagsVal != _undefined {
325		flags = flagsVal.toString().String()
326	}
327
328	if pattern == nil {
329		pattern = stringEmpty
330	}
331exit:
332	return r._newRegExp(pattern, flags, proto)
333}
334
335func (r *Runtime) builtin_RegExp(call FunctionCall) Value {
336	pattern := call.Argument(0)
337	patternIsRegExp := isRegexp(pattern)
338	flags := call.Argument(1)
339	if patternIsRegExp && flags == _undefined {
340		if obj, ok := call.Argument(0).(*Object); ok {
341			patternConstructor := obj.self.getStr("constructor", nil)
342			if patternConstructor == r.global.RegExp {
343				return pattern
344			}
345		}
346	}
347	return r.newRegExp(pattern, flags, r.global.RegExpPrototype).val
348}
349
350func (r *Runtime) regexpproto_compile(call FunctionCall) Value {
351	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
352		var (
353			pattern *regexpPattern
354			source  valueString
355			flags   string
356			err     error
357		)
358		patternVal := call.Argument(0)
359		flagsVal := call.Argument(1)
360		if o, ok := patternVal.(*Object); ok {
361			if p, ok := o.self.(*regexpObject); ok {
362				if flagsVal != _undefined {
363					panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another"))
364				}
365				this.pattern = p.pattern
366				this.source = p.source
367				goto exit
368			}
369		}
370		if patternVal != _undefined {
371			source = patternVal.toString()
372		} else {
373			source = stringEmpty
374		}
375		if flagsVal != _undefined {
376			flags = flagsVal.toString().String()
377		}
378		pattern, err = compileRegexpFromValueString(source, flags)
379		if err != nil {
380			panic(r.newSyntaxError(err.Error(), -1))
381		}
382		this.pattern = pattern
383		this.source = source
384	exit:
385		this.setOwnStr("lastIndex", intToValue(0), true)
386		return call.This
387	}
388
389	panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", call.This.toString()))
390}
391
392func (r *Runtime) regexpproto_exec(call FunctionCall) Value {
393	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
394		return this.exec(call.Argument(0).toString())
395	} else {
396		r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", call.This.toString())
397		return nil
398	}
399}
400
401func (r *Runtime) regexpproto_test(call FunctionCall) Value {
402	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
403		if this.test(call.Argument(0).toString()) {
404			return valueTrue
405		} else {
406			return valueFalse
407		}
408	} else {
409		r.typeErrorResult(true, "Method RegExp.prototype.test called on incompatible receiver %s", call.This.toString())
410		return nil
411	}
412}
413
414func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
415	obj := r.toObject(call.This)
416	if this := r.checkStdRegexp(obj); this != nil {
417		var sb valueStringBuilder
418		sb.WriteRune('/')
419		if !this.writeEscapedSource(&sb) {
420			sb.WriteString(this.source)
421		}
422		sb.WriteRune('/')
423		if this.pattern.global {
424			sb.WriteRune('g')
425		}
426		if this.pattern.ignoreCase {
427			sb.WriteRune('i')
428		}
429		if this.pattern.multiline {
430			sb.WriteRune('m')
431		}
432		if this.pattern.unicode {
433			sb.WriteRune('u')
434		}
435		if this.pattern.sticky {
436			sb.WriteRune('y')
437		}
438		return sb.String()
439	}
440	pattern := nilSafe(obj.self.getStr("source", nil)).toString()
441	flags := nilSafe(obj.self.getStr("flags", nil)).toString()
442	var sb valueStringBuilder
443	sb.WriteRune('/')
444	sb.WriteString(pattern)
445	sb.WriteRune('/')
446	sb.WriteString(flags)
447	return sb.String()
448}
449
450func (r *regexpObject) writeEscapedSource(sb *valueStringBuilder) bool {
451	if r.source.length() == 0 {
452		sb.WriteString(asciiString("(?:)"))
453		return true
454	}
455	pos := 0
456	lastPos := 0
457	rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader(0)}
458L:
459	for {
460		c, size, err := rd.ReadRune()
461		if err != nil {
462			break
463		}
464		switch c {
465		case '\\':
466			pos++
467			_, size, err = rd.ReadRune()
468			if err != nil {
469				break L
470			}
471		case '/', '\u000a', '\u000d', '\u2028', '\u2029':
472			sb.WriteSubstring(r.source, lastPos, pos)
473			sb.WriteRune('\\')
474			switch c {
475			case '\u000a':
476				sb.WriteRune('n')
477			case '\u000d':
478				sb.WriteRune('r')
479			default:
480				sb.WriteRune('u')
481				sb.WriteRune(rune(hex[c>>12]))
482				sb.WriteRune(rune(hex[(c>>8)&0xF]))
483				sb.WriteRune(rune(hex[(c>>4)&0xF]))
484				sb.WriteRune(rune(hex[c&0xF]))
485			}
486			lastPos = pos + size
487		}
488		pos += size
489	}
490	if lastPos > 0 {
491		sb.WriteSubstring(r.source, lastPos, r.source.length())
492		return true
493	}
494	return false
495}
496
497func (r *Runtime) regexpproto_getSource(call FunctionCall) Value {
498	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
499		var sb valueStringBuilder
500		if this.writeEscapedSource(&sb) {
501			return sb.String()
502		}
503		return this.source
504	} else {
505		r.typeErrorResult(true, "Method RegExp.prototype.source getter called on incompatible receiver")
506		return nil
507	}
508}
509
510func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value {
511	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
512		if this.pattern.global {
513			return valueTrue
514		} else {
515			return valueFalse
516		}
517	} else {
518		r.typeErrorResult(true, "Method RegExp.prototype.global getter called on incompatible receiver %s", call.This.toString())
519		return nil
520	}
521}
522
523func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
524	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
525		if this.pattern.multiline {
526			return valueTrue
527		} else {
528			return valueFalse
529		}
530	} else {
531		r.typeErrorResult(true, "Method RegExp.prototype.multiline getter called on incompatible receiver %s", call.This.toString())
532		return nil
533	}
534}
535
536func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
537	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
538		if this.pattern.ignoreCase {
539			return valueTrue
540		} else {
541			return valueFalse
542		}
543	} else {
544		r.typeErrorResult(true, "Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", call.This.toString())
545		return nil
546	}
547}
548
549func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value {
550	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
551		if this.pattern.unicode {
552			return valueTrue
553		} else {
554			return valueFalse
555		}
556	} else {
557		r.typeErrorResult(true, "Method RegExp.prototype.unicode getter called on incompatible receiver %s", call.This.toString())
558		return nil
559	}
560}
561
562func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
563	if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
564		if this.pattern.sticky {
565			return valueTrue
566		} else {
567			return valueFalse
568		}
569	} else {
570		r.typeErrorResult(true, "Method RegExp.prototype.sticky getter called on incompatible receiver %s", call.This.toString())
571		return nil
572	}
573}
574
575func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
576	var global, ignoreCase, multiline, sticky, unicode bool
577
578	thisObj := r.toObject(call.This)
579	size := 0
580	if v := thisObj.self.getStr("global", nil); v != nil {
581		global = v.ToBoolean()
582		if global {
583			size++
584		}
585	}
586	if v := thisObj.self.getStr("ignoreCase", nil); v != nil {
587		ignoreCase = v.ToBoolean()
588		if ignoreCase {
589			size++
590		}
591	}
592	if v := thisObj.self.getStr("multiline", nil); v != nil {
593		multiline = v.ToBoolean()
594		if multiline {
595			size++
596		}
597	}
598	if v := thisObj.self.getStr("sticky", nil); v != nil {
599		sticky = v.ToBoolean()
600		if sticky {
601			size++
602		}
603	}
604	if v := thisObj.self.getStr("unicode", nil); v != nil {
605		unicode = v.ToBoolean()
606		if unicode {
607			size++
608		}
609	}
610
611	var sb strings.Builder
612	sb.Grow(size)
613	if global {
614		sb.WriteByte('g')
615	}
616	if ignoreCase {
617		sb.WriteByte('i')
618	}
619	if multiline {
620		sb.WriteByte('m')
621	}
622	if unicode {
623		sb.WriteByte('u')
624	}
625	if sticky {
626		sb.WriteByte('y')
627	}
628
629	return asciiString(sb.String())
630}
631
632func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value {
633	res := execFn(FunctionCall{
634		This:      rxObj,
635		Arguments: []Value{arg},
636	})
637
638	if res != _null {
639		if _, ok := res.(*Object); !ok {
640			panic(r.NewTypeError("RegExp exec method returned something other than an Object or null"))
641		}
642	}
643
644	return res
645}
646
647func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s valueString) []Value {
648	fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean()
649	rxObj.self.setOwnStr("lastIndex", intToValue(0), true)
650	execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable()
651	if !ok {
652		panic(r.NewTypeError("exec is not a function"))
653	}
654	var a []Value
655	for {
656		res := r.regExpExec(execFn, rxObj, s)
657		if res == _null {
658			break
659		}
660		a = append(a, res)
661		matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString()
662		if matchStr.length() == 0 {
663			thisIndex := toLength(rxObj.self.getStr("lastIndex", nil))
664			rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true)
665		}
666	}
667
668	return a
669}
670
671func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s valueString) Value {
672	rx := rxObj.self
673	global := rx.getStr("global", nil)
674	if global != nil && global.ToBoolean() {
675		a := r.getGlobalRegexpMatches(rxObj, s)
676		if len(a) == 0 {
677			return _null
678		}
679		ar := make([]Value, 0, len(a))
680		for _, result := range a {
681			obj := r.toObject(result)
682			matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString()
683			ar = append(ar, matchStr)
684		}
685		return r.newArrayValues(ar)
686	}
687
688	execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
689	if !ok {
690		panic(r.NewTypeError("exec is not a function"))
691	}
692
693	return r.regExpExec(execFn, rxObj, s)
694}
695
696func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject {
697	if deoptimiseRegexp {
698		return nil
699	}
700
701	rx, ok := rxObj.self.(*regexpObject)
702	if !ok {
703		return nil
704	}
705
706	if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto {
707		return nil
708	}
709
710	return rx
711}
712
713func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value {
714	thisObj := r.toObject(call.This)
715	s := call.Argument(0).toString()
716	rx := r.checkStdRegexp(thisObj)
717	if rx == nil {
718		return r.regexpproto_stdMatcherGeneric(thisObj, s)
719	}
720	if rx.pattern.global {
721		res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky)
722		if len(res) == 0 {
723			rx.setOwnStr("lastIndex", intToValue(0), true)
724			return _null
725		}
726		a := make([]Value, 0, len(res))
727		for _, result := range res {
728			a = append(a, s.substring(result[0], result[1]))
729		}
730		rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true)
731		return r.newArrayValues(a)
732	} else {
733		return rx.exec(s)
734	}
735}
736
737func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg valueString) Value {
738	rx := rxObj.self
739	previousLastIndex := nilSafe(rx.getStr("lastIndex", nil))
740	zero := intToValue(0)
741	if !previousLastIndex.SameAs(zero) {
742		rx.setOwnStr("lastIndex", zero, true)
743	}
744	execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
745	if !ok {
746		panic(r.NewTypeError("exec is not a function"))
747	}
748
749	result := r.regExpExec(execFn, rxObj, arg)
750	currentLastIndex := nilSafe(rx.getStr("lastIndex", nil))
751	if !currentLastIndex.SameAs(previousLastIndex) {
752		rx.setOwnStr("lastIndex", previousLastIndex, true)
753	}
754
755	if result == _null {
756		return intToValue(-1)
757	}
758
759	return r.toObject(result).self.getStr("index", nil)
760}
761
762func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value {
763	thisObj := r.toObject(call.This)
764	s := call.Argument(0).toString()
765	flags := nilSafe(thisObj.self.getStr("flags", nil)).toString()
766	c := r.speciesConstructorObj(call.This.(*Object), r.global.RegExp)
767	matcher := r.toConstructor(c)([]Value{call.This, flags}, nil)
768	matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true)
769	flagsStr := flags.String()
770	global := strings.Contains(flagsStr, "g")
771	fullUnicode := strings.Contains(flagsStr, "u")
772	return r.createRegExpStringIterator(matcher, s, global, fullUnicode)
773}
774
775func (r *Runtime) createRegExpStringIterator(matcher *Object, s valueString, global, fullUnicode bool) Value {
776	o := &Object{runtime: r}
777
778	ri := &regExpStringIterObject{
779		matcher:     matcher,
780		s:           s,
781		global:      global,
782		fullUnicode: fullUnicode,
783	}
784	ri.class = classRegExpStringIterator
785	ri.val = o
786	ri.extensible = true
787	o.self = ri
788	ri.prototype = r.global.RegExpStringIteratorPrototype
789	ri.init()
790
791	return o
792}
793
794type regExpStringIterObject struct {
795	baseObject
796	matcher                   *Object
797	s                         valueString
798	global, fullUnicode, done bool
799}
800
801// RegExpExec as defined in 21.2.5.2.1
802func regExpExec(r *Object, s valueString) Value {
803	exec := r.self.getStr("exec", nil)
804	if execObject, ok := exec.(*Object); ok {
805		if execFn, ok := execObject.self.assertCallable(); ok {
806			return r.runtime.regExpExec(execFn, r, s)
807		}
808	}
809	if rx, ok := r.self.(*regexpObject); ok {
810		return rx.exec(s)
811	}
812	panic(r.runtime.NewTypeError("no RegExpMatcher internal slot"))
813}
814
815func (ri *regExpStringIterObject) next() (v Value) {
816	if ri.done {
817		return ri.val.runtime.createIterResultObject(_undefined, true)
818	}
819
820	match := regExpExec(ri.matcher, ri.s)
821	if IsNull(match) {
822		ri.done = true
823		return ri.val.runtime.createIterResultObject(_undefined, true)
824	}
825	if !ri.global {
826		ri.done = true
827		return ri.val.runtime.createIterResultObject(match, false)
828	}
829
830	matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString()
831	if matchStr.length() == 0 {
832		thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil))
833		ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true)
834	}
835	return ri.val.runtime.createIterResultObject(match, false)
836}
837
838func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value {
839	thisObj := r.toObject(call.This)
840	s := call.Argument(0).toString()
841	rx := r.checkStdRegexp(thisObj)
842	if rx == nil {
843		return r.regexpproto_stdSearchGeneric(thisObj, s)
844	}
845
846	previousLastIndex := rx.getStr("lastIndex", nil)
847	rx.setOwnStr("lastIndex", intToValue(0), true)
848
849	match, result := rx.execRegexp(s)
850	rx.setOwnStr("lastIndex", previousLastIndex, true)
851
852	if !match {
853		return intToValue(-1)
854	}
855	return intToValue(int64(result[0]))
856}
857
858func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s valueString, limit Value, unicodeMatching bool) Value {
859	var a []Value
860	var lim int64
861	if limit == nil || limit == _undefined {
862		lim = maxInt - 1
863	} else {
864		lim = toLength(limit)
865	}
866	if lim == 0 {
867		return r.newArrayValues(a)
868	}
869	size := s.length()
870	p := 0
871	execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil
872
873	if size == 0 {
874		if r.regExpExec(execFn, splitter, s) == _null {
875			a = append(a, s)
876		}
877		return r.newArrayValues(a)
878	}
879
880	q := p
881	for q < size {
882		splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true)
883		z := r.regExpExec(execFn, splitter, s)
884		if z == _null {
885			q = advanceStringIndex(s, q, unicodeMatching)
886		} else {
887			z := r.toObject(z)
888			e := toLength(splitter.self.getStr("lastIndex", nil))
889			if e == int64(p) {
890				q = advanceStringIndex(s, q, unicodeMatching)
891			} else {
892				a = append(a, s.substring(p, q))
893				if int64(len(a)) == lim {
894					return r.newArrayValues(a)
895				}
896				if e > int64(size) {
897					p = size
898				} else {
899					p = int(e)
900				}
901				numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0)
902				for i := int64(1); i <= numberOfCaptures; i++ {
903					a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil)))
904					if int64(len(a)) == lim {
905						return r.newArrayValues(a)
906					}
907				}
908				q = p
909			}
910		}
911	}
912	a = append(a, s.substring(p, size))
913	return r.newArrayValues(a)
914}
915
916func advanceStringIndex(s valueString, pos int, unicode bool) int {
917	next := pos + 1
918	if !unicode {
919		return next
920	}
921	l := s.length()
922	if next >= l {
923		return next
924	}
925	if !isUTF16FirstSurrogate(s.charAt(pos)) {
926		return next
927	}
928	if !isUTF16SecondSurrogate(s.charAt(next)) {
929		return next
930	}
931	return next + 1
932}
933
934func advanceStringIndex64(s valueString, pos int64, unicode bool) int64 {
935	next := pos + 1
936	if !unicode {
937		return next
938	}
939	l := int64(s.length())
940	if next >= l {
941		return next
942	}
943	if !isUTF16FirstSurrogate(s.charAt(int(pos))) {
944		return next
945	}
946	if !isUTF16SecondSurrogate(s.charAt(int(next))) {
947		return next
948	}
949	return next + 1
950}
951
952func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value {
953	rxObj := r.toObject(call.This)
954	s := call.Argument(0).toString()
955	limitValue := call.Argument(1)
956	var splitter *Object
957	search := r.checkStdRegexp(rxObj)
958	c := r.speciesConstructorObj(rxObj, r.global.RegExp)
959	if search == nil || c != r.global.RegExp {
960		flags := nilSafe(rxObj.self.getStr("flags", nil)).toString()
961		flagsStr := flags.String()
962
963		// Add 'y' flag if missing
964		if !strings.Contains(flagsStr, "y") {
965			flags = flags.concat(asciiString("y"))
966		}
967		splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil)
968		search = r.checkStdRegexp(splitter)
969		if search == nil {
970			return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u"))
971		}
972	}
973
974	pattern := search.pattern // toUint32() may recompile the pattern, but we still need to use the original
975	limit := -1
976	if limitValue != _undefined {
977		limit = int(toUint32(limitValue))
978	}
979
980	if limit == 0 {
981		return r.newArrayValues(nil)
982	}
983
984	targetLength := s.length()
985	var valueArray []Value
986	lastIndex := 0
987	found := 0
988
989	result := pattern.findAllSubmatchIndex(s, 0, -1, false)
990	if targetLength == 0 {
991		if result == nil {
992			valueArray = append(valueArray, s)
993		}
994		goto RETURN
995	}
996
997	for _, match := range result {
998		if match[0] == match[1] {
999			// FIXME Ugh, this is a hack
1000			if match[0] == 0 || match[0] == targetLength {
1001				continue
1002			}
1003		}
1004
1005		if lastIndex != match[0] {
1006			valueArray = append(valueArray, s.substring(lastIndex, match[0]))
1007			found++
1008		} else if lastIndex == match[0] {
1009			if lastIndex != -1 {
1010				valueArray = append(valueArray, stringEmpty)
1011				found++
1012			}
1013		}
1014
1015		lastIndex = match[1]
1016		if found == limit {
1017			goto RETURN
1018		}
1019
1020		captureCount := len(match) / 2
1021		for index := 1; index < captureCount; index++ {
1022			offset := index * 2
1023			var value Value
1024			if match[offset] != -1 {
1025				value = s.substring(match[offset], match[offset+1])
1026			} else {
1027				value = _undefined
1028			}
1029			valueArray = append(valueArray, value)
1030			found++
1031			if found == limit {
1032				goto RETURN
1033			}
1034		}
1035	}
1036
1037	if found != limit {
1038		if lastIndex != targetLength {
1039			valueArray = append(valueArray, s.substring(lastIndex, targetLength))
1040		} else {
1041			valueArray = append(valueArray, stringEmpty)
1042		}
1043	}
1044
1045RETURN:
1046	return r.newArrayValues(valueArray)
1047}
1048
1049func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr valueString, rcall func(FunctionCall) Value) Value {
1050	var results []Value
1051	if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() {
1052		results = r.getGlobalRegexpMatches(rxObj, s)
1053	} else {
1054		execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil
1055		result := r.regExpExec(execFn, rxObj, s)
1056		if result != _null {
1057			results = append(results, result)
1058		}
1059	}
1060	lengthS := s.length()
1061	nextSourcePosition := 0
1062	var resultBuf valueStringBuilder
1063	for _, result := range results {
1064		obj := r.toObject(result)
1065		nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0)
1066		matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString()
1067		matchLength := matched.length()
1068		position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0))
1069		var captures []Value
1070		if rcall != nil {
1071			captures = make([]Value, 0, nCaptures+3)
1072		} else {
1073			captures = make([]Value, 0, nCaptures+1)
1074		}
1075		captures = append(captures, matched)
1076		for n := int64(1); n <= nCaptures; n++ {
1077			capN := nilSafe(obj.self.getIdx(valueInt(n), nil))
1078			if capN != _undefined {
1079				capN = capN.ToString()
1080			}
1081			captures = append(captures, capN)
1082		}
1083		var replacement valueString
1084		if rcall != nil {
1085			captures = append(captures, intToValue(int64(position)), s)
1086			replacement = rcall(FunctionCall{
1087				This:      _undefined,
1088				Arguments: captures,
1089			}).toString()
1090			if position >= nextSourcePosition {
1091				resultBuf.WriteString(s.substring(nextSourcePosition, position))
1092				resultBuf.WriteString(replacement)
1093				nextSourcePosition = position + matchLength
1094			}
1095		} else {
1096			if position >= nextSourcePosition {
1097				resultBuf.WriteString(s.substring(nextSourcePosition, position))
1098				writeSubstitution(s, position, len(captures), func(idx int) valueString {
1099					capture := captures[idx]
1100					if capture != _undefined {
1101						return capture.toString()
1102					}
1103					return stringEmpty
1104				}, replaceStr, &resultBuf)
1105				nextSourcePosition = position + matchLength
1106			}
1107		}
1108	}
1109	if nextSourcePosition < lengthS {
1110		resultBuf.WriteString(s.substring(nextSourcePosition, lengthS))
1111	}
1112	return resultBuf.String()
1113}
1114
1115func writeSubstitution(s valueString, position int, numCaptures int, getCapture func(int) valueString, replaceStr valueString, buf *valueStringBuilder) {
1116	l := s.length()
1117	rl := replaceStr.length()
1118	matched := getCapture(0)
1119	tailPos := position + matched.length()
1120
1121	for i := 0; i < rl; i++ {
1122		c := replaceStr.charAt(i)
1123		if c == '$' && i < rl-1 {
1124			ch := replaceStr.charAt(i + 1)
1125			switch ch {
1126			case '$':
1127				buf.WriteRune('$')
1128			case '`':
1129				buf.WriteString(s.substring(0, position))
1130			case '\'':
1131				if tailPos < l {
1132					buf.WriteString(s.substring(tailPos, l))
1133				}
1134			case '&':
1135				buf.WriteString(matched)
1136			default:
1137				matchNumber := 0
1138				j := i + 1
1139				for j < rl {
1140					ch := replaceStr.charAt(j)
1141					if ch >= '0' && ch <= '9' {
1142						m := matchNumber*10 + int(ch-'0')
1143						if m >= numCaptures {
1144							break
1145						}
1146						matchNumber = m
1147						j++
1148					} else {
1149						break
1150					}
1151				}
1152				if matchNumber > 0 {
1153					buf.WriteString(getCapture(matchNumber))
1154					i = j - 1
1155					continue
1156				} else {
1157					buf.WriteRune('$')
1158					buf.WriteRune(ch)
1159				}
1160			}
1161			i++
1162		} else {
1163			buf.WriteRune(c)
1164		}
1165	}
1166}
1167
1168func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value {
1169	rxObj := r.toObject(call.This)
1170	s := call.Argument(0).toString()
1171	replaceStr, rcall := getReplaceValue(call.Argument(1))
1172
1173	rx := r.checkStdRegexp(rxObj)
1174	if rx == nil {
1175		return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall)
1176	}
1177
1178	var index int64
1179	find := 1
1180	if rx.pattern.global {
1181		find = -1
1182		rx.setOwnStr("lastIndex", intToValue(0), true)
1183	} else {
1184		index = rx.getLastIndex()
1185	}
1186	found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky)
1187	if len(found) > 0 {
1188		if !rx.updateLastIndex(index, found[0], found[len(found)-1]) {
1189			found = nil
1190		}
1191	} else {
1192		rx.updateLastIndex(index, nil, nil)
1193	}
1194
1195	return stringReplace(s, found, replaceStr, rcall)
1196}
1197
1198func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value {
1199	thisObj := r.toObject(call.This)
1200	if iter, ok := thisObj.self.(*regExpStringIterObject); ok {
1201		return iter.next()
1202	}
1203	panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", thisObj.String()))
1204}
1205
1206func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl {
1207	o := newBaseObjectObj(val, r.global.IteratorPrototype, classObject)
1208
1209	o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, nil, "next", nil, 0), true, false, true)
1210	o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true))
1211
1212	return o
1213}
1214
1215func (r *Runtime) initRegExp() {
1216	o := r.newGuardedObject(r.global.ObjectPrototype, classObject)
1217	r.global.RegExpPrototype = o.val
1218	r.global.stdRegexpProto = o
1219	r.global.RegExpStringIteratorPrototype = r.newLazyObject(r.createRegExpStringIteratorPrototype)
1220
1221	o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, nil, "compile", nil, 2), true, false, true)
1222	o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, nil, "exec", nil, 1), true, false, true)
1223	o._putProp("test", r.newNativeFunc(r.regexpproto_test, nil, "test", nil, 1), true, false, true)
1224	o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, nil, "toString", nil, 0), true, false, true)
1225	o.setOwnStr("source", &valueProperty{
1226		configurable: true,
1227		getterFunc:   r.newNativeFunc(r.regexpproto_getSource, nil, "get source", nil, 0),
1228		accessor:     true,
1229	}, false)
1230	o.setOwnStr("global", &valueProperty{
1231		configurable: true,
1232		getterFunc:   r.newNativeFunc(r.regexpproto_getGlobal, nil, "get global", nil, 0),
1233		accessor:     true,
1234	}, false)
1235	o.setOwnStr("multiline", &valueProperty{
1236		configurable: true,
1237		getterFunc:   r.newNativeFunc(r.regexpproto_getMultiline, nil, "get multiline", nil, 0),
1238		accessor:     true,
1239	}, false)
1240	o.setOwnStr("ignoreCase", &valueProperty{
1241		configurable: true,
1242		getterFunc:   r.newNativeFunc(r.regexpproto_getIgnoreCase, nil, "get ignoreCase", nil, 0),
1243		accessor:     true,
1244	}, false)
1245	o.setOwnStr("unicode", &valueProperty{
1246		configurable: true,
1247		getterFunc:   r.newNativeFunc(r.regexpproto_getUnicode, nil, "get unicode", nil, 0),
1248		accessor:     true,
1249	}, false)
1250	o.setOwnStr("sticky", &valueProperty{
1251		configurable: true,
1252		getterFunc:   r.newNativeFunc(r.regexpproto_getSticky, nil, "get sticky", nil, 0),
1253		accessor:     true,
1254	}, false)
1255	o.setOwnStr("flags", &valueProperty{
1256		configurable: true,
1257		getterFunc:   r.newNativeFunc(r.regexpproto_getFlags, nil, "get flags", nil, 0),
1258		accessor:     true,
1259	}, false)
1260
1261	o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, nil, "[Symbol.match]", nil, 1), true, false, true))
1262	o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, nil, "[Symbol.matchAll]", nil, 1), true, false, true))
1263	o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, nil, "[Symbol.search]", nil, 1), true, false, true))
1264	o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, nil, "[Symbol.split]", nil, 2), true, false, true))
1265	o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, nil, "[Symbol.replace]", nil, 2), true, false, true))
1266	o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky")
1267
1268	r.global.RegExp = r.newNativeFunc(r.builtin_RegExp, r.builtin_newRegExp, "RegExp", r.global.RegExpPrototype, 2)
1269	rx := r.global.RegExp.self
1270	rx._putSym(SymSpecies, &valueProperty{
1271		getterFunc:   r.newNativeFunc(r.returnThis, nil, "get [Symbol.species]", nil, 0),
1272		accessor:     true,
1273		configurable: true,
1274	})
1275	r.addToGlobal("RegExp", r.global.RegExp)
1276}
1277