1package goja 2 3import ( 4 "fmt" 5 "github.com/dop251/goja/parser" 6 "regexp" 7 "strings" 8 "unicode/utf16" 9 "unicode/utf8" 10) 11 12func (r *Runtime) newRegexpObject(proto *Object) *regexpObject { 13 v := &Object{runtime: r} 14 15 o := ®expObject{} 16 o.class = classRegExp 17 o.val = v 18 o.extensible = true 19 v.self = o 20 o.prototype = proto 21 o.init() 22 return o 23} 24 25func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr valueString, proto *Object) *regexpObject { 26 o := r.newRegexpObject(proto) 27 28 o.pattern = pattern 29 o.source = patternStr 30 31 return o 32} 33 34func decodeHex(s string) (int, bool) { 35 var hex int 36 for i := 0; i < len(s); i++ { 37 var n byte 38 chr := s[i] 39 switch { 40 case '0' <= chr && chr <= '9': 41 n = chr - '0' 42 case 'a' <= chr && chr <= 'f': 43 n = chr - 'a' + 10 44 case 'A' <= chr && chr <= 'F': 45 n = chr - 'A' + 10 46 default: 47 return 0, false 48 } 49 hex = hex*16 + int(n) 50 } 51 return hex, true 52} 53 54func writeHex4(b *strings.Builder, i int) { 55 b.WriteByte(hex[i>>12]) 56 b.WriteByte(hex[(i>>8)&0xF]) 57 b.WriteByte(hex[(i>>4)&0xF]) 58 b.WriteByte(hex[i&0xF]) 59} 60 61// Convert any valid surrogate pairs in the form of \uXXXX\uXXXX to unicode characters 62func convertRegexpToUnicode(patternStr string) string { 63 var sb strings.Builder 64 pos := 0 65 for i := 0; i < len(patternStr)-11; { 66 r, size := utf8.DecodeRuneInString(patternStr[i:]) 67 if r == '\\' { 68 i++ 69 if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' { 70 if first, ok := decodeHex(patternStr[i+1 : i+5]); ok { 71 if isUTF16FirstSurrogate(rune(first)) { 72 if second, ok := decodeHex(patternStr[i+7 : i+11]); ok { 73 if isUTF16SecondSurrogate(rune(second)) { 74 r = utf16.DecodeRune(rune(first), rune(second)) 75 sb.WriteString(patternStr[pos : i-1]) 76 sb.WriteRune(r) 77 i += 11 78 pos = i 79 continue 80 } 81 } 82 } 83 } 84 } 85 i++ 86 } else { 87 i += size 88 } 89 } 90 if pos > 0 { 91 sb.WriteString(patternStr[pos:]) 92 return sb.String() 93 } 94 return patternStr 95} 96 97// Convert any extended unicode characters to UTF-16 in the form of \uXXXX\uXXXX 98func convertRegexpToUtf16(patternStr string) string { 99 var sb strings.Builder 100 pos := 0 101 var prevRune rune 102 for i := 0; i < len(patternStr); { 103 r, size := utf8.DecodeRuneInString(patternStr[i:]) 104 if r > 0xFFFF { 105 sb.WriteString(patternStr[pos:i]) 106 if prevRune == '\\' { 107 sb.WriteRune('\\') 108 } 109 first, second := utf16.EncodeRune(r) 110 sb.WriteString(`\u`) 111 writeHex4(&sb, int(first)) 112 sb.WriteString(`\u`) 113 writeHex4(&sb, int(second)) 114 pos = i + size 115 } 116 i += size 117 prevRune = r 118 } 119 if pos > 0 { 120 sb.WriteString(patternStr[pos:]) 121 return sb.String() 122 } 123 return patternStr 124} 125 126// convert any broken UTF-16 surrogate pairs to \uXXXX 127func escapeInvalidUtf16(s valueString) string { 128 if ascii, ok := s.(asciiString); ok { 129 return ascii.String() 130 } 131 var sb strings.Builder 132 rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader(0)} 133 pos := 0 134 utf8Size := 0 135 var utf8Buf [utf8.UTFMax]byte 136 for { 137 c, size, err := rd.ReadRune() 138 if err != nil { 139 break 140 } 141 if utf16.IsSurrogate(c) { 142 if sb.Len() == 0 { 143 sb.Grow(utf8Size + 7) 144 hrd := s.reader(0) 145 var c rune 146 for p := 0; p < pos; { 147 var size int 148 var err error 149 c, size, err = hrd.ReadRune() 150 if err != nil { 151 // will not happen 152 panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err)) 153 } 154 sb.WriteRune(c) 155 p += size 156 } 157 if c == '\\' { 158 sb.WriteRune(c) 159 } 160 } 161 sb.WriteString(`\u`) 162 writeHex4(&sb, int(c)) 163 } else { 164 if sb.Len() > 0 { 165 sb.WriteRune(c) 166 } else { 167 utf8Size += utf8.EncodeRune(utf8Buf[:], c) 168 pos += size 169 } 170 } 171 } 172 if sb.Len() > 0 { 173 return sb.String() 174 } 175 return s.String() 176} 177 178func compileRegexpFromValueString(patternStr valueString, flags string) (*regexpPattern, error) { 179 return compileRegexp(escapeInvalidUtf16(patternStr), flags) 180} 181 182func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { 183 var global, ignoreCase, multiline, sticky, unicode bool 184 var wrapper *regexpWrapper 185 var wrapper2 *regexp2Wrapper 186 187 if flags != "" { 188 invalidFlags := func() { 189 err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags) 190 } 191 for _, chr := range flags { 192 switch chr { 193 case 'g': 194 if global { 195 invalidFlags() 196 return 197 } 198 global = true 199 case 'm': 200 if multiline { 201 invalidFlags() 202 return 203 } 204 multiline = true 205 case 'i': 206 if ignoreCase { 207 invalidFlags() 208 return 209 } 210 ignoreCase = true 211 case 'y': 212 if sticky { 213 invalidFlags() 214 return 215 } 216 sticky = true 217 case 'u': 218 if unicode { 219 invalidFlags() 220 } 221 unicode = true 222 default: 223 invalidFlags() 224 return 225 } 226 } 227 } 228 229 if unicode { 230 patternStr = convertRegexpToUnicode(patternStr) 231 } else { 232 patternStr = convertRegexpToUtf16(patternStr) 233 } 234 235 re2Str, err1 := parser.TransformRegExp(patternStr) 236 if err1 == nil { 237 re2flags := "" 238 if multiline { 239 re2flags += "m" 240 } 241 if ignoreCase { 242 re2flags += "i" 243 } 244 if len(re2flags) > 0 { 245 re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str) 246 } 247 248 pattern, err1 := regexp.Compile(re2Str) 249 if err1 != nil { 250 err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1) 251 return 252 } 253 wrapper = (*regexpWrapper)(pattern) 254 } else { 255 if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat { 256 err = err1 257 return 258 } 259 wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase) 260 if err != nil { 261 err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err) 262 return 263 } 264 } 265 266 p = ®expPattern{ 267 src: patternStr, 268 regexpWrapper: wrapper, 269 regexp2Wrapper: wrapper2, 270 global: global, 271 ignoreCase: ignoreCase, 272 multiline: multiline, 273 sticky: sticky, 274 unicode: unicode, 275 } 276 return 277} 278 279func (r *Runtime) _newRegExp(patternStr valueString, flags string, proto *Object) *regexpObject { 280 pattern, err := compileRegexpFromValueString(patternStr, flags) 281 if err != nil { 282 panic(r.newSyntaxError(err.Error(), -1)) 283 } 284 return r.newRegExpp(pattern, patternStr, proto) 285} 286 287func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object { 288 var patternVal, flagsVal Value 289 if len(args) > 0 { 290 patternVal = args[0] 291 } 292 if len(args) > 1 { 293 flagsVal = args[1] 294 } 295 return r.newRegExp(patternVal, flagsVal, proto).val 296} 297 298func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject { 299 var pattern valueString 300 var flags string 301 if isRegexp(patternVal) { // this may have side effects so need to call it anyway 302 if obj, ok := patternVal.(*Object); ok { 303 if rx, ok := obj.self.(*regexpObject); ok { 304 if flagsVal == nil || flagsVal == _undefined { 305 return rx.clone() 306 } else { 307 return r._newRegExp(rx.source, flagsVal.toString().String(), proto) 308 } 309 } else { 310 pattern = nilSafe(obj.self.getStr("source", nil)).toString() 311 if flagsVal == nil || flagsVal == _undefined { 312 flags = nilSafe(obj.self.getStr("flags", nil)).toString().String() 313 } else { 314 flags = flagsVal.toString().String() 315 } 316 goto exit 317 } 318 } 319 } 320 321 if patternVal != nil && patternVal != _undefined { 322 pattern = patternVal.toString() 323 } 324 if flagsVal != nil && flagsVal != _undefined { 325 flags = flagsVal.toString().String() 326 } 327 328 if pattern == nil { 329 pattern = stringEmpty 330 } 331exit: 332 return r._newRegExp(pattern, flags, proto) 333} 334 335func (r *Runtime) builtin_RegExp(call FunctionCall) Value { 336 pattern := call.Argument(0) 337 patternIsRegExp := isRegexp(pattern) 338 flags := call.Argument(1) 339 if patternIsRegExp && flags == _undefined { 340 if obj, ok := call.Argument(0).(*Object); ok { 341 patternConstructor := obj.self.getStr("constructor", nil) 342 if patternConstructor == r.global.RegExp { 343 return pattern 344 } 345 } 346 } 347 return r.newRegExp(pattern, flags, r.global.RegExpPrototype).val 348} 349 350func (r *Runtime) regexpproto_compile(call FunctionCall) Value { 351 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 352 var ( 353 pattern *regexpPattern 354 source valueString 355 flags string 356 err error 357 ) 358 patternVal := call.Argument(0) 359 flagsVal := call.Argument(1) 360 if o, ok := patternVal.(*Object); ok { 361 if p, ok := o.self.(*regexpObject); ok { 362 if flagsVal != _undefined { 363 panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another")) 364 } 365 this.pattern = p.pattern 366 this.source = p.source 367 goto exit 368 } 369 } 370 if patternVal != _undefined { 371 source = patternVal.toString() 372 } else { 373 source = stringEmpty 374 } 375 if flagsVal != _undefined { 376 flags = flagsVal.toString().String() 377 } 378 pattern, err = compileRegexpFromValueString(source, flags) 379 if err != nil { 380 panic(r.newSyntaxError(err.Error(), -1)) 381 } 382 this.pattern = pattern 383 this.source = source 384 exit: 385 this.setOwnStr("lastIndex", intToValue(0), true) 386 return call.This 387 } 388 389 panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", call.This.toString())) 390} 391 392func (r *Runtime) regexpproto_exec(call FunctionCall) Value { 393 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 394 return this.exec(call.Argument(0).toString()) 395 } else { 396 r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", call.This.toString()) 397 return nil 398 } 399} 400 401func (r *Runtime) regexpproto_test(call FunctionCall) Value { 402 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 403 if this.test(call.Argument(0).toString()) { 404 return valueTrue 405 } else { 406 return valueFalse 407 } 408 } else { 409 r.typeErrorResult(true, "Method RegExp.prototype.test called on incompatible receiver %s", call.This.toString()) 410 return nil 411 } 412} 413 414func (r *Runtime) regexpproto_toString(call FunctionCall) Value { 415 obj := r.toObject(call.This) 416 if this := r.checkStdRegexp(obj); this != nil { 417 var sb valueStringBuilder 418 sb.WriteRune('/') 419 if !this.writeEscapedSource(&sb) { 420 sb.WriteString(this.source) 421 } 422 sb.WriteRune('/') 423 if this.pattern.global { 424 sb.WriteRune('g') 425 } 426 if this.pattern.ignoreCase { 427 sb.WriteRune('i') 428 } 429 if this.pattern.multiline { 430 sb.WriteRune('m') 431 } 432 if this.pattern.unicode { 433 sb.WriteRune('u') 434 } 435 if this.pattern.sticky { 436 sb.WriteRune('y') 437 } 438 return sb.String() 439 } 440 pattern := nilSafe(obj.self.getStr("source", nil)).toString() 441 flags := nilSafe(obj.self.getStr("flags", nil)).toString() 442 var sb valueStringBuilder 443 sb.WriteRune('/') 444 sb.WriteString(pattern) 445 sb.WriteRune('/') 446 sb.WriteString(flags) 447 return sb.String() 448} 449 450func (r *regexpObject) writeEscapedSource(sb *valueStringBuilder) bool { 451 if r.source.length() == 0 { 452 sb.WriteString(asciiString("(?:)")) 453 return true 454 } 455 pos := 0 456 lastPos := 0 457 rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader(0)} 458L: 459 for { 460 c, size, err := rd.ReadRune() 461 if err != nil { 462 break 463 } 464 switch c { 465 case '\\': 466 pos++ 467 _, size, err = rd.ReadRune() 468 if err != nil { 469 break L 470 } 471 case '/', '\u000a', '\u000d', '\u2028', '\u2029': 472 sb.WriteSubstring(r.source, lastPos, pos) 473 sb.WriteRune('\\') 474 switch c { 475 case '\u000a': 476 sb.WriteRune('n') 477 case '\u000d': 478 sb.WriteRune('r') 479 default: 480 sb.WriteRune('u') 481 sb.WriteRune(rune(hex[c>>12])) 482 sb.WriteRune(rune(hex[(c>>8)&0xF])) 483 sb.WriteRune(rune(hex[(c>>4)&0xF])) 484 sb.WriteRune(rune(hex[c&0xF])) 485 } 486 lastPos = pos + size 487 } 488 pos += size 489 } 490 if lastPos > 0 { 491 sb.WriteSubstring(r.source, lastPos, r.source.length()) 492 return true 493 } 494 return false 495} 496 497func (r *Runtime) regexpproto_getSource(call FunctionCall) Value { 498 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 499 var sb valueStringBuilder 500 if this.writeEscapedSource(&sb) { 501 return sb.String() 502 } 503 return this.source 504 } else { 505 r.typeErrorResult(true, "Method RegExp.prototype.source getter called on incompatible receiver") 506 return nil 507 } 508} 509 510func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value { 511 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 512 if this.pattern.global { 513 return valueTrue 514 } else { 515 return valueFalse 516 } 517 } else { 518 r.typeErrorResult(true, "Method RegExp.prototype.global getter called on incompatible receiver %s", call.This.toString()) 519 return nil 520 } 521} 522 523func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value { 524 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 525 if this.pattern.multiline { 526 return valueTrue 527 } else { 528 return valueFalse 529 } 530 } else { 531 r.typeErrorResult(true, "Method RegExp.prototype.multiline getter called on incompatible receiver %s", call.This.toString()) 532 return nil 533 } 534} 535 536func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value { 537 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 538 if this.pattern.ignoreCase { 539 return valueTrue 540 } else { 541 return valueFalse 542 } 543 } else { 544 r.typeErrorResult(true, "Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", call.This.toString()) 545 return nil 546 } 547} 548 549func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value { 550 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 551 if this.pattern.unicode { 552 return valueTrue 553 } else { 554 return valueFalse 555 } 556 } else { 557 r.typeErrorResult(true, "Method RegExp.prototype.unicode getter called on incompatible receiver %s", call.This.toString()) 558 return nil 559 } 560} 561 562func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value { 563 if this, ok := r.toObject(call.This).self.(*regexpObject); ok { 564 if this.pattern.sticky { 565 return valueTrue 566 } else { 567 return valueFalse 568 } 569 } else { 570 r.typeErrorResult(true, "Method RegExp.prototype.sticky getter called on incompatible receiver %s", call.This.toString()) 571 return nil 572 } 573} 574 575func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value { 576 var global, ignoreCase, multiline, sticky, unicode bool 577 578 thisObj := r.toObject(call.This) 579 size := 0 580 if v := thisObj.self.getStr("global", nil); v != nil { 581 global = v.ToBoolean() 582 if global { 583 size++ 584 } 585 } 586 if v := thisObj.self.getStr("ignoreCase", nil); v != nil { 587 ignoreCase = v.ToBoolean() 588 if ignoreCase { 589 size++ 590 } 591 } 592 if v := thisObj.self.getStr("multiline", nil); v != nil { 593 multiline = v.ToBoolean() 594 if multiline { 595 size++ 596 } 597 } 598 if v := thisObj.self.getStr("sticky", nil); v != nil { 599 sticky = v.ToBoolean() 600 if sticky { 601 size++ 602 } 603 } 604 if v := thisObj.self.getStr("unicode", nil); v != nil { 605 unicode = v.ToBoolean() 606 if unicode { 607 size++ 608 } 609 } 610 611 var sb strings.Builder 612 sb.Grow(size) 613 if global { 614 sb.WriteByte('g') 615 } 616 if ignoreCase { 617 sb.WriteByte('i') 618 } 619 if multiline { 620 sb.WriteByte('m') 621 } 622 if unicode { 623 sb.WriteByte('u') 624 } 625 if sticky { 626 sb.WriteByte('y') 627 } 628 629 return asciiString(sb.String()) 630} 631 632func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value { 633 res := execFn(FunctionCall{ 634 This: rxObj, 635 Arguments: []Value{arg}, 636 }) 637 638 if res != _null { 639 if _, ok := res.(*Object); !ok { 640 panic(r.NewTypeError("RegExp exec method returned something other than an Object or null")) 641 } 642 } 643 644 return res 645} 646 647func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s valueString) []Value { 648 fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean() 649 rxObj.self.setOwnStr("lastIndex", intToValue(0), true) 650 execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable() 651 if !ok { 652 panic(r.NewTypeError("exec is not a function")) 653 } 654 var a []Value 655 for { 656 res := r.regExpExec(execFn, rxObj, s) 657 if res == _null { 658 break 659 } 660 a = append(a, res) 661 matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString() 662 if matchStr.length() == 0 { 663 thisIndex := toLength(rxObj.self.getStr("lastIndex", nil)) 664 rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true) 665 } 666 } 667 668 return a 669} 670 671func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s valueString) Value { 672 rx := rxObj.self 673 global := rx.getStr("global", nil) 674 if global != nil && global.ToBoolean() { 675 a := r.getGlobalRegexpMatches(rxObj, s) 676 if len(a) == 0 { 677 return _null 678 } 679 ar := make([]Value, 0, len(a)) 680 for _, result := range a { 681 obj := r.toObject(result) 682 matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString() 683 ar = append(ar, matchStr) 684 } 685 return r.newArrayValues(ar) 686 } 687 688 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable() 689 if !ok { 690 panic(r.NewTypeError("exec is not a function")) 691 } 692 693 return r.regExpExec(execFn, rxObj, s) 694} 695 696func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject { 697 if deoptimiseRegexp { 698 return nil 699 } 700 701 rx, ok := rxObj.self.(*regexpObject) 702 if !ok { 703 return nil 704 } 705 706 if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto { 707 return nil 708 } 709 710 return rx 711} 712 713func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value { 714 thisObj := r.toObject(call.This) 715 s := call.Argument(0).toString() 716 rx := r.checkStdRegexp(thisObj) 717 if rx == nil { 718 return r.regexpproto_stdMatcherGeneric(thisObj, s) 719 } 720 if rx.pattern.global { 721 res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky) 722 if len(res) == 0 { 723 rx.setOwnStr("lastIndex", intToValue(0), true) 724 return _null 725 } 726 a := make([]Value, 0, len(res)) 727 for _, result := range res { 728 a = append(a, s.substring(result[0], result[1])) 729 } 730 rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true) 731 return r.newArrayValues(a) 732 } else { 733 return rx.exec(s) 734 } 735} 736 737func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg valueString) Value { 738 rx := rxObj.self 739 previousLastIndex := nilSafe(rx.getStr("lastIndex", nil)) 740 zero := intToValue(0) 741 if !previousLastIndex.SameAs(zero) { 742 rx.setOwnStr("lastIndex", zero, true) 743 } 744 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable() 745 if !ok { 746 panic(r.NewTypeError("exec is not a function")) 747 } 748 749 result := r.regExpExec(execFn, rxObj, arg) 750 currentLastIndex := nilSafe(rx.getStr("lastIndex", nil)) 751 if !currentLastIndex.SameAs(previousLastIndex) { 752 rx.setOwnStr("lastIndex", previousLastIndex, true) 753 } 754 755 if result == _null { 756 return intToValue(-1) 757 } 758 759 return r.toObject(result).self.getStr("index", nil) 760} 761 762func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value { 763 thisObj := r.toObject(call.This) 764 s := call.Argument(0).toString() 765 flags := nilSafe(thisObj.self.getStr("flags", nil)).toString() 766 c := r.speciesConstructorObj(call.This.(*Object), r.global.RegExp) 767 matcher := r.toConstructor(c)([]Value{call.This, flags}, nil) 768 matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true) 769 flagsStr := flags.String() 770 global := strings.Contains(flagsStr, "g") 771 fullUnicode := strings.Contains(flagsStr, "u") 772 return r.createRegExpStringIterator(matcher, s, global, fullUnicode) 773} 774 775func (r *Runtime) createRegExpStringIterator(matcher *Object, s valueString, global, fullUnicode bool) Value { 776 o := &Object{runtime: r} 777 778 ri := ®ExpStringIterObject{ 779 matcher: matcher, 780 s: s, 781 global: global, 782 fullUnicode: fullUnicode, 783 } 784 ri.class = classRegExpStringIterator 785 ri.val = o 786 ri.extensible = true 787 o.self = ri 788 ri.prototype = r.global.RegExpStringIteratorPrototype 789 ri.init() 790 791 return o 792} 793 794type regExpStringIterObject struct { 795 baseObject 796 matcher *Object 797 s valueString 798 global, fullUnicode, done bool 799} 800 801// RegExpExec as defined in 21.2.5.2.1 802func regExpExec(r *Object, s valueString) Value { 803 exec := r.self.getStr("exec", nil) 804 if execObject, ok := exec.(*Object); ok { 805 if execFn, ok := execObject.self.assertCallable(); ok { 806 return r.runtime.regExpExec(execFn, r, s) 807 } 808 } 809 if rx, ok := r.self.(*regexpObject); ok { 810 return rx.exec(s) 811 } 812 panic(r.runtime.NewTypeError("no RegExpMatcher internal slot")) 813} 814 815func (ri *regExpStringIterObject) next() (v Value) { 816 if ri.done { 817 return ri.val.runtime.createIterResultObject(_undefined, true) 818 } 819 820 match := regExpExec(ri.matcher, ri.s) 821 if IsNull(match) { 822 ri.done = true 823 return ri.val.runtime.createIterResultObject(_undefined, true) 824 } 825 if !ri.global { 826 ri.done = true 827 return ri.val.runtime.createIterResultObject(match, false) 828 } 829 830 matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString() 831 if matchStr.length() == 0 { 832 thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil)) 833 ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true) 834 } 835 return ri.val.runtime.createIterResultObject(match, false) 836} 837 838func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value { 839 thisObj := r.toObject(call.This) 840 s := call.Argument(0).toString() 841 rx := r.checkStdRegexp(thisObj) 842 if rx == nil { 843 return r.regexpproto_stdSearchGeneric(thisObj, s) 844 } 845 846 previousLastIndex := rx.getStr("lastIndex", nil) 847 rx.setOwnStr("lastIndex", intToValue(0), true) 848 849 match, result := rx.execRegexp(s) 850 rx.setOwnStr("lastIndex", previousLastIndex, true) 851 852 if !match { 853 return intToValue(-1) 854 } 855 return intToValue(int64(result[0])) 856} 857 858func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s valueString, limit Value, unicodeMatching bool) Value { 859 var a []Value 860 var lim int64 861 if limit == nil || limit == _undefined { 862 lim = maxInt - 1 863 } else { 864 lim = toLength(limit) 865 } 866 if lim == 0 { 867 return r.newArrayValues(a) 868 } 869 size := s.length() 870 p := 0 871 execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil)) // must be non-nil 872 873 if size == 0 { 874 if r.regExpExec(execFn, splitter, s) == _null { 875 a = append(a, s) 876 } 877 return r.newArrayValues(a) 878 } 879 880 q := p 881 for q < size { 882 splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true) 883 z := r.regExpExec(execFn, splitter, s) 884 if z == _null { 885 q = advanceStringIndex(s, q, unicodeMatching) 886 } else { 887 z := r.toObject(z) 888 e := toLength(splitter.self.getStr("lastIndex", nil)) 889 if e == int64(p) { 890 q = advanceStringIndex(s, q, unicodeMatching) 891 } else { 892 a = append(a, s.substring(p, q)) 893 if int64(len(a)) == lim { 894 return r.newArrayValues(a) 895 } 896 if e > int64(size) { 897 p = size 898 } else { 899 p = int(e) 900 } 901 numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0) 902 for i := int64(1); i <= numberOfCaptures; i++ { 903 a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil))) 904 if int64(len(a)) == lim { 905 return r.newArrayValues(a) 906 } 907 } 908 q = p 909 } 910 } 911 } 912 a = append(a, s.substring(p, size)) 913 return r.newArrayValues(a) 914} 915 916func advanceStringIndex(s valueString, pos int, unicode bool) int { 917 next := pos + 1 918 if !unicode { 919 return next 920 } 921 l := s.length() 922 if next >= l { 923 return next 924 } 925 if !isUTF16FirstSurrogate(s.charAt(pos)) { 926 return next 927 } 928 if !isUTF16SecondSurrogate(s.charAt(next)) { 929 return next 930 } 931 return next + 1 932} 933 934func advanceStringIndex64(s valueString, pos int64, unicode bool) int64 { 935 next := pos + 1 936 if !unicode { 937 return next 938 } 939 l := int64(s.length()) 940 if next >= l { 941 return next 942 } 943 if !isUTF16FirstSurrogate(s.charAt(int(pos))) { 944 return next 945 } 946 if !isUTF16SecondSurrogate(s.charAt(int(next))) { 947 return next 948 } 949 return next + 1 950} 951 952func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value { 953 rxObj := r.toObject(call.This) 954 s := call.Argument(0).toString() 955 limitValue := call.Argument(1) 956 var splitter *Object 957 search := r.checkStdRegexp(rxObj) 958 c := r.speciesConstructorObj(rxObj, r.global.RegExp) 959 if search == nil || c != r.global.RegExp { 960 flags := nilSafe(rxObj.self.getStr("flags", nil)).toString() 961 flagsStr := flags.String() 962 963 // Add 'y' flag if missing 964 if !strings.Contains(flagsStr, "y") { 965 flags = flags.concat(asciiString("y")) 966 } 967 splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil) 968 search = r.checkStdRegexp(splitter) 969 if search == nil { 970 return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u")) 971 } 972 } 973 974 pattern := search.pattern // toUint32() may recompile the pattern, but we still need to use the original 975 limit := -1 976 if limitValue != _undefined { 977 limit = int(toUint32(limitValue)) 978 } 979 980 if limit == 0 { 981 return r.newArrayValues(nil) 982 } 983 984 targetLength := s.length() 985 var valueArray []Value 986 lastIndex := 0 987 found := 0 988 989 result := pattern.findAllSubmatchIndex(s, 0, -1, false) 990 if targetLength == 0 { 991 if result == nil { 992 valueArray = append(valueArray, s) 993 } 994 goto RETURN 995 } 996 997 for _, match := range result { 998 if match[0] == match[1] { 999 // FIXME Ugh, this is a hack 1000 if match[0] == 0 || match[0] == targetLength { 1001 continue 1002 } 1003 } 1004 1005 if lastIndex != match[0] { 1006 valueArray = append(valueArray, s.substring(lastIndex, match[0])) 1007 found++ 1008 } else if lastIndex == match[0] { 1009 if lastIndex != -1 { 1010 valueArray = append(valueArray, stringEmpty) 1011 found++ 1012 } 1013 } 1014 1015 lastIndex = match[1] 1016 if found == limit { 1017 goto RETURN 1018 } 1019 1020 captureCount := len(match) / 2 1021 for index := 1; index < captureCount; index++ { 1022 offset := index * 2 1023 var value Value 1024 if match[offset] != -1 { 1025 value = s.substring(match[offset], match[offset+1]) 1026 } else { 1027 value = _undefined 1028 } 1029 valueArray = append(valueArray, value) 1030 found++ 1031 if found == limit { 1032 goto RETURN 1033 } 1034 } 1035 } 1036 1037 if found != limit { 1038 if lastIndex != targetLength { 1039 valueArray = append(valueArray, s.substring(lastIndex, targetLength)) 1040 } else { 1041 valueArray = append(valueArray, stringEmpty) 1042 } 1043 } 1044 1045RETURN: 1046 return r.newArrayValues(valueArray) 1047} 1048 1049func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr valueString, rcall func(FunctionCall) Value) Value { 1050 var results []Value 1051 if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() { 1052 results = r.getGlobalRegexpMatches(rxObj, s) 1053 } else { 1054 execFn := toMethod(rxObj.self.getStr("exec", nil)) // must be non-nil 1055 result := r.regExpExec(execFn, rxObj, s) 1056 if result != _null { 1057 results = append(results, result) 1058 } 1059 } 1060 lengthS := s.length() 1061 nextSourcePosition := 0 1062 var resultBuf valueStringBuilder 1063 for _, result := range results { 1064 obj := r.toObject(result) 1065 nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0) 1066 matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString() 1067 matchLength := matched.length() 1068 position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0)) 1069 var captures []Value 1070 if rcall != nil { 1071 captures = make([]Value, 0, nCaptures+3) 1072 } else { 1073 captures = make([]Value, 0, nCaptures+1) 1074 } 1075 captures = append(captures, matched) 1076 for n := int64(1); n <= nCaptures; n++ { 1077 capN := nilSafe(obj.self.getIdx(valueInt(n), nil)) 1078 if capN != _undefined { 1079 capN = capN.ToString() 1080 } 1081 captures = append(captures, capN) 1082 } 1083 var replacement valueString 1084 if rcall != nil { 1085 captures = append(captures, intToValue(int64(position)), s) 1086 replacement = rcall(FunctionCall{ 1087 This: _undefined, 1088 Arguments: captures, 1089 }).toString() 1090 if position >= nextSourcePosition { 1091 resultBuf.WriteString(s.substring(nextSourcePosition, position)) 1092 resultBuf.WriteString(replacement) 1093 nextSourcePosition = position + matchLength 1094 } 1095 } else { 1096 if position >= nextSourcePosition { 1097 resultBuf.WriteString(s.substring(nextSourcePosition, position)) 1098 writeSubstitution(s, position, len(captures), func(idx int) valueString { 1099 capture := captures[idx] 1100 if capture != _undefined { 1101 return capture.toString() 1102 } 1103 return stringEmpty 1104 }, replaceStr, &resultBuf) 1105 nextSourcePosition = position + matchLength 1106 } 1107 } 1108 } 1109 if nextSourcePosition < lengthS { 1110 resultBuf.WriteString(s.substring(nextSourcePosition, lengthS)) 1111 } 1112 return resultBuf.String() 1113} 1114 1115func writeSubstitution(s valueString, position int, numCaptures int, getCapture func(int) valueString, replaceStr valueString, buf *valueStringBuilder) { 1116 l := s.length() 1117 rl := replaceStr.length() 1118 matched := getCapture(0) 1119 tailPos := position + matched.length() 1120 1121 for i := 0; i < rl; i++ { 1122 c := replaceStr.charAt(i) 1123 if c == '$' && i < rl-1 { 1124 ch := replaceStr.charAt(i + 1) 1125 switch ch { 1126 case '$': 1127 buf.WriteRune('$') 1128 case '`': 1129 buf.WriteString(s.substring(0, position)) 1130 case '\'': 1131 if tailPos < l { 1132 buf.WriteString(s.substring(tailPos, l)) 1133 } 1134 case '&': 1135 buf.WriteString(matched) 1136 default: 1137 matchNumber := 0 1138 j := i + 1 1139 for j < rl { 1140 ch := replaceStr.charAt(j) 1141 if ch >= '0' && ch <= '9' { 1142 m := matchNumber*10 + int(ch-'0') 1143 if m >= numCaptures { 1144 break 1145 } 1146 matchNumber = m 1147 j++ 1148 } else { 1149 break 1150 } 1151 } 1152 if matchNumber > 0 { 1153 buf.WriteString(getCapture(matchNumber)) 1154 i = j - 1 1155 continue 1156 } else { 1157 buf.WriteRune('$') 1158 buf.WriteRune(ch) 1159 } 1160 } 1161 i++ 1162 } else { 1163 buf.WriteRune(c) 1164 } 1165 } 1166} 1167 1168func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value { 1169 rxObj := r.toObject(call.This) 1170 s := call.Argument(0).toString() 1171 replaceStr, rcall := getReplaceValue(call.Argument(1)) 1172 1173 rx := r.checkStdRegexp(rxObj) 1174 if rx == nil { 1175 return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall) 1176 } 1177 1178 var index int64 1179 find := 1 1180 if rx.pattern.global { 1181 find = -1 1182 rx.setOwnStr("lastIndex", intToValue(0), true) 1183 } else { 1184 index = rx.getLastIndex() 1185 } 1186 found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky) 1187 if len(found) > 0 { 1188 if !rx.updateLastIndex(index, found[0], found[len(found)-1]) { 1189 found = nil 1190 } 1191 } else { 1192 rx.updateLastIndex(index, nil, nil) 1193 } 1194 1195 return stringReplace(s, found, replaceStr, rcall) 1196} 1197 1198func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value { 1199 thisObj := r.toObject(call.This) 1200 if iter, ok := thisObj.self.(*regExpStringIterObject); ok { 1201 return iter.next() 1202 } 1203 panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", thisObj.String())) 1204} 1205 1206func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl { 1207 o := newBaseObjectObj(val, r.global.IteratorPrototype, classObject) 1208 1209 o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, nil, "next", nil, 0), true, false, true) 1210 o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true)) 1211 1212 return o 1213} 1214 1215func (r *Runtime) initRegExp() { 1216 o := r.newGuardedObject(r.global.ObjectPrototype, classObject) 1217 r.global.RegExpPrototype = o.val 1218 r.global.stdRegexpProto = o 1219 r.global.RegExpStringIteratorPrototype = r.newLazyObject(r.createRegExpStringIteratorPrototype) 1220 1221 o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, nil, "compile", nil, 2), true, false, true) 1222 o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, nil, "exec", nil, 1), true, false, true) 1223 o._putProp("test", r.newNativeFunc(r.regexpproto_test, nil, "test", nil, 1), true, false, true) 1224 o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, nil, "toString", nil, 0), true, false, true) 1225 o.setOwnStr("source", &valueProperty{ 1226 configurable: true, 1227 getterFunc: r.newNativeFunc(r.regexpproto_getSource, nil, "get source", nil, 0), 1228 accessor: true, 1229 }, false) 1230 o.setOwnStr("global", &valueProperty{ 1231 configurable: true, 1232 getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, nil, "get global", nil, 0), 1233 accessor: true, 1234 }, false) 1235 o.setOwnStr("multiline", &valueProperty{ 1236 configurable: true, 1237 getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, nil, "get multiline", nil, 0), 1238 accessor: true, 1239 }, false) 1240 o.setOwnStr("ignoreCase", &valueProperty{ 1241 configurable: true, 1242 getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, nil, "get ignoreCase", nil, 0), 1243 accessor: true, 1244 }, false) 1245 o.setOwnStr("unicode", &valueProperty{ 1246 configurable: true, 1247 getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, nil, "get unicode", nil, 0), 1248 accessor: true, 1249 }, false) 1250 o.setOwnStr("sticky", &valueProperty{ 1251 configurable: true, 1252 getterFunc: r.newNativeFunc(r.regexpproto_getSticky, nil, "get sticky", nil, 0), 1253 accessor: true, 1254 }, false) 1255 o.setOwnStr("flags", &valueProperty{ 1256 configurable: true, 1257 getterFunc: r.newNativeFunc(r.regexpproto_getFlags, nil, "get flags", nil, 0), 1258 accessor: true, 1259 }, false) 1260 1261 o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, nil, "[Symbol.match]", nil, 1), true, false, true)) 1262 o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, nil, "[Symbol.matchAll]", nil, 1), true, false, true)) 1263 o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, nil, "[Symbol.search]", nil, 1), true, false, true)) 1264 o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, nil, "[Symbol.split]", nil, 2), true, false, true)) 1265 o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, nil, "[Symbol.replace]", nil, 2), true, false, true)) 1266 o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky") 1267 1268 r.global.RegExp = r.newNativeFunc(r.builtin_RegExp, r.builtin_newRegExp, "RegExp", r.global.RegExpPrototype, 2) 1269 rx := r.global.RegExp.self 1270 rx._putSym(SymSpecies, &valueProperty{ 1271 getterFunc: r.newNativeFunc(r.returnThis, nil, "get [Symbol.species]", nil, 0), 1272 accessor: true, 1273 configurable: true, 1274 }) 1275 r.addToGlobal("RegExp", r.global.RegExp) 1276} 1277