1package goja 2 3import ( 4 "errors" 5 "fmt" 6 "hash/maphash" 7 "io" 8 "math" 9 "reflect" 10 "strings" 11 "unicode/utf16" 12 "unicode/utf8" 13 14 "github.com/dop251/goja/parser" 15 "github.com/dop251/goja/unistring" 16 "golang.org/x/text/cases" 17 "golang.org/x/text/language" 18) 19 20type unicodeString []uint16 21 22type unicodeRuneReader struct { 23 s unicodeString 24 pos int 25} 26 27type utf16RuneReader struct { 28 s unicodeString 29 pos int 30} 31 32// passes through invalid surrogate pairs 33type lenientUtf16Decoder struct { 34 utf16Reader io.RuneReader 35 prev rune 36 prevSet bool 37} 38 39type valueStringBuilder struct { 40 asciiBuilder strings.Builder 41 unicodeBuilder unicodeStringBuilder 42} 43 44type unicodeStringBuilder struct { 45 buf []uint16 46 unicode bool 47} 48 49var ( 50 InvalidRuneError = errors.New("invalid rune") 51) 52 53func (rr *utf16RuneReader) ReadRune() (r rune, size int, err error) { 54 if rr.pos < len(rr.s) { 55 r = rune(rr.s[rr.pos]) 56 size++ 57 rr.pos++ 58 return 59 } 60 err = io.EOF 61 return 62} 63 64func (rr *lenientUtf16Decoder) ReadRune() (r rune, size int, err error) { 65 if rr.prevSet { 66 r = rr.prev 67 size = 1 68 rr.prevSet = false 69 } else { 70 r, size, err = rr.utf16Reader.ReadRune() 71 if err != nil { 72 return 73 } 74 } 75 if isUTF16FirstSurrogate(r) { 76 second, _, err1 := rr.utf16Reader.ReadRune() 77 if err1 != nil { 78 if err1 != io.EOF { 79 err = err1 80 } 81 return 82 } 83 if isUTF16SecondSurrogate(second) { 84 r = utf16.DecodeRune(r, second) 85 size++ 86 } else { 87 rr.prev = second 88 rr.prevSet = true 89 } 90 } 91 92 return 93} 94 95func (rr *unicodeRuneReader) ReadRune() (r rune, size int, err error) { 96 if rr.pos < len(rr.s) { 97 r = rune(rr.s[rr.pos]) 98 size++ 99 rr.pos++ 100 if isUTF16FirstSurrogate(r) { 101 if rr.pos < len(rr.s) { 102 second := rune(rr.s[rr.pos]) 103 if isUTF16SecondSurrogate(second) { 104 r = utf16.DecodeRune(r, second) 105 size++ 106 rr.pos++ 107 } else { 108 err = InvalidRuneError 109 } 110 } else { 111 err = InvalidRuneError 112 } 113 } else if isUTF16SecondSurrogate(r) { 114 err = InvalidRuneError 115 } 116 } else { 117 err = io.EOF 118 } 119 return 120} 121 122func (b *unicodeStringBuilder) grow(n int) { 123 if cap(b.buf)-len(b.buf) < n { 124 buf := make([]uint16, len(b.buf), 2*cap(b.buf)+n) 125 copy(buf, b.buf) 126 b.buf = buf 127 } 128} 129 130func (b *unicodeStringBuilder) Grow(n int) { 131 b.grow(n + 1) 132} 133 134func (b *unicodeStringBuilder) ensureStarted(initialSize int) { 135 b.grow(len(b.buf) + initialSize + 1) 136 if len(b.buf) == 0 { 137 b.buf = append(b.buf, unistring.BOM) 138 } 139} 140 141func (b *unicodeStringBuilder) WriteString(s valueString) { 142 b.ensureStarted(s.length()) 143 switch s := s.(type) { 144 case unicodeString: 145 b.buf = append(b.buf, s[1:]...) 146 b.unicode = true 147 case asciiString: 148 for i := 0; i < len(s); i++ { 149 b.buf = append(b.buf, uint16(s[i])) 150 } 151 default: 152 panic(fmt.Errorf("unsupported string type: %T", s)) 153 } 154} 155 156func (b *unicodeStringBuilder) String() valueString { 157 if b.unicode { 158 return unicodeString(b.buf) 159 } 160 if len(b.buf) == 0 { 161 return stringEmpty 162 } 163 buf := make([]byte, 0, len(b.buf)-1) 164 for _, c := range b.buf[1:] { 165 buf = append(buf, byte(c)) 166 } 167 return asciiString(buf) 168} 169 170func (b *unicodeStringBuilder) WriteRune(r rune) { 171 if r <= 0xFFFF { 172 b.ensureStarted(1) 173 b.buf = append(b.buf, uint16(r)) 174 if !b.unicode && r >= utf8.RuneSelf { 175 b.unicode = true 176 } 177 } else { 178 b.ensureStarted(2) 179 first, second := utf16.EncodeRune(r) 180 b.buf = append(b.buf, uint16(first), uint16(second)) 181 b.unicode = true 182 } 183} 184 185func (b *unicodeStringBuilder) writeASCIIString(bytes string) { 186 b.ensureStarted(len(bytes)) 187 for _, c := range bytes { 188 b.buf = append(b.buf, uint16(c)) 189 } 190} 191 192func (b *valueStringBuilder) ascii() bool { 193 return len(b.unicodeBuilder.buf) == 0 194} 195 196func (b *valueStringBuilder) WriteString(s valueString) { 197 if ascii, ok := s.(asciiString); ok { 198 if b.ascii() { 199 b.asciiBuilder.WriteString(string(ascii)) 200 } else { 201 b.unicodeBuilder.writeASCIIString(string(ascii)) 202 } 203 } else { 204 b.switchToUnicode(s.length()) 205 b.unicodeBuilder.WriteString(s) 206 } 207} 208 209func (b *valueStringBuilder) WriteRune(r rune) { 210 if r < utf8.RuneSelf { 211 if b.ascii() { 212 b.asciiBuilder.WriteByte(byte(r)) 213 } else { 214 b.unicodeBuilder.WriteRune(r) 215 } 216 } else { 217 var extraLen int 218 if r <= 0xFFFF { 219 extraLen = 1 220 } else { 221 extraLen = 2 222 } 223 b.switchToUnicode(extraLen) 224 b.unicodeBuilder.WriteRune(r) 225 } 226} 227 228func (b *valueStringBuilder) String() valueString { 229 if b.ascii() { 230 return asciiString(b.asciiBuilder.String()) 231 } 232 return b.unicodeBuilder.String() 233} 234 235func (b *valueStringBuilder) Grow(n int) { 236 if b.ascii() { 237 b.asciiBuilder.Grow(n) 238 } else { 239 b.unicodeBuilder.Grow(n) 240 } 241} 242 243func (b *valueStringBuilder) switchToUnicode(extraLen int) { 244 if b.ascii() { 245 b.unicodeBuilder.ensureStarted(b.asciiBuilder.Len() + extraLen) 246 b.unicodeBuilder.writeASCIIString(b.asciiBuilder.String()) 247 b.asciiBuilder.Reset() 248 } 249} 250 251func (b *valueStringBuilder) WriteSubstring(source valueString, start int, end int) { 252 if ascii, ok := source.(asciiString); ok { 253 if b.ascii() { 254 b.asciiBuilder.WriteString(string(ascii[start:end])) 255 } else { 256 b.unicodeBuilder.writeASCIIString(string(ascii[start:end])) 257 } 258 return 259 } 260 us := source.(unicodeString) 261 if b.ascii() { 262 uc := false 263 for i := start; i < end; i++ { 264 if us.charAt(i) >= utf8.RuneSelf { 265 uc = true 266 break 267 } 268 } 269 if uc { 270 b.switchToUnicode(end - start + 1) 271 } else { 272 b.asciiBuilder.Grow(end - start + 1) 273 for i := start; i < end; i++ { 274 b.asciiBuilder.WriteByte(byte(us.charAt(i))) 275 } 276 return 277 } 278 } 279 b.unicodeBuilder.buf = append(b.unicodeBuilder.buf, us[start+1:end+1]...) 280 b.unicodeBuilder.unicode = true 281} 282 283func (s unicodeString) reader(start int) io.RuneReader { 284 return &unicodeRuneReader{ 285 s: s[start+1:], 286 } 287} 288 289func (s unicodeString) utf16Reader(start int) io.RuneReader { 290 return &utf16RuneReader{ 291 s: s[start+1:], 292 } 293} 294 295func (s unicodeString) utf16Runes() []rune { 296 runes := make([]rune, len(s)-1) 297 for i, ch := range s[1:] { 298 runes[i] = rune(ch) 299 } 300 return runes 301} 302 303func (s unicodeString) ToInteger() int64 { 304 return 0 305} 306 307func (s unicodeString) toString() valueString { 308 return s 309} 310 311func (s unicodeString) ToString() Value { 312 return s 313} 314 315func (s unicodeString) ToFloat() float64 { 316 return math.NaN() 317} 318 319func (s unicodeString) ToBoolean() bool { 320 return len(s) > 0 321} 322 323func (s unicodeString) toTrimmedUTF8() string { 324 if len(s) == 0 { 325 return "" 326 } 327 return strings.Trim(s.String(), parser.WhitespaceChars) 328} 329 330func (s unicodeString) ToNumber() Value { 331 return asciiString(s.toTrimmedUTF8()).ToNumber() 332} 333 334func (s unicodeString) ToObject(r *Runtime) *Object { 335 return r._newString(s, r.global.StringPrototype) 336} 337 338func (s unicodeString) equals(other unicodeString) bool { 339 if len(s) != len(other) { 340 return false 341 } 342 for i, r := range s { 343 if r != other[i] { 344 return false 345 } 346 } 347 return true 348} 349 350func (s unicodeString) SameAs(other Value) bool { 351 if otherStr, ok := other.(unicodeString); ok { 352 return s.equals(otherStr) 353 } 354 355 return false 356} 357 358func (s unicodeString) Equals(other Value) bool { 359 if s.SameAs(other) { 360 return true 361 } 362 363 if o, ok := other.(*Object); ok { 364 return s.Equals(o.toPrimitive()) 365 } 366 return false 367} 368 369func (s unicodeString) StrictEquals(other Value) bool { 370 return s.SameAs(other) 371} 372 373func (s unicodeString) baseObject(r *Runtime) *Object { 374 ss := r.stringSingleton 375 ss.value = s 376 ss.setLength() 377 return ss.val 378} 379 380func (s unicodeString) charAt(idx int) rune { 381 return rune(s[idx+1]) 382} 383 384func (s unicodeString) length() int { 385 return len(s) - 1 386} 387 388func (s unicodeString) concat(other valueString) valueString { 389 switch other := other.(type) { 390 case unicodeString: 391 b := make(unicodeString, len(s)+len(other)-1) 392 copy(b, s) 393 copy(b[len(s):], other[1:]) 394 return b 395 case asciiString: 396 b := make([]uint16, len(s)+len(other)) 397 copy(b, s) 398 b1 := b[len(s):] 399 for i := 0; i < len(other); i++ { 400 b1[i] = uint16(other[i]) 401 } 402 return unicodeString(b) 403 default: 404 panic(fmt.Errorf("Unknown string type: %T", other)) 405 } 406} 407 408func (s unicodeString) substring(start, end int) valueString { 409 ss := s[start+1 : end+1] 410 for _, c := range ss { 411 if c >= utf8.RuneSelf { 412 b := make(unicodeString, end-start+1) 413 b[0] = unistring.BOM 414 copy(b[1:], ss) 415 return b 416 } 417 } 418 as := make([]byte, end-start) 419 for i, c := range ss { 420 as[i] = byte(c) 421 } 422 return asciiString(as) 423} 424 425func (s unicodeString) String() string { 426 return string(utf16.Decode(s[1:])) 427} 428 429func (s unicodeString) compareTo(other valueString) int { 430 // TODO handle invalid UTF-16 431 return strings.Compare(s.String(), other.String()) 432} 433 434func (s unicodeString) index(substr valueString, start int) int { 435 var ss []uint16 436 switch substr := substr.(type) { 437 case unicodeString: 438 ss = substr[1:] 439 case asciiString: 440 ss = make([]uint16, len(substr)) 441 for i := 0; i < len(substr); i++ { 442 ss[i] = uint16(substr[i]) 443 } 444 default: 445 panic(fmt.Errorf("unknown string type: %T", substr)) 446 } 447 s1 := s[1:] 448 // TODO: optimise 449 end := len(s1) - len(ss) 450 for start <= end { 451 for i := 0; i < len(ss); i++ { 452 if s1[start+i] != ss[i] { 453 goto nomatch 454 } 455 } 456 457 return start 458 nomatch: 459 start++ 460 } 461 return -1 462} 463 464func (s unicodeString) lastIndex(substr valueString, start int) int { 465 var ss []uint16 466 switch substr := substr.(type) { 467 case unicodeString: 468 ss = substr[1:] 469 case asciiString: 470 ss = make([]uint16, len(substr)) 471 for i := 0; i < len(substr); i++ { 472 ss[i] = uint16(substr[i]) 473 } 474 default: 475 panic(fmt.Errorf("Unknown string type: %T", substr)) 476 } 477 478 s1 := s[1:] 479 if maxStart := len(s1) - len(ss); start > maxStart { 480 start = maxStart 481 } 482 // TODO: optimise 483 for start >= 0 { 484 for i := 0; i < len(ss); i++ { 485 if s1[start+i] != ss[i] { 486 goto nomatch 487 } 488 } 489 490 return start 491 nomatch: 492 start-- 493 } 494 return -1 495} 496 497func unicodeStringFromRunes(r []rune) unicodeString { 498 return unistring.NewFromRunes(r).AsUtf16() 499} 500 501func (s unicodeString) toLower() valueString { 502 caser := cases.Lower(language.Und) 503 r := []rune(caser.String(s.String())) 504 // Workaround 505 ascii := true 506 for i := 0; i < len(r)-1; i++ { 507 if (i == 0 || r[i-1] != 0x3b1) && r[i] == 0x345 && r[i+1] == 0x3c2 { 508 i++ 509 r[i] = 0x3c3 510 } 511 if r[i] >= utf8.RuneSelf { 512 ascii = false 513 } 514 } 515 if ascii { 516 ascii = r[len(r)-1] < utf8.RuneSelf 517 } 518 if ascii { 519 return asciiString(r) 520 } 521 return unicodeStringFromRunes(r) 522} 523 524func (s unicodeString) toUpper() valueString { 525 caser := cases.Upper(language.Und) 526 return newStringValue(caser.String(s.String())) 527} 528 529func (s unicodeString) Export() interface{} { 530 return s.String() 531} 532 533func (s unicodeString) ExportType() reflect.Type { 534 return reflectTypeString 535} 536 537func (s unicodeString) hash(hash *maphash.Hash) uint64 { 538 _, _ = hash.WriteString(string(unistring.FromUtf16(s))) 539 h := hash.Sum64() 540 hash.Reset() 541 return h 542} 543 544func (s unicodeString) string() unistring.String { 545 return unistring.FromUtf16(s) 546} 547