1package goja
2
3import (
4	"errors"
5	"fmt"
6	"hash/maphash"
7	"io"
8	"math"
9	"reflect"
10	"strings"
11	"unicode/utf16"
12	"unicode/utf8"
13
14	"github.com/dop251/goja/parser"
15	"github.com/dop251/goja/unistring"
16	"golang.org/x/text/cases"
17	"golang.org/x/text/language"
18)
19
20type unicodeString []uint16
21
22type unicodeRuneReader struct {
23	s   unicodeString
24	pos int
25}
26
27type utf16RuneReader struct {
28	s   unicodeString
29	pos int
30}
31
32// passes through invalid surrogate pairs
33type lenientUtf16Decoder struct {
34	utf16Reader io.RuneReader
35	prev        rune
36	prevSet     bool
37}
38
39type valueStringBuilder struct {
40	asciiBuilder   strings.Builder
41	unicodeBuilder unicodeStringBuilder
42}
43
44type unicodeStringBuilder struct {
45	buf     []uint16
46	unicode bool
47}
48
49var (
50	InvalidRuneError = errors.New("invalid rune")
51)
52
53func (rr *utf16RuneReader) ReadRune() (r rune, size int, err error) {
54	if rr.pos < len(rr.s) {
55		r = rune(rr.s[rr.pos])
56		size++
57		rr.pos++
58		return
59	}
60	err = io.EOF
61	return
62}
63
64func (rr *lenientUtf16Decoder) ReadRune() (r rune, size int, err error) {
65	if rr.prevSet {
66		r = rr.prev
67		size = 1
68		rr.prevSet = false
69	} else {
70		r, size, err = rr.utf16Reader.ReadRune()
71		if err != nil {
72			return
73		}
74	}
75	if isUTF16FirstSurrogate(r) {
76		second, _, err1 := rr.utf16Reader.ReadRune()
77		if err1 != nil {
78			if err1 != io.EOF {
79				err = err1
80			}
81			return
82		}
83		if isUTF16SecondSurrogate(second) {
84			r = utf16.DecodeRune(r, second)
85			size++
86		} else {
87			rr.prev = second
88			rr.prevSet = true
89		}
90	}
91
92	return
93}
94
95func (rr *unicodeRuneReader) ReadRune() (r rune, size int, err error) {
96	if rr.pos < len(rr.s) {
97		r = rune(rr.s[rr.pos])
98		size++
99		rr.pos++
100		if isUTF16FirstSurrogate(r) {
101			if rr.pos < len(rr.s) {
102				second := rune(rr.s[rr.pos])
103				if isUTF16SecondSurrogate(second) {
104					r = utf16.DecodeRune(r, second)
105					size++
106					rr.pos++
107				} else {
108					err = InvalidRuneError
109				}
110			} else {
111				err = InvalidRuneError
112			}
113		} else if isUTF16SecondSurrogate(r) {
114			err = InvalidRuneError
115		}
116	} else {
117		err = io.EOF
118	}
119	return
120}
121
122func (b *unicodeStringBuilder) grow(n int) {
123	if cap(b.buf)-len(b.buf) < n {
124		buf := make([]uint16, len(b.buf), 2*cap(b.buf)+n)
125		copy(buf, b.buf)
126		b.buf = buf
127	}
128}
129
130func (b *unicodeStringBuilder) Grow(n int) {
131	b.grow(n + 1)
132}
133
134func (b *unicodeStringBuilder) ensureStarted(initialSize int) {
135	b.grow(len(b.buf) + initialSize + 1)
136	if len(b.buf) == 0 {
137		b.buf = append(b.buf, unistring.BOM)
138	}
139}
140
141func (b *unicodeStringBuilder) WriteString(s valueString) {
142	b.ensureStarted(s.length())
143	switch s := s.(type) {
144	case unicodeString:
145		b.buf = append(b.buf, s[1:]...)
146		b.unicode = true
147	case asciiString:
148		for i := 0; i < len(s); i++ {
149			b.buf = append(b.buf, uint16(s[i]))
150		}
151	default:
152		panic(fmt.Errorf("unsupported string type: %T", s))
153	}
154}
155
156func (b *unicodeStringBuilder) String() valueString {
157	if b.unicode {
158		return unicodeString(b.buf)
159	}
160	if len(b.buf) == 0 {
161		return stringEmpty
162	}
163	buf := make([]byte, 0, len(b.buf)-1)
164	for _, c := range b.buf[1:] {
165		buf = append(buf, byte(c))
166	}
167	return asciiString(buf)
168}
169
170func (b *unicodeStringBuilder) WriteRune(r rune) {
171	if r <= 0xFFFF {
172		b.ensureStarted(1)
173		b.buf = append(b.buf, uint16(r))
174		if !b.unicode && r >= utf8.RuneSelf {
175			b.unicode = true
176		}
177	} else {
178		b.ensureStarted(2)
179		first, second := utf16.EncodeRune(r)
180		b.buf = append(b.buf, uint16(first), uint16(second))
181		b.unicode = true
182	}
183}
184
185func (b *unicodeStringBuilder) writeASCIIString(bytes string) {
186	b.ensureStarted(len(bytes))
187	for _, c := range bytes {
188		b.buf = append(b.buf, uint16(c))
189	}
190}
191
192func (b *valueStringBuilder) ascii() bool {
193	return len(b.unicodeBuilder.buf) == 0
194}
195
196func (b *valueStringBuilder) WriteString(s valueString) {
197	if ascii, ok := s.(asciiString); ok {
198		if b.ascii() {
199			b.asciiBuilder.WriteString(string(ascii))
200		} else {
201			b.unicodeBuilder.writeASCIIString(string(ascii))
202		}
203	} else {
204		b.switchToUnicode(s.length())
205		b.unicodeBuilder.WriteString(s)
206	}
207}
208
209func (b *valueStringBuilder) WriteRune(r rune) {
210	if r < utf8.RuneSelf {
211		if b.ascii() {
212			b.asciiBuilder.WriteByte(byte(r))
213		} else {
214			b.unicodeBuilder.WriteRune(r)
215		}
216	} else {
217		var extraLen int
218		if r <= 0xFFFF {
219			extraLen = 1
220		} else {
221			extraLen = 2
222		}
223		b.switchToUnicode(extraLen)
224		b.unicodeBuilder.WriteRune(r)
225	}
226}
227
228func (b *valueStringBuilder) String() valueString {
229	if b.ascii() {
230		return asciiString(b.asciiBuilder.String())
231	}
232	return b.unicodeBuilder.String()
233}
234
235func (b *valueStringBuilder) Grow(n int) {
236	if b.ascii() {
237		b.asciiBuilder.Grow(n)
238	} else {
239		b.unicodeBuilder.Grow(n)
240	}
241}
242
243func (b *valueStringBuilder) switchToUnicode(extraLen int) {
244	if b.ascii() {
245		b.unicodeBuilder.ensureStarted(b.asciiBuilder.Len() + extraLen)
246		b.unicodeBuilder.writeASCIIString(b.asciiBuilder.String())
247		b.asciiBuilder.Reset()
248	}
249}
250
251func (b *valueStringBuilder) WriteSubstring(source valueString, start int, end int) {
252	if ascii, ok := source.(asciiString); ok {
253		if b.ascii() {
254			b.asciiBuilder.WriteString(string(ascii[start:end]))
255		} else {
256			b.unicodeBuilder.writeASCIIString(string(ascii[start:end]))
257		}
258		return
259	}
260	us := source.(unicodeString)
261	if b.ascii() {
262		uc := false
263		for i := start; i < end; i++ {
264			if us.charAt(i) >= utf8.RuneSelf {
265				uc = true
266				break
267			}
268		}
269		if uc {
270			b.switchToUnicode(end - start + 1)
271		} else {
272			b.asciiBuilder.Grow(end - start + 1)
273			for i := start; i < end; i++ {
274				b.asciiBuilder.WriteByte(byte(us.charAt(i)))
275			}
276			return
277		}
278	}
279	b.unicodeBuilder.buf = append(b.unicodeBuilder.buf, us[start+1:end+1]...)
280	b.unicodeBuilder.unicode = true
281}
282
283func (s unicodeString) reader(start int) io.RuneReader {
284	return &unicodeRuneReader{
285		s: s[start+1:],
286	}
287}
288
289func (s unicodeString) utf16Reader(start int) io.RuneReader {
290	return &utf16RuneReader{
291		s: s[start+1:],
292	}
293}
294
295func (s unicodeString) utf16Runes() []rune {
296	runes := make([]rune, len(s)-1)
297	for i, ch := range s[1:] {
298		runes[i] = rune(ch)
299	}
300	return runes
301}
302
303func (s unicodeString) ToInteger() int64 {
304	return 0
305}
306
307func (s unicodeString) toString() valueString {
308	return s
309}
310
311func (s unicodeString) ToString() Value {
312	return s
313}
314
315func (s unicodeString) ToFloat() float64 {
316	return math.NaN()
317}
318
319func (s unicodeString) ToBoolean() bool {
320	return len(s) > 0
321}
322
323func (s unicodeString) toTrimmedUTF8() string {
324	if len(s) == 0 {
325		return ""
326	}
327	return strings.Trim(s.String(), parser.WhitespaceChars)
328}
329
330func (s unicodeString) ToNumber() Value {
331	return asciiString(s.toTrimmedUTF8()).ToNumber()
332}
333
334func (s unicodeString) ToObject(r *Runtime) *Object {
335	return r._newString(s, r.global.StringPrototype)
336}
337
338func (s unicodeString) equals(other unicodeString) bool {
339	if len(s) != len(other) {
340		return false
341	}
342	for i, r := range s {
343		if r != other[i] {
344			return false
345		}
346	}
347	return true
348}
349
350func (s unicodeString) SameAs(other Value) bool {
351	if otherStr, ok := other.(unicodeString); ok {
352		return s.equals(otherStr)
353	}
354
355	return false
356}
357
358func (s unicodeString) Equals(other Value) bool {
359	if s.SameAs(other) {
360		return true
361	}
362
363	if o, ok := other.(*Object); ok {
364		return s.Equals(o.toPrimitive())
365	}
366	return false
367}
368
369func (s unicodeString) StrictEquals(other Value) bool {
370	return s.SameAs(other)
371}
372
373func (s unicodeString) baseObject(r *Runtime) *Object {
374	ss := r.stringSingleton
375	ss.value = s
376	ss.setLength()
377	return ss.val
378}
379
380func (s unicodeString) charAt(idx int) rune {
381	return rune(s[idx+1])
382}
383
384func (s unicodeString) length() int {
385	return len(s) - 1
386}
387
388func (s unicodeString) concat(other valueString) valueString {
389	switch other := other.(type) {
390	case unicodeString:
391		b := make(unicodeString, len(s)+len(other)-1)
392		copy(b, s)
393		copy(b[len(s):], other[1:])
394		return b
395	case asciiString:
396		b := make([]uint16, len(s)+len(other))
397		copy(b, s)
398		b1 := b[len(s):]
399		for i := 0; i < len(other); i++ {
400			b1[i] = uint16(other[i])
401		}
402		return unicodeString(b)
403	default:
404		panic(fmt.Errorf("Unknown string type: %T", other))
405	}
406}
407
408func (s unicodeString) substring(start, end int) valueString {
409	ss := s[start+1 : end+1]
410	for _, c := range ss {
411		if c >= utf8.RuneSelf {
412			b := make(unicodeString, end-start+1)
413			b[0] = unistring.BOM
414			copy(b[1:], ss)
415			return b
416		}
417	}
418	as := make([]byte, end-start)
419	for i, c := range ss {
420		as[i] = byte(c)
421	}
422	return asciiString(as)
423}
424
425func (s unicodeString) String() string {
426	return string(utf16.Decode(s[1:]))
427}
428
429func (s unicodeString) compareTo(other valueString) int {
430	// TODO handle invalid UTF-16
431	return strings.Compare(s.String(), other.String())
432}
433
434func (s unicodeString) index(substr valueString, start int) int {
435	var ss []uint16
436	switch substr := substr.(type) {
437	case unicodeString:
438		ss = substr[1:]
439	case asciiString:
440		ss = make([]uint16, len(substr))
441		for i := 0; i < len(substr); i++ {
442			ss[i] = uint16(substr[i])
443		}
444	default:
445		panic(fmt.Errorf("unknown string type: %T", substr))
446	}
447	s1 := s[1:]
448	// TODO: optimise
449	end := len(s1) - len(ss)
450	for start <= end {
451		for i := 0; i < len(ss); i++ {
452			if s1[start+i] != ss[i] {
453				goto nomatch
454			}
455		}
456
457		return start
458	nomatch:
459		start++
460	}
461	return -1
462}
463
464func (s unicodeString) lastIndex(substr valueString, start int) int {
465	var ss []uint16
466	switch substr := substr.(type) {
467	case unicodeString:
468		ss = substr[1:]
469	case asciiString:
470		ss = make([]uint16, len(substr))
471		for i := 0; i < len(substr); i++ {
472			ss[i] = uint16(substr[i])
473		}
474	default:
475		panic(fmt.Errorf("Unknown string type: %T", substr))
476	}
477
478	s1 := s[1:]
479	if maxStart := len(s1) - len(ss); start > maxStart {
480		start = maxStart
481	}
482	// TODO: optimise
483	for start >= 0 {
484		for i := 0; i < len(ss); i++ {
485			if s1[start+i] != ss[i] {
486				goto nomatch
487			}
488		}
489
490		return start
491	nomatch:
492		start--
493	}
494	return -1
495}
496
497func unicodeStringFromRunes(r []rune) unicodeString {
498	return unistring.NewFromRunes(r).AsUtf16()
499}
500
501func (s unicodeString) toLower() valueString {
502	caser := cases.Lower(language.Und)
503	r := []rune(caser.String(s.String()))
504	// Workaround
505	ascii := true
506	for i := 0; i < len(r)-1; i++ {
507		if (i == 0 || r[i-1] != 0x3b1) && r[i] == 0x345 && r[i+1] == 0x3c2 {
508			i++
509			r[i] = 0x3c3
510		}
511		if r[i] >= utf8.RuneSelf {
512			ascii = false
513		}
514	}
515	if ascii {
516		ascii = r[len(r)-1] < utf8.RuneSelf
517	}
518	if ascii {
519		return asciiString(r)
520	}
521	return unicodeStringFromRunes(r)
522}
523
524func (s unicodeString) toUpper() valueString {
525	caser := cases.Upper(language.Und)
526	return newStringValue(caser.String(s.String()))
527}
528
529func (s unicodeString) Export() interface{} {
530	return s.String()
531}
532
533func (s unicodeString) ExportType() reflect.Type {
534	return reflectTypeString
535}
536
537func (s unicodeString) hash(hash *maphash.Hash) uint64 {
538	_, _ = hash.WriteString(string(unistring.FromUtf16(s)))
539	h := hash.Sum64()
540	hash.Reset()
541	return h
542}
543
544func (s unicodeString) string() unistring.String {
545	return unistring.FromUtf16(s)
546}
547