1package jsoniter
2
3import (
4	"fmt"
5	"unicode/utf16"
6)
7
8// ReadString read string from iterator
9func (iter *Iterator) ReadString() (ret string) {
10	c := iter.nextToken()
11	if c == '"' {
12		for i := iter.head; i < iter.tail; i++ {
13			c := iter.buf[i]
14			if c == '"' {
15				ret = string(iter.buf[iter.head:i])
16				iter.head = i + 1
17				return ret
18			} else if c == '\\' {
19				break
20			} else if c < ' ' {
21				iter.ReportError("ReadString",
22					fmt.Sprintf(`invalid control character found: %d`, c))
23				return
24			}
25		}
26		return iter.readStringSlowPath()
27	} else if c == 'n' {
28		iter.skipThreeBytes('u', 'l', 'l')
29		return ""
30	}
31	iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c}))
32	return
33}
34
35func (iter *Iterator) readStringSlowPath() (ret string) {
36	var str []byte
37	var c byte
38	for iter.Error == nil {
39		c = iter.readByte()
40		if c == '"' {
41			return string(str)
42		}
43		if c == '\\' {
44			c = iter.readByte()
45			str = iter.readEscapedChar(c, str)
46		} else {
47			str = append(str, c)
48		}
49	}
50	iter.ReportError("readStringSlowPath", "unexpected end of input")
51	return
52}
53
54func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
55	switch c {
56	case 'u':
57		r := iter.readU4()
58		if utf16.IsSurrogate(r) {
59			c = iter.readByte()
60			if iter.Error != nil {
61				return nil
62			}
63			if c != '\\' {
64				iter.unreadByte()
65				str = appendRune(str, r)
66				return str
67			}
68			c = iter.readByte()
69			if iter.Error != nil {
70				return nil
71			}
72			if c != 'u' {
73				str = appendRune(str, r)
74				return iter.readEscapedChar(c, str)
75			}
76			r2 := iter.readU4()
77			if iter.Error != nil {
78				return nil
79			}
80			combined := utf16.DecodeRune(r, r2)
81			if combined == '\uFFFD' {
82				str = appendRune(str, r)
83				str = appendRune(str, r2)
84			} else {
85				str = appendRune(str, combined)
86			}
87		} else {
88			str = appendRune(str, r)
89		}
90	case '"':
91		str = append(str, '"')
92	case '\\':
93		str = append(str, '\\')
94	case '/':
95		str = append(str, '/')
96	case 'b':
97		str = append(str, '\b')
98	case 'f':
99		str = append(str, '\f')
100	case 'n':
101		str = append(str, '\n')
102	case 'r':
103		str = append(str, '\r')
104	case 't':
105		str = append(str, '\t')
106	default:
107		iter.ReportError("readEscapedChar",
108			`invalid escape char after \`)
109		return nil
110	}
111	return str
112}
113
114// ReadStringAsSlice read string from iterator without copying into string form.
115// The []byte can not be kept, as it will change after next iterator call.
116func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
117	c := iter.nextToken()
118	if c == '"' {
119		for i := iter.head; i < iter.tail; i++ {
120			// require ascii string and no escape
121			// for: field name, base64, number
122			if iter.buf[i] == '"' {
123				// fast path: reuse the underlying buffer
124				ret = iter.buf[iter.head:i]
125				iter.head = i + 1
126				return ret
127			}
128		}
129		readLen := iter.tail - iter.head
130		copied := make([]byte, readLen, readLen*2)
131		copy(copied, iter.buf[iter.head:iter.tail])
132		iter.head = iter.tail
133		for iter.Error == nil {
134			c := iter.readByte()
135			if c == '"' {
136				return copied
137			}
138			copied = append(copied, c)
139		}
140		return copied
141	}
142	iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c}))
143	return
144}
145
146func (iter *Iterator) readU4() (ret rune) {
147	for i := 0; i < 4; i++ {
148		c := iter.readByte()
149		if iter.Error != nil {
150			return
151		}
152		if c >= '0' && c <= '9' {
153			ret = ret*16 + rune(c-'0')
154		} else if c >= 'a' && c <= 'f' {
155			ret = ret*16 + rune(c-'a'+10)
156		} else if c >= 'A' && c <= 'F' {
157			ret = ret*16 + rune(c-'A'+10)
158		} else {
159			iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c}))
160			return
161		}
162	}
163	return ret
164}
165
166const (
167	t1 = 0x00 // 0000 0000
168	tx = 0x80 // 1000 0000
169	t2 = 0xC0 // 1100 0000
170	t3 = 0xE0 // 1110 0000
171	t4 = 0xF0 // 1111 0000
172	t5 = 0xF8 // 1111 1000
173
174	maskx = 0x3F // 0011 1111
175	mask2 = 0x1F // 0001 1111
176	mask3 = 0x0F // 0000 1111
177	mask4 = 0x07 // 0000 0111
178
179	rune1Max = 1<<7 - 1
180	rune2Max = 1<<11 - 1
181	rune3Max = 1<<16 - 1
182
183	surrogateMin = 0xD800
184	surrogateMax = 0xDFFF
185
186	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
187	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
188)
189
190func appendRune(p []byte, r rune) []byte {
191	// Negative values are erroneous. Making it unsigned addresses the problem.
192	switch i := uint32(r); {
193	case i <= rune1Max:
194		p = append(p, byte(r))
195		return p
196	case i <= rune2Max:
197		p = append(p, t2|byte(r>>6))
198		p = append(p, tx|byte(r)&maskx)
199		return p
200	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
201		r = runeError
202		fallthrough
203	case i <= rune3Max:
204		p = append(p, t3|byte(r>>12))
205		p = append(p, tx|byte(r>>6)&maskx)
206		p = append(p, tx|byte(r)&maskx)
207		return p
208	default:
209		p = append(p, t4|byte(r>>18))
210		p = append(p, tx|byte(r>>12)&maskx)
211		p = append(p, tx|byte(r>>6)&maskx)
212		p = append(p, tx|byte(r)&maskx)
213		return p
214	}
215}
216