1package jsoniter 2 3import ( 4 "fmt" 5 "unicode/utf16" 6) 7 8// ReadString read string from iterator 9func (iter *Iterator) ReadString() (ret string) { 10 c := iter.nextToken() 11 if c == '"' { 12 for i := iter.head; i < iter.tail; i++ { 13 c := iter.buf[i] 14 if c == '"' { 15 ret = string(iter.buf[iter.head:i]) 16 iter.head = i + 1 17 return ret 18 } else if c == '\\' { 19 break 20 } else if c < ' ' { 21 iter.ReportError("ReadString", 22 fmt.Sprintf(`invalid control character found: %d`, c)) 23 return 24 } 25 } 26 return iter.readStringSlowPath() 27 } else if c == 'n' { 28 iter.skipThreeBytes('u', 'l', 'l') 29 return "" 30 } 31 iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c})) 32 return 33} 34 35func (iter *Iterator) readStringSlowPath() (ret string) { 36 var str []byte 37 var c byte 38 for iter.Error == nil { 39 c = iter.readByte() 40 if c == '"' { 41 return string(str) 42 } 43 if c == '\\' { 44 c = iter.readByte() 45 str = iter.readEscapedChar(c, str) 46 } else { 47 str = append(str, c) 48 } 49 } 50 iter.ReportError("readStringSlowPath", "unexpected end of input") 51 return 52} 53 54func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte { 55 switch c { 56 case 'u': 57 r := iter.readU4() 58 if utf16.IsSurrogate(r) { 59 c = iter.readByte() 60 if iter.Error != nil { 61 return nil 62 } 63 if c != '\\' { 64 iter.unreadByte() 65 str = appendRune(str, r) 66 return str 67 } 68 c = iter.readByte() 69 if iter.Error != nil { 70 return nil 71 } 72 if c != 'u' { 73 str = appendRune(str, r) 74 return iter.readEscapedChar(c, str) 75 } 76 r2 := iter.readU4() 77 if iter.Error != nil { 78 return nil 79 } 80 combined := utf16.DecodeRune(r, r2) 81 if combined == '\uFFFD' { 82 str = appendRune(str, r) 83 str = appendRune(str, r2) 84 } else { 85 str = appendRune(str, combined) 86 } 87 } else { 88 str = appendRune(str, r) 89 } 90 case '"': 91 str = append(str, '"') 92 case '\\': 93 str = append(str, '\\') 94 case '/': 95 str = append(str, '/') 96 case 'b': 97 str = append(str, '\b') 98 case 'f': 99 str = append(str, '\f') 100 case 'n': 101 str = append(str, '\n') 102 case 'r': 103 str = append(str, '\r') 104 case 't': 105 str = append(str, '\t') 106 default: 107 iter.ReportError("readEscapedChar", 108 `invalid escape char after \`) 109 return nil 110 } 111 return str 112} 113 114// ReadStringAsSlice read string from iterator without copying into string form. 115// The []byte can not be kept, as it will change after next iterator call. 116func (iter *Iterator) ReadStringAsSlice() (ret []byte) { 117 c := iter.nextToken() 118 if c == '"' { 119 for i := iter.head; i < iter.tail; i++ { 120 // require ascii string and no escape 121 // for: field name, base64, number 122 if iter.buf[i] == '"' { 123 // fast path: reuse the underlying buffer 124 ret = iter.buf[iter.head:i] 125 iter.head = i + 1 126 return ret 127 } 128 } 129 readLen := iter.tail - iter.head 130 copied := make([]byte, readLen, readLen*2) 131 copy(copied, iter.buf[iter.head:iter.tail]) 132 iter.head = iter.tail 133 for iter.Error == nil { 134 c := iter.readByte() 135 if c == '"' { 136 return copied 137 } 138 copied = append(copied, c) 139 } 140 return copied 141 } 142 iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c})) 143 return 144} 145 146func (iter *Iterator) readU4() (ret rune) { 147 for i := 0; i < 4; i++ { 148 c := iter.readByte() 149 if iter.Error != nil { 150 return 151 } 152 if c >= '0' && c <= '9' { 153 ret = ret*16 + rune(c-'0') 154 } else if c >= 'a' && c <= 'f' { 155 ret = ret*16 + rune(c-'a'+10) 156 } else if c >= 'A' && c <= 'F' { 157 ret = ret*16 + rune(c-'A'+10) 158 } else { 159 iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c})) 160 return 161 } 162 } 163 return ret 164} 165 166const ( 167 t1 = 0x00 // 0000 0000 168 tx = 0x80 // 1000 0000 169 t2 = 0xC0 // 1100 0000 170 t3 = 0xE0 // 1110 0000 171 t4 = 0xF0 // 1111 0000 172 t5 = 0xF8 // 1111 1000 173 174 maskx = 0x3F // 0011 1111 175 mask2 = 0x1F // 0001 1111 176 mask3 = 0x0F // 0000 1111 177 mask4 = 0x07 // 0000 0111 178 179 rune1Max = 1<<7 - 1 180 rune2Max = 1<<11 - 1 181 rune3Max = 1<<16 - 1 182 183 surrogateMin = 0xD800 184 surrogateMax = 0xDFFF 185 186 maxRune = '\U0010FFFF' // Maximum valid Unicode code point. 187 runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character" 188) 189 190func appendRune(p []byte, r rune) []byte { 191 // Negative values are erroneous. Making it unsigned addresses the problem. 192 switch i := uint32(r); { 193 case i <= rune1Max: 194 p = append(p, byte(r)) 195 return p 196 case i <= rune2Max: 197 p = append(p, t2|byte(r>>6)) 198 p = append(p, tx|byte(r)&maskx) 199 return p 200 case i > maxRune, surrogateMin <= i && i <= surrogateMax: 201 r = runeError 202 fallthrough 203 case i <= rune3Max: 204 p = append(p, t3|byte(r>>12)) 205 p = append(p, tx|byte(r>>6)&maskx) 206 p = append(p, tx|byte(r)&maskx) 207 return p 208 default: 209 p = append(p, t4|byte(r>>18)) 210 p = append(p, tx|byte(r>>12)&maskx) 211 p = append(p, tx|byte(r>>6)&maskx) 212 p = append(p, tx|byte(r)&maskx) 213 return p 214 } 215} 216