1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package json 6 7import ( 8 "bytes" 9 "errors" 10 "io" 11) 12 13// A Decoder reads and decodes JSON values from an input stream. 14type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scanned int64 // amount of data already scanned 20 scan scanner 21 err error 22 23 tokenState int 24 tokenStack []int 25} 26 27// NewDecoder returns a new decoder that reads from r. 28// 29// The decoder introduces its own buffering and may 30// read data from r beyond the JSON values requested. 31func NewDecoder(r io.Reader) *Decoder { 32 return &Decoder{r: r} 33} 34 35// UseNumber causes the Decoder to unmarshal a number into an interface{} as a 36// Number instead of as a float64. 37func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 38 39// DisallowUnknownFields causes the Decoder to return an error when the destination 40// is a struct and the input contains object keys which do not match any 41// non-ignored, exported fields in the destination. 42func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } 43 44// Decode reads the next JSON-encoded value from its 45// input and stores it in the value pointed to by v. 46// 47// See the documentation for Unmarshal for details about 48// the conversion of JSON into a Go value. 49func (dec *Decoder) Decode(v interface{}) error { 50 if dec.err != nil { 51 return dec.err 52 } 53 54 if err := dec.tokenPrepareForDecode(); err != nil { 55 return err 56 } 57 58 if !dec.tokenValueAllowed() { 59 return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} 60 } 61 62 // Read whole value into buffer. 63 n, err := dec.readValue() 64 if err != nil { 65 return err 66 } 67 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 68 dec.scanp += n 69 70 // Don't save err from unmarshal into dec.err: 71 // the connection is still usable since we read a complete JSON 72 // object from it before the error happened. 73 err = dec.d.unmarshal(v) 74 75 // fixup token streaming state 76 dec.tokenValueEnd() 77 78 return err 79} 80 81// Buffered returns a reader of the data remaining in the Decoder's 82// buffer. The reader is valid until the next call to Decode. 83func (dec *Decoder) Buffered() io.Reader { 84 return bytes.NewReader(dec.buf[dec.scanp:]) 85} 86 87// readValue reads a JSON value into dec.buf. 88// It returns the length of the encoding. 89func (dec *Decoder) readValue() (int, error) { 90 dec.scan.reset() 91 92 scanp := dec.scanp 93 var err error 94Input: 95 // help the compiler see that scanp is never negative, so it can remove 96 // some bounds checks below. 97 for scanp >= 0 { 98 99 // Look in the buffer for a new value. 100 for ; scanp < len(dec.buf); scanp++ { 101 c := dec.buf[scanp] 102 dec.scan.bytes++ 103 switch dec.scan.step(&dec.scan, c) { 104 case scanEnd: 105 // scanEnd is delayed one byte so we decrement 106 // the scanner bytes count by 1 to ensure that 107 // this value is correct in the next call of Decode. 108 dec.scan.bytes-- 109 break Input 110 case scanEndObject, scanEndArray: 111 // scanEnd is delayed one byte. 112 // We might block trying to get that byte from src, 113 // so instead invent a space byte. 114 if stateEndValue(&dec.scan, ' ') == scanEnd { 115 scanp++ 116 break Input 117 } 118 case scanError: 119 dec.err = dec.scan.err 120 return 0, dec.scan.err 121 } 122 } 123 124 // Did the last read have an error? 125 // Delayed until now to allow buffer scan. 126 if err != nil { 127 if err == io.EOF { 128 if dec.scan.step(&dec.scan, ' ') == scanEnd { 129 break Input 130 } 131 if nonSpace(dec.buf) { 132 err = io.ErrUnexpectedEOF 133 } 134 } 135 dec.err = err 136 return 0, err 137 } 138 139 n := scanp - dec.scanp 140 err = dec.refill() 141 scanp = dec.scanp + n 142 } 143 return scanp - dec.scanp, nil 144} 145 146func (dec *Decoder) refill() error { 147 // Make room to read more into the buffer. 148 // First slide down data already consumed. 149 if dec.scanp > 0 { 150 dec.scanned += int64(dec.scanp) 151 n := copy(dec.buf, dec.buf[dec.scanp:]) 152 dec.buf = dec.buf[:n] 153 dec.scanp = 0 154 } 155 156 // Grow buffer if not large enough. 157 const minRead = 512 158 if cap(dec.buf)-len(dec.buf) < minRead { 159 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 160 copy(newBuf, dec.buf) 161 dec.buf = newBuf 162 } 163 164 // Read. Delay error for next iteration (after scan). 165 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 166 dec.buf = dec.buf[0 : len(dec.buf)+n] 167 168 return err 169} 170 171func nonSpace(b []byte) bool { 172 for _, c := range b { 173 if !isSpace(c) { 174 return true 175 } 176 } 177 return false 178} 179 180// An Encoder writes JSON values to an output stream. 181type Encoder struct { 182 w io.Writer 183 err error 184 escapeHTML bool 185 186 indentBuf *bytes.Buffer 187 indentPrefix string 188 indentValue string 189} 190 191// NewEncoder returns a new encoder that writes to w. 192func NewEncoder(w io.Writer) *Encoder { 193 return &Encoder{w: w, escapeHTML: true} 194} 195 196// Encode writes the JSON encoding of v to the stream, 197// followed by a newline character. 198// 199// See the documentation for Marshal for details about the 200// conversion of Go values to JSON. 201func (enc *Encoder) Encode(v interface{}) error { 202 if enc.err != nil { 203 return enc.err 204 } 205 e := newEncodeState() 206 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 207 if err != nil { 208 return err 209 } 210 211 // Terminate each value with a newline. 212 // This makes the output look a little nicer 213 // when debugging, and some kind of space 214 // is required if the encoded value was a number, 215 // so that the reader knows there aren't more 216 // digits coming. 217 e.WriteByte('\n') 218 219 b := e.Bytes() 220 if enc.indentPrefix != "" || enc.indentValue != "" { 221 if enc.indentBuf == nil { 222 enc.indentBuf = new(bytes.Buffer) 223 } 224 enc.indentBuf.Reset() 225 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) 226 if err != nil { 227 return err 228 } 229 b = enc.indentBuf.Bytes() 230 } 231 if _, err = enc.w.Write(b); err != nil { 232 enc.err = err 233 } 234 encodeStatePool.Put(e) 235 return err 236} 237 238// SetIndent instructs the encoder to format each subsequent encoded 239// value as if indented by the package-level function Indent(dst, src, prefix, indent). 240// Calling SetIndent("", "") disables indentation. 241func (enc *Encoder) SetIndent(prefix, indent string) { 242 enc.indentPrefix = prefix 243 enc.indentValue = indent 244} 245 246// SetEscapeHTML specifies whether problematic HTML characters 247// should be escaped inside JSON quoted strings. 248// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 249// to avoid certain safety problems that can arise when embedding JSON in HTML. 250// 251// In non-HTML settings where the escaping interferes with the readability 252// of the output, SetEscapeHTML(false) disables this behavior. 253func (enc *Encoder) SetEscapeHTML(on bool) { 254 enc.escapeHTML = on 255} 256 257// RawMessage is a raw encoded JSON value. 258// It implements Marshaler and Unmarshaler and can 259// be used to delay JSON decoding or precompute a JSON encoding. 260type RawMessage []byte 261 262// MarshalJSON returns m as the JSON encoding of m. 263func (m RawMessage) MarshalJSON() ([]byte, error) { 264 if m == nil { 265 return []byte("null"), nil 266 } 267 return m, nil 268} 269 270// UnmarshalJSON sets *m to a copy of data. 271func (m *RawMessage) UnmarshalJSON(data []byte) error { 272 if m == nil { 273 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 274 } 275 *m = append((*m)[0:0], data...) 276 return nil 277} 278 279var _ Marshaler = (*RawMessage)(nil) 280var _ Unmarshaler = (*RawMessage)(nil) 281 282// A Token holds a value of one of these types: 283// 284// Delim, for the four JSON delimiters [ ] { } 285// bool, for JSON booleans 286// float64, for JSON numbers 287// Number, for JSON numbers 288// string, for JSON string literals 289// nil, for JSON null 290// 291type Token interface{} 292 293const ( 294 tokenTopValue = iota 295 tokenArrayStart 296 tokenArrayValue 297 tokenArrayComma 298 tokenObjectStart 299 tokenObjectKey 300 tokenObjectColon 301 tokenObjectValue 302 tokenObjectComma 303) 304 305// advance tokenstate from a separator state to a value state 306func (dec *Decoder) tokenPrepareForDecode() error { 307 // Note: Not calling peek before switch, to avoid 308 // putting peek into the standard Decode path. 309 // peek is only called when using the Token API. 310 switch dec.tokenState { 311 case tokenArrayComma: 312 c, err := dec.peek() 313 if err != nil { 314 return err 315 } 316 if c != ',' { 317 return &SyntaxError{"expected comma after array element", dec.InputOffset()} 318 } 319 dec.scanp++ 320 dec.tokenState = tokenArrayValue 321 case tokenObjectColon: 322 c, err := dec.peek() 323 if err != nil { 324 return err 325 } 326 if c != ':' { 327 return &SyntaxError{"expected colon after object key", dec.InputOffset()} 328 } 329 dec.scanp++ 330 dec.tokenState = tokenObjectValue 331 } 332 return nil 333} 334 335func (dec *Decoder) tokenValueAllowed() bool { 336 switch dec.tokenState { 337 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 338 return true 339 } 340 return false 341} 342 343func (dec *Decoder) tokenValueEnd() { 344 switch dec.tokenState { 345 case tokenArrayStart, tokenArrayValue: 346 dec.tokenState = tokenArrayComma 347 case tokenObjectValue: 348 dec.tokenState = tokenObjectComma 349 } 350} 351 352// A Delim is a JSON array or object delimiter, one of [ ] { or }. 353type Delim rune 354 355func (d Delim) String() string { 356 return string(d) 357} 358 359// Token returns the next JSON token in the input stream. 360// At the end of the input stream, Token returns nil, io.EOF. 361// 362// Token guarantees that the delimiters [ ] { } it returns are 363// properly nested and matched: if Token encounters an unexpected 364// delimiter in the input, it will return an error. 365// 366// The input stream consists of basic JSON values—bool, string, 367// number, and null—along with delimiters [ ] { } of type Delim 368// to mark the start and end of arrays and objects. 369// Commas and colons are elided. 370func (dec *Decoder) Token() (Token, error) { 371 for { 372 c, err := dec.peek() 373 if err != nil { 374 return nil, err 375 } 376 switch c { 377 case '[': 378 if !dec.tokenValueAllowed() { 379 return dec.tokenError(c) 380 } 381 dec.scanp++ 382 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 383 dec.tokenState = tokenArrayStart 384 return Delim('['), nil 385 386 case ']': 387 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 388 return dec.tokenError(c) 389 } 390 dec.scanp++ 391 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 392 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 393 dec.tokenValueEnd() 394 return Delim(']'), nil 395 396 case '{': 397 if !dec.tokenValueAllowed() { 398 return dec.tokenError(c) 399 } 400 dec.scanp++ 401 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 402 dec.tokenState = tokenObjectStart 403 return Delim('{'), nil 404 405 case '}': 406 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 407 return dec.tokenError(c) 408 } 409 dec.scanp++ 410 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 411 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 412 dec.tokenValueEnd() 413 return Delim('}'), nil 414 415 case ':': 416 if dec.tokenState != tokenObjectColon { 417 return dec.tokenError(c) 418 } 419 dec.scanp++ 420 dec.tokenState = tokenObjectValue 421 continue 422 423 case ',': 424 if dec.tokenState == tokenArrayComma { 425 dec.scanp++ 426 dec.tokenState = tokenArrayValue 427 continue 428 } 429 if dec.tokenState == tokenObjectComma { 430 dec.scanp++ 431 dec.tokenState = tokenObjectKey 432 continue 433 } 434 return dec.tokenError(c) 435 436 case '"': 437 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 438 var x string 439 old := dec.tokenState 440 dec.tokenState = tokenTopValue 441 err := dec.Decode(&x) 442 dec.tokenState = old 443 if err != nil { 444 return nil, err 445 } 446 dec.tokenState = tokenObjectColon 447 return x, nil 448 } 449 fallthrough 450 451 default: 452 if !dec.tokenValueAllowed() { 453 return dec.tokenError(c) 454 } 455 var x interface{} 456 if err := dec.Decode(&x); err != nil { 457 return nil, err 458 } 459 return x, nil 460 } 461 } 462} 463 464func (dec *Decoder) tokenError(c byte) (Token, error) { 465 var context string 466 switch dec.tokenState { 467 case tokenTopValue: 468 context = " looking for beginning of value" 469 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 470 context = " looking for beginning of value" 471 case tokenArrayComma: 472 context = " after array element" 473 case tokenObjectKey: 474 context = " looking for beginning of object key string" 475 case tokenObjectColon: 476 context = " after object key" 477 case tokenObjectComma: 478 context = " after object key:value pair" 479 } 480 return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} 481} 482 483// More reports whether there is another element in the 484// current array or object being parsed. 485func (dec *Decoder) More() bool { 486 c, err := dec.peek() 487 return err == nil && c != ']' && c != '}' 488} 489 490func (dec *Decoder) peek() (byte, error) { 491 var err error 492 for { 493 for i := dec.scanp; i < len(dec.buf); i++ { 494 c := dec.buf[i] 495 if isSpace(c) { 496 continue 497 } 498 dec.scanp = i 499 return c, nil 500 } 501 // buffer has been scanned, now report any error 502 if err != nil { 503 return 0, err 504 } 505 err = dec.refill() 506 } 507} 508 509// InputOffset returns the input stream byte offset of the current decoder position. 510// The offset gives the location of the end of the most recently returned token 511// and the beginning of the next token. 512func (dec *Decoder) InputOffset() int64 { 513 return dec.scanned + int64(dec.scanp) 514} 515