1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package json 6 7import ( 8 "bytes" 9 "errors" 10 "io" 11) 12 13// A Decoder reads and decodes JSON values from an input stream. 14type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scanned int64 // amount of data already scanned 20 scan scanner 21 err error 22 23 tokenState int 24 tokenStack []int 25} 26 27// NewDecoder returns a new decoder that reads from r. 28// 29// The decoder introduces its own buffering and may 30// read data from r beyond the JSON values requested. 31func NewDecoder(r io.Reader) *Decoder { 32 return &Decoder{r: r} 33} 34 35// UseNumber causes the Decoder to unmarshal a number into an interface{} as a 36// Number instead of as a float64. 37func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 38 39// DisallowUnknownFields causes the Decoder to return an error when the destination 40// is a struct and the input contains object keys which do not match any 41// non-ignored, exported fields in the destination. 42func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } 43 44// Decode reads the next JSON-encoded value from its 45// input and stores it in the value pointed to by v. 46// 47// See the documentation for Unmarshal for details about 48// the conversion of JSON into a Go value. 49func (dec *Decoder) Decode(v interface{}) error { 50 if dec.err != nil { 51 return dec.err 52 } 53 54 if err := dec.tokenPrepareForDecode(); err != nil { 55 return err 56 } 57 58 if !dec.tokenValueAllowed() { 59 return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()} 60 } 61 62 // Read whole value into buffer. 63 n, err := dec.readValue() 64 if err != nil { 65 return err 66 } 67 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 68 dec.scanp += n 69 70 // Don't save err from unmarshal into dec.err: 71 // the connection is still usable since we read a complete JSON 72 // object from it before the error happened. 73 err = dec.d.unmarshal(v) 74 75 // fixup token streaming state 76 dec.tokenValueEnd() 77 78 return err 79} 80 81// Buffered returns a reader of the data remaining in the Decoder's 82// buffer. The reader is valid until the next call to Decode. 83func (dec *Decoder) Buffered() io.Reader { 84 return bytes.NewReader(dec.buf[dec.scanp:]) 85} 86 87// readValue reads a JSON value into dec.buf. 88// It returns the length of the encoding. 89func (dec *Decoder) readValue() (int, error) { 90 dec.scan.reset() 91 92 scanp := dec.scanp 93 var err error 94Input: 95 for { 96 // Look in the buffer for a new value. 97 for i, c := range dec.buf[scanp:] { 98 dec.scan.bytes++ 99 switch dec.scan.step(&dec.scan, c) { 100 case scanEnd: 101 scanp += i 102 break Input 103 case scanEndObject, scanEndArray: 104 // scanEnd is delayed one byte. 105 // We might block trying to get that byte from src, 106 // so instead invent a space byte. 107 if stateEndValue(&dec.scan, ' ') == scanEnd { 108 scanp += i + 1 109 break Input 110 } 111 case scanError: 112 dec.err = dec.scan.err 113 return 0, dec.scan.err 114 } 115 } 116 scanp = len(dec.buf) 117 118 // Did the last read have an error? 119 // Delayed until now to allow buffer scan. 120 if err != nil { 121 if err == io.EOF { 122 if dec.scan.step(&dec.scan, ' ') == scanEnd { 123 break Input 124 } 125 if nonSpace(dec.buf) { 126 err = io.ErrUnexpectedEOF 127 } 128 } 129 dec.err = err 130 return 0, err 131 } 132 133 n := scanp - dec.scanp 134 err = dec.refill() 135 scanp = dec.scanp + n 136 } 137 return scanp - dec.scanp, nil 138} 139 140func (dec *Decoder) refill() error { 141 // Make room to read more into the buffer. 142 // First slide down data already consumed. 143 if dec.scanp > 0 { 144 dec.scanned += int64(dec.scanp) 145 n := copy(dec.buf, dec.buf[dec.scanp:]) 146 dec.buf = dec.buf[:n] 147 dec.scanp = 0 148 } 149 150 // Grow buffer if not large enough. 151 const minRead = 512 152 if cap(dec.buf)-len(dec.buf) < minRead { 153 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 154 copy(newBuf, dec.buf) 155 dec.buf = newBuf 156 } 157 158 // Read. Delay error for next iteration (after scan). 159 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 160 dec.buf = dec.buf[0 : len(dec.buf)+n] 161 162 return err 163} 164 165func nonSpace(b []byte) bool { 166 for _, c := range b { 167 if !isSpace(c) { 168 return true 169 } 170 } 171 return false 172} 173 174// An Encoder writes JSON values to an output stream. 175type Encoder struct { 176 w io.Writer 177 err error 178 escapeHTML bool 179 180 indentBuf *bytes.Buffer 181 indentPrefix string 182 indentValue string 183} 184 185// NewEncoder returns a new encoder that writes to w. 186func NewEncoder(w io.Writer) *Encoder { 187 return &Encoder{w: w, escapeHTML: true} 188} 189 190// Encode writes the JSON encoding of v to the stream, 191// followed by a newline character. 192// 193// See the documentation for Marshal for details about the 194// conversion of Go values to JSON. 195func (enc *Encoder) Encode(v interface{}) error { 196 if enc.err != nil { 197 return enc.err 198 } 199 e := newEncodeState() 200 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 201 if err != nil { 202 return err 203 } 204 205 // Terminate each value with a newline. 206 // This makes the output look a little nicer 207 // when debugging, and some kind of space 208 // is required if the encoded value was a number, 209 // so that the reader knows there aren't more 210 // digits coming. 211 e.WriteByte('\n') 212 213 b := e.Bytes() 214 if enc.indentPrefix != "" || enc.indentValue != "" { 215 if enc.indentBuf == nil { 216 enc.indentBuf = new(bytes.Buffer) 217 } 218 enc.indentBuf.Reset() 219 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) 220 if err != nil { 221 return err 222 } 223 b = enc.indentBuf.Bytes() 224 } 225 if _, err = enc.w.Write(b); err != nil { 226 enc.err = err 227 } 228 encodeStatePool.Put(e) 229 return err 230} 231 232// SetIndent instructs the encoder to format each subsequent encoded 233// value as if indented by the package-level function Indent(dst, src, prefix, indent). 234// Calling SetIndent("", "") disables indentation. 235func (enc *Encoder) SetIndent(prefix, indent string) { 236 enc.indentPrefix = prefix 237 enc.indentValue = indent 238} 239 240// SetEscapeHTML specifies whether problematic HTML characters 241// should be escaped inside JSON quoted strings. 242// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 243// to avoid certain safety problems that can arise when embedding JSON in HTML. 244// 245// In non-HTML settings where the escaping interferes with the readability 246// of the output, SetEscapeHTML(false) disables this behavior. 247func (enc *Encoder) SetEscapeHTML(on bool) { 248 enc.escapeHTML = on 249} 250 251// RawMessage is a raw encoded JSON value. 252// It implements Marshaler and Unmarshaler and can 253// be used to delay JSON decoding or precompute a JSON encoding. 254type RawMessage []byte 255 256// MarshalJSON returns m as the JSON encoding of m. 257func (m RawMessage) MarshalJSON() ([]byte, error) { 258 if m == nil { 259 return []byte("null"), nil 260 } 261 return m, nil 262} 263 264// UnmarshalJSON sets *m to a copy of data. 265func (m *RawMessage) UnmarshalJSON(data []byte) error { 266 if m == nil { 267 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 268 } 269 *m = append((*m)[0:0], data...) 270 return nil 271} 272 273var _ Marshaler = (*RawMessage)(nil) 274var _ Unmarshaler = (*RawMessage)(nil) 275 276// A Token holds a value of one of these types: 277// 278// Delim, for the four JSON delimiters [ ] { } 279// bool, for JSON booleans 280// float64, for JSON numbers 281// Number, for JSON numbers 282// string, for JSON string literals 283// nil, for JSON null 284// 285type Token interface{} 286 287const ( 288 tokenTopValue = iota 289 tokenArrayStart 290 tokenArrayValue 291 tokenArrayComma 292 tokenObjectStart 293 tokenObjectKey 294 tokenObjectColon 295 tokenObjectValue 296 tokenObjectComma 297) 298 299// advance tokenstate from a separator state to a value state 300func (dec *Decoder) tokenPrepareForDecode() error { 301 // Note: Not calling peek before switch, to avoid 302 // putting peek into the standard Decode path. 303 // peek is only called when using the Token API. 304 switch dec.tokenState { 305 case tokenArrayComma: 306 c, err := dec.peek() 307 if err != nil { 308 return err 309 } 310 if c != ',' { 311 return &SyntaxError{"expected comma after array element", dec.offset()} 312 } 313 dec.scanp++ 314 dec.tokenState = tokenArrayValue 315 case tokenObjectColon: 316 c, err := dec.peek() 317 if err != nil { 318 return err 319 } 320 if c != ':' { 321 return &SyntaxError{"expected colon after object key", dec.offset()} 322 } 323 dec.scanp++ 324 dec.tokenState = tokenObjectValue 325 } 326 return nil 327} 328 329func (dec *Decoder) tokenValueAllowed() bool { 330 switch dec.tokenState { 331 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 332 return true 333 } 334 return false 335} 336 337func (dec *Decoder) tokenValueEnd() { 338 switch dec.tokenState { 339 case tokenArrayStart, tokenArrayValue: 340 dec.tokenState = tokenArrayComma 341 case tokenObjectValue: 342 dec.tokenState = tokenObjectComma 343 } 344} 345 346// A Delim is a JSON array or object delimiter, one of [ ] { or }. 347type Delim rune 348 349func (d Delim) String() string { 350 return string(d) 351} 352 353// Token returns the next JSON token in the input stream. 354// At the end of the input stream, Token returns nil, io.EOF. 355// 356// Token guarantees that the delimiters [ ] { } it returns are 357// properly nested and matched: if Token encounters an unexpected 358// delimiter in the input, it will return an error. 359// 360// The input stream consists of basic JSON values—bool, string, 361// number, and null—along with delimiters [ ] { } of type Delim 362// to mark the start and end of arrays and objects. 363// Commas and colons are elided. 364func (dec *Decoder) Token() (Token, error) { 365 for { 366 c, err := dec.peek() 367 if err != nil { 368 return nil, err 369 } 370 switch c { 371 case '[': 372 if !dec.tokenValueAllowed() { 373 return dec.tokenError(c) 374 } 375 dec.scanp++ 376 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 377 dec.tokenState = tokenArrayStart 378 return Delim('['), nil 379 380 case ']': 381 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 382 return dec.tokenError(c) 383 } 384 dec.scanp++ 385 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 386 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 387 dec.tokenValueEnd() 388 return Delim(']'), nil 389 390 case '{': 391 if !dec.tokenValueAllowed() { 392 return dec.tokenError(c) 393 } 394 dec.scanp++ 395 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 396 dec.tokenState = tokenObjectStart 397 return Delim('{'), nil 398 399 case '}': 400 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 401 return dec.tokenError(c) 402 } 403 dec.scanp++ 404 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 405 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 406 dec.tokenValueEnd() 407 return Delim('}'), nil 408 409 case ':': 410 if dec.tokenState != tokenObjectColon { 411 return dec.tokenError(c) 412 } 413 dec.scanp++ 414 dec.tokenState = tokenObjectValue 415 continue 416 417 case ',': 418 if dec.tokenState == tokenArrayComma { 419 dec.scanp++ 420 dec.tokenState = tokenArrayValue 421 continue 422 } 423 if dec.tokenState == tokenObjectComma { 424 dec.scanp++ 425 dec.tokenState = tokenObjectKey 426 continue 427 } 428 return dec.tokenError(c) 429 430 case '"': 431 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 432 var x string 433 old := dec.tokenState 434 dec.tokenState = tokenTopValue 435 err := dec.Decode(&x) 436 dec.tokenState = old 437 if err != nil { 438 return nil, err 439 } 440 dec.tokenState = tokenObjectColon 441 return x, nil 442 } 443 fallthrough 444 445 default: 446 if !dec.tokenValueAllowed() { 447 return dec.tokenError(c) 448 } 449 var x interface{} 450 if err := dec.Decode(&x); err != nil { 451 return nil, err 452 } 453 return x, nil 454 } 455 } 456} 457 458func (dec *Decoder) tokenError(c byte) (Token, error) { 459 var context string 460 switch dec.tokenState { 461 case tokenTopValue: 462 context = " looking for beginning of value" 463 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 464 context = " looking for beginning of value" 465 case tokenArrayComma: 466 context = " after array element" 467 case tokenObjectKey: 468 context = " looking for beginning of object key string" 469 case tokenObjectColon: 470 context = " after object key" 471 case tokenObjectComma: 472 context = " after object key:value pair" 473 } 474 return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.offset()} 475} 476 477// More reports whether there is another element in the 478// current array or object being parsed. 479func (dec *Decoder) More() bool { 480 c, err := dec.peek() 481 return err == nil && c != ']' && c != '}' 482} 483 484func (dec *Decoder) peek() (byte, error) { 485 var err error 486 for { 487 for i := dec.scanp; i < len(dec.buf); i++ { 488 c := dec.buf[i] 489 if isSpace(c) { 490 continue 491 } 492 dec.scanp = i 493 return c, nil 494 } 495 // buffer has been scanned, now report any error 496 if err != nil { 497 return 0, err 498 } 499 err = dec.refill() 500 } 501} 502 503func (dec *Decoder) offset() int64 { 504 return dec.scanned + int64(dec.scanp) 505} 506