1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package json 6 7import ( 8 "bytes" 9 "errors" 10 "io" 11) 12 13// A Decoder reads and decodes JSON objects from an input stream. 14type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scan scanner 20 err error 21 22 tokenState int 23 tokenStack []int 24} 25 26// NewDecoder returns a new decoder that reads from r. 27// 28// The decoder introduces its own buffering and may 29// read data from r beyond the JSON values requested. 30func NewDecoder(r io.Reader) *Decoder { 31 return &Decoder{r: r} 32} 33 34// UseNumber causes the Decoder to unmarshal a number into an interface{} as a 35// Number instead of as a float64. 36func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 37 38// Decode reads the next JSON-encoded value from its 39// input and stores it in the value pointed to by v. 40// 41// See the documentation for Unmarshal for details about 42// the conversion of JSON into a Go value. 43func (dec *Decoder) Decode(v interface{}) error { 44 if dec.err != nil { 45 return dec.err 46 } 47 48 if err := dec.tokenPrepareForDecode(); err != nil { 49 return err 50 } 51 52 if !dec.tokenValueAllowed() { 53 return &SyntaxError{msg: "not at beginning of value"} 54 } 55 56 // Read whole value into buffer. 57 n, err := dec.readValue() 58 if err != nil { 59 return err 60 } 61 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 62 dec.scanp += n 63 64 // Don't save err from unmarshal into dec.err: 65 // the connection is still usable since we read a complete JSON 66 // object from it before the error happened. 67 err = dec.d.unmarshal(v) 68 69 // fixup token streaming state 70 dec.tokenValueEnd() 71 72 return err 73} 74 75// Buffered returns a reader of the data remaining in the Decoder's 76// buffer. The reader is valid until the next call to Decode. 77func (dec *Decoder) Buffered() io.Reader { 78 return bytes.NewReader(dec.buf[dec.scanp:]) 79} 80 81// readValue reads a JSON value into dec.buf. 82// It returns the length of the encoding. 83func (dec *Decoder) readValue() (int, error) { 84 dec.scan.reset() 85 86 scanp := dec.scanp 87 var err error 88Input: 89 for { 90 // Look in the buffer for a new value. 91 for i, c := range dec.buf[scanp:] { 92 dec.scan.bytes++ 93 v := dec.scan.step(&dec.scan, c) 94 if v == scanEnd { 95 scanp += i 96 break Input 97 } 98 // scanEnd is delayed one byte. 99 // We might block trying to get that byte from src, 100 // so instead invent a space byte. 101 if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd { 102 scanp += i + 1 103 break Input 104 } 105 if v == scanError { 106 dec.err = dec.scan.err 107 return 0, dec.scan.err 108 } 109 } 110 scanp = len(dec.buf) 111 112 // Did the last read have an error? 113 // Delayed until now to allow buffer scan. 114 if err != nil { 115 if err == io.EOF { 116 if dec.scan.step(&dec.scan, ' ') == scanEnd { 117 break Input 118 } 119 if nonSpace(dec.buf) { 120 err = io.ErrUnexpectedEOF 121 } 122 } 123 dec.err = err 124 return 0, err 125 } 126 127 n := scanp - dec.scanp 128 err = dec.refill() 129 scanp = dec.scanp + n 130 } 131 return scanp - dec.scanp, nil 132} 133 134func (dec *Decoder) refill() error { 135 // Make room to read more into the buffer. 136 // First slide down data already consumed. 137 if dec.scanp > 0 { 138 n := copy(dec.buf, dec.buf[dec.scanp:]) 139 dec.buf = dec.buf[:n] 140 dec.scanp = 0 141 } 142 143 // Grow buffer if not large enough. 144 const minRead = 512 145 if cap(dec.buf)-len(dec.buf) < minRead { 146 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 147 copy(newBuf, dec.buf) 148 dec.buf = newBuf 149 } 150 151 // Read. Delay error for next iteration (after scan). 152 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 153 dec.buf = dec.buf[0 : len(dec.buf)+n] 154 155 return err 156} 157 158func nonSpace(b []byte) bool { 159 for _, c := range b { 160 if !isSpace(c) { 161 return true 162 } 163 } 164 return false 165} 166 167// An Encoder writes JSON objects to an output stream. 168type Encoder struct { 169 w io.Writer 170 err error 171} 172 173// NewEncoder returns a new encoder that writes to w. 174func NewEncoder(w io.Writer) *Encoder { 175 return &Encoder{w: w} 176} 177 178// Encode writes the JSON encoding of v to the stream, 179// followed by a newline character. 180// 181// See the documentation for Marshal for details about the 182// conversion of Go values to JSON. 183func (enc *Encoder) Encode(v interface{}) error { 184 if enc.err != nil { 185 return enc.err 186 } 187 e := newEncodeState() 188 err := e.marshal(v) 189 if err != nil { 190 return err 191 } 192 193 // Terminate each value with a newline. 194 // This makes the output look a little nicer 195 // when debugging, and some kind of space 196 // is required if the encoded value was a number, 197 // so that the reader knows there aren't more 198 // digits coming. 199 e.WriteByte('\n') 200 201 if _, err = enc.w.Write(e.Bytes()); err != nil { 202 enc.err = err 203 } 204 encodeStatePool.Put(e) 205 return err 206} 207 208// RawMessage is a raw encoded JSON object. 209// It implements Marshaler and Unmarshaler and can 210// be used to delay JSON decoding or precompute a JSON encoding. 211type RawMessage []byte 212 213// MarshalJSON returns *m as the JSON encoding of m. 214func (m *RawMessage) MarshalJSON() ([]byte, error) { 215 return *m, nil 216} 217 218// UnmarshalJSON sets *m to a copy of data. 219func (m *RawMessage) UnmarshalJSON(data []byte) error { 220 if m == nil { 221 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 222 } 223 *m = append((*m)[0:0], data...) 224 return nil 225} 226 227var _ Marshaler = (*RawMessage)(nil) 228var _ Unmarshaler = (*RawMessage)(nil) 229 230// A Token holds a value of one of these types: 231// 232// Delim, for the four JSON delimiters [ ] { } 233// bool, for JSON booleans 234// float64, for JSON numbers 235// Number, for JSON numbers 236// string, for JSON string literals 237// nil, for JSON null 238// 239type Token interface{} 240 241const ( 242 tokenTopValue = iota 243 tokenArrayStart 244 tokenArrayValue 245 tokenArrayComma 246 tokenObjectStart 247 tokenObjectKey 248 tokenObjectColon 249 tokenObjectValue 250 tokenObjectComma 251) 252 253// advance tokenstate from a separator state to a value state 254func (dec *Decoder) tokenPrepareForDecode() error { 255 // Note: Not calling peek before switch, to avoid 256 // putting peek into the standard Decode path. 257 // peek is only called when using the Token API. 258 switch dec.tokenState { 259 case tokenArrayComma: 260 c, err := dec.peek() 261 if err != nil { 262 return err 263 } 264 if c != ',' { 265 return &SyntaxError{"expected comma after array element", 0} 266 } 267 dec.scanp++ 268 dec.tokenState = tokenArrayValue 269 case tokenObjectColon: 270 c, err := dec.peek() 271 if err != nil { 272 return err 273 } 274 if c != ':' { 275 return &SyntaxError{"expected colon after object key", 0} 276 } 277 dec.scanp++ 278 dec.tokenState = tokenObjectValue 279 } 280 return nil 281} 282 283func (dec *Decoder) tokenValueAllowed() bool { 284 switch dec.tokenState { 285 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 286 return true 287 } 288 return false 289} 290 291func (dec *Decoder) tokenValueEnd() { 292 switch dec.tokenState { 293 case tokenArrayStart, tokenArrayValue: 294 dec.tokenState = tokenArrayComma 295 case tokenObjectValue: 296 dec.tokenState = tokenObjectComma 297 } 298} 299 300// A Delim is a JSON array or object delimiter, one of [ ] { or }. 301type Delim rune 302 303func (d Delim) String() string { 304 return string(d) 305} 306 307// Token returns the next JSON token in the input stream. 308// At the end of the input stream, Token returns nil, io.EOF. 309// 310// Token guarantees that the delimiters [ ] { } it returns are 311// properly nested and matched: if Token encounters an unexpected 312// delimiter in the input, it will return an error. 313// 314// The input stream consists of basic JSON values—bool, string, 315// number, and null—along with delimiters [ ] { } of type Delim 316// to mark the start and end of arrays and objects. 317// Commas and colons are elided. 318func (dec *Decoder) Token() (Token, error) { 319 for { 320 c, err := dec.peek() 321 if err != nil { 322 return nil, err 323 } 324 switch c { 325 case '[': 326 if !dec.tokenValueAllowed() { 327 return dec.tokenError(c) 328 } 329 dec.scanp++ 330 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 331 dec.tokenState = tokenArrayStart 332 return Delim('['), nil 333 334 case ']': 335 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 336 return dec.tokenError(c) 337 } 338 dec.scanp++ 339 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 340 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 341 dec.tokenValueEnd() 342 return Delim(']'), nil 343 344 case '{': 345 if !dec.tokenValueAllowed() { 346 return dec.tokenError(c) 347 } 348 dec.scanp++ 349 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 350 dec.tokenState = tokenObjectStart 351 return Delim('{'), nil 352 353 case '}': 354 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 355 return dec.tokenError(c) 356 } 357 dec.scanp++ 358 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 359 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 360 dec.tokenValueEnd() 361 return Delim('}'), nil 362 363 case ':': 364 if dec.tokenState != tokenObjectColon { 365 return dec.tokenError(c) 366 } 367 dec.scanp++ 368 dec.tokenState = tokenObjectValue 369 continue 370 371 case ',': 372 if dec.tokenState == tokenArrayComma { 373 dec.scanp++ 374 dec.tokenState = tokenArrayValue 375 continue 376 } 377 if dec.tokenState == tokenObjectComma { 378 dec.scanp++ 379 dec.tokenState = tokenObjectKey 380 continue 381 } 382 return dec.tokenError(c) 383 384 case '"': 385 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 386 var x string 387 old := dec.tokenState 388 dec.tokenState = tokenTopValue 389 err := dec.Decode(&x) 390 dec.tokenState = old 391 if err != nil { 392 clearOffset(err) 393 return nil, err 394 } 395 dec.tokenState = tokenObjectColon 396 return x, nil 397 } 398 fallthrough 399 400 default: 401 if !dec.tokenValueAllowed() { 402 return dec.tokenError(c) 403 } 404 var x interface{} 405 if err := dec.Decode(&x); err != nil { 406 clearOffset(err) 407 return nil, err 408 } 409 return x, nil 410 } 411 } 412} 413 414func clearOffset(err error) { 415 if s, ok := err.(*SyntaxError); ok { 416 s.Offset = 0 417 } 418} 419 420func (dec *Decoder) tokenError(c byte) (Token, error) { 421 var context string 422 switch dec.tokenState { 423 case tokenTopValue: 424 context = " looking for beginning of value" 425 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 426 context = " looking for beginning of value" 427 case tokenArrayComma: 428 context = " after array element" 429 case tokenObjectKey: 430 context = " looking for beginning of object key string" 431 case tokenObjectColon: 432 context = " after object key" 433 case tokenObjectComma: 434 context = " after object key:value pair" 435 } 436 return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0} 437} 438 439// More reports whether there is another element in the 440// current array or object being parsed. 441func (dec *Decoder) More() bool { 442 c, err := dec.peek() 443 return err == nil && c != ']' && c != '}' 444} 445 446func (dec *Decoder) peek() (byte, error) { 447 var err error 448 for { 449 for i := dec.scanp; i < len(dec.buf); i++ { 450 c := dec.buf[i] 451 if isSpace(c) { 452 continue 453 } 454 dec.scanp = i 455 return c, nil 456 } 457 // buffer has been scanned, now report any error 458 if err != nil { 459 return 0, err 460 } 461 err = dec.refill() 462 } 463} 464 465/* 466TODO 467 468// EncodeToken writes the given JSON token to the stream. 469// It returns an error if the delimiters [ ] { } are not properly used. 470// 471// EncodeToken does not call Flush, because usually it is part of 472// a larger operation such as Encode, and those will call Flush when finished. 473// Callers that create an Encoder and then invoke EncodeToken directly, 474// without using Encode, need to call Flush when finished to ensure that 475// the JSON is written to the underlying writer. 476func (e *Encoder) EncodeToken(t Token) error { 477 ... 478} 479 480*/ 481