1// Copyright 2010 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8	"bytes"
9	"errors"
10	"io"
11)
12
13// A Decoder reads and decodes JSON objects from an input stream.
14type Decoder struct {
15	r     io.Reader
16	buf   []byte
17	d     decodeState
18	scanp int // start of unread data in buf
19	scan  scanner
20	err   error
21
22	tokenState int
23	tokenStack []int
24}
25
26// NewDecoder returns a new decoder that reads from r.
27//
28// The decoder introduces its own buffering and may
29// read data from r beyond the JSON values requested.
30func NewDecoder(r io.Reader) *Decoder {
31	return &Decoder{r: r}
32}
33
34// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
35// Number instead of as a float64.
36func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
37
38// Decode reads the next JSON-encoded value from its
39// input and stores it in the value pointed to by v.
40//
41// See the documentation for Unmarshal for details about
42// the conversion of JSON into a Go value.
43func (dec *Decoder) Decode(v interface{}) error {
44	if dec.err != nil {
45		return dec.err
46	}
47
48	if err := dec.tokenPrepareForDecode(); err != nil {
49		return err
50	}
51
52	if !dec.tokenValueAllowed() {
53		return &SyntaxError{msg: "not at beginning of value"}
54	}
55
56	// Read whole value into buffer.
57	n, err := dec.readValue()
58	if err != nil {
59		return err
60	}
61	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
62	dec.scanp += n
63
64	// Don't save err from unmarshal into dec.err:
65	// the connection is still usable since we read a complete JSON
66	// object from it before the error happened.
67	err = dec.d.unmarshal(v)
68
69	// fixup token streaming state
70	dec.tokenValueEnd()
71
72	return err
73}
74
75// Buffered returns a reader of the data remaining in the Decoder's
76// buffer. The reader is valid until the next call to Decode.
77func (dec *Decoder) Buffered() io.Reader {
78	return bytes.NewReader(dec.buf[dec.scanp:])
79}
80
81// readValue reads a JSON value into dec.buf.
82// It returns the length of the encoding.
83func (dec *Decoder) readValue() (int, error) {
84	dec.scan.reset()
85
86	scanp := dec.scanp
87	var err error
88Input:
89	for {
90		// Look in the buffer for a new value.
91		for i, c := range dec.buf[scanp:] {
92			dec.scan.bytes++
93			v := dec.scan.step(&dec.scan, c)
94			if v == scanEnd {
95				scanp += i
96				break Input
97			}
98			// scanEnd is delayed one byte.
99			// We might block trying to get that byte from src,
100			// so instead invent a space byte.
101			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
102				scanp += i + 1
103				break Input
104			}
105			if v == scanError {
106				dec.err = dec.scan.err
107				return 0, dec.scan.err
108			}
109		}
110		scanp = len(dec.buf)
111
112		// Did the last read have an error?
113		// Delayed until now to allow buffer scan.
114		if err != nil {
115			if err == io.EOF {
116				if dec.scan.step(&dec.scan, ' ') == scanEnd {
117					break Input
118				}
119				if nonSpace(dec.buf) {
120					err = io.ErrUnexpectedEOF
121				}
122			}
123			dec.err = err
124			return 0, err
125		}
126
127		n := scanp - dec.scanp
128		err = dec.refill()
129		scanp = dec.scanp + n
130	}
131	return scanp - dec.scanp, nil
132}
133
134func (dec *Decoder) refill() error {
135	// Make room to read more into the buffer.
136	// First slide down data already consumed.
137	if dec.scanp > 0 {
138		n := copy(dec.buf, dec.buf[dec.scanp:])
139		dec.buf = dec.buf[:n]
140		dec.scanp = 0
141	}
142
143	// Grow buffer if not large enough.
144	const minRead = 512
145	if cap(dec.buf)-len(dec.buf) < minRead {
146		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
147		copy(newBuf, dec.buf)
148		dec.buf = newBuf
149	}
150
151	// Read.  Delay error for next iteration (after scan).
152	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
153	dec.buf = dec.buf[0 : len(dec.buf)+n]
154
155	return err
156}
157
158func nonSpace(b []byte) bool {
159	for _, c := range b {
160		if !isSpace(c) {
161			return true
162		}
163	}
164	return false
165}
166
167// An Encoder writes JSON objects to an output stream.
168type Encoder struct {
169	w   io.Writer
170	err error
171}
172
173// NewEncoder returns a new encoder that writes to w.
174func NewEncoder(w io.Writer) *Encoder {
175	return &Encoder{w: w}
176}
177
178// Encode writes the JSON encoding of v to the stream,
179// followed by a newline character.
180//
181// See the documentation for Marshal for details about the
182// conversion of Go values to JSON.
183func (enc *Encoder) Encode(v interface{}) error {
184	if enc.err != nil {
185		return enc.err
186	}
187	e := newEncodeState()
188	err := e.marshal(v)
189	if err != nil {
190		return err
191	}
192
193	// Terminate each value with a newline.
194	// This makes the output look a little nicer
195	// when debugging, and some kind of space
196	// is required if the encoded value was a number,
197	// so that the reader knows there aren't more
198	// digits coming.
199	e.WriteByte('\n')
200
201	if _, err = enc.w.Write(e.Bytes()); err != nil {
202		enc.err = err
203	}
204	encodeStatePool.Put(e)
205	return err
206}
207
208// RawMessage is a raw encoded JSON object.
209// It implements Marshaler and Unmarshaler and can
210// be used to delay JSON decoding or precompute a JSON encoding.
211type RawMessage []byte
212
213// MarshalJSON returns *m as the JSON encoding of m.
214func (m *RawMessage) MarshalJSON() ([]byte, error) {
215	return *m, nil
216}
217
218// UnmarshalJSON sets *m to a copy of data.
219func (m *RawMessage) UnmarshalJSON(data []byte) error {
220	if m == nil {
221		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
222	}
223	*m = append((*m)[0:0], data...)
224	return nil
225}
226
227var _ Marshaler = (*RawMessage)(nil)
228var _ Unmarshaler = (*RawMessage)(nil)
229
230// A Token holds a value of one of these types:
231//
232//	Delim, for the four JSON delimiters [ ] { }
233//	bool, for JSON booleans
234//	float64, for JSON numbers
235//	Number, for JSON numbers
236//	string, for JSON string literals
237//	nil, for JSON null
238//
239type Token interface{}
240
241const (
242	tokenTopValue = iota
243	tokenArrayStart
244	tokenArrayValue
245	tokenArrayComma
246	tokenObjectStart
247	tokenObjectKey
248	tokenObjectColon
249	tokenObjectValue
250	tokenObjectComma
251)
252
253// advance tokenstate from a separator state to a value state
254func (dec *Decoder) tokenPrepareForDecode() error {
255	// Note: Not calling peek before switch, to avoid
256	// putting peek into the standard Decode path.
257	// peek is only called when using the Token API.
258	switch dec.tokenState {
259	case tokenArrayComma:
260		c, err := dec.peek()
261		if err != nil {
262			return err
263		}
264		if c != ',' {
265			return &SyntaxError{"expected comma after array element", 0}
266		}
267		dec.scanp++
268		dec.tokenState = tokenArrayValue
269	case tokenObjectColon:
270		c, err := dec.peek()
271		if err != nil {
272			return err
273		}
274		if c != ':' {
275			return &SyntaxError{"expected colon after object key", 0}
276		}
277		dec.scanp++
278		dec.tokenState = tokenObjectValue
279	}
280	return nil
281}
282
283func (dec *Decoder) tokenValueAllowed() bool {
284	switch dec.tokenState {
285	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
286		return true
287	}
288	return false
289}
290
291func (dec *Decoder) tokenValueEnd() {
292	switch dec.tokenState {
293	case tokenArrayStart, tokenArrayValue:
294		dec.tokenState = tokenArrayComma
295	case tokenObjectValue:
296		dec.tokenState = tokenObjectComma
297	}
298}
299
300// A Delim is a JSON array or object delimiter, one of [ ] { or }.
301type Delim rune
302
303func (d Delim) String() string {
304	return string(d)
305}
306
307// Token returns the next JSON token in the input stream.
308// At the end of the input stream, Token returns nil, io.EOF.
309//
310// Token guarantees that the delimiters [ ] { } it returns are
311// properly nested and matched: if Token encounters an unexpected
312// delimiter in the input, it will return an error.
313//
314// The input stream consists of basic JSON values—bool, string,
315// number, and null—along with delimiters [ ] { } of type Delim
316// to mark the start and end of arrays and objects.
317// Commas and colons are elided.
318func (dec *Decoder) Token() (Token, error) {
319	for {
320		c, err := dec.peek()
321		if err != nil {
322			return nil, err
323		}
324		switch c {
325		case '[':
326			if !dec.tokenValueAllowed() {
327				return dec.tokenError(c)
328			}
329			dec.scanp++
330			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
331			dec.tokenState = tokenArrayStart
332			return Delim('['), nil
333
334		case ']':
335			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
336				return dec.tokenError(c)
337			}
338			dec.scanp++
339			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
340			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
341			dec.tokenValueEnd()
342			return Delim(']'), nil
343
344		case '{':
345			if !dec.tokenValueAllowed() {
346				return dec.tokenError(c)
347			}
348			dec.scanp++
349			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
350			dec.tokenState = tokenObjectStart
351			return Delim('{'), nil
352
353		case '}':
354			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
355				return dec.tokenError(c)
356			}
357			dec.scanp++
358			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
359			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
360			dec.tokenValueEnd()
361			return Delim('}'), nil
362
363		case ':':
364			if dec.tokenState != tokenObjectColon {
365				return dec.tokenError(c)
366			}
367			dec.scanp++
368			dec.tokenState = tokenObjectValue
369			continue
370
371		case ',':
372			if dec.tokenState == tokenArrayComma {
373				dec.scanp++
374				dec.tokenState = tokenArrayValue
375				continue
376			}
377			if dec.tokenState == tokenObjectComma {
378				dec.scanp++
379				dec.tokenState = tokenObjectKey
380				continue
381			}
382			return dec.tokenError(c)
383
384		case '"':
385			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
386				var x string
387				old := dec.tokenState
388				dec.tokenState = tokenTopValue
389				err := dec.Decode(&x)
390				dec.tokenState = old
391				if err != nil {
392					clearOffset(err)
393					return nil, err
394				}
395				dec.tokenState = tokenObjectColon
396				return x, nil
397			}
398			fallthrough
399
400		default:
401			if !dec.tokenValueAllowed() {
402				return dec.tokenError(c)
403			}
404			var x interface{}
405			if err := dec.Decode(&x); err != nil {
406				clearOffset(err)
407				return nil, err
408			}
409			return x, nil
410		}
411	}
412}
413
414func clearOffset(err error) {
415	if s, ok := err.(*SyntaxError); ok {
416		s.Offset = 0
417	}
418}
419
420func (dec *Decoder) tokenError(c byte) (Token, error) {
421	var context string
422	switch dec.tokenState {
423	case tokenTopValue:
424		context = " looking for beginning of value"
425	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
426		context = " looking for beginning of value"
427	case tokenArrayComma:
428		context = " after array element"
429	case tokenObjectKey:
430		context = " looking for beginning of object key string"
431	case tokenObjectColon:
432		context = " after object key"
433	case tokenObjectComma:
434		context = " after object key:value pair"
435	}
436	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
437}
438
439// More reports whether there is another element in the
440// current array or object being parsed.
441func (dec *Decoder) More() bool {
442	c, err := dec.peek()
443	return err == nil && c != ']' && c != '}'
444}
445
446func (dec *Decoder) peek() (byte, error) {
447	var err error
448	for {
449		for i := dec.scanp; i < len(dec.buf); i++ {
450			c := dec.buf[i]
451			if isSpace(c) {
452				continue
453			}
454			dec.scanp = i
455			return c, nil
456		}
457		// buffer has been scanned, now report any error
458		if err != nil {
459			return 0, err
460		}
461		err = dec.refill()
462	}
463}
464
465/*
466TODO
467
468// EncodeToken writes the given JSON token to the stream.
469// It returns an error if the delimiters [ ] { } are not properly used.
470//
471// EncodeToken does not call Flush, because usually it is part of
472// a larger operation such as Encode, and those will call Flush when finished.
473// Callers that create an Encoder and then invoke EncodeToken directly,
474// without using Encode, need to call Flush when finished to ensure that
475// the JSON is written to the underlying writer.
476func (e *Encoder) EncodeToken(t Token) error  {
477	...
478}
479
480*/
481