1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8	"bytes"
9	"errors"
10	"io"
11)
12
13// A Decoder reads and decodes JSON values from an input stream.
14type Decoder struct {
15	r       io.Reader
16	buf     []byte
17	d       decodeState
18	scanp   int   // start of unread data in buf
19	scanned int64 // amount of data already scanned
20	scan    scanner
21	err     error
22
23	tokenState int
24	tokenStack []int
25}
26
27// NewDecoder returns a new decoder that reads from r.
28//
29// The decoder introduces its own buffering and may
30// read data from r beyond the JSON values requested.
31func NewDecoder(r io.Reader) *Decoder {
32	return &Decoder{r: r}
33}
34
35// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
36// Number instead of as a float64.
37func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
38
39// DisallowUnknownFields causes the Decoder to return an error when the destination
40// is a struct and the input contains object keys which do not match any
41// non-ignored, exported fields in the destination.
42func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
43
44// Decode reads the next JSON-encoded value from its
45// input and stores it in the value pointed to by v.
46//
47// See the documentation for Unmarshal for details about
48// the conversion of JSON into a Go value.
49func (dec *Decoder) Decode(v interface{}) error {
50	if dec.err != nil {
51		return dec.err
52	}
53
54	if err := dec.tokenPrepareForDecode(); err != nil {
55		return err
56	}
57
58	if !dec.tokenValueAllowed() {
59		return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()}
60	}
61
62	// Read whole value into buffer.
63	n, err := dec.readValue()
64	if err != nil {
65		return err
66	}
67	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
68	dec.scanp += n
69
70	// Don't save err from unmarshal into dec.err:
71	// the connection is still usable since we read a complete JSON
72	// object from it before the error happened.
73	err = dec.d.unmarshal(v)
74
75	// fixup token streaming state
76	dec.tokenValueEnd()
77
78	return err
79}
80
81// Buffered returns a reader of the data remaining in the Decoder's
82// buffer. The reader is valid until the next call to Decode.
83func (dec *Decoder) Buffered() io.Reader {
84	return bytes.NewReader(dec.buf[dec.scanp:])
85}
86
87// readValue reads a JSON value into dec.buf.
88// It returns the length of the encoding.
89func (dec *Decoder) readValue() (int, error) {
90	dec.scan.reset()
91
92	scanp := dec.scanp
93	var err error
94Input:
95	for {
96		// Look in the buffer for a new value.
97		for i, c := range dec.buf[scanp:] {
98			dec.scan.bytes++
99			switch dec.scan.step(&dec.scan, c) {
100			case scanEnd:
101				scanp += i
102				break Input
103			case scanEndObject, scanEndArray:
104				// scanEnd is delayed one byte.
105				// We might block trying to get that byte from src,
106				// so instead invent a space byte.
107				if stateEndValue(&dec.scan, ' ') == scanEnd {
108					scanp += i + 1
109					break Input
110				}
111			case scanError:
112				dec.err = dec.scan.err
113				return 0, dec.scan.err
114			}
115		}
116		scanp = len(dec.buf)
117
118		// Did the last read have an error?
119		// Delayed until now to allow buffer scan.
120		if err != nil {
121			if err == io.EOF {
122				if dec.scan.step(&dec.scan, ' ') == scanEnd {
123					break Input
124				}
125				if nonSpace(dec.buf) {
126					err = io.ErrUnexpectedEOF
127				}
128			}
129			dec.err = err
130			return 0, err
131		}
132
133		n := scanp - dec.scanp
134		err = dec.refill()
135		scanp = dec.scanp + n
136	}
137	return scanp - dec.scanp, nil
138}
139
140func (dec *Decoder) refill() error {
141	// Make room to read more into the buffer.
142	// First slide down data already consumed.
143	if dec.scanp > 0 {
144		dec.scanned += int64(dec.scanp)
145		n := copy(dec.buf, dec.buf[dec.scanp:])
146		dec.buf = dec.buf[:n]
147		dec.scanp = 0
148	}
149
150	// Grow buffer if not large enough.
151	const minRead = 512
152	if cap(dec.buf)-len(dec.buf) < minRead {
153		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
154		copy(newBuf, dec.buf)
155		dec.buf = newBuf
156	}
157
158	// Read. Delay error for next iteration (after scan).
159	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
160	dec.buf = dec.buf[0 : len(dec.buf)+n]
161
162	return err
163}
164
165func nonSpace(b []byte) bool {
166	for _, c := range b {
167		if !isSpace(c) {
168			return true
169		}
170	}
171	return false
172}
173
174// An Encoder writes JSON values to an output stream.
175type Encoder struct {
176	w          io.Writer
177	err        error
178	escapeHTML bool
179
180	indentBuf    *bytes.Buffer
181	indentPrefix string
182	indentValue  string
183}
184
185// NewEncoder returns a new encoder that writes to w.
186func NewEncoder(w io.Writer) *Encoder {
187	return &Encoder{w: w, escapeHTML: true}
188}
189
190// Encode writes the JSON encoding of v to the stream,
191// followed by a newline character.
192//
193// See the documentation for Marshal for details about the
194// conversion of Go values to JSON.
195func (enc *Encoder) Encode(v interface{}) error {
196	if enc.err != nil {
197		return enc.err
198	}
199	e := newEncodeState()
200	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
201	if err != nil {
202		return err
203	}
204
205	// Terminate each value with a newline.
206	// This makes the output look a little nicer
207	// when debugging, and some kind of space
208	// is required if the encoded value was a number,
209	// so that the reader knows there aren't more
210	// digits coming.
211	e.WriteByte('\n')
212
213	b := e.Bytes()
214	if enc.indentPrefix != "" || enc.indentValue != "" {
215		if enc.indentBuf == nil {
216			enc.indentBuf = new(bytes.Buffer)
217		}
218		enc.indentBuf.Reset()
219		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
220		if err != nil {
221			return err
222		}
223		b = enc.indentBuf.Bytes()
224	}
225	if _, err = enc.w.Write(b); err != nil {
226		enc.err = err
227	}
228	encodeStatePool.Put(e)
229	return err
230}
231
232// SetIndent instructs the encoder to format each subsequent encoded
233// value as if indented by the package-level function Indent(dst, src, prefix, indent).
234// Calling SetIndent("", "") disables indentation.
235func (enc *Encoder) SetIndent(prefix, indent string) {
236	enc.indentPrefix = prefix
237	enc.indentValue = indent
238}
239
240// SetEscapeHTML specifies whether problematic HTML characters
241// should be escaped inside JSON quoted strings.
242// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
243// to avoid certain safety problems that can arise when embedding JSON in HTML.
244//
245// In non-HTML settings where the escaping interferes with the readability
246// of the output, SetEscapeHTML(false) disables this behavior.
247func (enc *Encoder) SetEscapeHTML(on bool) {
248	enc.escapeHTML = on
249}
250
251// RawMessage is a raw encoded JSON value.
252// It implements Marshaler and Unmarshaler and can
253// be used to delay JSON decoding or precompute a JSON encoding.
254type RawMessage []byte
255
256// MarshalJSON returns m as the JSON encoding of m.
257func (m RawMessage) MarshalJSON() ([]byte, error) {
258	if m == nil {
259		return []byte("null"), nil
260	}
261	return m, nil
262}
263
264// UnmarshalJSON sets *m to a copy of data.
265func (m *RawMessage) UnmarshalJSON(data []byte) error {
266	if m == nil {
267		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
268	}
269	*m = append((*m)[0:0], data...)
270	return nil
271}
272
273var _ Marshaler = (*RawMessage)(nil)
274var _ Unmarshaler = (*RawMessage)(nil)
275
276// A Token holds a value of one of these types:
277//
278//	Delim, for the four JSON delimiters [ ] { }
279//	bool, for JSON booleans
280//	float64, for JSON numbers
281//	Number, for JSON numbers
282//	string, for JSON string literals
283//	nil, for JSON null
284//
285type Token interface{}
286
287const (
288	tokenTopValue = iota
289	tokenArrayStart
290	tokenArrayValue
291	tokenArrayComma
292	tokenObjectStart
293	tokenObjectKey
294	tokenObjectColon
295	tokenObjectValue
296	tokenObjectComma
297)
298
299// advance tokenstate from a separator state to a value state
300func (dec *Decoder) tokenPrepareForDecode() error {
301	// Note: Not calling peek before switch, to avoid
302	// putting peek into the standard Decode path.
303	// peek is only called when using the Token API.
304	switch dec.tokenState {
305	case tokenArrayComma:
306		c, err := dec.peek()
307		if err != nil {
308			return err
309		}
310		if c != ',' {
311			return &SyntaxError{"expected comma after array element", dec.offset()}
312		}
313		dec.scanp++
314		dec.tokenState = tokenArrayValue
315	case tokenObjectColon:
316		c, err := dec.peek()
317		if err != nil {
318			return err
319		}
320		if c != ':' {
321			return &SyntaxError{"expected colon after object key", dec.offset()}
322		}
323		dec.scanp++
324		dec.tokenState = tokenObjectValue
325	}
326	return nil
327}
328
329func (dec *Decoder) tokenValueAllowed() bool {
330	switch dec.tokenState {
331	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
332		return true
333	}
334	return false
335}
336
337func (dec *Decoder) tokenValueEnd() {
338	switch dec.tokenState {
339	case tokenArrayStart, tokenArrayValue:
340		dec.tokenState = tokenArrayComma
341	case tokenObjectValue:
342		dec.tokenState = tokenObjectComma
343	}
344}
345
346// A Delim is a JSON array or object delimiter, one of [ ] { or }.
347type Delim rune
348
349func (d Delim) String() string {
350	return string(d)
351}
352
353// Token returns the next JSON token in the input stream.
354// At the end of the input stream, Token returns nil, io.EOF.
355//
356// Token guarantees that the delimiters [ ] { } it returns are
357// properly nested and matched: if Token encounters an unexpected
358// delimiter in the input, it will return an error.
359//
360// The input stream consists of basic JSON values—bool, string,
361// number, and null—along with delimiters [ ] { } of type Delim
362// to mark the start and end of arrays and objects.
363// Commas and colons are elided.
364func (dec *Decoder) Token() (Token, error) {
365	for {
366		c, err := dec.peek()
367		if err != nil {
368			return nil, err
369		}
370		switch c {
371		case '[':
372			if !dec.tokenValueAllowed() {
373				return dec.tokenError(c)
374			}
375			dec.scanp++
376			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
377			dec.tokenState = tokenArrayStart
378			return Delim('['), nil
379
380		case ']':
381			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
382				return dec.tokenError(c)
383			}
384			dec.scanp++
385			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
386			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
387			dec.tokenValueEnd()
388			return Delim(']'), nil
389
390		case '{':
391			if !dec.tokenValueAllowed() {
392				return dec.tokenError(c)
393			}
394			dec.scanp++
395			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
396			dec.tokenState = tokenObjectStart
397			return Delim('{'), nil
398
399		case '}':
400			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
401				return dec.tokenError(c)
402			}
403			dec.scanp++
404			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
405			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
406			dec.tokenValueEnd()
407			return Delim('}'), nil
408
409		case ':':
410			if dec.tokenState != tokenObjectColon {
411				return dec.tokenError(c)
412			}
413			dec.scanp++
414			dec.tokenState = tokenObjectValue
415			continue
416
417		case ',':
418			if dec.tokenState == tokenArrayComma {
419				dec.scanp++
420				dec.tokenState = tokenArrayValue
421				continue
422			}
423			if dec.tokenState == tokenObjectComma {
424				dec.scanp++
425				dec.tokenState = tokenObjectKey
426				continue
427			}
428			return dec.tokenError(c)
429
430		case '"':
431			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
432				var x string
433				old := dec.tokenState
434				dec.tokenState = tokenTopValue
435				err := dec.Decode(&x)
436				dec.tokenState = old
437				if err != nil {
438					return nil, err
439				}
440				dec.tokenState = tokenObjectColon
441				return x, nil
442			}
443			fallthrough
444
445		default:
446			if !dec.tokenValueAllowed() {
447				return dec.tokenError(c)
448			}
449			var x interface{}
450			if err := dec.Decode(&x); err != nil {
451				return nil, err
452			}
453			return x, nil
454		}
455	}
456}
457
458func (dec *Decoder) tokenError(c byte) (Token, error) {
459	var context string
460	switch dec.tokenState {
461	case tokenTopValue:
462		context = " looking for beginning of value"
463	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
464		context = " looking for beginning of value"
465	case tokenArrayComma:
466		context = " after array element"
467	case tokenObjectKey:
468		context = " looking for beginning of object key string"
469	case tokenObjectColon:
470		context = " after object key"
471	case tokenObjectComma:
472		context = " after object key:value pair"
473	}
474	return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.offset()}
475}
476
477// More reports whether there is another element in the
478// current array or object being parsed.
479func (dec *Decoder) More() bool {
480	c, err := dec.peek()
481	return err == nil && c != ']' && c != '}'
482}
483
484func (dec *Decoder) peek() (byte, error) {
485	var err error
486	for {
487		for i := dec.scanp; i < len(dec.buf); i++ {
488			c := dec.buf[i]
489			if isSpace(c) {
490				continue
491			}
492			dec.scanp = i
493			return c, nil
494		}
495		// buffer has been scanned, now report any error
496		if err != nil {
497			return 0, err
498		}
499		err = dec.refill()
500	}
501}
502
503func (dec *Decoder) offset() int64 {
504	return dec.scanned + int64(dec.scanp)
505}
506