1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8	"bytes"
9	"errors"
10	"io"
11)
12
13// A Decoder reads and decodes JSON values from an input stream.
14type Decoder struct {
15	r       io.Reader
16	buf     []byte
17	d       decodeState
18	scanp   int   // start of unread data in buf
19	scanned int64 // amount of data already scanned
20	scan    scanner
21	err     error
22
23	tokenState int
24	tokenStack []int
25}
26
27// NewDecoder returns a new decoder that reads from r.
28//
29// The decoder introduces its own buffering and may
30// read data from r beyond the JSON values requested.
31func NewDecoder(r io.Reader) *Decoder {
32	return &Decoder{r: r}
33}
34
35// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
36// Number instead of as a float64.
37func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
38
39// DisallowUnknownFields causes the Decoder to return an error when the destination
40// is a struct and the input contains object keys which do not match any
41// non-ignored, exported fields in the destination.
42func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
43
44// Decode reads the next JSON-encoded value from its
45// input and stores it in the value pointed to by v.
46//
47// See the documentation for Unmarshal for details about
48// the conversion of JSON into a Go value.
49func (dec *Decoder) Decode(v interface{}) error {
50	if dec.err != nil {
51		return dec.err
52	}
53
54	if err := dec.tokenPrepareForDecode(); err != nil {
55		return err
56	}
57
58	if !dec.tokenValueAllowed() {
59		return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
60	}
61
62	// Read whole value into buffer.
63	n, err := dec.readValue()
64	if err != nil {
65		return err
66	}
67	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
68	dec.scanp += n
69
70	// Don't save err from unmarshal into dec.err:
71	// the connection is still usable since we read a complete JSON
72	// object from it before the error happened.
73	err = dec.d.unmarshal(v)
74
75	// fixup token streaming state
76	dec.tokenValueEnd()
77
78	return err
79}
80
81// Buffered returns a reader of the data remaining in the Decoder's
82// buffer. The reader is valid until the next call to Decode.
83func (dec *Decoder) Buffered() io.Reader {
84	return bytes.NewReader(dec.buf[dec.scanp:])
85}
86
87// readValue reads a JSON value into dec.buf.
88// It returns the length of the encoding.
89func (dec *Decoder) readValue() (int, error) {
90	dec.scan.reset()
91
92	scanp := dec.scanp
93	var err error
94Input:
95	// help the compiler see that scanp is never negative, so it can remove
96	// some bounds checks below.
97	for scanp >= 0 {
98
99		// Look in the buffer for a new value.
100		for ; scanp < len(dec.buf); scanp++ {
101			c := dec.buf[scanp]
102			dec.scan.bytes++
103			switch dec.scan.step(&dec.scan, c) {
104			case scanEnd:
105				// scanEnd is delayed one byte so we decrement
106				// the scanner bytes count by 1 to ensure that
107				// this value is correct in the next call of Decode.
108				dec.scan.bytes--
109				break Input
110			case scanEndObject, scanEndArray:
111				// scanEnd is delayed one byte.
112				// We might block trying to get that byte from src,
113				// so instead invent a space byte.
114				if stateEndValue(&dec.scan, ' ') == scanEnd {
115					scanp++
116					break Input
117				}
118			case scanError:
119				dec.err = dec.scan.err
120				return 0, dec.scan.err
121			}
122		}
123
124		// Did the last read have an error?
125		// Delayed until now to allow buffer scan.
126		if err != nil {
127			if err == io.EOF {
128				if dec.scan.step(&dec.scan, ' ') == scanEnd {
129					break Input
130				}
131				if nonSpace(dec.buf) {
132					err = io.ErrUnexpectedEOF
133				}
134			}
135			dec.err = err
136			return 0, err
137		}
138
139		n := scanp - dec.scanp
140		err = dec.refill()
141		scanp = dec.scanp + n
142	}
143	return scanp - dec.scanp, nil
144}
145
146func (dec *Decoder) refill() error {
147	// Make room to read more into the buffer.
148	// First slide down data already consumed.
149	if dec.scanp > 0 {
150		dec.scanned += int64(dec.scanp)
151		n := copy(dec.buf, dec.buf[dec.scanp:])
152		dec.buf = dec.buf[:n]
153		dec.scanp = 0
154	}
155
156	// Grow buffer if not large enough.
157	const minRead = 512
158	if cap(dec.buf)-len(dec.buf) < minRead {
159		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
160		copy(newBuf, dec.buf)
161		dec.buf = newBuf
162	}
163
164	// Read. Delay error for next iteration (after scan).
165	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
166	dec.buf = dec.buf[0 : len(dec.buf)+n]
167
168	return err
169}
170
171func nonSpace(b []byte) bool {
172	for _, c := range b {
173		if !isSpace(c) {
174			return true
175		}
176	}
177	return false
178}
179
180// An Encoder writes JSON values to an output stream.
181type Encoder struct {
182	w          io.Writer
183	err        error
184	escapeHTML bool
185
186	indentBuf    *bytes.Buffer
187	indentPrefix string
188	indentValue  string
189}
190
191// NewEncoder returns a new encoder that writes to w.
192func NewEncoder(w io.Writer) *Encoder {
193	return &Encoder{w: w, escapeHTML: true}
194}
195
196// Encode writes the JSON encoding of v to the stream,
197// followed by a newline character.
198//
199// See the documentation for Marshal for details about the
200// conversion of Go values to JSON.
201func (enc *Encoder) Encode(v interface{}) error {
202	if enc.err != nil {
203		return enc.err
204	}
205	e := newEncodeState()
206	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
207	if err != nil {
208		return err
209	}
210
211	// Terminate each value with a newline.
212	// This makes the output look a little nicer
213	// when debugging, and some kind of space
214	// is required if the encoded value was a number,
215	// so that the reader knows there aren't more
216	// digits coming.
217	e.WriteByte('\n')
218
219	b := e.Bytes()
220	if enc.indentPrefix != "" || enc.indentValue != "" {
221		if enc.indentBuf == nil {
222			enc.indentBuf = new(bytes.Buffer)
223		}
224		enc.indentBuf.Reset()
225		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
226		if err != nil {
227			return err
228		}
229		b = enc.indentBuf.Bytes()
230	}
231	if _, err = enc.w.Write(b); err != nil {
232		enc.err = err
233	}
234	encodeStatePool.Put(e)
235	return err
236}
237
238// SetIndent instructs the encoder to format each subsequent encoded
239// value as if indented by the package-level function Indent(dst, src, prefix, indent).
240// Calling SetIndent("", "") disables indentation.
241func (enc *Encoder) SetIndent(prefix, indent string) {
242	enc.indentPrefix = prefix
243	enc.indentValue = indent
244}
245
246// SetEscapeHTML specifies whether problematic HTML characters
247// should be escaped inside JSON quoted strings.
248// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
249// to avoid certain safety problems that can arise when embedding JSON in HTML.
250//
251// In non-HTML settings where the escaping interferes with the readability
252// of the output, SetEscapeHTML(false) disables this behavior.
253func (enc *Encoder) SetEscapeHTML(on bool) {
254	enc.escapeHTML = on
255}
256
257// RawMessage is a raw encoded JSON value.
258// It implements Marshaler and Unmarshaler and can
259// be used to delay JSON decoding or precompute a JSON encoding.
260type RawMessage []byte
261
262// MarshalJSON returns m as the JSON encoding of m.
263func (m RawMessage) MarshalJSON() ([]byte, error) {
264	if m == nil {
265		return []byte("null"), nil
266	}
267	return m, nil
268}
269
270// UnmarshalJSON sets *m to a copy of data.
271func (m *RawMessage) UnmarshalJSON(data []byte) error {
272	if m == nil {
273		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
274	}
275	*m = append((*m)[0:0], data...)
276	return nil
277}
278
279var _ Marshaler = (*RawMessage)(nil)
280var _ Unmarshaler = (*RawMessage)(nil)
281
282// A Token holds a value of one of these types:
283//
284//	Delim, for the four JSON delimiters [ ] { }
285//	bool, for JSON booleans
286//	float64, for JSON numbers
287//	Number, for JSON numbers
288//	string, for JSON string literals
289//	nil, for JSON null
290//
291type Token interface{}
292
293const (
294	tokenTopValue = iota
295	tokenArrayStart
296	tokenArrayValue
297	tokenArrayComma
298	tokenObjectStart
299	tokenObjectKey
300	tokenObjectColon
301	tokenObjectValue
302	tokenObjectComma
303)
304
305// advance tokenstate from a separator state to a value state
306func (dec *Decoder) tokenPrepareForDecode() error {
307	// Note: Not calling peek before switch, to avoid
308	// putting peek into the standard Decode path.
309	// peek is only called when using the Token API.
310	switch dec.tokenState {
311	case tokenArrayComma:
312		c, err := dec.peek()
313		if err != nil {
314			return err
315		}
316		if c != ',' {
317			return &SyntaxError{"expected comma after array element", dec.InputOffset()}
318		}
319		dec.scanp++
320		dec.tokenState = tokenArrayValue
321	case tokenObjectColon:
322		c, err := dec.peek()
323		if err != nil {
324			return err
325		}
326		if c != ':' {
327			return &SyntaxError{"expected colon after object key", dec.InputOffset()}
328		}
329		dec.scanp++
330		dec.tokenState = tokenObjectValue
331	}
332	return nil
333}
334
335func (dec *Decoder) tokenValueAllowed() bool {
336	switch dec.tokenState {
337	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
338		return true
339	}
340	return false
341}
342
343func (dec *Decoder) tokenValueEnd() {
344	switch dec.tokenState {
345	case tokenArrayStart, tokenArrayValue:
346		dec.tokenState = tokenArrayComma
347	case tokenObjectValue:
348		dec.tokenState = tokenObjectComma
349	}
350}
351
352// A Delim is a JSON array or object delimiter, one of [ ] { or }.
353type Delim rune
354
355func (d Delim) String() string {
356	return string(d)
357}
358
359// Token returns the next JSON token in the input stream.
360// At the end of the input stream, Token returns nil, io.EOF.
361//
362// Token guarantees that the delimiters [ ] { } it returns are
363// properly nested and matched: if Token encounters an unexpected
364// delimiter in the input, it will return an error.
365//
366// The input stream consists of basic JSON values—bool, string,
367// number, and null—along with delimiters [ ] { } of type Delim
368// to mark the start and end of arrays and objects.
369// Commas and colons are elided.
370func (dec *Decoder) Token() (Token, error) {
371	for {
372		c, err := dec.peek()
373		if err != nil {
374			return nil, err
375		}
376		switch c {
377		case '[':
378			if !dec.tokenValueAllowed() {
379				return dec.tokenError(c)
380			}
381			dec.scanp++
382			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
383			dec.tokenState = tokenArrayStart
384			return Delim('['), nil
385
386		case ']':
387			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
388				return dec.tokenError(c)
389			}
390			dec.scanp++
391			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
392			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
393			dec.tokenValueEnd()
394			return Delim(']'), nil
395
396		case '{':
397			if !dec.tokenValueAllowed() {
398				return dec.tokenError(c)
399			}
400			dec.scanp++
401			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
402			dec.tokenState = tokenObjectStart
403			return Delim('{'), nil
404
405		case '}':
406			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
407				return dec.tokenError(c)
408			}
409			dec.scanp++
410			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
411			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
412			dec.tokenValueEnd()
413			return Delim('}'), nil
414
415		case ':':
416			if dec.tokenState != tokenObjectColon {
417				return dec.tokenError(c)
418			}
419			dec.scanp++
420			dec.tokenState = tokenObjectValue
421			continue
422
423		case ',':
424			if dec.tokenState == tokenArrayComma {
425				dec.scanp++
426				dec.tokenState = tokenArrayValue
427				continue
428			}
429			if dec.tokenState == tokenObjectComma {
430				dec.scanp++
431				dec.tokenState = tokenObjectKey
432				continue
433			}
434			return dec.tokenError(c)
435
436		case '"':
437			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
438				var x string
439				old := dec.tokenState
440				dec.tokenState = tokenTopValue
441				err := dec.Decode(&x)
442				dec.tokenState = old
443				if err != nil {
444					return nil, err
445				}
446				dec.tokenState = tokenObjectColon
447				return x, nil
448			}
449			fallthrough
450
451		default:
452			if !dec.tokenValueAllowed() {
453				return dec.tokenError(c)
454			}
455			var x interface{}
456			if err := dec.Decode(&x); err != nil {
457				return nil, err
458			}
459			return x, nil
460		}
461	}
462}
463
464func (dec *Decoder) tokenError(c byte) (Token, error) {
465	var context string
466	switch dec.tokenState {
467	case tokenTopValue:
468		context = " looking for beginning of value"
469	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
470		context = " looking for beginning of value"
471	case tokenArrayComma:
472		context = " after array element"
473	case tokenObjectKey:
474		context = " looking for beginning of object key string"
475	case tokenObjectColon:
476		context = " after object key"
477	case tokenObjectComma:
478		context = " after object key:value pair"
479	}
480	return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
481}
482
483// More reports whether there is another element in the
484// current array or object being parsed.
485func (dec *Decoder) More() bool {
486	c, err := dec.peek()
487	return err == nil && c != ']' && c != '}'
488}
489
490func (dec *Decoder) peek() (byte, error) {
491	var err error
492	for {
493		for i := dec.scanp; i < len(dec.buf); i++ {
494			c := dec.buf[i]
495			if isSpace(c) {
496				continue
497			}
498			dec.scanp = i
499			return c, nil
500		}
501		// buffer has been scanned, now report any error
502		if err != nil {
503			return 0, err
504		}
505		err = dec.refill()
506	}
507}
508
509// InputOffset returns the input stream byte offset of the current decoder position.
510// The offset gives the location of the end of the most recently returned token
511// and the beginning of the next token.
512func (dec *Decoder) InputOffset() int64 {
513	return dec.scanned + int64(dec.scanp)
514}
515