1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8	"strconv"
9	"unicode"
10	"unicode/utf16"
11	"unicode/utf8"
12
13	"google.golang.org/protobuf/internal/strs"
14)
15
16func (d *Decoder) parseString(in []byte) (string, int, error) {
17	in0 := in
18	if len(in) == 0 {
19		return "", 0, ErrUnexpectedEOF
20	}
21	if in[0] != '"' {
22		return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0])
23	}
24	in = in[1:]
25	i := indexNeedEscapeInBytes(in)
26	in, out := in[i:], in[:i:i] // set cap to prevent mutations
27	for len(in) > 0 {
28		switch r, n := utf8.DecodeRune(in); {
29		case r == utf8.RuneError && n == 1:
30			return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string")
31		case r < ' ':
32			return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r)
33		case r == '"':
34			in = in[1:]
35			n := len(in0) - len(in)
36			return string(out), n, nil
37		case r == '\\':
38			if len(in) < 2 {
39				return "", 0, ErrUnexpectedEOF
40			}
41			switch r := in[1]; r {
42			case '"', '\\', '/':
43				in, out = in[2:], append(out, r)
44			case 'b':
45				in, out = in[2:], append(out, '\b')
46			case 'f':
47				in, out = in[2:], append(out, '\f')
48			case 'n':
49				in, out = in[2:], append(out, '\n')
50			case 'r':
51				in, out = in[2:], append(out, '\r')
52			case 't':
53				in, out = in[2:], append(out, '\t')
54			case 'u':
55				if len(in) < 6 {
56					return "", 0, ErrUnexpectedEOF
57				}
58				v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
59				if err != nil {
60					return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
61				}
62				in = in[6:]
63
64				r := rune(v)
65				if utf16.IsSurrogate(r) {
66					if len(in) < 6 {
67						return "", 0, ErrUnexpectedEOF
68					}
69					v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
70					r = utf16.DecodeRune(r, rune(v))
71					if in[0] != '\\' || in[1] != 'u' ||
72						r == unicode.ReplacementChar || err != nil {
73						return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
74					}
75					in = in[6:]
76				}
77				out = append(out, string(r)...)
78			default:
79				return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2])
80			}
81		default:
82			i := indexNeedEscapeInBytes(in[n:])
83			in, out = in[n+i:], append(out, in[:n+i]...)
84		}
85	}
86	return "", 0, ErrUnexpectedEOF
87}
88
89// indexNeedEscapeInBytes returns the index of the character that needs
90// escaping. If no characters need escaping, this returns the input length.
91func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
92