1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package json 6 7import ( 8 "strconv" 9 "unicode" 10 "unicode/utf16" 11 "unicode/utf8" 12 13 "google.golang.org/protobuf/internal/strs" 14) 15 16func (d *Decoder) parseString(in []byte) (string, int, error) { 17 in0 := in 18 if len(in) == 0 { 19 return "", 0, ErrUnexpectedEOF 20 } 21 if in[0] != '"' { 22 return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0]) 23 } 24 in = in[1:] 25 i := indexNeedEscapeInBytes(in) 26 in, out := in[i:], in[:i:i] // set cap to prevent mutations 27 for len(in) > 0 { 28 switch r, n := utf8.DecodeRune(in); { 29 case r == utf8.RuneError && n == 1: 30 return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string") 31 case r < ' ': 32 return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r) 33 case r == '"': 34 in = in[1:] 35 n := len(in0) - len(in) 36 return string(out), n, nil 37 case r == '\\': 38 if len(in) < 2 { 39 return "", 0, ErrUnexpectedEOF 40 } 41 switch r := in[1]; r { 42 case '"', '\\', '/': 43 in, out = in[2:], append(out, r) 44 case 'b': 45 in, out = in[2:], append(out, '\b') 46 case 'f': 47 in, out = in[2:], append(out, '\f') 48 case 'n': 49 in, out = in[2:], append(out, '\n') 50 case 'r': 51 in, out = in[2:], append(out, '\r') 52 case 't': 53 in, out = in[2:], append(out, '\t') 54 case 'u': 55 if len(in) < 6 { 56 return "", 0, ErrUnexpectedEOF 57 } 58 v, err := strconv.ParseUint(string(in[2:6]), 16, 16) 59 if err != nil { 60 return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6]) 61 } 62 in = in[6:] 63 64 r := rune(v) 65 if utf16.IsSurrogate(r) { 66 if len(in) < 6 { 67 return "", 0, ErrUnexpectedEOF 68 } 69 v, err := strconv.ParseUint(string(in[2:6]), 16, 16) 70 r = utf16.DecodeRune(r, rune(v)) 71 if in[0] != '\\' || in[1] != 'u' || 72 r == unicode.ReplacementChar || err != nil { 73 return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6]) 74 } 75 in = in[6:] 76 } 77 out = append(out, string(r)...) 78 default: 79 return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2]) 80 } 81 default: 82 i := indexNeedEscapeInBytes(in[n:]) 83 in, out = in[n+i:], append(out, in[:n+i]...) 84 } 85 } 86 return "", 0, ErrUnexpectedEOF 87} 88 89// indexNeedEscapeInBytes returns the index of the character that needs 90// escaping. If no characters need escaping, this returns the input length. 91func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) } 92