1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package text 6 7import ( 8 "math" 9 "math/bits" 10 "strconv" 11 "strings" 12 "unicode/utf8" 13 14 "google.golang.org/protobuf/internal/detrand" 15 "google.golang.org/protobuf/internal/errors" 16) 17 18// encType represents an encoding type. 19type encType uint8 20 21const ( 22 _ encType = (1 << iota) / 2 23 name 24 scalar 25 messageOpen 26 messageClose 27) 28 29// Encoder provides methods to write out textproto constructs and values. The user is 30// responsible for producing valid sequences of constructs and values. 31type Encoder struct { 32 encoderState 33 34 indent string 35 newline string // set to "\n" if len(indent) > 0 36 delims [2]byte 37 outputASCII bool 38} 39 40type encoderState struct { 41 lastType encType 42 indents []byte 43 out []byte 44} 45 46// NewEncoder returns an Encoder. 47// 48// If indent is a non-empty string, it causes every entry in a List or Message 49// to be preceded by the indent and trailed by a newline. 50// 51// If delims is not the zero value, it controls the delimiter characters used 52// for messages (e.g., "{}" vs "<>"). 53// 54// If outputASCII is true, strings will be serialized in such a way that 55// multi-byte UTF-8 sequences are escaped. This property ensures that the 56// overall output is ASCII (as opposed to UTF-8). 57func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) { 58 e := &Encoder{} 59 if len(indent) > 0 { 60 if strings.Trim(indent, " \t") != "" { 61 return nil, errors.New("indent may only be composed of space and tab characters") 62 } 63 e.indent = indent 64 e.newline = "\n" 65 } 66 switch delims { 67 case [2]byte{0, 0}: 68 e.delims = [2]byte{'{', '}'} 69 case [2]byte{'{', '}'}, [2]byte{'<', '>'}: 70 e.delims = delims 71 default: 72 return nil, errors.New("delimiters may only be \"{}\" or \"<>\"") 73 } 74 e.outputASCII = outputASCII 75 76 return e, nil 77} 78 79// Bytes returns the content of the written bytes. 80func (e *Encoder) Bytes() []byte { 81 return e.out 82} 83 84// StartMessage writes out the '{' or '<' symbol. 85func (e *Encoder) StartMessage() { 86 e.prepareNext(messageOpen) 87 e.out = append(e.out, e.delims[0]) 88} 89 90// EndMessage writes out the '}' or '>' symbol. 91func (e *Encoder) EndMessage() { 92 e.prepareNext(messageClose) 93 e.out = append(e.out, e.delims[1]) 94} 95 96// WriteName writes out the field name and the separator ':'. 97func (e *Encoder) WriteName(s string) { 98 e.prepareNext(name) 99 e.out = append(e.out, s...) 100 e.out = append(e.out, ':') 101} 102 103// WriteBool writes out the given boolean value. 104func (e *Encoder) WriteBool(b bool) { 105 if b { 106 e.WriteLiteral("true") 107 } else { 108 e.WriteLiteral("false") 109 } 110} 111 112// WriteString writes out the given string value. 113func (e *Encoder) WriteString(s string) { 114 e.prepareNext(scalar) 115 e.out = appendString(e.out, s, e.outputASCII) 116} 117 118func appendString(out []byte, in string, outputASCII bool) []byte { 119 out = append(out, '"') 120 i := indexNeedEscapeInString(in) 121 in, out = in[i:], append(out, in[:i]...) 122 for len(in) > 0 { 123 switch r, n := utf8.DecodeRuneInString(in); { 124 case r == utf8.RuneError && n == 1: 125 // We do not report invalid UTF-8 because strings in the text format 126 // are used to represent both the proto string and bytes type. 127 r = rune(in[0]) 128 fallthrough 129 case r < ' ' || r == '"' || r == '\\': 130 out = append(out, '\\') 131 switch r { 132 case '"', '\\': 133 out = append(out, byte(r)) 134 case '\n': 135 out = append(out, 'n') 136 case '\r': 137 out = append(out, 'r') 138 case '\t': 139 out = append(out, 't') 140 default: 141 out = append(out, 'x') 142 out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...) 143 out = strconv.AppendUint(out, uint64(r), 16) 144 } 145 in = in[n:] 146 case outputASCII && r >= utf8.RuneSelf: 147 out = append(out, '\\') 148 if r <= math.MaxUint16 { 149 out = append(out, 'u') 150 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) 151 out = strconv.AppendUint(out, uint64(r), 16) 152 } else { 153 out = append(out, 'U') 154 out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...) 155 out = strconv.AppendUint(out, uint64(r), 16) 156 } 157 in = in[n:] 158 default: 159 i := indexNeedEscapeInString(in[n:]) 160 in, out = in[n+i:], append(out, in[:n+i]...) 161 } 162 } 163 out = append(out, '"') 164 return out 165} 166 167// indexNeedEscapeInString returns the index of the character that needs 168// escaping. If no characters need escaping, this returns the input length. 169func indexNeedEscapeInString(s string) int { 170 for i := 0; i < len(s); i++ { 171 if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf { 172 return i 173 } 174 } 175 return len(s) 176} 177 178// WriteFloat writes out the given float value for given bitSize. 179func (e *Encoder) WriteFloat(n float64, bitSize int) { 180 e.prepareNext(scalar) 181 e.out = appendFloat(e.out, n, bitSize) 182} 183 184func appendFloat(out []byte, n float64, bitSize int) []byte { 185 switch { 186 case math.IsNaN(n): 187 return append(out, "nan"...) 188 case math.IsInf(n, +1): 189 return append(out, "inf"...) 190 case math.IsInf(n, -1): 191 return append(out, "-inf"...) 192 default: 193 return strconv.AppendFloat(out, n, 'g', -1, bitSize) 194 } 195} 196 197// WriteInt writes out the given signed integer value. 198func (e *Encoder) WriteInt(n int64) { 199 e.prepareNext(scalar) 200 e.out = append(e.out, strconv.FormatInt(n, 10)...) 201} 202 203// WriteUint writes out the given unsigned integer value. 204func (e *Encoder) WriteUint(n uint64) { 205 e.prepareNext(scalar) 206 e.out = append(e.out, strconv.FormatUint(n, 10)...) 207} 208 209// WriteLiteral writes out the given string as a literal value without quotes. 210// This is used for writing enum literal strings. 211func (e *Encoder) WriteLiteral(s string) { 212 e.prepareNext(scalar) 213 e.out = append(e.out, s...) 214} 215 216// prepareNext adds possible space and indentation for the next value based 217// on last encType and indent option. It also updates e.lastType to next. 218func (e *Encoder) prepareNext(next encType) { 219 defer func() { 220 e.lastType = next 221 }() 222 223 // Single line. 224 if len(e.indent) == 0 { 225 // Add space after each field before the next one. 226 if e.lastType&(scalar|messageClose) != 0 && next == name { 227 e.out = append(e.out, ' ') 228 // Add a random extra space to make output unstable. 229 if detrand.Bool() { 230 e.out = append(e.out, ' ') 231 } 232 } 233 return 234 } 235 236 // Multi-line. 237 switch { 238 case e.lastType == name: 239 e.out = append(e.out, ' ') 240 // Add a random extra space after name: to make output unstable. 241 if detrand.Bool() { 242 e.out = append(e.out, ' ') 243 } 244 245 case e.lastType == messageOpen && next != messageClose: 246 e.indents = append(e.indents, e.indent...) 247 e.out = append(e.out, '\n') 248 e.out = append(e.out, e.indents...) 249 250 case e.lastType&(scalar|messageClose) != 0: 251 if next == messageClose { 252 e.indents = e.indents[:len(e.indents)-len(e.indent)] 253 } 254 e.out = append(e.out, '\n') 255 e.out = append(e.out, e.indents...) 256 } 257} 258 259// Snapshot returns the current snapshot for use in Reset. 260func (e *Encoder) Snapshot() encoderState { 261 return e.encoderState 262} 263 264// Reset resets the Encoder to the given encoderState from a Snapshot. 265func (e *Encoder) Reset(es encoderState) { 266 e.encoderState = es 267} 268