1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package prototext
6
7import (
8	"fmt"
9	"sort"
10	"strconv"
11	"unicode/utf8"
12
13	"google.golang.org/protobuf/encoding/protowire"
14	"google.golang.org/protobuf/internal/encoding/messageset"
15	"google.golang.org/protobuf/internal/encoding/text"
16	"google.golang.org/protobuf/internal/errors"
17	"google.golang.org/protobuf/internal/flags"
18	"google.golang.org/protobuf/internal/genid"
19	"google.golang.org/protobuf/internal/mapsort"
20	"google.golang.org/protobuf/internal/pragma"
21	"google.golang.org/protobuf/internal/strs"
22	"google.golang.org/protobuf/proto"
23	pref "google.golang.org/protobuf/reflect/protoreflect"
24	"google.golang.org/protobuf/reflect/protoregistry"
25)
26
27const defaultIndent = "  "
28
29// Format formats the message as a multiline string.
30// This function is only intended for human consumption and ignores errors.
31// Do not depend on the output being stable. It may change over time across
32// different versions of the program.
33func Format(m proto.Message) string {
34	return MarshalOptions{Multiline: true}.Format(m)
35}
36
37// Marshal writes the given proto.Message in textproto format using default
38// options. Do not depend on the output being stable. It may change over time
39// across different versions of the program.
40func Marshal(m proto.Message) ([]byte, error) {
41	return MarshalOptions{}.Marshal(m)
42}
43
44// MarshalOptions is a configurable text format marshaler.
45type MarshalOptions struct {
46	pragma.NoUnkeyedLiterals
47
48	// Multiline specifies whether the marshaler should format the output in
49	// indented-form with every textual element on a new line.
50	// If Indent is an empty string, then an arbitrary indent is chosen.
51	Multiline bool
52
53	// Indent specifies the set of indentation characters to use in a multiline
54	// formatted output such that every entry is preceded by Indent and
55	// terminated by a newline. If non-empty, then Multiline is treated as true.
56	// Indent can only be composed of space or tab characters.
57	Indent string
58
59	// EmitASCII specifies whether to format strings and bytes as ASCII only
60	// as opposed to using UTF-8 encoding when possible.
61	EmitASCII bool
62
63	// allowInvalidUTF8 specifies whether to permit the encoding of strings
64	// with invalid UTF-8. This is unexported as it is intended to only
65	// be specified by the Format method.
66	allowInvalidUTF8 bool
67
68	// AllowPartial allows messages that have missing required fields to marshal
69	// without returning an error. If AllowPartial is false (the default),
70	// Marshal will return error if there are any missing required fields.
71	AllowPartial bool
72
73	// EmitUnknown specifies whether to emit unknown fields in the output.
74	// If specified, the unmarshaler may be unable to parse the output.
75	// The default is to exclude unknown fields.
76	EmitUnknown bool
77
78	// Resolver is used for looking up types when expanding google.protobuf.Any
79	// messages. If nil, this defaults to using protoregistry.GlobalTypes.
80	Resolver interface {
81		protoregistry.ExtensionTypeResolver
82		protoregistry.MessageTypeResolver
83	}
84}
85
86// Format formats the message as a string.
87// This method is only intended for human consumption and ignores errors.
88// Do not depend on the output being stable. It may change over time across
89// different versions of the program.
90func (o MarshalOptions) Format(m proto.Message) string {
91	if m == nil || !m.ProtoReflect().IsValid() {
92		return "<nil>" // invalid syntax, but okay since this is for debugging
93	}
94	o.allowInvalidUTF8 = true
95	o.AllowPartial = true
96	o.EmitUnknown = true
97	b, _ := o.Marshal(m)
98	return string(b)
99}
100
101// Marshal writes the given proto.Message in textproto format using options in
102// MarshalOptions object. Do not depend on the output being stable. It may
103// change over time across different versions of the program.
104func (o MarshalOptions) Marshal(m proto.Message) ([]byte, error) {
105	return o.marshal(m)
106}
107
108// marshal is a centralized function that all marshal operations go through.
109// For profiling purposes, avoid changing the name of this function or
110// introducing other code paths for marshal that do not go through this.
111func (o MarshalOptions) marshal(m proto.Message) ([]byte, error) {
112	var delims = [2]byte{'{', '}'}
113
114	if o.Multiline && o.Indent == "" {
115		o.Indent = defaultIndent
116	}
117	if o.Resolver == nil {
118		o.Resolver = protoregistry.GlobalTypes
119	}
120
121	internalEnc, err := text.NewEncoder(o.Indent, delims, o.EmitASCII)
122	if err != nil {
123		return nil, err
124	}
125
126	// Treat nil message interface as an empty message,
127	// in which case there is nothing to output.
128	if m == nil {
129		return []byte{}, nil
130	}
131
132	enc := encoder{internalEnc, o}
133	err = enc.marshalMessage(m.ProtoReflect(), false)
134	if err != nil {
135		return nil, err
136	}
137	out := enc.Bytes()
138	if len(o.Indent) > 0 && len(out) > 0 {
139		out = append(out, '\n')
140	}
141	if o.AllowPartial {
142		return out, nil
143	}
144	return out, proto.CheckInitialized(m)
145}
146
147type encoder struct {
148	*text.Encoder
149	opts MarshalOptions
150}
151
152// marshalMessage marshals the given protoreflect.Message.
153func (e encoder) marshalMessage(m pref.Message, inclDelims bool) error {
154	messageDesc := m.Descriptor()
155	if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
156		return errors.New("no support for proto1 MessageSets")
157	}
158
159	if inclDelims {
160		e.StartMessage()
161		defer e.EndMessage()
162	}
163
164	// Handle Any expansion.
165	if messageDesc.FullName() == genid.Any_message_fullname {
166		if e.marshalAny(m) {
167			return nil
168		}
169		// If unable to expand, continue on to marshal Any as a regular message.
170	}
171
172	// Marshal known fields.
173	fieldDescs := messageDesc.Fields()
174	size := fieldDescs.Len()
175	for i := 0; i < size; {
176		fd := fieldDescs.Get(i)
177		if od := fd.ContainingOneof(); od != nil {
178			fd = m.WhichOneof(od)
179			i += od.Fields().Len()
180		} else {
181			i++
182		}
183
184		if fd == nil || !m.Has(fd) {
185			continue
186		}
187
188		name := fd.Name()
189		// Use type name for group field name.
190		if fd.Kind() == pref.GroupKind {
191			name = fd.Message().Name()
192		}
193		val := m.Get(fd)
194		if err := e.marshalField(string(name), val, fd); err != nil {
195			return err
196		}
197	}
198
199	// Marshal extensions.
200	if err := e.marshalExtensions(m); err != nil {
201		return err
202	}
203
204	// Marshal unknown fields.
205	if e.opts.EmitUnknown {
206		e.marshalUnknown(m.GetUnknown())
207	}
208
209	return nil
210}
211
212// marshalField marshals the given field with protoreflect.Value.
213func (e encoder) marshalField(name string, val pref.Value, fd pref.FieldDescriptor) error {
214	switch {
215	case fd.IsList():
216		return e.marshalList(name, val.List(), fd)
217	case fd.IsMap():
218		return e.marshalMap(name, val.Map(), fd)
219	default:
220		e.WriteName(name)
221		return e.marshalSingular(val, fd)
222	}
223}
224
225// marshalSingular marshals the given non-repeated field value. This includes
226// all scalar types, enums, messages, and groups.
227func (e encoder) marshalSingular(val pref.Value, fd pref.FieldDescriptor) error {
228	kind := fd.Kind()
229	switch kind {
230	case pref.BoolKind:
231		e.WriteBool(val.Bool())
232
233	case pref.StringKind:
234		s := val.String()
235		if !e.opts.allowInvalidUTF8 && strs.EnforceUTF8(fd) && !utf8.ValidString(s) {
236			return errors.InvalidUTF8(string(fd.FullName()))
237		}
238		e.WriteString(s)
239
240	case pref.Int32Kind, pref.Int64Kind,
241		pref.Sint32Kind, pref.Sint64Kind,
242		pref.Sfixed32Kind, pref.Sfixed64Kind:
243		e.WriteInt(val.Int())
244
245	case pref.Uint32Kind, pref.Uint64Kind,
246		pref.Fixed32Kind, pref.Fixed64Kind:
247		e.WriteUint(val.Uint())
248
249	case pref.FloatKind:
250		// Encoder.WriteFloat handles the special numbers NaN and infinites.
251		e.WriteFloat(val.Float(), 32)
252
253	case pref.DoubleKind:
254		// Encoder.WriteFloat handles the special numbers NaN and infinites.
255		e.WriteFloat(val.Float(), 64)
256
257	case pref.BytesKind:
258		e.WriteString(string(val.Bytes()))
259
260	case pref.EnumKind:
261		num := val.Enum()
262		if desc := fd.Enum().Values().ByNumber(num); desc != nil {
263			e.WriteLiteral(string(desc.Name()))
264		} else {
265			// Use numeric value if there is no enum description.
266			e.WriteInt(int64(num))
267		}
268
269	case pref.MessageKind, pref.GroupKind:
270		return e.marshalMessage(val.Message(), true)
271
272	default:
273		panic(fmt.Sprintf("%v has unknown kind: %v", fd.FullName(), kind))
274	}
275	return nil
276}
277
278// marshalList marshals the given protoreflect.List as multiple name-value fields.
279func (e encoder) marshalList(name string, list pref.List, fd pref.FieldDescriptor) error {
280	size := list.Len()
281	for i := 0; i < size; i++ {
282		e.WriteName(name)
283		if err := e.marshalSingular(list.Get(i), fd); err != nil {
284			return err
285		}
286	}
287	return nil
288}
289
290// marshalMap marshals the given protoreflect.Map as multiple name-value fields.
291func (e encoder) marshalMap(name string, mmap pref.Map, fd pref.FieldDescriptor) error {
292	var err error
293	mapsort.Range(mmap, fd.MapKey().Kind(), func(key pref.MapKey, val pref.Value) bool {
294		e.WriteName(name)
295		e.StartMessage()
296		defer e.EndMessage()
297
298		e.WriteName(string(genid.MapEntry_Key_field_name))
299		err = e.marshalSingular(key.Value(), fd.MapKey())
300		if err != nil {
301			return false
302		}
303
304		e.WriteName(string(genid.MapEntry_Value_field_name))
305		err = e.marshalSingular(val, fd.MapValue())
306		if err != nil {
307			return false
308		}
309		return true
310	})
311	return err
312}
313
314// marshalExtensions marshals extension fields.
315func (e encoder) marshalExtensions(m pref.Message) error {
316	type entry struct {
317		key   string
318		value pref.Value
319		desc  pref.FieldDescriptor
320	}
321
322	// Get a sorted list based on field key first.
323	var entries []entry
324	m.Range(func(fd pref.FieldDescriptor, v pref.Value) bool {
325		if !fd.IsExtension() {
326			return true
327		}
328		// For MessageSet extensions, the name used is the parent message.
329		name := fd.FullName()
330		if messageset.IsMessageSetExtension(fd) {
331			name = name.Parent()
332		}
333		entries = append(entries, entry{
334			key:   string(name),
335			value: v,
336			desc:  fd,
337		})
338		return true
339	})
340	// Sort extensions lexicographically.
341	sort.Slice(entries, func(i, j int) bool {
342		return entries[i].key < entries[j].key
343	})
344
345	// Write out sorted list.
346	for _, entry := range entries {
347		// Extension field name is the proto field name enclosed in [].
348		name := "[" + entry.key + "]"
349		if err := e.marshalField(name, entry.value, entry.desc); err != nil {
350			return err
351		}
352	}
353	return nil
354}
355
356// marshalUnknown parses the given []byte and marshals fields out.
357// This function assumes proper encoding in the given []byte.
358func (e encoder) marshalUnknown(b []byte) {
359	const dec = 10
360	const hex = 16
361	for len(b) > 0 {
362		num, wtype, n := protowire.ConsumeTag(b)
363		b = b[n:]
364		e.WriteName(strconv.FormatInt(int64(num), dec))
365
366		switch wtype {
367		case protowire.VarintType:
368			var v uint64
369			v, n = protowire.ConsumeVarint(b)
370			e.WriteUint(v)
371		case protowire.Fixed32Type:
372			var v uint32
373			v, n = protowire.ConsumeFixed32(b)
374			e.WriteLiteral("0x" + strconv.FormatUint(uint64(v), hex))
375		case protowire.Fixed64Type:
376			var v uint64
377			v, n = protowire.ConsumeFixed64(b)
378			e.WriteLiteral("0x" + strconv.FormatUint(v, hex))
379		case protowire.BytesType:
380			var v []byte
381			v, n = protowire.ConsumeBytes(b)
382			e.WriteString(string(v))
383		case protowire.StartGroupType:
384			e.StartMessage()
385			var v []byte
386			v, n = protowire.ConsumeGroup(num, b)
387			e.marshalUnknown(v)
388			e.EndMessage()
389		default:
390			panic(fmt.Sprintf("prototext: error parsing unknown field wire type: %v", wtype))
391		}
392
393		b = b[n:]
394	}
395}
396
397// marshalAny marshals the given google.protobuf.Any message in expanded form.
398// It returns true if it was able to marshal, else false.
399func (e encoder) marshalAny(any pref.Message) bool {
400	// Construct the embedded message.
401	fds := any.Descriptor().Fields()
402	fdType := fds.ByNumber(genid.Any_TypeUrl_field_number)
403	typeURL := any.Get(fdType).String()
404	mt, err := e.opts.Resolver.FindMessageByURL(typeURL)
405	if err != nil {
406		return false
407	}
408	m := mt.New().Interface()
409
410	// Unmarshal bytes into embedded message.
411	fdValue := fds.ByNumber(genid.Any_Value_field_number)
412	value := any.Get(fdValue)
413	err = proto.UnmarshalOptions{
414		AllowPartial: true,
415		Resolver:     e.opts.Resolver,
416	}.Unmarshal(value.Bytes(), m)
417	if err != nil {
418		return false
419	}
420
421	// Get current encoder position. If marshaling fails, reset encoder output
422	// back to this position.
423	pos := e.Snapshot()
424
425	// Field name is the proto field name enclosed in [].
426	e.WriteName("[" + typeURL + "]")
427	err = e.marshalMessage(m.ProtoReflect(), true)
428	if err != nil {
429		e.Reset(pos)
430		return false
431	}
432	return true
433}
434