1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Utilities for printing and parsing protocol messages in a human-readable,
36 // text-based format.
37
38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40
41 #include <map>
42 #include <memory>
43 #ifndef _SHARED_PTR_H
44 #include <google/protobuf/stubs/shared_ptr.h>
45 #endif
46 #include <string>
47 #include <vector>
48
49 #include <google/protobuf/stubs/common.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/message.h>
52 #include <google/protobuf/message_lite.h>
53
54 namespace google {
55 namespace protobuf {
56
57 namespace io {
58 class ErrorCollector; // tokenizer.h
59 }
60
61 // This class implements protocol buffer text format. Printing and parsing
62 // protocol messages in text format is useful for debugging and human editing
63 // of messages.
64 //
65 // This class is really a namespace that contains only static methods.
66 class LIBPROTOBUF_EXPORT TextFormat {
67 public:
68 // Outputs a textual representation of the given message to the given
69 // output stream. Returns false if printing fails.
70 static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
71
72 // Print the fields in an UnknownFieldSet. They are printed by tag number
73 // only. Embedded messages are heuristically identified by attempting to
74 // parse them. Returns false if printing fails.
75 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
76 io::ZeroCopyOutputStream* output);
77
78 // Like Print(), but outputs directly to a string.
79 // Note: output will be cleared prior to printing, and will be left empty
80 // even if printing fails. Returns false if printing fails.
81 static bool PrintToString(const Message& message, string* output);
82
83 // Like PrintUnknownFields(), but outputs directly to a string. Returns false
84 // if printing fails.
85 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
86 string* output);
87
88 // Outputs a textual representation of the value of the field supplied on
89 // the message supplied. For non-repeated fields, an index of -1 must
90 // be supplied. Note that this method will print the default value for a
91 // field if it is not set.
92 static void PrintFieldValueToString(const Message& message,
93 const FieldDescriptor* field,
94 int index,
95 string* output);
96
97 class LIBPROTOBUF_EXPORT BaseTextGenerator {
98 public:
99 virtual ~BaseTextGenerator();
100 // Print text to the output stream.
101 virtual void Print(const char* text, size_t size) = 0;
102
PrintString(const string & str)103 void PrintString(const string& str) { Print(str.data(), str.size()); }
104
105 template <size_t n>
PrintLiteral(const char (& text)[n])106 void PrintLiteral(const char (&text)[n]) {
107 Print(text, n - 1); // n includes the terminating zero character.
108 }
109 };
110
111 // The default printer that converts scalar values from fields into their
112 // string representation.
113 // You can derive from this FastFieldValuePrinter if you want to have fields
114 // to be printed in a different way and register it at the Printer.
115 class LIBPROTOBUF_EXPORT FastFieldValuePrinter {
116 public:
117 FastFieldValuePrinter();
118 virtual ~FastFieldValuePrinter();
119 virtual void PrintBool(bool val, BaseTextGenerator* generator) const;
120 virtual void PrintInt32(int32 val, BaseTextGenerator* generator) const;
121 virtual void PrintUInt32(uint32 val, BaseTextGenerator* generator) const;
122 virtual void PrintInt64(int64 val, BaseTextGenerator* generator) const;
123 virtual void PrintUInt64(uint64 val, BaseTextGenerator* generator) const;
124 virtual void PrintFloat(float val, BaseTextGenerator* generator) const;
125 virtual void PrintDouble(double val, BaseTextGenerator* generator) const;
126 virtual void PrintString(const string& val,
127 BaseTextGenerator* generator) const;
128 virtual void PrintBytes(const string& val,
129 BaseTextGenerator* generator) const;
130 virtual void PrintEnum(int32 val, const string& name,
131 BaseTextGenerator* generator) const;
132 virtual void PrintFieldName(const Message& message,
133 const Reflection* reflection,
134 const FieldDescriptor* field,
135 BaseTextGenerator* generator) const;
136 virtual void PrintMessageStart(const Message& message, int field_index,
137 int field_count, bool single_line_mode,
138 BaseTextGenerator* generator) const;
139 virtual void PrintMessageEnd(const Message& message, int field_index,
140 int field_count, bool single_line_mode,
141 BaseTextGenerator* generator) const;
142
143 private:
144 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FastFieldValuePrinter);
145 };
146
147 class LIBPROTOBUF_EXPORT PROTOBUF_RUNTIME_DEPRECATED("Please use FastFieldValuePrinter")
148 FieldValuePrinter {
149 public:
150 FieldValuePrinter();
151 virtual ~FieldValuePrinter();
152 virtual string PrintBool(bool val) const;
153 virtual string PrintInt32(int32 val) const;
154 virtual string PrintUInt32(uint32 val) const;
155 virtual string PrintInt64(int64 val) const;
156 virtual string PrintUInt64(uint64 val) const;
157 virtual string PrintFloat(float val) const;
158 virtual string PrintDouble(double val) const;
159 virtual string PrintString(const string& val) const;
160 virtual string PrintBytes(const string& val) const;
161 virtual string PrintEnum(int32 val, const string& name) const;
162 virtual string PrintFieldName(const Message& message,
163 const Reflection* reflection,
164 const FieldDescriptor* field) const;
165 virtual string PrintMessageStart(const Message& message,
166 int field_index,
167 int field_count,
168 bool single_line_mode) const;
169 virtual string PrintMessageEnd(const Message& message,
170 int field_index,
171 int field_count,
172 bool single_line_mode) const;
173
174 private:
175 FastFieldValuePrinter delegate_;
176 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
177 };
178
179 // Class for those users which require more fine-grained control over how
180 // a protobuffer message is printed out.
181 class LIBPROTOBUF_EXPORT Printer {
182 public:
183 Printer();
184 ~Printer();
185
186 // Like TextFormat::Print
187 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
188 // Like TextFormat::PrintUnknownFields
189 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
190 io::ZeroCopyOutputStream* output) const;
191 // Like TextFormat::PrintToString
192 bool PrintToString(const Message& message, string* output) const;
193 // Like TextFormat::PrintUnknownFieldsToString
194 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
195 string* output) const;
196 // Like TextFormat::PrintFieldValueToString
197 void PrintFieldValueToString(const Message& message,
198 const FieldDescriptor* field,
199 int index,
200 string* output) const;
201
202 // Adjust the initial indent level of all output. Each indent level is
203 // equal to two spaces.
SetInitialIndentLevel(int indent_level)204 void SetInitialIndentLevel(int indent_level) {
205 initial_indent_level_ = indent_level;
206 }
207
208 // If printing in single line mode, then the entire message will be output
209 // on a single line with no line breaks.
SetSingleLineMode(bool single_line_mode)210 void SetSingleLineMode(bool single_line_mode) {
211 single_line_mode_ = single_line_mode;
212 }
213
IsInSingleLineMode()214 bool IsInSingleLineMode() const {
215 return single_line_mode_;
216 }
217
218 // If use_field_number is true, uses field number instead of field name.
SetUseFieldNumber(bool use_field_number)219 void SetUseFieldNumber(bool use_field_number) {
220 use_field_number_ = use_field_number;
221 }
222
223 // Set true to print repeated primitives in a format like:
224 // field_name: [1, 2, 3, 4]
225 // instead of printing each value on its own line. Short format applies
226 // only to primitive values -- i.e. everything except strings and
227 // sub-messages/groups.
SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)228 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
229 use_short_repeated_primitives_ = use_short_repeated_primitives;
230 }
231
232 // Set true to output UTF-8 instead of ASCII. The only difference
233 // is that bytes >= 0x80 in string fields will not be escaped,
234 // because they are assumed to be part of UTF-8 multi-byte
235 // sequences. This will change the default FastFieldValuePrinter.
236 void SetUseUtf8StringEscaping(bool as_utf8);
237
238 // Set the default (Fast)FieldValuePrinter that is used for all fields that
239 // don't have a field-specific printer registered.
240 // Takes ownership of the printer.
241 void SetDefaultFieldValuePrinter(const FastFieldValuePrinter* printer);
242 void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
243
244 // Sets whether we want to hide unknown fields or not.
245 // Usually unknown fields are printed in a generic way that includes the
246 // tag number of the field instead of field name. However, sometimes it
247 // is useful to be able to print the message without unknown fields (e.g.
248 // for the python protobuf version to maintain consistency between its pure
249 // python and c++ implementations).
SetHideUnknownFields(bool hide)250 void SetHideUnknownFields(bool hide) {
251 hide_unknown_fields_ = hide;
252 }
253
254 // If print_message_fields_in_index_order is true, print fields of a proto
255 // message using the order defined in source code instead of the field
256 // number. By default, use the field number order.
SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)257 void SetPrintMessageFieldsInIndexOrder(
258 bool print_message_fields_in_index_order) {
259 print_message_fields_in_index_order_ =
260 print_message_fields_in_index_order;
261 }
262
263 // If expand==true, expand google.protobuf.Any payloads. The output
264 // will be of form
265 // [type_url] { <value_printed_in_text> }
266 //
267 // If expand==false, print Any using the default printer. The output will
268 // look like
269 // type_url: "<type_url>" value: "serialized_content"
SetExpandAny(bool expand)270 void SetExpandAny(bool expand) {
271 expand_any_ = expand;
272 }
273
274 // If non-zero, we truncate all string fields that are longer than this
275 // threshold. This is useful when the proto message has very long strings,
276 // e.g., dump of encoded image file.
277 //
278 // NOTE(hfgong): Setting a non-zero value breaks round-trip safe
279 // property of TextFormat::Printer. That is, from the printed message, we
280 // cannot fully recover the original string field any more.
SetTruncateStringFieldLongerThan(const int64 truncate_string_field_longer_than)281 void SetTruncateStringFieldLongerThan(
282 const int64 truncate_string_field_longer_than) {
283 truncate_string_field_longer_than_ = truncate_string_field_longer_than;
284 }
285
286 // Register a custom field-specific (Fast)FieldValuePrinter for fields
287 // with a particular FieldDescriptor.
288 // Returns "true" if the registration succeeded, or "false", if there is
289 // already a printer for that FieldDescriptor.
290 // Takes ownership of the printer on successful registration.
291 bool RegisterFieldValuePrinter(const FieldDescriptor* field,
292 const FieldValuePrinter* printer);
293 bool RegisterFieldValuePrinter(const FieldDescriptor* field,
294 const FastFieldValuePrinter* printer);
295
296 private:
297 // Forward declaration of an internal class used to print the text
298 // output to the OutputStream (see text_format.cc for implementation).
299 class TextGenerator;
300
301 // Internal Print method, used for writing to the OutputStream via
302 // the TextGenerator class.
303 void Print(const Message& message, TextGenerator* generator) const;
304
305 // Print a single field.
306 void PrintField(const Message& message, const Reflection* reflection,
307 const FieldDescriptor* field,
308 TextGenerator* generator) const;
309
310 // Print a repeated primitive field in short form.
311 void PrintShortRepeatedField(const Message& message,
312 const Reflection* reflection,
313 const FieldDescriptor* field,
314 TextGenerator* generator) const;
315
316 // Print the name of a field -- i.e. everything that comes before the
317 // ':' for a single name/value pair.
318 void PrintFieldName(const Message& message, const Reflection* reflection,
319 const FieldDescriptor* field,
320 TextGenerator* generator) const;
321
322 // Outputs a textual representation of the value of the field supplied on
323 // the message supplied or the default value if not set.
324 void PrintFieldValue(const Message& message, const Reflection* reflection,
325 const FieldDescriptor* field, int index,
326 TextGenerator* generator) const;
327
328 // Print the fields in an UnknownFieldSet. They are printed by tag number
329 // only. Embedded messages are heuristically identified by attempting to
330 // parse them.
331 void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
332 TextGenerator* generator) const;
333
334 bool PrintAny(const Message& message, TextGenerator* generator) const;
335
336 int initial_indent_level_;
337
338 bool single_line_mode_;
339
340 bool use_field_number_;
341
342 bool use_short_repeated_primitives_;
343
344 bool hide_unknown_fields_;
345
346 bool print_message_fields_in_index_order_;
347
348 bool expand_any_;
349
350 int64 truncate_string_field_longer_than_;
351
352 google::protobuf::scoped_ptr<const FastFieldValuePrinter> default_field_value_printer_;
353 typedef std::map<const FieldDescriptor*, const FastFieldValuePrinter*>
354 CustomPrinterMap;
355 CustomPrinterMap custom_printers_;
356 };
357
358 // Parses a text-format protocol message from the given input stream to
359 // the given message object. This function parses the human-readable format
360 // written by Print(). Returns true on success. The message is cleared first,
361 // even if the function fails -- See Merge() to avoid this behavior.
362 //
363 // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
364 //
365 // One use for this function is parsing handwritten strings in test code.
366 // Another use is to parse the output from google::protobuf::Message::DebugString()
367 // (or ShortDebugString()), because these functions output using
368 // google::protobuf::TextFormat::Print().
369 //
370 // If you would like to read a protocol buffer serialized in the
371 // (non-human-readable) binary wire format, see
372 // google::protobuf::MessageLite::ParseFromString().
373 static bool Parse(io::ZeroCopyInputStream* input, Message* output);
374 // Like Parse(), but reads directly from a string.
375 static bool ParseFromString(const string& input, Message* output);
376
377 // Like Parse(), but the data is merged into the given message, as if
378 // using Message::MergeFrom().
379 static bool Merge(io::ZeroCopyInputStream* input, Message* output);
380 // Like Merge(), but reads directly from a string.
381 static bool MergeFromString(const string& input, Message* output);
382
383 // Parse the given text as a single field value and store it into the
384 // given field of the given message. If the field is a repeated field,
385 // the new value will be added to the end
386 static bool ParseFieldValueFromString(const string& input,
387 const FieldDescriptor* field,
388 Message* message);
389
390 // Interface that TextFormat::Parser can use to find extensions.
391 // This class may be extended in the future to find more information
392 // like fields, etc.
393 class LIBPROTOBUF_EXPORT Finder {
394 public:
395 virtual ~Finder();
396
397 // Try to find an extension of *message by fully-qualified field
398 // name. Returns NULL if no extension is known for this name or number.
399 virtual const FieldDescriptor* FindExtension(
400 Message* message,
401 const string& name) const = 0;
402 };
403
404 // A location in the parsed text.
405 struct ParseLocation {
406 int line;
407 int column;
408
ParseLocationParseLocation409 ParseLocation() : line(-1), column(-1) {}
ParseLocationParseLocation410 ParseLocation(int line_param, int column_param)
411 : line(line_param), column(column_param) {}
412 };
413
414 // Data structure which is populated with the locations of each field
415 // value parsed from the text.
416 class LIBPROTOBUF_EXPORT ParseInfoTree {
417 public:
418 ParseInfoTree();
419 ~ParseInfoTree();
420
421 // Returns the parse location for index-th value of the field in the parsed
422 // text. If none exists, returns a location with line = -1. Index should be
423 // -1 for not-repeated fields.
424 ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
425
426 // Returns the parse info tree for the given field, which must be a message
427 // type. The nested information tree is owned by the root tree and will be
428 // deleted when it is deleted.
429 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
430 int index) const;
431
432 private:
433 // Allow the text format parser to record information into the tree.
434 friend class TextFormat;
435
436 // Records the starting location of a single value for a field.
437 void RecordLocation(const FieldDescriptor* field, ParseLocation location);
438
439 // Create and records a nested tree for a nested message field.
440 ParseInfoTree* CreateNested(const FieldDescriptor* field);
441
442 // Defines the map from the index-th field descriptor to its parse location.
443 typedef std::map<const FieldDescriptor*,
444 std::vector<ParseLocation> > LocationMap;
445
446 // Defines the map from the index-th field descriptor to the nested parse
447 // info tree.
448 typedef std::map<const FieldDescriptor*,
449 std::vector<ParseInfoTree*> > NestedMap;
450
451 LocationMap locations_;
452 NestedMap nested_;
453
454 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
455 };
456
457 // For more control over parsing, use this class.
458 class LIBPROTOBUF_EXPORT Parser {
459 public:
460 Parser();
461 ~Parser();
462
463 // Like TextFormat::Parse().
464 bool Parse(io::ZeroCopyInputStream* input, Message* output);
465 // Like TextFormat::ParseFromString().
466 bool ParseFromString(const string& input, Message* output);
467 // Like TextFormat::Merge().
468 bool Merge(io::ZeroCopyInputStream* input, Message* output);
469 // Like TextFormat::MergeFromString().
470 bool MergeFromString(const string& input, Message* output);
471
472 // Set where to report parse errors. If NULL (the default), errors will
473 // be printed to stderr.
RecordErrorsTo(io::ErrorCollector * error_collector)474 void RecordErrorsTo(io::ErrorCollector* error_collector) {
475 error_collector_ = error_collector;
476 }
477
478 // Set how parser finds extensions. If NULL (the default), the
479 // parser will use the standard Reflection object associated with
480 // the message being parsed.
SetFinder(Finder * finder)481 void SetFinder(Finder* finder) {
482 finder_ = finder;
483 }
484
485 // Sets where location information about the parse will be written. If NULL
486 // (the default), then no location will be written.
WriteLocationsTo(ParseInfoTree * tree)487 void WriteLocationsTo(ParseInfoTree* tree) {
488 parse_info_tree_ = tree;
489 }
490
491 // Normally parsing fails if, after parsing, output->IsInitialized()
492 // returns false. Call AllowPartialMessage(true) to skip this check.
AllowPartialMessage(bool allow)493 void AllowPartialMessage(bool allow) {
494 allow_partial_ = allow;
495 }
496
497 // Allow field names to be matched case-insensitively.
498 // This is not advisable if there are fields that only differ in case, or
499 // if you want to enforce writing in the canonical form.
500 // This is 'false' by default.
AllowCaseInsensitiveField(bool allow)501 void AllowCaseInsensitiveField(bool allow) {
502 allow_case_insensitive_field_ = allow;
503 }
504
505 // Like TextFormat::ParseFieldValueFromString
506 bool ParseFieldValueFromString(const string& input,
507 const FieldDescriptor* field,
508 Message* output);
509
510
511 // backported from 3.8.0
512 // When an unknown field is met, parsing will fail if this option is set
513 // to false(the default). If true, unknown fields will be ignored and
514 // a warning message will be generated.
515 // Please aware that set this option true may hide some errors (e.g.
516 // spelling error on field name). Avoid to use this option if possible.
AllowUnknownField(bool allow)517 void AllowUnknownField(bool allow) { allow_unknown_field_ = allow; }
518
519
AllowFieldNumber(bool allow)520 void AllowFieldNumber(bool allow) {
521 allow_field_number_ = allow;
522 }
523
524 // backported from 3.8.0
525 // Sets maximum recursion depth which parser can use. This is effectively
526 // the maximum allowed nesting of proto messages.
SetRecursionLimit(int limit)527 void SetRecursionLimit(int limit) { recursion_limit_ = limit; }
528
529 private:
530 // Forward declaration of an internal class used to parse text
531 // representations (see text_format.cc for implementation).
532 class ParserImpl;
533
534 // Like TextFormat::Merge(). The provided implementation is used
535 // to do the parsing.
536 bool MergeUsingImpl(io::ZeroCopyInputStream* input,
537 Message* output,
538 ParserImpl* parser_impl);
539
540 io::ErrorCollector* error_collector_;
541 Finder* finder_;
542 ParseInfoTree* parse_info_tree_;
543 bool allow_partial_;
544 bool allow_case_insensitive_field_;
545 bool allow_unknown_field_;
546 bool allow_unknown_enum_;
547 bool allow_field_number_;
548 bool allow_relaxed_whitespace_;
549 bool allow_singular_overwrites_;
550 int recursion_limit_; // backported from 3.8.0
551 };
552
553
554 private:
555 // Hack: ParseInfoTree declares TextFormat as a friend which should extend
556 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
557 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
558 // helpers for ParserImpl to call methods of ParseInfoTree.
559 static inline void RecordLocation(ParseInfoTree* info_tree,
560 const FieldDescriptor* field,
561 ParseLocation location);
562 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
563 const FieldDescriptor* field);
564
565 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
566 };
567
RecordLocation(ParseInfoTree * info_tree,const FieldDescriptor * field,ParseLocation location)568 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
569 const FieldDescriptor* field,
570 ParseLocation location) {
571 info_tree->RecordLocation(field, location);
572 }
573
574
CreateNested(ParseInfoTree * info_tree,const FieldDescriptor * field)575 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
576 ParseInfoTree* info_tree, const FieldDescriptor* field) {
577 return info_tree->CreateNested(field);
578 }
579
580 } // namespace protobuf
581
582 } // namespace google
583 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
584